Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use DCAgent/a1-agenttuning_mind2web with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use DCAgent/a1-agenttuning_mind2web with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="DCAgent/a1-agenttuning_mind2web") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("DCAgent/a1-agenttuning_mind2web") model = AutoModelForCausalLM.from_pretrained("DCAgent/a1-agenttuning_mind2web") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use DCAgent/a1-agenttuning_mind2web with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "DCAgent/a1-agenttuning_mind2web" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_mind2web", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/DCAgent/a1-agenttuning_mind2web
- SGLang
How to use DCAgent/a1-agenttuning_mind2web with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "DCAgent/a1-agenttuning_mind2web" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_mind2web", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "DCAgent/a1-agenttuning_mind2web" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "DCAgent/a1-agenttuning_mind2web", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use DCAgent/a1-agenttuning_mind2web with Docker Model Runner:
docker model run hf.co/DCAgent/a1-agenttuning_mind2web
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 4375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 12.374207826720879, | |
| "learning_rate": 3.6529680365296803e-07, | |
| "loss": 0.673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6549139022827148, | |
| "step": 5, | |
| "valid_targets_mean": 3321.2, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 11.43057305648331, | |
| "learning_rate": 8.219178082191781e-07, | |
| "loss": 0.6633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6954354643821716, | |
| "step": 10, | |
| "valid_targets_mean": 2652.1, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 9.570235812016593, | |
| "learning_rate": 1.278538812785388e-06, | |
| "loss": 0.6271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.6770514249801636, | |
| "step": 15, | |
| "valid_targets_mean": 3358.0, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 8.170405598762851, | |
| "learning_rate": 1.7351598173515982e-06, | |
| "loss": 0.6202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5899783372879028, | |
| "step": 20, | |
| "valid_targets_mean": 4322.4, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 3.6087283340453133, | |
| "learning_rate": 2.191780821917808e-06, | |
| "loss": 0.5571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4666231572628021, | |
| "step": 25, | |
| "valid_targets_mean": 5854.6, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 2.6527603461614553, | |
| "learning_rate": 2.6484018264840183e-06, | |
| "loss": 0.5838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5448369979858398, | |
| "step": 30, | |
| "valid_targets_mean": 4481.5, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 2.027006540805368, | |
| "learning_rate": 3.1050228310502285e-06, | |
| "loss": 0.5264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5310207605361938, | |
| "step": 35, | |
| "valid_targets_mean": 2902.4, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 1.8084836411086191, | |
| "learning_rate": 3.5616438356164386e-06, | |
| "loss": 0.5147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5529466867446899, | |
| "step": 40, | |
| "valid_targets_mean": 2200.1, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 1.0819999335224122, | |
| "learning_rate": 4.018264840182649e-06, | |
| "loss": 0.4949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45418739318847656, | |
| "step": 45, | |
| "valid_targets_mean": 5406.0, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.9962749166883843, | |
| "learning_rate": 4.4748858447488585e-06, | |
| "loss": 0.5261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5496567487716675, | |
| "step": 50, | |
| "valid_targets_mean": 3678.8, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.8013005822835737, | |
| "learning_rate": 4.931506849315069e-06, | |
| "loss": 0.4898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5123369693756104, | |
| "step": 55, | |
| "valid_targets_mean": 4873.1, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.6575479902865903, | |
| "learning_rate": 5.388127853881279e-06, | |
| "loss": 0.4628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41041916608810425, | |
| "step": 60, | |
| "valid_targets_mean": 4923.4, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.7006327660151911, | |
| "learning_rate": 5.8447488584474885e-06, | |
| "loss": 0.476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4419603943824768, | |
| "step": 65, | |
| "valid_targets_mean": 3550.9, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.6222761889218557, | |
| "learning_rate": 6.301369863013699e-06, | |
| "loss": 0.4573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.45139503479003906, | |
| "step": 70, | |
| "valid_targets_mean": 3806.9, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.5620156952847125, | |
| "learning_rate": 6.757990867579909e-06, | |
| "loss": 0.4339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.44665512442588806, | |
| "step": 75, | |
| "valid_targets_mean": 4951.6, | |
| "valid_targets_min": 994 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.49883881664127766, | |
| "learning_rate": 7.214611872146119e-06, | |
| "loss": 0.4414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42222359776496887, | |
| "step": 80, | |
| "valid_targets_mean": 5721.9, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.5538426247192075, | |
| "learning_rate": 7.671232876712329e-06, | |
| "loss": 0.4276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4493791460990906, | |
| "step": 85, | |
| "valid_targets_mean": 4876.8, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.5791700438161751, | |
| "learning_rate": 8.127853881278539e-06, | |
| "loss": 0.4333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4152202606201172, | |
| "step": 90, | |
| "valid_targets_mean": 3532.4, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.5477499942337496, | |
| "learning_rate": 8.584474885844748e-06, | |
| "loss": 0.4003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.37116068601608276, | |
| "step": 95, | |
| "valid_targets_mean": 3519.7, | |
| "valid_targets_min": 1179 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.5775964980586745, | |
| "learning_rate": 9.04109589041096e-06, | |
| "loss": 0.4339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.40842100977897644, | |
| "step": 100, | |
| "valid_targets_mean": 3587.7, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.6430856727704666, | |
| "learning_rate": 9.49771689497717e-06, | |
| "loss": 0.4225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.47565746307373047, | |
| "step": 105, | |
| "valid_targets_mean": 3647.5, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.716966298704087, | |
| "learning_rate": 9.95433789954338e-06, | |
| "loss": 0.3843, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.42896223068237305, | |
| "step": 110, | |
| "valid_targets_mean": 2842.8, | |
| "valid_targets_min": 921 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.5301806019840736, | |
| "learning_rate": 1.0410958904109589e-05, | |
| "loss": 0.3547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3746205270290375, | |
| "step": 115, | |
| "valid_targets_mean": 3999.1, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.48241364109719725, | |
| "learning_rate": 1.08675799086758e-05, | |
| "loss": 0.3868, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33790087699890137, | |
| "step": 120, | |
| "valid_targets_mean": 5485.6, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.38455061918249317, | |
| "learning_rate": 1.132420091324201e-05, | |
| "loss": 0.3826, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3305814862251282, | |
| "step": 125, | |
| "valid_targets_mean": 7719.6, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.4907444754258652, | |
| "learning_rate": 1.178082191780822e-05, | |
| "loss": 0.3677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.401222288608551, | |
| "step": 130, | |
| "valid_targets_mean": 4818.7, | |
| "valid_targets_min": 961 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.45743331436009144, | |
| "learning_rate": 1.223744292237443e-05, | |
| "loss": 0.3565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3574488162994385, | |
| "step": 135, | |
| "valid_targets_mean": 5390.3, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.511011558834825, | |
| "learning_rate": 1.2694063926940641e-05, | |
| "loss": 0.3764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3379029631614685, | |
| "step": 140, | |
| "valid_targets_mean": 5142.8, | |
| "valid_targets_min": 893 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.5822963240467418, | |
| "learning_rate": 1.3150684931506849e-05, | |
| "loss": 0.3659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34744590520858765, | |
| "step": 145, | |
| "valid_targets_mean": 5302.1, | |
| "valid_targets_min": 880 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.6216847277162149, | |
| "learning_rate": 1.360730593607306e-05, | |
| "loss": 0.3759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3552771210670471, | |
| "step": 150, | |
| "valid_targets_mean": 4948.8, | |
| "valid_targets_min": 814 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.574367767908384, | |
| "learning_rate": 1.406392694063927e-05, | |
| "loss": 0.3622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3638104498386383, | |
| "step": 155, | |
| "valid_targets_mean": 3668.3, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.5264337459661648, | |
| "learning_rate": 1.4520547945205482e-05, | |
| "loss": 0.3625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3273163437843323, | |
| "step": 160, | |
| "valid_targets_mean": 4477.4, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.49616555200759516, | |
| "learning_rate": 1.497716894977169e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3494306802749634, | |
| "step": 165, | |
| "valid_targets_mean": 4903.5, | |
| "valid_targets_min": 766 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.5183306288028923, | |
| "learning_rate": 1.54337899543379e-05, | |
| "loss": 0.3495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32361409068107605, | |
| "step": 170, | |
| "valid_targets_mean": 4151.4, | |
| "valid_targets_min": 979 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.6625899937361988, | |
| "learning_rate": 1.589041095890411e-05, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3405327796936035, | |
| "step": 175, | |
| "valid_targets_mean": 2944.8, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.521897584371646, | |
| "learning_rate": 1.634703196347032e-05, | |
| "loss": 0.3227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3742603063583374, | |
| "step": 180, | |
| "valid_targets_mean": 4572.4, | |
| "valid_targets_min": 872 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.49053163267667843, | |
| "learning_rate": 1.680365296803653e-05, | |
| "loss": 0.3486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30405259132385254, | |
| "step": 185, | |
| "valid_targets_mean": 5678.8, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.9074873265259921, | |
| "learning_rate": 1.726027397260274e-05, | |
| "loss": 0.3689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5085344314575195, | |
| "step": 190, | |
| "valid_targets_mean": 2015.6, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.7231403812681834, | |
| "learning_rate": 1.771689497716895e-05, | |
| "loss": 0.3531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4101080000400543, | |
| "step": 195, | |
| "valid_targets_mean": 3050.3, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.4901983123951643, | |
| "learning_rate": 1.8173515981735163e-05, | |
| "loss": 0.3366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3015870749950409, | |
| "step": 200, | |
| "valid_targets_mean": 6477.4, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.7091813307576407, | |
| "learning_rate": 1.863013698630137e-05, | |
| "loss": 0.3278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36794477701187134, | |
| "step": 205, | |
| "valid_targets_mean": 2819.5, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.6242515345896389, | |
| "learning_rate": 1.9086757990867582e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.38683176040649414, | |
| "step": 210, | |
| "valid_targets_mean": 3260.6, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.5367951319766895, | |
| "learning_rate": 1.954337899543379e-05, | |
| "loss": 0.3027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31571871042251587, | |
| "step": 215, | |
| "valid_targets_mean": 4805.6, | |
| "valid_targets_min": 1114 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.7441017340177637, | |
| "learning_rate": 2e-05, | |
| "loss": 0.3545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35355275869369507, | |
| "step": 220, | |
| "valid_targets_mean": 2396.6, | |
| "valid_targets_min": 889 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.5463780408548322, | |
| "learning_rate": 2.045662100456621e-05, | |
| "loss": 0.3252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2769712805747986, | |
| "step": 225, | |
| "valid_targets_mean": 4953.7, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.5117792360088306, | |
| "learning_rate": 2.0913242009132424e-05, | |
| "loss": 0.334, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3192422389984131, | |
| "step": 230, | |
| "valid_targets_mean": 5398.4, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.6081591338406857, | |
| "learning_rate": 2.1369863013698632e-05, | |
| "loss": 0.3315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3515390157699585, | |
| "step": 235, | |
| "valid_targets_mean": 3282.2, | |
| "valid_targets_min": 1082 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.5361233007075954, | |
| "learning_rate": 2.182648401826484e-05, | |
| "loss": 0.327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33629757165908813, | |
| "step": 240, | |
| "valid_targets_mean": 5654.5, | |
| "valid_targets_min": 794 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.4879945295289328, | |
| "learning_rate": 2.2283105022831052e-05, | |
| "loss": 0.3186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2991241216659546, | |
| "step": 245, | |
| "valid_targets_mean": 6910.0, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5818121087256863, | |
| "learning_rate": 2.2739726027397263e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2698156237602234, | |
| "step": 250, | |
| "valid_targets_mean": 5727.4, | |
| "valid_targets_min": 837 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.5484701967234875, | |
| "learning_rate": 2.3196347031963475e-05, | |
| "loss": 0.3167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2877916991710663, | |
| "step": 255, | |
| "valid_targets_mean": 3935.9, | |
| "valid_targets_min": 908 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.7598295746278937, | |
| "learning_rate": 2.3652968036529683e-05, | |
| "loss": 0.3256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.379886269569397, | |
| "step": 260, | |
| "valid_targets_mean": 3017.8, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.6292837735711766, | |
| "learning_rate": 2.410958904109589e-05, | |
| "loss": 0.3291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3254546821117401, | |
| "step": 265, | |
| "valid_targets_mean": 3521.9, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.507874716177431, | |
| "learning_rate": 2.4566210045662106e-05, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2930123805999756, | |
| "step": 270, | |
| "valid_targets_mean": 4279.1, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.4869811841188768, | |
| "learning_rate": 2.5022831050228314e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.295903742313385, | |
| "step": 275, | |
| "valid_targets_mean": 5994.6, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.618514883028784, | |
| "learning_rate": 2.547945205479452e-05, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32905834913253784, | |
| "step": 280, | |
| "valid_targets_mean": 4779.1, | |
| "valid_targets_min": 1148 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.6790060720865024, | |
| "learning_rate": 2.593607305936073e-05, | |
| "loss": 0.3311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3841496706008911, | |
| "step": 285, | |
| "valid_targets_mean": 3403.9, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 1.3404239047851452, | |
| "learning_rate": 2.6392694063926944e-05, | |
| "loss": 0.3167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29200658202171326, | |
| "step": 290, | |
| "valid_targets_mean": 4245.9, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.5708709470409948, | |
| "learning_rate": 2.6849315068493153e-05, | |
| "loss": 0.3095, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30335745215415955, | |
| "step": 295, | |
| "valid_targets_mean": 4115.2, | |
| "valid_targets_min": 1058 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.601194220459844, | |
| "learning_rate": 2.7305936073059364e-05, | |
| "loss": 0.3138, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4048035740852356, | |
| "step": 300, | |
| "valid_targets_mean": 5644.8, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.5494984385782556, | |
| "learning_rate": 2.7762557077625572e-05, | |
| "loss": 0.2913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29575061798095703, | |
| "step": 305, | |
| "valid_targets_mean": 5261.0, | |
| "valid_targets_min": 734 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.5098156256599904, | |
| "learning_rate": 2.8219178082191783e-05, | |
| "loss": 0.3329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3082813024520874, | |
| "step": 310, | |
| "valid_targets_mean": 6082.6, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.5159281940057138, | |
| "learning_rate": 2.8675799086757995e-05, | |
| "loss": 0.3099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3101263642311096, | |
| "step": 315, | |
| "valid_targets_mean": 5008.8, | |
| "valid_targets_min": 806 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.6804451743074025, | |
| "learning_rate": 2.9132420091324203e-05, | |
| "loss": 0.3051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29619288444519043, | |
| "step": 320, | |
| "valid_targets_mean": 4585.7, | |
| "valid_targets_min": 1016 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.5850935505815877, | |
| "learning_rate": 2.958904109589041e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28627079725265503, | |
| "step": 325, | |
| "valid_targets_mean": 3302.2, | |
| "valid_targets_min": 966 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.965007545709362, | |
| "learning_rate": 3.0045662100456626e-05, | |
| "loss": 0.3114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35180073976516724, | |
| "step": 330, | |
| "valid_targets_mean": 3250.3, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.5381764485997723, | |
| "learning_rate": 3.0502283105022834e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2976144552230835, | |
| "step": 335, | |
| "valid_targets_mean": 4887.6, | |
| "valid_targets_min": 827 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.648507415849347, | |
| "learning_rate": 3.0958904109589045e-05, | |
| "loss": 0.319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3534129858016968, | |
| "step": 340, | |
| "valid_targets_mean": 4146.6, | |
| "valid_targets_min": 1034 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.5712482499204495, | |
| "learning_rate": 3.141552511415525e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25627678632736206, | |
| "step": 345, | |
| "valid_targets_mean": 4196.8, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.44280064147997866, | |
| "learning_rate": 3.187214611872147e-05, | |
| "loss": 0.2947, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29131901264190674, | |
| "step": 350, | |
| "valid_targets_mean": 6720.6, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.5998227591824579, | |
| "learning_rate": 3.2328767123287676e-05, | |
| "loss": 0.294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2890249192714691, | |
| "step": 355, | |
| "valid_targets_mean": 3408.9, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.7483944088873048, | |
| "learning_rate": 3.2785388127853884e-05, | |
| "loss": 0.3097, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3844132721424103, | |
| "step": 360, | |
| "valid_targets_mean": 4073.4, | |
| "valid_targets_min": 893 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.5805802611531444, | |
| "learning_rate": 3.324200913242009e-05, | |
| "loss": 0.2985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31355297565460205, | |
| "step": 365, | |
| "valid_targets_mean": 3970.4, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.6042096610158246, | |
| "learning_rate": 3.369863013698631e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2948343753814697, | |
| "step": 370, | |
| "valid_targets_mean": 3084.6, | |
| "valid_targets_min": 677 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.6461668885744746, | |
| "learning_rate": 3.4155251141552515e-05, | |
| "loss": 0.2984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2847023010253906, | |
| "step": 375, | |
| "valid_targets_mean": 3152.9, | |
| "valid_targets_min": 827 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.6252813642439993, | |
| "learning_rate": 3.461187214611872e-05, | |
| "loss": 0.3071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31125932931900024, | |
| "step": 380, | |
| "valid_targets_mean": 3398.4, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.5513266421188043, | |
| "learning_rate": 3.506849315068493e-05, | |
| "loss": 0.302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3118017315864563, | |
| "step": 385, | |
| "valid_targets_mean": 4706.7, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.5782519741863913, | |
| "learning_rate": 3.5525114155251146e-05, | |
| "loss": 0.3088, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.272940993309021, | |
| "step": 390, | |
| "valid_targets_mean": 3489.2, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.4835190489595145, | |
| "learning_rate": 3.5981735159817354e-05, | |
| "loss": 0.2907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24451082944869995, | |
| "step": 395, | |
| "valid_targets_mean": 6700.7, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.5678926388355849, | |
| "learning_rate": 3.643835616438356e-05, | |
| "loss": 0.3022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27626603841781616, | |
| "step": 400, | |
| "valid_targets_mean": 3785.8, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.5159242200400144, | |
| "learning_rate": 3.689497716894977e-05, | |
| "loss": 0.3, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26561927795410156, | |
| "step": 405, | |
| "valid_targets_mean": 4753.9, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.6417729066274248, | |
| "learning_rate": 3.7351598173515985e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36154353618621826, | |
| "step": 410, | |
| "valid_targets_mean": 3772.2, | |
| "valid_targets_min": 1060 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.5052939522988528, | |
| "learning_rate": 3.780821917808219e-05, | |
| "loss": 0.2794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2737759053707123, | |
| "step": 415, | |
| "valid_targets_mean": 6162.9, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.41783612659228964, | |
| "learning_rate": 3.82648401826484e-05, | |
| "loss": 0.2938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2618584930896759, | |
| "step": 420, | |
| "valid_targets_mean": 9499.0, | |
| "valid_targets_min": 1090 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.0340429120195325, | |
| "learning_rate": 3.8721461187214615e-05, | |
| "loss": 0.311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3576170802116394, | |
| "step": 425, | |
| "valid_targets_mean": 3117.8, | |
| "valid_targets_min": 1019 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.6432463334293257, | |
| "learning_rate": 3.9178082191780823e-05, | |
| "loss": 0.2938, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31897398829460144, | |
| "step": 430, | |
| "valid_targets_mean": 2963.1, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.6510917417085519, | |
| "learning_rate": 3.963470319634704e-05, | |
| "loss": 0.3034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3051530718803406, | |
| "step": 435, | |
| "valid_targets_mean": 3365.6, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.5340165987215613, | |
| "learning_rate": 3.99999936325009e-05, | |
| "loss": 0.3129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3342752456665039, | |
| "step": 440, | |
| "valid_targets_mean": 4853.4, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.5677728076782123, | |
| "learning_rate": 3.9999770770457856e-05, | |
| "loss": 0.3128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2853566110134125, | |
| "step": 445, | |
| "valid_targets_mean": 4938.3, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.566650322028894, | |
| "learning_rate": 3.9999229537513936e-05, | |
| "loss": 0.2989, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27975791692733765, | |
| "step": 450, | |
| "valid_targets_mean": 3907.8, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.512996855287856, | |
| "learning_rate": 3.999836994228487e-05, | |
| "loss": 0.2823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654213011264801, | |
| "step": 455, | |
| "valid_targets_mean": 3657.9, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.5712508862988912, | |
| "learning_rate": 3.999719199845432e-05, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28030380606651306, | |
| "step": 460, | |
| "valid_targets_mean": 3826.2, | |
| "valid_targets_min": 791 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.550763618650829, | |
| "learning_rate": 3.999569572477366e-05, | |
| "loss": 0.3016, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2925485074520111, | |
| "step": 465, | |
| "valid_targets_mean": 3767.2, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.4844778100988879, | |
| "learning_rate": 3.999388114506166e-05, | |
| "loss": 0.2779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28359127044677734, | |
| "step": 470, | |
| "valid_targets_mean": 5441.2, | |
| "valid_targets_min": 1040 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.5223336482223219, | |
| "learning_rate": 3.999174828820413e-05, | |
| "loss": 0.3165, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2604113817214966, | |
| "step": 475, | |
| "valid_targets_mean": 4254.1, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.7085323476959958, | |
| "learning_rate": 3.998929718815341e-05, | |
| "loss": 0.3046, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.366795152425766, | |
| "step": 480, | |
| "valid_targets_mean": 3101.3, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.5280273815145139, | |
| "learning_rate": 3.998652788392792e-05, | |
| "loss": 0.2805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2906476855278015, | |
| "step": 485, | |
| "valid_targets_mean": 4255.8, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.45090956227053314, | |
| "learning_rate": 3.9983440419611445e-05, | |
| "loss": 0.2803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23735715448856354, | |
| "step": 490, | |
| "valid_targets_mean": 4642.8, | |
| "valid_targets_min": 868 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.4683162495835124, | |
| "learning_rate": 3.9980034844352494e-05, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.268685519695282, | |
| "step": 495, | |
| "valid_targets_mean": 6424.1, | |
| "valid_targets_min": 1279 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.517351165125786, | |
| "learning_rate": 3.9976311212363495e-05, | |
| "loss": 0.2838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3032742142677307, | |
| "step": 500, | |
| "valid_targets_mean": 4640.5, | |
| "valid_targets_min": 738 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.5071613397785332, | |
| "learning_rate": 3.997226958291992e-05, | |
| "loss": 0.3027, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26866069436073303, | |
| "step": 505, | |
| "valid_targets_mean": 5655.1, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.6466486162647562, | |
| "learning_rate": 3.996791002035937e-05, | |
| "loss": 0.3158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4299527406692505, | |
| "step": 510, | |
| "valid_targets_mean": 3593.1, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.5443624306552559, | |
| "learning_rate": 3.996323259408055e-05, | |
| "loss": 0.3062, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2630675435066223, | |
| "step": 515, | |
| "valid_targets_mean": 7815.6, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.6243451089128813, | |
| "learning_rate": 3.995823737854211e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3227846622467041, | |
| "step": 520, | |
| "valid_targets_mean": 3138.6, | |
| "valid_targets_min": 757 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.6022810456897381, | |
| "learning_rate": 3.9952924453261534e-05, | |
| "loss": 0.3032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3212684988975525, | |
| "step": 525, | |
| "valid_targets_mean": 3576.4, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.5723242679917651, | |
| "learning_rate": 3.994729390281384e-05, | |
| "loss": 0.2769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2988251745700836, | |
| "step": 530, | |
| "valid_targets_mean": 4023.6, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.5462336841915116, | |
| "learning_rate": 3.994134581683021e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2816080152988434, | |
| "step": 535, | |
| "valid_targets_mean": 3982.4, | |
| "valid_targets_min": 1090 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.4918215735498511, | |
| "learning_rate": 3.9935080289996626e-05, | |
| "loss": 0.298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26696616411209106, | |
| "step": 540, | |
| "valid_targets_mean": 5292.1, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.6349909083508697, | |
| "learning_rate": 3.992849742205228e-05, | |
| "loss": 0.281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32703348994255066, | |
| "step": 545, | |
| "valid_targets_mean": 3155.8, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.455853550279736, | |
| "learning_rate": 3.9921597317788065e-05, | |
| "loss": 0.276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2557429075241089, | |
| "step": 550, | |
| "valid_targets_mean": 4919.1, | |
| "valid_targets_min": 767 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.6086037931430966, | |
| "learning_rate": 3.991438008704486e-05, | |
| "loss": 0.281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33778220415115356, | |
| "step": 555, | |
| "valid_targets_mean": 3085.8, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.5213582280989516, | |
| "learning_rate": 3.990684584471179e-05, | |
| "loss": 0.2946, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29489654302597046, | |
| "step": 560, | |
| "valid_targets_mean": 4436.3, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.5843714438083748, | |
| "learning_rate": 3.989899471072441e-05, | |
| "loss": 0.2991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26970839500427246, | |
| "step": 565, | |
| "valid_targets_mean": 2767.6, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.541008479057164, | |
| "learning_rate": 3.9890826810062784e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25051432847976685, | |
| "step": 570, | |
| "valid_targets_mean": 3267.8, | |
| "valid_targets_min": 901 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.44648718657516523, | |
| "learning_rate": 3.988234227274949e-05, | |
| "loss": 0.2902, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28003740310668945, | |
| "step": 575, | |
| "valid_targets_mean": 5206.3, | |
| "valid_targets_min": 616 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.5963913155091053, | |
| "learning_rate": 3.987354123384757e-05, | |
| "loss": 0.2813, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27831757068634033, | |
| "step": 580, | |
| "valid_targets_mean": 3726.2, | |
| "valid_targets_min": 1063 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.5529862065194503, | |
| "learning_rate": 3.9864423833458364e-05, | |
| "loss": 0.2803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33845236897468567, | |
| "step": 585, | |
| "valid_targets_mean": 3352.5, | |
| "valid_targets_min": 1010 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.5276685630923462, | |
| "learning_rate": 3.9854990216719285e-05, | |
| "loss": 0.2734, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30608147382736206, | |
| "step": 590, | |
| "valid_targets_mean": 3963.6, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.5761410220991169, | |
| "learning_rate": 3.98452405338015e-05, | |
| "loss": 0.2941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3611443042755127, | |
| "step": 595, | |
| "valid_targets_mean": 3870.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.43161899888992733, | |
| "learning_rate": 3.983517493990756e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2567683458328247, | |
| "step": 600, | |
| "valid_targets_mean": 5753.8, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.5174699444690888, | |
| "learning_rate": 3.982479359526892e-05, | |
| "loss": 0.2846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31545543670654297, | |
| "step": 605, | |
| "valid_targets_mean": 4297.7, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.5582649393151906, | |
| "learning_rate": 3.981409666514336e-05, | |
| "loss": 0.287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28723669052124023, | |
| "step": 610, | |
| "valid_targets_mean": 3499.9, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.4569144879632991, | |
| "learning_rate": 3.98030843198124e-05, | |
| "loss": 0.2793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26559823751449585, | |
| "step": 615, | |
| "valid_targets_mean": 3593.3, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.5364468262792286, | |
| "learning_rate": 3.979175673457858e-05, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2658482491970062, | |
| "step": 620, | |
| "valid_targets_mean": 3494.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.589932569473527, | |
| "learning_rate": 3.9780114089762616e-05, | |
| "loss": 0.2845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2865822911262512, | |
| "step": 625, | |
| "valid_targets_mean": 3167.1, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.4223733533410392, | |
| "learning_rate": 3.976815657070062e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24453572928905487, | |
| "step": 630, | |
| "valid_targets_mean": 5350.5, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 1.016, | |
| "grad_norm": 0.5617173103201819, | |
| "learning_rate": 3.975588436774107e-05, | |
| "loss": 0.2779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2646633982658386, | |
| "step": 635, | |
| "valid_targets_mean": 3123.4, | |
| "valid_targets_min": 1007 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.5490235878628557, | |
| "learning_rate": 3.9743297676241826e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2878129184246063, | |
| "step": 640, | |
| "valid_targets_mean": 4515.7, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.8694303583221871, | |
| "learning_rate": 3.9730396696566994e-05, | |
| "loss": 0.299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.39216941595077515, | |
| "step": 645, | |
| "valid_targets_mean": 1941.7, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.5176313613959357, | |
| "learning_rate": 3.971718163408375e-05, | |
| "loss": 0.2728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2930067777633667, | |
| "step": 650, | |
| "valid_targets_mean": 4818.8, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 1.048, | |
| "grad_norm": 0.44358594950793484, | |
| "learning_rate": 3.9703652699159093e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30579259991645813, | |
| "step": 655, | |
| "valid_targets_mean": 5172.5, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.5740468844331686, | |
| "learning_rate": 3.9689810107156425e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3474939167499542, | |
| "step": 660, | |
| "valid_targets_mean": 4049.9, | |
| "valid_targets_min": 821 | |
| }, | |
| { | |
| "epoch": 1.064, | |
| "grad_norm": 0.5556529187566539, | |
| "learning_rate": 3.967565407843222e-05, | |
| "loss": 0.2977, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29836633801460266, | |
| "step": 665, | |
| "valid_targets_mean": 4204.5, | |
| "valid_targets_min": 1348 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.5024698593720891, | |
| "learning_rate": 3.966118483833242e-05, | |
| "loss": 0.2738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26958394050598145, | |
| "step": 670, | |
| "valid_targets_mean": 4268.8, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.4842800380286951, | |
| "learning_rate": 3.964640261718893e-05, | |
| "loss": 0.2664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26958194375038147, | |
| "step": 675, | |
| "valid_targets_mean": 3595.6, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.5273262657978705, | |
| "learning_rate": 3.963130765031589e-05, | |
| "loss": 0.2808, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2689908444881439, | |
| "step": 680, | |
| "valid_targets_mean": 3801.2, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 1.096, | |
| "grad_norm": 0.5751221396165925, | |
| "learning_rate": 3.961590017800598e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2882736027240753, | |
| "step": 685, | |
| "valid_targets_mean": 3101.3, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.5471508339834354, | |
| "learning_rate": 3.960018044552653e-05, | |
| "loss": 0.2994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31060564517974854, | |
| "step": 690, | |
| "valid_targets_mean": 3763.3, | |
| "valid_targets_min": 846 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.5792478778548963, | |
| "learning_rate": 3.9584148703115704e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2743430435657501, | |
| "step": 695, | |
| "valid_targets_mean": 2966.4, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.5701845098447167, | |
| "learning_rate": 3.956780520597842e-05, | |
| "loss": 0.2719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31023383140563965, | |
| "step": 700, | |
| "valid_targets_mean": 3593.6, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 1.1280000000000001, | |
| "grad_norm": 0.4243313433223685, | |
| "learning_rate": 3.955115021428236e-05, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2508736550807953, | |
| "step": 705, | |
| "valid_targets_mean": 5902.2, | |
| "valid_targets_min": 912 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.4790109609572266, | |
| "learning_rate": 3.95341839931538e-05, | |
| "loss": 0.2804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2982293367385864, | |
| "step": 710, | |
| "valid_targets_mean": 5286.7, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 1.144, | |
| "grad_norm": 0.5585199616419411, | |
| "learning_rate": 3.95169068126734e-05, | |
| "loss": 0.2816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2889254093170166, | |
| "step": 715, | |
| "valid_targets_mean": 2680.4, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.393666834541853, | |
| "learning_rate": 3.949931894787187e-05, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22171318531036377, | |
| "step": 720, | |
| "valid_targets_mean": 6140.5, | |
| "valid_targets_min": 913 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.4554560378266813, | |
| "learning_rate": 3.948142067872565e-05, | |
| "loss": 0.2743, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.276266485452652, | |
| "step": 725, | |
| "valid_targets_mean": 4809.1, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.4943688708217394, | |
| "learning_rate": 3.946321229015241e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2835407853126526, | |
| "step": 730, | |
| "valid_targets_mean": 4429.9, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 1.176, | |
| "grad_norm": 0.4477093216825828, | |
| "learning_rate": 3.944469407200652e-05, | |
| "loss": 0.2788, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23854275047779083, | |
| "step": 735, | |
| "valid_targets_mean": 4396.1, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.5896727157094116, | |
| "learning_rate": 3.942586631907444e-05, | |
| "loss": 0.2705, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2517150044441223, | |
| "step": 740, | |
| "valid_targets_mean": 3115.4, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.5289038060630349, | |
| "learning_rate": 3.9406729331070054e-05, | |
| "loss": 0.2763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2541031539440155, | |
| "step": 745, | |
| "valid_targets_mean": 3815.3, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.44733647620635336, | |
| "learning_rate": 3.938728341262985e-05, | |
| "loss": 0.291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2868531048297882, | |
| "step": 750, | |
| "valid_targets_mean": 4895.9, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 1.208, | |
| "grad_norm": 0.7202988131820695, | |
| "learning_rate": 3.936752887330812e-05, | |
| "loss": 0.289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3202763497829437, | |
| "step": 755, | |
| "valid_targets_mean": 2302.6, | |
| "valid_targets_min": 929 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.5140036991263218, | |
| "learning_rate": 3.9347466027571975e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2578458786010742, | |
| "step": 760, | |
| "valid_targets_mean": 4010.8, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 1.224, | |
| "grad_norm": 0.5537418130011722, | |
| "learning_rate": 3.932709519479639e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2952079176902771, | |
| "step": 765, | |
| "valid_targets_mean": 3206.0, | |
| "valid_targets_min": 800 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.49323600062823847, | |
| "learning_rate": 3.930641669925911e-05, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.264598548412323, | |
| "step": 770, | |
| "valid_targets_mean": 3837.5, | |
| "valid_targets_min": 878 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.46711652952383553, | |
| "learning_rate": 3.928543087013546e-05, | |
| "loss": 0.2778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27722471952438354, | |
| "step": 775, | |
| "valid_targets_mean": 4181.8, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.43556151715805724, | |
| "learning_rate": 3.926413804149315e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21522939205169678, | |
| "step": 780, | |
| "valid_targets_mean": 4802.7, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 1.256, | |
| "grad_norm": 0.6366361986050708, | |
| "learning_rate": 3.9242538552286894e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32596951723098755, | |
| "step": 785, | |
| "valid_targets_mean": 2695.1, | |
| "valid_targets_min": 707 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.5512190834498045, | |
| "learning_rate": 3.9220632746353096e-05, | |
| "loss": 0.2994, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3354596495628357, | |
| "step": 790, | |
| "valid_targets_mean": 3169.8, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.4994124792250944, | |
| "learning_rate": 3.91984209724043e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24309590458869934, | |
| "step": 795, | |
| "valid_targets_mean": 3187.5, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5058818429479062, | |
| "learning_rate": 3.917590358402369e-05, | |
| "loss": 0.2748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2896243929862976, | |
| "step": 800, | |
| "valid_targets_mean": 4368.1, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 1.288, | |
| "grad_norm": 0.5741606754229182, | |
| "learning_rate": 3.915308093965943e-05, | |
| "loss": 0.2857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31103113293647766, | |
| "step": 805, | |
| "valid_targets_mean": 3463.2, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.5269169994703332, | |
| "learning_rate": 3.9129953402618976e-05, | |
| "loss": 0.2764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28437525033950806, | |
| "step": 810, | |
| "valid_targets_mean": 3127.5, | |
| "valid_targets_min": 768 | |
| }, | |
| { | |
| "epoch": 1.304, | |
| "grad_norm": 0.5105272259938182, | |
| "learning_rate": 3.91065213410633e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27728912234306335, | |
| "step": 815, | |
| "valid_targets_mean": 3164.8, | |
| "valid_targets_min": 954 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.7068657863624991, | |
| "learning_rate": 3.908278512800098e-05, | |
| "loss": 0.2749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30154961347579956, | |
| "step": 820, | |
| "valid_targets_mean": 2021.1, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.43262404009468025, | |
| "learning_rate": 3.905874514128235e-05, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22265702486038208, | |
| "step": 825, | |
| "valid_targets_mean": 5069.6, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.4833800812123599, | |
| "learning_rate": 3.903440176359338e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2597518265247345, | |
| "step": 830, | |
| "valid_targets_mean": 4273.6, | |
| "valid_targets_min": 642 | |
| }, | |
| { | |
| "epoch": 1.336, | |
| "grad_norm": 0.495986902645752, | |
| "learning_rate": 3.90097553824497e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26148658990859985, | |
| "step": 835, | |
| "valid_targets_mean": 3473.2, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.51848462208148, | |
| "learning_rate": 3.8984806390190304e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30521005392074585, | |
| "step": 840, | |
| "valid_targets_mean": 3706.1, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.557804275665896, | |
| "learning_rate": 3.895955518397141e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2697344124317169, | |
| "step": 845, | |
| "valid_targets_mean": 2746.5, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.4866612431488525, | |
| "learning_rate": 3.893400216576011e-05, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24589639902114868, | |
| "step": 850, | |
| "valid_targets_mean": 3582.2, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 1.3679999999999999, | |
| "grad_norm": 0.46312511696658815, | |
| "learning_rate": 3.89081477423279e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25814926624298096, | |
| "step": 855, | |
| "valid_targets_mean": 4407.3, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.4559151598342351, | |
| "learning_rate": 3.888199232524434e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27635207772254944, | |
| "step": 860, | |
| "valid_targets_mean": 4211.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 1.384, | |
| "grad_norm": 0.42662571052348236, | |
| "learning_rate": 3.8855536330870354e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27323490381240845, | |
| "step": 865, | |
| "valid_targets_mean": 5150.4, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 0.43724344437427815, | |
| "learning_rate": 3.882878018035173e-05, | |
| "loss": 0.2682, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.298825204372406, | |
| "step": 870, | |
| "valid_targets_mean": 4823.4, | |
| "valid_targets_min": 648 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.4513041544448632, | |
| "learning_rate": 3.880172429961232e-05, | |
| "loss": 0.2641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2363213747739792, | |
| "step": 875, | |
| "valid_targets_mean": 4356.6, | |
| "valid_targets_min": 676 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.4023862024488909, | |
| "learning_rate": 3.877436911934733e-05, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26997482776641846, | |
| "step": 880, | |
| "valid_targets_mean": 5533.5, | |
| "valid_targets_min": 904 | |
| }, | |
| { | |
| "epoch": 1.416, | |
| "grad_norm": 0.7080890847938507, | |
| "learning_rate": 3.874671507501641e-05, | |
| "loss": 0.2796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3377278745174408, | |
| "step": 885, | |
| "valid_targets_mean": 2200.1, | |
| "valid_targets_min": 912 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.5267587721617124, | |
| "learning_rate": 3.871876260683677e-05, | |
| "loss": 0.2809, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2512984275817871, | |
| "step": 890, | |
| "valid_targets_mean": 3473.6, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.5790695758130487, | |
| "learning_rate": 3.869051215977612e-05, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32350438833236694, | |
| "step": 895, | |
| "valid_targets_mean": 3247.3, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.4803005137644847, | |
| "learning_rate": 3.8661964183545634e-05, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27841246128082275, | |
| "step": 900, | |
| "valid_targets_mean": 4475.9, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 1.448, | |
| "grad_norm": 0.475901655735222, | |
| "learning_rate": 3.863311913259276e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33764156699180603, | |
| "step": 905, | |
| "valid_targets_mean": 5242.4, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.748149672688827, | |
| "learning_rate": 3.860397746609402e-05, | |
| "loss": 0.306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31807559728622437, | |
| "step": 910, | |
| "valid_targets_mean": 2950.9, | |
| "valid_targets_min": 816 | |
| }, | |
| { | |
| "epoch": 1.464, | |
| "grad_norm": 0.37580443450106377, | |
| "learning_rate": 3.857453964794764e-05, | |
| "loss": 0.2834, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2712574005126953, | |
| "step": 915, | |
| "valid_targets_mean": 6658.7, | |
| "valid_targets_min": 1186 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.45030105000964843, | |
| "learning_rate": 3.854480614676624e-05, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.272754043340683, | |
| "step": 920, | |
| "valid_targets_mean": 3925.5, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.34923108926716045, | |
| "learning_rate": 3.851477743586932e-05, | |
| "loss": 0.2515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2357773333787918, | |
| "step": 925, | |
| "valid_targets_mean": 6845.4, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.5115811479757503, | |
| "learning_rate": 3.8484453993275746e-05, | |
| "loss": 0.2768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23984485864639282, | |
| "step": 930, | |
| "valid_targets_mean": 3133.7, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 1.496, | |
| "grad_norm": 0.48452858019317047, | |
| "learning_rate": 3.8453836301696134e-05, | |
| "loss": 0.2732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2921665608882904, | |
| "step": 935, | |
| "valid_targets_mean": 3872.1, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.5141883495932179, | |
| "learning_rate": 3.842292484852518e-05, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2862679958343506, | |
| "step": 940, | |
| "valid_targets_mean": 4214.1, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 0.46283592158032705, | |
| "learning_rate": 3.8391720125833875e-05, | |
| "loss": 0.282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2581455707550049, | |
| "step": 945, | |
| "valid_targets_mean": 3914.6, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.48706765716515343, | |
| "learning_rate": 3.83602226303617e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2788928747177124, | |
| "step": 950, | |
| "valid_targets_mean": 4213.4, | |
| "valid_targets_min": 839 | |
| }, | |
| { | |
| "epoch": 1.528, | |
| "grad_norm": 0.585464765294358, | |
| "learning_rate": 3.83284328635087e-05, | |
| "loss": 0.2678, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2879747152328491, | |
| "step": 955, | |
| "valid_targets_mean": 3057.9, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.5315853015293113, | |
| "learning_rate": 3.829635133132751e-05, | |
| "loss": 0.2663, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2610795199871063, | |
| "step": 960, | |
| "valid_targets_mean": 3284.8, | |
| "valid_targets_min": 733 | |
| }, | |
| { | |
| "epoch": 1.544, | |
| "grad_norm": 0.5296944336561442, | |
| "learning_rate": 3.8263978544515304e-05, | |
| "loss": 0.2836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26374924182891846, | |
| "step": 965, | |
| "valid_targets_mean": 3174.6, | |
| "valid_targets_min": 824 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.4659744673324122, | |
| "learning_rate": 3.823131501840565e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27271947264671326, | |
| "step": 970, | |
| "valid_targets_mean": 3992.8, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.41130937508304954, | |
| "learning_rate": 3.819836127296032e-05, | |
| "loss": 0.2874, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26125800609588623, | |
| "step": 975, | |
| "valid_targets_mean": 4572.8, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.5146658247796757, | |
| "learning_rate": 3.8165117832761016e-05, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3052479326725006, | |
| "step": 980, | |
| "valid_targets_mean": 3267.8, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 1.576, | |
| "grad_norm": 0.5707618643836874, | |
| "learning_rate": 3.813158522700098e-05, | |
| "loss": 0.2592, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3040449619293213, | |
| "step": 985, | |
| "valid_targets_mean": 3492.4, | |
| "valid_targets_min": 995 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.45611771016792524, | |
| "learning_rate": 3.809776398947665e-05, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2837957441806793, | |
| "step": 990, | |
| "valid_targets_mean": 4701.9, | |
| "valid_targets_min": 1074 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.4630680235085982, | |
| "learning_rate": 3.806365465857908e-05, | |
| "loss": 0.2701, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2922239601612091, | |
| "step": 995, | |
| "valid_targets_mean": 3976.1, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.399096117534781, | |
| "learning_rate": 3.802925777728541e-05, | |
| "loss": 0.2633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2673601806163788, | |
| "step": 1000, | |
| "valid_targets_mean": 5512.1, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 1.608, | |
| "grad_norm": 0.41627336969619316, | |
| "learning_rate": 3.799457389315023e-05, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2412080317735672, | |
| "step": 1005, | |
| "valid_targets_mean": 4379.9, | |
| "valid_targets_min": 1023 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.440295852006296, | |
| "learning_rate": 3.795960355829683e-05, | |
| "loss": 0.2708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2329067587852478, | |
| "step": 1010, | |
| "valid_targets_mean": 4429.0, | |
| "valid_targets_min": 1099 | |
| }, | |
| { | |
| "epoch": 1.624, | |
| "grad_norm": 0.5318475478532778, | |
| "learning_rate": 3.7924347329408444e-05, | |
| "loss": 0.2674, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2775961756706238, | |
| "step": 1015, | |
| "valid_targets_mean": 2742.1, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.4161727759675333, | |
| "learning_rate": 3.788880576771937e-05, | |
| "loss": 0.2617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24313268065452576, | |
| "step": 1020, | |
| "valid_targets_mean": 5492.8, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.7999546972605147, | |
| "learning_rate": 3.785297943900605e-05, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28334081172943115, | |
| "step": 1025, | |
| "valid_targets_mean": 5762.0, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.48331284388571666, | |
| "learning_rate": 3.7816868913578044e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2545091509819031, | |
| "step": 1030, | |
| "valid_targets_mean": 3178.3, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 1.6560000000000001, | |
| "grad_norm": 0.45991402061064773, | |
| "learning_rate": 3.778047476626897e-05, | |
| "loss": 0.2936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25272148847579956, | |
| "step": 1035, | |
| "valid_targets_mean": 4338.1, | |
| "valid_targets_min": 1049 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.3451764939977074, | |
| "learning_rate": 3.7743797576427335e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2401534467935562, | |
| "step": 1040, | |
| "valid_targets_mean": 6253.1, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.4111940706574221, | |
| "learning_rate": 3.770683792790733e-05, | |
| "loss": 0.2651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2934751510620117, | |
| "step": 1045, | |
| "valid_targets_mean": 4744.1, | |
| "valid_targets_min": 1059 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.4087263278002059, | |
| "learning_rate": 3.766959640905954e-05, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2707015872001648, | |
| "step": 1050, | |
| "valid_targets_mean": 5591.9, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 1.688, | |
| "grad_norm": 0.5089485422227342, | |
| "learning_rate": 3.763207361272153e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27094948291778564, | |
| "step": 1055, | |
| "valid_targets_mean": 3115.1, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.39668167250416864, | |
| "learning_rate": 3.759427013620849e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23788487911224365, | |
| "step": 1060, | |
| "valid_targets_mean": 4781.2, | |
| "valid_targets_min": 1059 | |
| }, | |
| { | |
| "epoch": 1.704, | |
| "grad_norm": 0.4307058651279445, | |
| "learning_rate": 3.755618658130366e-05, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2379121482372284, | |
| "step": 1065, | |
| "valid_targets_mean": 3575.8, | |
| "valid_targets_min": 730 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.6248314289543657, | |
| "learning_rate": 3.751782355424877e-05, | |
| "loss": 0.3129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33632132411003113, | |
| "step": 1070, | |
| "valid_targets_mean": 2413.9, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.3832687956684591, | |
| "learning_rate": 3.7479181665734395e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27428677678108215, | |
| "step": 1075, | |
| "valid_targets_mean": 5823.8, | |
| "valid_targets_min": 795 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.4146423424953804, | |
| "learning_rate": 3.7440261530890213e-05, | |
| "loss": 0.2665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24523381888866425, | |
| "step": 1080, | |
| "valid_targets_mean": 5357.3, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 1.736, | |
| "grad_norm": 0.4881475237391791, | |
| "learning_rate": 3.740106376927527e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25967293977737427, | |
| "step": 1085, | |
| "valid_targets_mean": 3166.9, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.4001288366939507, | |
| "learning_rate": 3.7361589004868035e-05, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2417476922273636, | |
| "step": 1090, | |
| "valid_targets_mean": 4840.1, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.4845852844999974, | |
| "learning_rate": 3.7321837866056535e-05, | |
| "loss": 0.2736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28427213430404663, | |
| "step": 1095, | |
| "valid_targets_mean": 3742.1, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.44706609607667686, | |
| "learning_rate": 3.728181098562831e-05, | |
| "loss": 0.2547, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23905465006828308, | |
| "step": 1100, | |
| "valid_targets_mean": 4081.2, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 1.768, | |
| "grad_norm": 0.4606642272123344, | |
| "learning_rate": 3.7241509000760355e-05, | |
| "loss": 0.2919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2809835374355316, | |
| "step": 1105, | |
| "valid_targets_mean": 4572.8, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.44374131291191704, | |
| "learning_rate": 3.720093255300899e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23165518045425415, | |
| "step": 1110, | |
| "valid_targets_mean": 5182.4, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 1.784, | |
| "grad_norm": 0.39542261654137617, | |
| "learning_rate": 3.7160082288299645e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26467621326446533, | |
| "step": 1115, | |
| "valid_targets_mean": 6130.7, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.40912132064625895, | |
| "learning_rate": 3.7118958856916534e-05, | |
| "loss": 0.2694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25465548038482666, | |
| "step": 1120, | |
| "valid_targets_mean": 6377.2, | |
| "valid_targets_min": 1032 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.39755902551807576, | |
| "learning_rate": 3.707756291349237e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2527580261230469, | |
| "step": 1125, | |
| "valid_targets_mean": 5014.4, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.514457815285411, | |
| "learning_rate": 3.703589511699787e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2838781476020813, | |
| "step": 1130, | |
| "valid_targets_mean": 3433.5, | |
| "valid_targets_min": 975 | |
| }, | |
| { | |
| "epoch": 1.8159999999999998, | |
| "grad_norm": 0.44550270673299985, | |
| "learning_rate": 3.6993956130731355e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2567026615142822, | |
| "step": 1135, | |
| "valid_targets_mean": 3949.9, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.5422815583872076, | |
| "learning_rate": 3.6951746622308106e-05, | |
| "loss": 0.2691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2764151692390442, | |
| "step": 1140, | |
| "valid_targets_mean": 2868.9, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.41869577320813534, | |
| "learning_rate": 3.69092672636498e-05, | |
| "loss": 0.2932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28278565406799316, | |
| "step": 1145, | |
| "valid_targets_mean": 5837.7, | |
| "valid_targets_min": 1035 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.3757713903865787, | |
| "learning_rate": 3.686651873097375e-05, | |
| "loss": 0.2985, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23112526535987854, | |
| "step": 1150, | |
| "valid_targets_mean": 6685.0, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 1.8479999999999999, | |
| "grad_norm": 0.4409326357854815, | |
| "learning_rate": 3.682350170478223e-05, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23602645099163055, | |
| "step": 1155, | |
| "valid_targets_mean": 5068.7, | |
| "valid_targets_min": 1197 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.40528589174751617, | |
| "learning_rate": 3.678021686985153e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2743171453475952, | |
| "step": 1160, | |
| "valid_targets_mean": 4716.9, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 1.8639999999999999, | |
| "grad_norm": 0.41676048150188805, | |
| "learning_rate": 3.6736664915221144e-05, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23870986700057983, | |
| "step": 1165, | |
| "valid_targets_mean": 4469.6, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.5284619471071941, | |
| "learning_rate": 3.669284653418278e-05, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29566067457199097, | |
| "step": 1170, | |
| "valid_targets_mean": 3079.8, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.47907263465195665, | |
| "learning_rate": 3.6648762424269306e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2716890275478363, | |
| "step": 1175, | |
| "valid_targets_mean": 3397.4, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.41124099809929116, | |
| "learning_rate": 3.660441328724365e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26922544836997986, | |
| "step": 1180, | |
| "valid_targets_mean": 5981.5, | |
| "valid_targets_min": 1078 | |
| }, | |
| { | |
| "epoch": 1.896, | |
| "grad_norm": 0.46088125238526284, | |
| "learning_rate": 3.655979982908764e-05, | |
| "loss": 0.2667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25594204664230347, | |
| "step": 1185, | |
| "valid_targets_mean": 3616.8, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.4543017272146403, | |
| "learning_rate": 3.6514922759990756e-05, | |
| "loss": 0.2744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26674598455429077, | |
| "step": 1190, | |
| "valid_targets_mean": 3739.4, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 0.4192846645781652, | |
| "learning_rate": 3.646978279433883e-05, | |
| "loss": 0.2673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2927936911582947, | |
| "step": 1195, | |
| "valid_targets_mean": 5007.6, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.4436459924254246, | |
| "learning_rate": 3.6424380650702685e-05, | |
| "loss": 0.2573, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26739734411239624, | |
| "step": 1200, | |
| "valid_targets_mean": 4250.5, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 1.928, | |
| "grad_norm": 0.4240018692969433, | |
| "learning_rate": 3.637871705182667e-05, | |
| "loss": 0.2787, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24744366109371185, | |
| "step": 1205, | |
| "valid_targets_mean": 5123.5, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.36924402152142755, | |
| "learning_rate": 3.633279272461717e-05, | |
| "loss": 0.2657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23109649121761322, | |
| "step": 1210, | |
| "valid_targets_mean": 5121.6, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 1.944, | |
| "grad_norm": 0.3870106636312311, | |
| "learning_rate": 3.628660840013102e-05, | |
| "loss": 0.2727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24079151451587677, | |
| "step": 1215, | |
| "valid_targets_mean": 5143.0, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.47018664339123123, | |
| "learning_rate": 3.624016481356392e-05, | |
| "loss": 0.2725, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28172844648361206, | |
| "step": 1220, | |
| "valid_targets_mean": 3450.4, | |
| "valid_targets_min": 616 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.46397233622758055, | |
| "learning_rate": 3.619346270423866e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2602039575576782, | |
| "step": 1225, | |
| "valid_targets_mean": 3486.5, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.43784857356480683, | |
| "learning_rate": 3.6146502815593384e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26954376697540283, | |
| "step": 1230, | |
| "valid_targets_mean": 4244.4, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 1.976, | |
| "grad_norm": 0.3574426307276786, | |
| "learning_rate": 3.609928589516977e-05, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2643759846687317, | |
| "step": 1235, | |
| "valid_targets_mean": 5850.1, | |
| "valid_targets_min": 1066 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.38025251874050797, | |
| "learning_rate": 3.6051812694601114e-05, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2867104411125183, | |
| "step": 1240, | |
| "valid_targets_mean": 6052.6, | |
| "valid_targets_min": 796 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.4749876308252262, | |
| "learning_rate": 3.6004083969600346e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2491377741098404, | |
| "step": 1245, | |
| "valid_targets_mean": 3096.4, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.39416119445325465, | |
| "learning_rate": 3.595610047994804e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21893836557865143, | |
| "step": 1250, | |
| "valid_targets_mean": 4368.7, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 2.008, | |
| "grad_norm": 0.4997159356103167, | |
| "learning_rate": 3.5907862989480285e-05, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24703270196914673, | |
| "step": 1255, | |
| "valid_targets_mean": 3216.5, | |
| "valid_targets_min": 943 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.537153879112119, | |
| "learning_rate": 3.585937226607656e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24673984944820404, | |
| "step": 1260, | |
| "valid_targets_mean": 3123.9, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 2.024, | |
| "grad_norm": 0.5137006043744009, | |
| "learning_rate": 3.5810629081647476e-05, | |
| "loss": 0.2712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25879064202308655, | |
| "step": 1265, | |
| "valid_targets_mean": 2930.7, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.39071865921634885, | |
| "learning_rate": 3.576163421212249e-05, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21474619209766388, | |
| "step": 1270, | |
| "valid_targets_mean": 5110.3, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.42220001994538664, | |
| "learning_rate": 3.5712388437437576e-05, | |
| "loss": 0.2688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2964683473110199, | |
| "step": 1275, | |
| "valid_targets_mean": 5167.3, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.49759776720417487, | |
| "learning_rate": 3.566289254152283e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25483906269073486, | |
| "step": 1280, | |
| "valid_targets_mean": 3432.9, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 2.056, | |
| "grad_norm": 0.43733797744170083, | |
| "learning_rate": 3.56131473122899e-05, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2655543386936188, | |
| "step": 1285, | |
| "valid_targets_mean": 4495.1, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.4421965923645115, | |
| "learning_rate": 3.556315354161955e-05, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23384560644626617, | |
| "step": 1290, | |
| "valid_targets_mean": 4117.4, | |
| "valid_targets_min": 828 | |
| }, | |
| { | |
| "epoch": 2.072, | |
| "grad_norm": 0.3725666055720055, | |
| "learning_rate": 3.551291202534899e-05, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2299247682094574, | |
| "step": 1295, | |
| "valid_targets_mean": 5426.9, | |
| "valid_targets_min": 1115 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.6713941175950688, | |
| "learning_rate": 3.546242356325922e-05, | |
| "loss": 0.2612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29278409481048584, | |
| "step": 1300, | |
| "valid_targets_mean": 2013.2, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 2.088, | |
| "grad_norm": 0.557278656484483, | |
| "learning_rate": 3.5411688959062323e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544538080692291, | |
| "step": 1305, | |
| "valid_targets_mean": 3579.6, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.3687842744740229, | |
| "learning_rate": 3.5360709020388625e-05, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25263142585754395, | |
| "step": 1310, | |
| "valid_targets_mean": 6351.1, | |
| "valid_targets_min": 905 | |
| }, | |
| { | |
| "epoch": 2.104, | |
| "grad_norm": 0.3793263690857251, | |
| "learning_rate": 3.530948455877388e-05, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24786485731601715, | |
| "step": 1315, | |
| "valid_targets_mean": 5880.1, | |
| "valid_targets_min": 910 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.47916929111479617, | |
| "learning_rate": 3.525801638964634e-05, | |
| "loss": 0.2435, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2757444679737091, | |
| "step": 1320, | |
| "valid_targets_mean": 3739.0, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.5444709991468958, | |
| "learning_rate": 3.520630533231376e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3086904287338257, | |
| "step": 1325, | |
| "valid_targets_mean": 3999.2, | |
| "valid_targets_min": 974 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.48962862421008274, | |
| "learning_rate": 3.5154352209950376e-05, | |
| "loss": 0.2585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27979689836502075, | |
| "step": 1330, | |
| "valid_targets_mean": 3676.9, | |
| "valid_targets_min": 825 | |
| }, | |
| { | |
| "epoch": 2.136, | |
| "grad_norm": 0.45083380728163236, | |
| "learning_rate": 3.510215784958376e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2559383511543274, | |
| "step": 1335, | |
| "valid_targets_mean": 4485.2, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.33484746429649287, | |
| "learning_rate": 3.5049723082081755e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2397380918264389, | |
| "step": 1340, | |
| "valid_targets_mean": 6896.4, | |
| "valid_targets_min": 629 | |
| }, | |
| { | |
| "epoch": 2.152, | |
| "grad_norm": 0.5512883869808385, | |
| "learning_rate": 3.49970487421391e-05, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2993601858615875, | |
| "step": 1345, | |
| "valid_targets_mean": 3205.4, | |
| "valid_targets_min": 988 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.42190102820629805, | |
| "learning_rate": 3.494413566826427e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2174837589263916, | |
| "step": 1350, | |
| "valid_targets_mean": 4768.2, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 2.168, | |
| "grad_norm": 0.5081581527596065, | |
| "learning_rate": 3.489098470276608e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23775967955589294, | |
| "step": 1355, | |
| "valid_targets_mean": 2642.8, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.40826096131920026, | |
| "learning_rate": 3.483759669174024e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27968931198120117, | |
| "step": 1360, | |
| "valid_targets_mean": 4708.7, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 2.184, | |
| "grad_norm": 0.4485915411385824, | |
| "learning_rate": 3.478397248505598e-05, | |
| "loss": 0.2486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608921527862549, | |
| "step": 1365, | |
| "valid_targets_mean": 4368.9, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.5512019565130977, | |
| "learning_rate": 3.473011293634241e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2932088375091553, | |
| "step": 1370, | |
| "valid_targets_mean": 3872.3, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.42038314886774936, | |
| "learning_rate": 3.467601890297502e-05, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23290091753005981, | |
| "step": 1375, | |
| "valid_targets_mean": 3820.8, | |
| "valid_targets_min": 862 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.486361040619312, | |
| "learning_rate": 3.4621691246061976e-05, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654815912246704, | |
| "step": 1380, | |
| "valid_targets_mean": 2967.8, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 2.216, | |
| "grad_norm": 0.3689995882810626, | |
| "learning_rate": 3.456713083043046e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2481921762228012, | |
| "step": 1385, | |
| "valid_targets_mean": 6700.3, | |
| "valid_targets_min": 766 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.3741844661161578, | |
| "learning_rate": 3.451233852461285e-05, | |
| "loss": 0.253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2775578498840332, | |
| "step": 1390, | |
| "valid_targets_mean": 6166.1, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 2.232, | |
| "grad_norm": 0.4690171509050791, | |
| "learning_rate": 3.4457315200832935e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27017349004745483, | |
| "step": 1395, | |
| "valid_targets_mean": 3758.4, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.46502707551837297, | |
| "learning_rate": 3.440206173499201e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24808195233345032, | |
| "step": 1400, | |
| "valid_targets_mean": 3311.0, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 2.248, | |
| "grad_norm": 0.5358873234497143, | |
| "learning_rate": 3.4346579006654945e-05, | |
| "loss": 0.2398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28968346118927, | |
| "step": 1405, | |
| "valid_targets_mean": 3115.9, | |
| "valid_targets_min": 556 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.42084243548194467, | |
| "learning_rate": 3.4290867899036166e-05, | |
| "loss": 0.2563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22804833948612213, | |
| "step": 1410, | |
| "valid_targets_mean": 3692.8, | |
| "valid_targets_min": 753 | |
| }, | |
| { | |
| "epoch": 2.2640000000000002, | |
| "grad_norm": 0.4001618784605517, | |
| "learning_rate": 3.4234929298985614e-05, | |
| "loss": 0.2656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23770037293434143, | |
| "step": 1415, | |
| "valid_targets_mean": 5107.0, | |
| "valid_targets_min": 1186 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.5080158679760888, | |
| "learning_rate": 3.417876409697463e-05, | |
| "loss": 0.2382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20453549921512604, | |
| "step": 1420, | |
| "valid_targets_mean": 2835.4, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.4168243110819609, | |
| "learning_rate": 3.412237318708175e-05, | |
| "loss": 0.2374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23663721978664398, | |
| "step": 1425, | |
| "valid_targets_mean": 4147.2, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.45954399327365, | |
| "learning_rate": 3.4065757466978504e-05, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507503926753998, | |
| "step": 1430, | |
| "valid_targets_mean": 3781.1, | |
| "valid_targets_min": 808 | |
| }, | |
| { | |
| "epoch": 2.296, | |
| "grad_norm": 0.4702539653734292, | |
| "learning_rate": 3.400891783791511e-05, | |
| "loss": 0.2671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27482253313064575, | |
| "step": 1435, | |
| "valid_targets_mean": 3705.0, | |
| "valid_targets_min": 1032 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.40026868215420613, | |
| "learning_rate": 3.395185520470614e-05, | |
| "loss": 0.2681, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24075943231582642, | |
| "step": 1440, | |
| "valid_targets_mean": 5237.6, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 2.312, | |
| "grad_norm": 0.3905513051922005, | |
| "learning_rate": 3.38945704757161e-05, | |
| "loss": 0.2522, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22074247896671295, | |
| "step": 1445, | |
| "valid_targets_mean": 4402.3, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.4882416307667647, | |
| "learning_rate": 3.383706456284498e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2848014533519745, | |
| "step": 1450, | |
| "valid_targets_mean": 3364.2, | |
| "valid_targets_min": 901 | |
| }, | |
| { | |
| "epoch": 2.328, | |
| "grad_norm": 0.45445708291864306, | |
| "learning_rate": 3.377933838151374e-05, | |
| "loss": 0.2603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27066075801849365, | |
| "step": 1455, | |
| "valid_targets_mean": 3707.1, | |
| "valid_targets_min": 1017 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.6608392796917465, | |
| "learning_rate": 3.3721392850649714e-05, | |
| "loss": 0.2744, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33999860286712646, | |
| "step": 1460, | |
| "valid_targets_mean": 2394.0, | |
| "valid_targets_min": 1063 | |
| }, | |
| { | |
| "epoch": 2.344, | |
| "grad_norm": 0.47360622747404885, | |
| "learning_rate": 3.3663228892672034e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2844492197036743, | |
| "step": 1465, | |
| "valid_targets_mean": 4209.1, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.37445807515235896, | |
| "learning_rate": 3.36048474334769e-05, | |
| "loss": 0.2717, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21594926714897156, | |
| "step": 1470, | |
| "valid_targets_mean": 4527.0, | |
| "valid_targets_min": 1016 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.4066613926869203, | |
| "learning_rate": 3.3546249402422834e-05, | |
| "loss": 0.2637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2440742552280426, | |
| "step": 1475, | |
| "valid_targets_mean": 4281.1, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.450872690024147, | |
| "learning_rate": 3.3487435732315944e-05, | |
| "loss": 0.25, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23967817425727844, | |
| "step": 1480, | |
| "valid_targets_mean": 3973.4, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 2.376, | |
| "grad_norm": 0.4373336501281426, | |
| "learning_rate": 3.342840735939501e-05, | |
| "loss": 0.2588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28060564398765564, | |
| "step": 1485, | |
| "valid_targets_mean": 3881.0, | |
| "valid_targets_min": 1006 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.41616555215578727, | |
| "learning_rate": 3.33691652233166e-05, | |
| "loss": 0.2397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23200851678848267, | |
| "step": 1490, | |
| "valid_targets_mean": 4087.1, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 2.392, | |
| "grad_norm": 0.4741632763884423, | |
| "learning_rate": 3.330971026714016e-05, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2454630434513092, | |
| "step": 1495, | |
| "valid_targets_mean": 3751.9, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.44777751707599694, | |
| "learning_rate": 3.325004343731292e-05, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2585783004760742, | |
| "step": 1500, | |
| "valid_targets_mean": 4000.0, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 2.408, | |
| "grad_norm": 0.4838887067897237, | |
| "learning_rate": 3.3190165683654885e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27823567390441895, | |
| "step": 1505, | |
| "valid_targets_mean": 3297.0, | |
| "valid_targets_min": 1109 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.42450262258661076, | |
| "learning_rate": 3.31300779593437e-05, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25310438871383667, | |
| "step": 1510, | |
| "valid_targets_mean": 4192.0, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 2.424, | |
| "grad_norm": 0.4114223292664404, | |
| "learning_rate": 3.306978122089948e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23483526706695557, | |
| "step": 1515, | |
| "valid_targets_mean": 4129.9, | |
| "valid_targets_min": 785 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.4382457699711022, | |
| "learning_rate": 3.300927642816957e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25855690240859985, | |
| "step": 1520, | |
| "valid_targets_mean": 3935.2, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.37454574276197367, | |
| "learning_rate": 3.294856454431328e-05, | |
| "loss": 0.2628, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2558472752571106, | |
| "step": 1525, | |
| "valid_targets_mean": 4478.7, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.6185855420387542, | |
| "learning_rate": 3.288764653578653e-05, | |
| "loss": 0.2724, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3244587182998657, | |
| "step": 1530, | |
| "valid_targets_mean": 2447.6, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 2.456, | |
| "grad_norm": 0.41449538190334706, | |
| "learning_rate": 3.2826523372326516e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22788169980049133, | |
| "step": 1535, | |
| "valid_targets_mean": 4480.9, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.3844395691586439, | |
| "learning_rate": 3.276519602693621e-05, | |
| "loss": 0.2502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2364940345287323, | |
| "step": 1540, | |
| "valid_targets_mean": 4955.6, | |
| "valid_targets_min": 1039 | |
| }, | |
| { | |
| "epoch": 2.472, | |
| "grad_norm": 0.37710049485157016, | |
| "learning_rate": 3.270366547586892e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24367119371891022, | |
| "step": 1545, | |
| "valid_targets_mean": 5767.8, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.38894525582252293, | |
| "learning_rate": 3.2641932698612715e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23194950819015503, | |
| "step": 1550, | |
| "valid_targets_mean": 4335.3, | |
| "valid_targets_min": 726 | |
| }, | |
| { | |
| "epoch": 2.488, | |
| "grad_norm": 0.3982220790804564, | |
| "learning_rate": 3.2579998677874855e-05, | |
| "loss": 0.2463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23282426595687866, | |
| "step": 1555, | |
| "valid_targets_mean": 4442.9, | |
| "valid_targets_min": 890 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 0.3920266130118298, | |
| "learning_rate": 3.251786439956614e-05, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22500693798065186, | |
| "step": 1560, | |
| "valid_targets_mean": 3975.3, | |
| "valid_targets_min": 638 | |
| }, | |
| { | |
| "epoch": 2.504, | |
| "grad_norm": 0.5374661523331783, | |
| "learning_rate": 3.2455530852785206e-05, | |
| "loss": 0.2727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3037063479423523, | |
| "step": 1565, | |
| "valid_targets_mean": 3909.3, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.42133581754576677, | |
| "learning_rate": 3.239299902980281e-05, | |
| "loss": 0.2591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27413496375083923, | |
| "step": 1570, | |
| "valid_targets_mean": 4875.9, | |
| "valid_targets_min": 1016 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.4540812695017237, | |
| "learning_rate": 3.2330269926046e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3003402054309845, | |
| "step": 1575, | |
| "valid_targets_mean": 4698.6, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.48721669960386377, | |
| "learning_rate": 3.2267344540082284e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2769826650619507, | |
| "step": 1580, | |
| "valid_targets_mean": 3544.6, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 2.536, | |
| "grad_norm": 0.44779354854153475, | |
| "learning_rate": 3.220422387360373e-05, | |
| "loss": 0.2413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23960761725902557, | |
| "step": 1585, | |
| "valid_targets_mean": 4160.4, | |
| "valid_targets_min": 1206 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.36624305776785404, | |
| "learning_rate": 3.2140908931411026e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24884971976280212, | |
| "step": 1590, | |
| "valid_targets_mean": 5886.5, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 2.552, | |
| "grad_norm": 0.4205442975727069, | |
| "learning_rate": 3.207740072139748e-05, | |
| "loss": 0.2641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2376871556043625, | |
| "step": 1595, | |
| "valid_targets_mean": 4554.3, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.4390164861743431, | |
| "learning_rate": 3.2013700254532996e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2751956582069397, | |
| "step": 1600, | |
| "valid_targets_mean": 3900.7, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 2.568, | |
| "grad_norm": 0.45059040166393205, | |
| "learning_rate": 3.194980854484794e-05, | |
| "loss": 0.2556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24145694077014923, | |
| "step": 1605, | |
| "valid_targets_mean": 3108.7, | |
| "valid_targets_min": 1069 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.38498385041200234, | |
| "learning_rate": 3.188572660941702e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25530481338500977, | |
| "step": 1610, | |
| "valid_targets_mean": 4882.4, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 2.584, | |
| "grad_norm": 0.45078053611552554, | |
| "learning_rate": 3.182145546834311e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23659245669841766, | |
| "step": 1615, | |
| "valid_targets_mean": 3979.6, | |
| "valid_targets_min": 933 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.4873505068621663, | |
| "learning_rate": 3.1756996144740994e-05, | |
| "loss": 0.2731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24679982662200928, | |
| "step": 1620, | |
| "valid_targets_mean": 2844.8, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.375889816303558, | |
| "learning_rate": 3.1692349664721074e-05, | |
| "loss": 0.2752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25449809432029724, | |
| "step": 1625, | |
| "valid_targets_mean": 5291.6, | |
| "valid_targets_min": 1192 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.43710999498519076, | |
| "learning_rate": 3.1627517057373046e-05, | |
| "loss": 0.259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2557065486907959, | |
| "step": 1630, | |
| "valid_targets_mean": 4127.9, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 2.616, | |
| "grad_norm": 0.47479745259245676, | |
| "learning_rate": 3.156249935474953e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2876622974872589, | |
| "step": 1635, | |
| "valid_targets_mean": 4105.4, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.38433518760476604, | |
| "learning_rate": 3.1497297591849614e-05, | |
| "loss": 0.2629, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25629955530166626, | |
| "step": 1640, | |
| "valid_targets_mean": 5574.8, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 2.632, | |
| "grad_norm": 0.43299417778940696, | |
| "learning_rate": 3.143191280660238e-05, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23174050450325012, | |
| "step": 1645, | |
| "valid_targets_mean": 3752.2, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.4042914030109211, | |
| "learning_rate": 3.1366346039850424e-05, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2770325541496277, | |
| "step": 1650, | |
| "valid_targets_mean": 4572.9, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 2.648, | |
| "grad_norm": 0.4000943004214781, | |
| "learning_rate": 3.130059833533323e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2539942264556885, | |
| "step": 1655, | |
| "valid_targets_mean": 4367.9, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.5246478390502145, | |
| "learning_rate": 3.123467073967059e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25003015995025635, | |
| "step": 1660, | |
| "valid_targets_mean": 2662.1, | |
| "valid_targets_min": 935 | |
| }, | |
| { | |
| "epoch": 2.664, | |
| "grad_norm": 0.48721529047895606, | |
| "learning_rate": 3.116856430234594e-05, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24807119369506836, | |
| "step": 1665, | |
| "valid_targets_mean": 2776.6, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.4011766685019322, | |
| "learning_rate": 3.110228007568963e-05, | |
| "loss": 0.2532, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25203073024749756, | |
| "step": 1670, | |
| "valid_targets_mean": 4795.0, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.4108384417120924, | |
| "learning_rate": 3.103581911486221e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2413668930530548, | |
| "step": 1675, | |
| "valid_targets_mean": 4554.4, | |
| "valid_targets_min": 807 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.4318489284689886, | |
| "learning_rate": 3.0969182477837604e-05, | |
| "loss": 0.2747, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2699677348136902, | |
| "step": 1680, | |
| "valid_targets_mean": 4299.4, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 2.6959999999999997, | |
| "grad_norm": 0.46195534616572037, | |
| "learning_rate": 3.090237122538628e-05, | |
| "loss": 0.2711, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2878229022026062, | |
| "step": 1685, | |
| "valid_targets_mean": 3651.4, | |
| "valid_targets_min": 882 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.4064409277711358, | |
| "learning_rate": 3.0835386421058345e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.252693772315979, | |
| "step": 1690, | |
| "valid_targets_mean": 4166.9, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 2.7119999999999997, | |
| "grad_norm": 0.3315855719749269, | |
| "learning_rate": 3.0768229131166664e-05, | |
| "loss": 0.261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22979766130447388, | |
| "step": 1695, | |
| "valid_targets_mean": 6155.2, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.46914934540874326, | |
| "learning_rate": 3.070090042476983e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23643366992473602, | |
| "step": 1700, | |
| "valid_targets_mean": 3444.9, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 2.7279999999999998, | |
| "grad_norm": 0.6284719316531368, | |
| "learning_rate": 3.063340137365517e-05, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2869764566421509, | |
| "step": 1705, | |
| "valid_targets_mean": 2083.1, | |
| "valid_targets_min": 837 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.38536559734095, | |
| "learning_rate": 3.0565733052321674e-05, | |
| "loss": 0.2549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25470173358917236, | |
| "step": 1710, | |
| "valid_targets_mean": 5245.9, | |
| "valid_targets_min": 1287 | |
| }, | |
| { | |
| "epoch": 2.7439999999999998, | |
| "grad_norm": 0.4111284973330911, | |
| "learning_rate": 3.0497896537962924e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2471093237400055, | |
| "step": 1715, | |
| "valid_targets_mean": 4554.9, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.340479711041136, | |
| "learning_rate": 3.042989291044991e-05, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.255589097738266, | |
| "step": 1720, | |
| "valid_targets_mean": 8342.3, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.4997516270386313, | |
| "learning_rate": 3.036172325231383e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26695573329925537, | |
| "step": 1725, | |
| "valid_targets_mean": 2913.8, | |
| "valid_targets_min": 928 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.4160259337644636, | |
| "learning_rate": 3.0293388648728908e-05, | |
| "loss": 0.2608, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23652175068855286, | |
| "step": 1730, | |
| "valid_targets_mean": 4476.8, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 2.776, | |
| "grad_norm": 0.39927985153665424, | |
| "learning_rate": 3.022489018749508e-05, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24088355898857117, | |
| "step": 1735, | |
| "valid_targets_mean": 3952.5, | |
| "valid_targets_min": 1032 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.4550887207976117, | |
| "learning_rate": 3.015622895902068e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2190483808517456, | |
| "step": 1740, | |
| "valid_targets_mean": 3110.4, | |
| "valid_targets_min": 1039 | |
| }, | |
| { | |
| "epoch": 2.792, | |
| "grad_norm": 0.33861046318017596, | |
| "learning_rate": 3.008740605630508e-05, | |
| "loss": 0.2541, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24156677722930908, | |
| "step": 1745, | |
| "valid_targets_mean": 6048.4, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.36617432315449255, | |
| "learning_rate": 3.0018422574921337e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24149316549301147, | |
| "step": 1750, | |
| "valid_targets_mean": 5241.0, | |
| "valid_targets_min": 1079 | |
| }, | |
| { | |
| "epoch": 2.808, | |
| "grad_norm": 0.3464726951761244, | |
| "learning_rate": 2.9949279612998673e-05, | |
| "loss": 0.2423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2470303475856781, | |
| "step": 1755, | |
| "valid_targets_mean": 6875.5, | |
| "valid_targets_min": 1210 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.3450155466545645, | |
| "learning_rate": 2.9879978271205064e-05, | |
| "loss": 0.2374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2277037501335144, | |
| "step": 1760, | |
| "valid_targets_mean": 5934.5, | |
| "valid_targets_min": 912 | |
| }, | |
| { | |
| "epoch": 2.824, | |
| "grad_norm": 0.4016568882747598, | |
| "learning_rate": 2.9810519652729692e-05, | |
| "loss": 0.2793, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2241845428943634, | |
| "step": 1765, | |
| "valid_targets_mean": 4558.4, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.41748302286864053, | |
| "learning_rate": 2.9740904863265378e-05, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.263439804315567, | |
| "step": 1770, | |
| "valid_targets_mean": 4341.7, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.48850050096135555, | |
| "learning_rate": 2.967113501099097e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23303407430648804, | |
| "step": 1775, | |
| "valid_targets_mean": 2772.5, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.47900204436253896, | |
| "learning_rate": 2.9601211206553745e-05, | |
| "loss": 0.28, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32549360394477844, | |
| "step": 1780, | |
| "valid_targets_mean": 3978.5, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 2.856, | |
| "grad_norm": 0.39202466712624395, | |
| "learning_rate": 2.9531134563051686e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24673870205879211, | |
| "step": 1785, | |
| "valid_targets_mean": 4397.2, | |
| "valid_targets_min": 789 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.454753156477103, | |
| "learning_rate": 2.946090619601579e-05, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2228916883468628, | |
| "step": 1790, | |
| "valid_targets_mean": 3641.2, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 2.872, | |
| "grad_norm": 0.5239133569031957, | |
| "learning_rate": 2.9390527223392292e-05, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26477718353271484, | |
| "step": 1795, | |
| "valid_targets_mean": 2819.3, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.43565748091817924, | |
| "learning_rate": 2.931999876552488e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2930182218551636, | |
| "step": 1800, | |
| "valid_targets_mean": 4048.8, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 2.888, | |
| "grad_norm": 0.4126016381922162, | |
| "learning_rate": 2.9249321945136854e-05, | |
| "loss": 0.2456, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26448121666908264, | |
| "step": 1805, | |
| "valid_targets_mean": 4604.4, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.4216543598725713, | |
| "learning_rate": 2.9178497887313257e-05, | |
| "loss": 0.2589, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27048546075820923, | |
| "step": 1810, | |
| "valid_targets_mean": 4440.7, | |
| "valid_targets_min": 926 | |
| }, | |
| { | |
| "epoch": 2.904, | |
| "grad_norm": 0.339972934761611, | |
| "learning_rate": 2.9107527719482968e-05, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24039791524410248, | |
| "step": 1815, | |
| "valid_targets_mean": 5725.7, | |
| "valid_targets_min": 1038 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.4284574874009162, | |
| "learning_rate": 2.9036412571400747e-05, | |
| "loss": 0.2348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25202876329421997, | |
| "step": 1820, | |
| "valid_targets_mean": 3956.6, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.3887923193278144, | |
| "learning_rate": 2.8965153575129255e-05, | |
| "loss": 0.2383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26022517681121826, | |
| "step": 1825, | |
| "valid_targets_mean": 5180.9, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.42207586149145204, | |
| "learning_rate": 2.8893751865021044e-05, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24122312664985657, | |
| "step": 1830, | |
| "valid_targets_mean": 3972.8, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 2.936, | |
| "grad_norm": 0.38545070788876795, | |
| "learning_rate": 2.8822208577700473e-05, | |
| "loss": 0.2401, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2604748606681824, | |
| "step": 1835, | |
| "valid_targets_mean": 4398.5, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.5068190010044846, | |
| "learning_rate": 2.8750524852045642e-05, | |
| "loss": 0.2548, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26114439964294434, | |
| "step": 1840, | |
| "valid_targets_mean": 2930.1, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 2.952, | |
| "grad_norm": 0.38331449055182676, | |
| "learning_rate": 2.867870182917024e-05, | |
| "loss": 0.244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23407770693302155, | |
| "step": 1845, | |
| "valid_targets_mean": 4040.6, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.40079080583130816, | |
| "learning_rate": 2.8606740652405394e-05, | |
| "loss": 0.2779, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2599574327468872, | |
| "step": 1850, | |
| "valid_targets_mean": 4798.5, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 2.968, | |
| "grad_norm": 0.3479930057967736, | |
| "learning_rate": 2.853464246728147e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23751023411750793, | |
| "step": 1855, | |
| "valid_targets_mean": 5672.2, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.3369578047927962, | |
| "learning_rate": 2.846240842150984e-05, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20889130234718323, | |
| "step": 1860, | |
| "valid_targets_mean": 5308.2, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 2.984, | |
| "grad_norm": 0.4291039444647585, | |
| "learning_rate": 2.839003966496458e-05, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2270471304655075, | |
| "step": 1865, | |
| "valid_targets_mean": 3958.4, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.43281841130812354, | |
| "learning_rate": 2.8317537349664215e-05, | |
| "loss": 0.2807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2587190866470337, | |
| "step": 1870, | |
| "valid_targets_mean": 4151.9, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.3978270265787385, | |
| "learning_rate": 2.824490262975334e-05, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2422841191291809, | |
| "step": 1875, | |
| "valid_targets_mean": 4473.6, | |
| "valid_targets_min": 931 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.3686194912968534, | |
| "learning_rate": 2.817213666148427e-05, | |
| "loss": 0.2345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20717495679855347, | |
| "step": 1880, | |
| "valid_targets_mean": 4454.7, | |
| "valid_targets_min": 1031 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 0.4391451892820445, | |
| "learning_rate": 2.809924060319862e-05, | |
| "loss": 0.2248, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2463517189025879, | |
| "step": 1885, | |
| "valid_targets_mean": 4455.8, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.37945227439492357, | |
| "learning_rate": 2.802621561530888e-05, | |
| "loss": 0.2377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23665887117385864, | |
| "step": 1890, | |
| "valid_targets_mean": 4614.1, | |
| "valid_targets_min": 801 | |
| }, | |
| { | |
| "epoch": 3.032, | |
| "grad_norm": 0.5047817941563999, | |
| "learning_rate": 2.7953062860279937e-05, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24539366364479065, | |
| "step": 1895, | |
| "valid_targets_mean": 2838.2, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.640395323678212, | |
| "learning_rate": 2.7879783502610557e-05, | |
| "loss": 0.2587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33286240696907043, | |
| "step": 1900, | |
| "valid_targets_mean": 2273.1, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 3.048, | |
| "grad_norm": 0.42850604085756194, | |
| "learning_rate": 2.7806378708814875e-05, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24500463902950287, | |
| "step": 1905, | |
| "valid_targets_mean": 4063.4, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.4303493596218602, | |
| "learning_rate": 2.773284964740379e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23514248430728912, | |
| "step": 1910, | |
| "valid_targets_mean": 3806.9, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 3.064, | |
| "grad_norm": 0.5376670517442778, | |
| "learning_rate": 2.7659197488866403e-05, | |
| "loss": 0.2436, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24571284651756287, | |
| "step": 1915, | |
| "valid_targets_mean": 2978.4, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.4856549540142223, | |
| "learning_rate": 2.7585423405651347e-05, | |
| "loss": 0.2459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2590499818325043, | |
| "step": 1920, | |
| "valid_targets_mean": 3158.0, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.41265678997745975, | |
| "learning_rate": 2.7511528572148153e-05, | |
| "loss": 0.2285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24581827223300934, | |
| "step": 1925, | |
| "valid_targets_mean": 3845.9, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.39148774769841904, | |
| "learning_rate": 2.7437514164668536e-05, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2122671902179718, | |
| "step": 1930, | |
| "valid_targets_mean": 4021.6, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 0.42384683209642465, | |
| "learning_rate": 2.7363381361427692e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2753984332084656, | |
| "step": 1935, | |
| "valid_targets_mean": 4518.8, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.5156696168531341, | |
| "learning_rate": 2.72891313425255e-05, | |
| "loss": 0.2448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2671349346637726, | |
| "step": 1940, | |
| "valid_targets_mean": 3111.5, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 3.112, | |
| "grad_norm": 0.40374314761107644, | |
| "learning_rate": 2.7214765289927777e-05, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22792913019657135, | |
| "step": 1945, | |
| "valid_targets_mean": 4236.8, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.41400421213473976, | |
| "learning_rate": 2.714028438744746e-05, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28113529086112976, | |
| "step": 1950, | |
| "valid_targets_mean": 4941.1, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 3.128, | |
| "grad_norm": 0.3753847058592898, | |
| "learning_rate": 2.706568982072573e-05, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22099116444587708, | |
| "step": 1955, | |
| "valid_targets_mean": 4709.8, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.4414960074804987, | |
| "learning_rate": 2.6990982777213174e-05, | |
| "loss": 0.2336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24348290264606476, | |
| "step": 1960, | |
| "valid_targets_mean": 4199.9, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 3.144, | |
| "grad_norm": 0.39031860273125923, | |
| "learning_rate": 2.691616444615085e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2442023903131485, | |
| "step": 1965, | |
| "valid_targets_mean": 4948.6, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.42869791654686545, | |
| "learning_rate": 2.6841236018551402e-05, | |
| "loss": 0.2524, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24929338693618774, | |
| "step": 1970, | |
| "valid_targets_mean": 4449.2, | |
| "valid_targets_min": 1184 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.3724581951765585, | |
| "learning_rate": 2.6766198687180028e-05, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2496282011270523, | |
| "step": 1975, | |
| "valid_targets_mean": 5392.3, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.3179361729676042, | |
| "learning_rate": 2.6691053646535564e-05, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20636862516403198, | |
| "step": 1980, | |
| "valid_targets_mean": 6938.2, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.46664739794355814, | |
| "learning_rate": 2.6615802092831446e-05, | |
| "loss": 0.2712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22112616896629333, | |
| "step": 1985, | |
| "valid_targets_mean": 2945.5, | |
| "valid_targets_min": 884 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.4339184924410943, | |
| "learning_rate": 2.6540445223976637e-05, | |
| "loss": 0.2509, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2532888352870941, | |
| "step": 1990, | |
| "valid_targets_mean": 4107.5, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 3.192, | |
| "grad_norm": 0.3729189208408062, | |
| "learning_rate": 2.6464984239556602e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24716854095458984, | |
| "step": 1995, | |
| "valid_targets_mean": 5853.9, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.3663312807394943, | |
| "learning_rate": 2.63894203408142e-05, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2260250449180603, | |
| "step": 2000, | |
| "valid_targets_mean": 5386.6, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 3.208, | |
| "grad_norm": 0.43597269696745533, | |
| "learning_rate": 2.6313754730630528e-05, | |
| "loss": 0.2339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23965974152088165, | |
| "step": 2005, | |
| "valid_targets_mean": 3921.7, | |
| "valid_targets_min": 1011 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.38031686943560017, | |
| "learning_rate": 2.623798861350582e-05, | |
| "loss": 0.2377, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2457212209701538, | |
| "step": 2010, | |
| "valid_targets_mean": 5434.2, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 3.224, | |
| "grad_norm": 0.37852046127420746, | |
| "learning_rate": 2.6162123195540247e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24134644865989685, | |
| "step": 2015, | |
| "valid_targets_mean": 4507.3, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.3774539132405667, | |
| "learning_rate": 2.6086159684414726e-05, | |
| "loss": 0.2626, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20981399714946747, | |
| "step": 2020, | |
| "valid_targets_mean": 5689.2, | |
| "valid_targets_min": 983 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.5876396358898935, | |
| "learning_rate": 2.6010099289371694e-05, | |
| "loss": 0.2517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24734702706336975, | |
| "step": 2025, | |
| "valid_targets_mean": 3616.8, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.41025472682008307, | |
| "learning_rate": 2.5933943221195844e-05, | |
| "loss": 0.2369, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2263985127210617, | |
| "step": 2030, | |
| "valid_targets_mean": 4079.0, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.3585820544990285, | |
| "learning_rate": 2.5857692692194884e-05, | |
| "loss": 0.2358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24530109763145447, | |
| "step": 2035, | |
| "valid_targets_mean": 5752.3, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.4016499873533317, | |
| "learning_rate": 2.5781348916180195e-05, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26491880416870117, | |
| "step": 2040, | |
| "valid_targets_mean": 4694.8, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 3.2720000000000002, | |
| "grad_norm": 0.40307560980296747, | |
| "learning_rate": 2.570491310844755e-05, | |
| "loss": 0.2409, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2063388228416443, | |
| "step": 2045, | |
| "valid_targets_mean": 3911.2, | |
| "valid_targets_min": 688 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.44852837331808115, | |
| "learning_rate": 2.562838648575774e-05, | |
| "loss": 0.2376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21985195577144623, | |
| "step": 2050, | |
| "valid_targets_mean": 3346.2, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 3.288, | |
| "grad_norm": 0.4285370827323687, | |
| "learning_rate": 2.5551770266317224e-05, | |
| "loss": 0.2427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24515582621097565, | |
| "step": 2055, | |
| "valid_targets_mean": 4142.2, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 0.3469454440141706, | |
| "learning_rate": 2.5475065669758713e-05, | |
| "loss": 0.2292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2313218116760254, | |
| "step": 2060, | |
| "valid_targets_mean": 6451.2, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 3.304, | |
| "grad_norm": 0.4660419550497371, | |
| "learning_rate": 2.5398273917121786e-05, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23948527872562408, | |
| "step": 2065, | |
| "valid_targets_mean": 3834.3, | |
| "valid_targets_min": 840 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.46020726031423426, | |
| "learning_rate": 2.532139623083342e-05, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22267141938209534, | |
| "step": 2070, | |
| "valid_targets_mean": 3291.5, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.4066475864337174, | |
| "learning_rate": 2.5244433834688552e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24701744318008423, | |
| "step": 2075, | |
| "valid_targets_mean": 4166.8, | |
| "valid_targets_min": 1174 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.3389260313456441, | |
| "learning_rate": 2.5167387953830602e-05, | |
| "loss": 0.2393, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2435072362422943, | |
| "step": 2080, | |
| "valid_targets_mean": 6527.8, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.44452524791300885, | |
| "learning_rate": 2.5090259814731946e-05, | |
| "loss": 0.2564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23047269880771637, | |
| "step": 2085, | |
| "valid_targets_mean": 4016.4, | |
| "valid_targets_min": 882 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.5236654293524927, | |
| "learning_rate": 2.5013050645174414e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25171908736228943, | |
| "step": 2090, | |
| "valid_targets_mean": 2928.3, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 3.352, | |
| "grad_norm": 0.46686168548498463, | |
| "learning_rate": 2.4935761674229735e-05, | |
| "loss": 0.2738, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25355401635169983, | |
| "step": 2095, | |
| "valid_targets_mean": 3444.8, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.4658436781904886, | |
| "learning_rate": 2.4858394132239982e-05, | |
| "loss": 0.2506, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24720919132232666, | |
| "step": 2100, | |
| "valid_targets_mean": 3369.1, | |
| "valid_targets_min": 870 | |
| }, | |
| { | |
| "epoch": 3.368, | |
| "grad_norm": 0.41610717299525096, | |
| "learning_rate": 2.4780949250797964e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25919485092163086, | |
| "step": 2105, | |
| "valid_targets_mean": 4384.9, | |
| "valid_targets_min": 895 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.3776558528507944, | |
| "learning_rate": 2.4703428262727656e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2172096073627472, | |
| "step": 2110, | |
| "valid_targets_mean": 4115.6, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 3.384, | |
| "grad_norm": 0.4332689527083962, | |
| "learning_rate": 2.4625832402064525e-05, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24866342544555664, | |
| "step": 2115, | |
| "valid_targets_mean": 4320.1, | |
| "valid_targets_min": 908 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.34298883302266486, | |
| "learning_rate": 2.454816290403595e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2340373992919922, | |
| "step": 2120, | |
| "valid_targets_mean": 5510.9, | |
| "valid_targets_min": 914 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.39746900839974997, | |
| "learning_rate": 2.4470421005041492e-05, | |
| "loss": 0.2437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27413320541381836, | |
| "step": 2125, | |
| "valid_targets_mean": 4856.7, | |
| "valid_targets_min": 1080 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.3693795848606551, | |
| "learning_rate": 2.4392607942633263e-05, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20374570786952972, | |
| "step": 2130, | |
| "valid_targets_mean": 4384.4, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.6248740105863385, | |
| "learning_rate": 2.43147249554962e-05, | |
| "loss": 0.2438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27008944749832153, | |
| "step": 2135, | |
| "valid_targets_mean": 2089.4, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.4780329930048178, | |
| "learning_rate": 2.423677328342835e-05, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22337397933006287, | |
| "step": 2140, | |
| "valid_targets_mean": 3094.6, | |
| "valid_targets_min": 780 | |
| }, | |
| { | |
| "epoch": 3.432, | |
| "grad_norm": 0.3654477042526744, | |
| "learning_rate": 2.415875416732113e-05, | |
| "loss": 0.2525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2606598734855652, | |
| "step": 2145, | |
| "valid_targets_mean": 5555.8, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.36116589317728864, | |
| "learning_rate": 2.4080668849139603e-05, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23566177487373352, | |
| "step": 2150, | |
| "valid_targets_mean": 5559.3, | |
| "valid_targets_min": 682 | |
| }, | |
| { | |
| "epoch": 3.448, | |
| "grad_norm": 0.4284496251086486, | |
| "learning_rate": 2.4002518571902665e-05, | |
| "loss": 0.2391, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27861011028289795, | |
| "step": 2155, | |
| "valid_targets_mean": 5020.9, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.3785079045493894, | |
| "learning_rate": 2.392430457966328e-05, | |
| "loss": 0.2495, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21359242498874664, | |
| "step": 2160, | |
| "valid_targets_mean": 4796.9, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 3.464, | |
| "grad_norm": 0.3289768949204519, | |
| "learning_rate": 2.3846028117488686e-05, | |
| "loss": 0.2373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23382601141929626, | |
| "step": 2165, | |
| "valid_targets_mean": 6570.4, | |
| "valid_targets_min": 1014 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.3147676115980224, | |
| "learning_rate": 2.3767690431440533e-05, | |
| "loss": 0.2402, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2314678281545639, | |
| "step": 2170, | |
| "valid_targets_mean": 7406.5, | |
| "valid_targets_min": 1083 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.3671848999318846, | |
| "learning_rate": 2.368929276855512e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24486009776592255, | |
| "step": 2175, | |
| "valid_targets_mean": 4844.6, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.39604902765788624, | |
| "learning_rate": 2.361083637682347e-05, | |
| "loss": 0.2222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23706820607185364, | |
| "step": 2180, | |
| "valid_targets_mean": 4694.9, | |
| "valid_targets_min": 744 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.5655798668270807, | |
| "learning_rate": 2.3532322505171502e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27846089005470276, | |
| "step": 2185, | |
| "valid_targets_mean": 2487.4, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.37720888972750033, | |
| "learning_rate": 2.3453752403440147e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2432319074869156, | |
| "step": 2190, | |
| "valid_targets_mean": 4674.2, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 3.512, | |
| "grad_norm": 0.38338363036309026, | |
| "learning_rate": 2.337512732236545e-05, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2563523054122925, | |
| "step": 2195, | |
| "valid_targets_mean": 4757.8, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.3383403543256627, | |
| "learning_rate": 2.3296448513558628e-05, | |
| "loss": 0.2329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.230048269033432, | |
| "step": 2200, | |
| "valid_targets_mean": 6312.8, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 3.528, | |
| "grad_norm": 0.42174030998567286, | |
| "learning_rate": 2.321771722948622e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23159737884998322, | |
| "step": 2205, | |
| "valid_targets_mean": 3522.6, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.3441421244493924, | |
| "learning_rate": 2.3138934723450074e-05, | |
| "loss": 0.243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23105758428573608, | |
| "step": 2210, | |
| "valid_targets_mean": 5247.6, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 3.544, | |
| "grad_norm": 0.4237690704094179, | |
| "learning_rate": 2.306010224956744e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20057953894138336, | |
| "step": 2215, | |
| "valid_targets_mean": 3344.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.40314828900451494, | |
| "learning_rate": 2.2981221062750986e-05, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23921722173690796, | |
| "step": 2220, | |
| "valid_targets_mean": 4463.3, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.35913421571712756, | |
| "learning_rate": 2.290229241868882e-05, | |
| "loss": 0.2336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22847603261470795, | |
| "step": 2225, | |
| "valid_targets_mean": 4817.8, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.4300249539977357, | |
| "learning_rate": 2.282331757382454e-05, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2206239104270935, | |
| "step": 2230, | |
| "valid_targets_mean": 3468.5, | |
| "valid_targets_min": 913 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 0.44969351210373343, | |
| "learning_rate": 2.2744297785337155e-05, | |
| "loss": 0.2287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21605494618415833, | |
| "step": 2235, | |
| "valid_targets_mean": 3286.3, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.42301687473892546, | |
| "learning_rate": 2.2665234311121155e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23820188641548157, | |
| "step": 2240, | |
| "valid_targets_mean": 3650.9, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 3.592, | |
| "grad_norm": 0.392543464599928, | |
| "learning_rate": 2.258612840976645e-05, | |
| "loss": 0.2346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24136680364608765, | |
| "step": 2245, | |
| "valid_targets_mean": 4852.1, | |
| "valid_targets_min": 1183 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.367395547542587, | |
| "learning_rate": 2.2506981340538315e-05, | |
| "loss": 0.2661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2347022294998169, | |
| "step": 2250, | |
| "valid_targets_mean": 5126.2, | |
| "valid_targets_min": 765 | |
| }, | |
| { | |
| "epoch": 3.608, | |
| "grad_norm": 0.3205045384740095, | |
| "learning_rate": 2.2427794363357384e-05, | |
| "loss": 0.2351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2343018651008606, | |
| "step": 2255, | |
| "valid_targets_mean": 6760.8, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.4272334380835925, | |
| "learning_rate": 2.2348568738779566e-05, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23038670420646667, | |
| "step": 2260, | |
| "valid_targets_mean": 4056.7, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 3.624, | |
| "grad_norm": 0.40730070006221614, | |
| "learning_rate": 2.2269305727975993e-05, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26035311818122864, | |
| "step": 2265, | |
| "valid_targets_mean": 4897.9, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.39008805670033025, | |
| "learning_rate": 2.2190006592712927e-05, | |
| "loss": 0.2422, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2496114820241928, | |
| "step": 2270, | |
| "valid_targets_mean": 4502.3, | |
| "valid_targets_min": 956 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.4098580625905869, | |
| "learning_rate": 2.2110672595331698e-05, | |
| "loss": 0.2327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2622841000556946, | |
| "step": 2275, | |
| "valid_targets_mean": 4436.5, | |
| "valid_targets_min": 1137 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.42996882915875295, | |
| "learning_rate": 2.2031304998728587e-05, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2525962293148041, | |
| "step": 2280, | |
| "valid_targets_mean": 4247.2, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 0.3824484509543543, | |
| "learning_rate": 2.1951905066334737e-05, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23063942790031433, | |
| "step": 2285, | |
| "valid_targets_mean": 4258.1, | |
| "valid_targets_min": 959 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.40350526206019344, | |
| "learning_rate": 2.1872474062096046e-05, | |
| "loss": 0.2241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23316706717014313, | |
| "step": 2290, | |
| "valid_targets_mean": 4255.7, | |
| "valid_targets_min": 698 | |
| }, | |
| { | |
| "epoch": 3.672, | |
| "grad_norm": 0.44596553563121094, | |
| "learning_rate": 2.179301325045301e-05, | |
| "loss": 0.2716, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26697400212287903, | |
| "step": 2295, | |
| "valid_targets_mean": 3889.1, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.3479970961360567, | |
| "learning_rate": 2.1713523896320647e-05, | |
| "loss": 0.2378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21883633732795715, | |
| "step": 2300, | |
| "valid_targets_mean": 4919.6, | |
| "valid_targets_min": 1008 | |
| }, | |
| { | |
| "epoch": 3.6879999999999997, | |
| "grad_norm": 0.35571201991914386, | |
| "learning_rate": 2.163400726506832e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23191869258880615, | |
| "step": 2305, | |
| "valid_targets_mean": 5307.3, | |
| "valid_targets_min": 1096 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.3604714079928107, | |
| "learning_rate": 2.155446462249961e-05, | |
| "loss": 0.247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2559710144996643, | |
| "step": 2310, | |
| "valid_targets_mean": 5540.6, | |
| "valid_targets_min": 728 | |
| }, | |
| { | |
| "epoch": 3.7039999999999997, | |
| "grad_norm": 0.32612870359060286, | |
| "learning_rate": 2.147489723483217e-05, | |
| "loss": 0.2389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2424880862236023, | |
| "step": 2315, | |
| "valid_targets_mean": 6959.5, | |
| "valid_targets_min": 1094 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.41493912008225486, | |
| "learning_rate": 2.139530636867757e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24148619174957275, | |
| "step": 2320, | |
| "valid_targets_mean": 4001.1, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.4110902151701686, | |
| "learning_rate": 2.1315693291021114e-05, | |
| "loss": 0.2473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22682532668113708, | |
| "step": 2325, | |
| "valid_targets_mean": 3857.5, | |
| "valid_targets_min": 532 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.3771443937450319, | |
| "learning_rate": 2.1236059269201686e-05, | |
| "loss": 0.2264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2078881859779358, | |
| "step": 2330, | |
| "valid_targets_mean": 3960.9, | |
| "valid_targets_min": 1015 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 0.4361587523350003, | |
| "learning_rate": 2.1156405570891584e-05, | |
| "loss": 0.2493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.240316241979599, | |
| "step": 2335, | |
| "valid_targets_mean": 3572.1, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.4089677660324536, | |
| "learning_rate": 2.1076733464076322e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22675375640392303, | |
| "step": 2340, | |
| "valid_targets_mean": 3776.9, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 3.752, | |
| "grad_norm": 0.42692556018849953, | |
| "learning_rate": 2.0997044217034462e-05, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2749752402305603, | |
| "step": 2345, | |
| "valid_targets_mean": 4011.2, | |
| "valid_targets_min": 1087 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.3607657231192886, | |
| "learning_rate": 2.0917339098317405e-05, | |
| "loss": 0.2291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23275761306285858, | |
| "step": 2350, | |
| "valid_targets_mean": 5583.9, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 3.768, | |
| "grad_norm": 0.53112047115276, | |
| "learning_rate": 2.083761937672922e-05, | |
| "loss": 0.2346, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23606562614440918, | |
| "step": 2355, | |
| "valid_targets_mean": 2382.6, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.44795554427006484, | |
| "learning_rate": 2.0757886321306433e-05, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30794838070869446, | |
| "step": 2360, | |
| "valid_targets_mean": 4058.8, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 3.784, | |
| "grad_norm": 0.38096232285290094, | |
| "learning_rate": 2.0678141201297827e-05, | |
| "loss": 0.2339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2388550341129303, | |
| "step": 2365, | |
| "valid_targets_mean": 4521.2, | |
| "valid_targets_min": 976 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.3677287419628772, | |
| "learning_rate": 2.059838528614423e-05, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2527107894420624, | |
| "step": 2370, | |
| "valid_targets_mean": 4927.9, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.4603649045230554, | |
| "learning_rate": 2.0518619845458322e-05, | |
| "loss": 0.2357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2524024546146393, | |
| "step": 2375, | |
| "valid_targets_mean": 3487.9, | |
| "valid_targets_min": 900 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.49340867750085365, | |
| "learning_rate": 2.0438846149004426e-05, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2245899885892868, | |
| "step": 2380, | |
| "valid_targets_mean": 3973.9, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 0.3391628456601482, | |
| "learning_rate": 2.0359065466678268e-05, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22324374318122864, | |
| "step": 2385, | |
| "valid_targets_mean": 5603.8, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.4064978135894322, | |
| "learning_rate": 2.0279279068486795e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24098357558250427, | |
| "step": 2390, | |
| "valid_targets_mean": 4030.3, | |
| "valid_targets_min": 836 | |
| }, | |
| { | |
| "epoch": 3.832, | |
| "grad_norm": 0.45574421310954694, | |
| "learning_rate": 2.019948822452794e-05, | |
| "loss": 0.2584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2887115776538849, | |
| "step": 2395, | |
| "valid_targets_mean": 3871.5, | |
| "valid_targets_min": 593 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.42912425327908477, | |
| "learning_rate": 2.0119694204970393e-05, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2643599510192871, | |
| "step": 2400, | |
| "valid_targets_mean": 3953.9, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 3.848, | |
| "grad_norm": 0.5452664048943783, | |
| "learning_rate": 2.0039898280033414e-05, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25383543968200684, | |
| "step": 2405, | |
| "valid_targets_mean": 2670.8, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.35547888248847065, | |
| "learning_rate": 1.9960101719966592e-05, | |
| "loss": 0.2536, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25882688164711, | |
| "step": 2410, | |
| "valid_targets_mean": 6267.4, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 3.864, | |
| "grad_norm": 0.4236770102857227, | |
| "learning_rate": 1.9880305795029617e-05, | |
| "loss": 0.2552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23290708661079407, | |
| "step": 2415, | |
| "valid_targets_mean": 3531.1, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.5053989332284283, | |
| "learning_rate": 1.980051177547207e-05, | |
| "loss": 0.2442, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2855120897293091, | |
| "step": 2420, | |
| "valid_targets_mean": 3139.5, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.40363417243530514, | |
| "learning_rate": 1.9720720931513212e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2508298456668854, | |
| "step": 2425, | |
| "valid_targets_mean": 4059.1, | |
| "valid_targets_min": 614 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.43924888297351233, | |
| "learning_rate": 1.9640934533321735e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2625318765640259, | |
| "step": 2430, | |
| "valid_targets_mean": 3693.1, | |
| "valid_targets_min": 948 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 0.3419350657709466, | |
| "learning_rate": 1.9561153850995577e-05, | |
| "loss": 0.2407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21997395157814026, | |
| "step": 2435, | |
| "valid_targets_mean": 5387.0, | |
| "valid_targets_min": 979 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.4260255800186257, | |
| "learning_rate": 1.948138015454168e-05, | |
| "loss": 0.2381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23484918475151062, | |
| "step": 2440, | |
| "valid_targets_mean": 3805.9, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 3.912, | |
| "grad_norm": 0.4706145084246985, | |
| "learning_rate": 1.9401614713855775e-05, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30133742094039917, | |
| "step": 2445, | |
| "valid_targets_mean": 3473.2, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.42155693267392574, | |
| "learning_rate": 1.932185879870218e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23585715889930725, | |
| "step": 2450, | |
| "valid_targets_mean": 4632.4, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 3.928, | |
| "grad_norm": 0.42216004508529464, | |
| "learning_rate": 1.924211367869357e-05, | |
| "loss": 0.2731, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25942063331604004, | |
| "step": 2455, | |
| "valid_targets_mean": 4232.3, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.43356255890123696, | |
| "learning_rate": 1.9162380623270783e-05, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24051374197006226, | |
| "step": 2460, | |
| "valid_targets_mean": 3791.2, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 3.944, | |
| "grad_norm": 0.41613078686108457, | |
| "learning_rate": 1.90826609016826e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2255561798810959, | |
| "step": 2465, | |
| "valid_targets_mean": 3539.2, | |
| "valid_targets_min": 785 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.38294039222353565, | |
| "learning_rate": 1.9002955782965548e-05, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2199365645647049, | |
| "step": 2470, | |
| "valid_targets_mean": 4334.4, | |
| "valid_targets_min": 791 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.45878262976054157, | |
| "learning_rate": 1.8923266535923688e-05, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27829837799072266, | |
| "step": 2475, | |
| "valid_targets_mean": 3980.3, | |
| "valid_targets_min": 1038 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.42297288882981043, | |
| "learning_rate": 1.8843594429108426e-05, | |
| "loss": 0.2469, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29415255784988403, | |
| "step": 2480, | |
| "valid_targets_mean": 4948.5, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.4638883460903558, | |
| "learning_rate": 1.8763940730798324e-05, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2466743290424347, | |
| "step": 2485, | |
| "valid_targets_mean": 3243.7, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 0.4897364741478554, | |
| "learning_rate": 1.8684306708978896e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25233909487724304, | |
| "step": 2490, | |
| "valid_targets_mean": 3177.6, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 3.992, | |
| "grad_norm": 0.46252614216792853, | |
| "learning_rate": 1.8604693631322433e-05, | |
| "loss": 0.2521, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23966452479362488, | |
| "step": 2495, | |
| "valid_targets_mean": 3006.2, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.31710051876751844, | |
| "learning_rate": 1.852510276516783e-05, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20360557734966278, | |
| "step": 2500, | |
| "valid_targets_mean": 5554.4, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 4.008, | |
| "grad_norm": 0.4182463285312466, | |
| "learning_rate": 1.8445535377500393e-05, | |
| "loss": 0.2262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2686900496482849, | |
| "step": 2505, | |
| "valid_targets_mean": 4283.1, | |
| "valid_targets_min": 780 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 0.34215268944260163, | |
| "learning_rate": 1.8365992734931686e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24253295361995697, | |
| "step": 2510, | |
| "valid_targets_mean": 6879.1, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 4.024, | |
| "grad_norm": 0.38956003361720304, | |
| "learning_rate": 1.8286476103679356e-05, | |
| "loss": 0.2283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21489191055297852, | |
| "step": 2515, | |
| "valid_targets_mean": 4114.4, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.3507035259915641, | |
| "learning_rate": 1.8206986749546992e-05, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22584864497184753, | |
| "step": 2520, | |
| "valid_targets_mean": 5080.9, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.3812064932824899, | |
| "learning_rate": 1.8127525937903957e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23593032360076904, | |
| "step": 2525, | |
| "valid_targets_mean": 4911.8, | |
| "valid_targets_min": 760 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.5267876933020568, | |
| "learning_rate": 1.8048094933665262e-05, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608511745929718, | |
| "step": 2530, | |
| "valid_targets_mean": 2954.8, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 4.056, | |
| "grad_norm": 0.3449942876056951, | |
| "learning_rate": 1.7968695001271416e-05, | |
| "loss": 0.2466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2336217761039734, | |
| "step": 2535, | |
| "valid_targets_mean": 6292.1, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.4355115464048802, | |
| "learning_rate": 1.7889327404668316e-05, | |
| "loss": 0.2195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2266024351119995, | |
| "step": 2540, | |
| "valid_targets_mean": 3884.9, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 4.072, | |
| "grad_norm": 0.3081848481333973, | |
| "learning_rate": 1.7809993407287083e-05, | |
| "loss": 0.2544, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22978955507278442, | |
| "step": 2545, | |
| "valid_targets_mean": 7871.2, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.5481512251233728, | |
| "learning_rate": 1.7730694272024018e-05, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30004870891571045, | |
| "step": 2550, | |
| "valid_targets_mean": 3161.5, | |
| "valid_targets_min": 804 | |
| }, | |
| { | |
| "epoch": 4.088, | |
| "grad_norm": 0.48046326523783284, | |
| "learning_rate": 1.765143126122044e-05, | |
| "loss": 0.2554, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2633433938026428, | |
| "step": 2555, | |
| "valid_targets_mean": 3137.6, | |
| "valid_targets_min": 913 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.45306035014956936, | |
| "learning_rate": 1.7572205636642622e-05, | |
| "loss": 0.2404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27019399404525757, | |
| "step": 2560, | |
| "valid_targets_mean": 4373.4, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 4.104, | |
| "grad_norm": 0.3583844187285849, | |
| "learning_rate": 1.749301865946169e-05, | |
| "loss": 0.2278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21709087491035461, | |
| "step": 2565, | |
| "valid_targets_mean": 5196.9, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.4402520016943769, | |
| "learning_rate": 1.7413871590233557e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26503556966781616, | |
| "step": 2570, | |
| "valid_targets_mean": 4493.4, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.3555706054314649, | |
| "learning_rate": 1.7334765688878848e-05, | |
| "loss": 0.2263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2207963764667511, | |
| "step": 2575, | |
| "valid_targets_mean": 5484.2, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.4611646375770736, | |
| "learning_rate": 1.7255702214662852e-05, | |
| "loss": 0.2404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24676471948623657, | |
| "step": 2580, | |
| "valid_targets_mean": 3392.4, | |
| "valid_targets_min": 784 | |
| }, | |
| { | |
| "epoch": 4.136, | |
| "grad_norm": 0.47243039431807954, | |
| "learning_rate": 1.7176682426175468e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2714911103248596, | |
| "step": 2585, | |
| "valid_targets_mean": 3790.6, | |
| "valid_targets_min": 996 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.3937112234840623, | |
| "learning_rate": 1.709770758131118e-05, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25096097588539124, | |
| "step": 2590, | |
| "valid_targets_mean": 4663.3, | |
| "valid_targets_min": 653 | |
| }, | |
| { | |
| "epoch": 4.152, | |
| "grad_norm": 0.4070587446512345, | |
| "learning_rate": 1.7018778937249017e-05, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2075790911912918, | |
| "step": 2595, | |
| "valid_targets_mean": 3767.4, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.36994325328075317, | |
| "learning_rate": 1.6939897750432562e-05, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22925224900245667, | |
| "step": 2600, | |
| "valid_targets_mean": 5491.9, | |
| "valid_targets_min": 719 | |
| }, | |
| { | |
| "epoch": 4.168, | |
| "grad_norm": 0.45041384359435777, | |
| "learning_rate": 1.6861065276549933e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24161088466644287, | |
| "step": 2605, | |
| "valid_targets_mean": 4071.9, | |
| "valid_targets_min": 855 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.4581954398994809, | |
| "learning_rate": 1.6782282770513788e-05, | |
| "loss": 0.2421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2478228509426117, | |
| "step": 2610, | |
| "valid_targets_mean": 3686.6, | |
| "valid_targets_min": 709 | |
| }, | |
| { | |
| "epoch": 4.184, | |
| "grad_norm": 0.4007037738747935, | |
| "learning_rate": 1.6703551486441382e-05, | |
| "loss": 0.2359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2544166147708893, | |
| "step": 2615, | |
| "valid_targets_mean": 4708.0, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.35558467476339845, | |
| "learning_rate": 1.6624872677634565e-05, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22993795573711395, | |
| "step": 2620, | |
| "valid_targets_mean": 5626.6, | |
| "valid_targets_min": 1024 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.4362176145931464, | |
| "learning_rate": 1.654624759655986e-05, | |
| "loss": 0.2327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23500782251358032, | |
| "step": 2625, | |
| "valid_targets_mean": 4308.1, | |
| "valid_targets_min": 908 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.43269722980811254, | |
| "learning_rate": 1.64676774948285e-05, | |
| "loss": 0.2531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25792813301086426, | |
| "step": 2630, | |
| "valid_targets_mean": 4154.8, | |
| "valid_targets_min": 1168 | |
| }, | |
| { | |
| "epoch": 4.216, | |
| "grad_norm": 0.362438370317793, | |
| "learning_rate": 1.6389163623176536e-05, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23212498426437378, | |
| "step": 2635, | |
| "valid_targets_mean": 5189.7, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.6495031296425801, | |
| "learning_rate": 1.6310707231444884e-05, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33409446477890015, | |
| "step": 2640, | |
| "valid_targets_mean": 2262.6, | |
| "valid_targets_min": 923 | |
| }, | |
| { | |
| "epoch": 4.232, | |
| "grad_norm": 0.3044482725544302, | |
| "learning_rate": 1.623230956855947e-05, | |
| "loss": 0.2257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1921289712190628, | |
| "step": 2645, | |
| "valid_targets_mean": 6717.5, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.4021251156140196, | |
| "learning_rate": 1.6153971882511324e-05, | |
| "loss": 0.2388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23349186778068542, | |
| "step": 2650, | |
| "valid_targets_mean": 4582.3, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 4.248, | |
| "grad_norm": 0.3445640898802388, | |
| "learning_rate": 1.6075695420336724e-05, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24022488296031952, | |
| "step": 2655, | |
| "valid_targets_mean": 5738.2, | |
| "valid_targets_min": 859 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.3634851638301243, | |
| "learning_rate": 1.5997481428097338e-05, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18648147583007812, | |
| "step": 2660, | |
| "valid_targets_mean": 5074.1, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 4.264, | |
| "grad_norm": 0.4641862374466239, | |
| "learning_rate": 1.5919331150860396e-05, | |
| "loss": 0.2343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23830462992191315, | |
| "step": 2665, | |
| "valid_targets_mean": 3713.1, | |
| "valid_targets_min": 823 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.5391929747868811, | |
| "learning_rate": 1.5841245832678873e-05, | |
| "loss": 0.2384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2308681607246399, | |
| "step": 2670, | |
| "valid_targets_mean": 3575.1, | |
| "valid_targets_min": 748 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.35324812073512407, | |
| "learning_rate": 1.576322671657166e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22632098197937012, | |
| "step": 2675, | |
| "valid_targets_mean": 5186.4, | |
| "valid_targets_min": 764 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.36772896338506605, | |
| "learning_rate": 1.5685275044503804e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20238947868347168, | |
| "step": 2680, | |
| "valid_targets_mean": 4375.9, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 4.296, | |
| "grad_norm": 0.5917343917305425, | |
| "learning_rate": 1.560739205736674e-05, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2685689628124237, | |
| "step": 2685, | |
| "valid_targets_mean": 2465.2, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.38800770329149276, | |
| "learning_rate": 1.552957899495851e-05, | |
| "loss": 0.2386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22670996189117432, | |
| "step": 2690, | |
| "valid_targets_mean": 4851.9, | |
| "valid_targets_min": 1051 | |
| }, | |
| { | |
| "epoch": 4.312, | |
| "grad_norm": 0.4348192390077996, | |
| "learning_rate": 1.5451837095964054e-05, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23623718321323395, | |
| "step": 2695, | |
| "valid_targets_mean": 4257.4, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.3150520299968283, | |
| "learning_rate": 1.5374167597935478e-05, | |
| "loss": 0.2113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18955495953559875, | |
| "step": 2700, | |
| "valid_targets_mean": 5891.4, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 4.328, | |
| "grad_norm": 0.4228257855789774, | |
| "learning_rate": 1.5296571737272354e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22553138434886932, | |
| "step": 2705, | |
| "valid_targets_mean": 3918.9, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.4441377948845305, | |
| "learning_rate": 1.5219050749202037e-05, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22206555306911469, | |
| "step": 2710, | |
| "valid_targets_mean": 3918.2, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 4.344, | |
| "grad_norm": 0.5003922378983456, | |
| "learning_rate": 1.5141605867760021e-05, | |
| "loss": 0.2539, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2991781234741211, | |
| "step": 2715, | |
| "valid_targets_mean": 3582.6, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.3967867101584086, | |
| "learning_rate": 1.5064238325770267e-05, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24074117839336395, | |
| "step": 2720, | |
| "valid_targets_mean": 4480.3, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.40832709295548075, | |
| "learning_rate": 1.498694935482559e-05, | |
| "loss": 0.236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2141822874546051, | |
| "step": 2725, | |
| "valid_targets_mean": 3890.5, | |
| "valid_targets_min": 894 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.5353662529083422, | |
| "learning_rate": 1.4909740185268056e-05, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2768443822860718, | |
| "step": 2730, | |
| "valid_targets_mean": 3256.1, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 4.376, | |
| "grad_norm": 0.3903407215377192, | |
| "learning_rate": 1.4832612046169408e-05, | |
| "loss": 0.2425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25160282850265503, | |
| "step": 2735, | |
| "valid_targets_mean": 5186.7, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.3610608999407683, | |
| "learning_rate": 1.4755566165311455e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21602243185043335, | |
| "step": 2740, | |
| "valid_targets_mean": 5826.9, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 4.392, | |
| "grad_norm": 0.3705361464347009, | |
| "learning_rate": 1.4678603769166591e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2093980610370636, | |
| "step": 2745, | |
| "valid_targets_mean": 4775.3, | |
| "valid_targets_min": 685 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.5672264522039926, | |
| "learning_rate": 1.4601726082878226e-05, | |
| "loss": 0.2265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23798605799674988, | |
| "step": 2750, | |
| "valid_targets_mean": 2565.5, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 4.408, | |
| "grad_norm": 0.3839516251209785, | |
| "learning_rate": 1.4524934330241292e-05, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20655933022499084, | |
| "step": 2755, | |
| "valid_targets_mean": 4338.2, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.6594074981447611, | |
| "learning_rate": 1.4448229733682784e-05, | |
| "loss": 0.235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24626955389976501, | |
| "step": 2760, | |
| "valid_targets_mean": 2719.0, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 4.424, | |
| "grad_norm": 0.5159254418985769, | |
| "learning_rate": 1.4371613514242264e-05, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24901220202445984, | |
| "step": 2765, | |
| "valid_targets_mean": 3019.8, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.3594039502636306, | |
| "learning_rate": 1.4295086891552457e-05, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2654702663421631, | |
| "step": 2770, | |
| "valid_targets_mean": 6056.7, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.34691201685012074, | |
| "learning_rate": 1.4218651083819811e-05, | |
| "loss": 0.2217, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21709834039211273, | |
| "step": 2775, | |
| "valid_targets_mean": 5528.9, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.4454510808301901, | |
| "learning_rate": 1.4142307307805125e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2332070767879486, | |
| "step": 2780, | |
| "valid_targets_mean": 3425.8, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 4.456, | |
| "grad_norm": 0.33114532349140574, | |
| "learning_rate": 1.406605677880416e-05, | |
| "loss": 0.225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.216552734375, | |
| "step": 2785, | |
| "valid_targets_mean": 5557.9, | |
| "valid_targets_min": 686 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.36774664936117085, | |
| "learning_rate": 1.3989900710628313e-05, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21505959331989288, | |
| "step": 2790, | |
| "valid_targets_mean": 4693.8, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 4.4719999999999995, | |
| "grad_norm": 0.3545900301698533, | |
| "learning_rate": 1.3913840315585279e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21788690984249115, | |
| "step": 2795, | |
| "valid_targets_mean": 5270.3, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.4302379594475467, | |
| "learning_rate": 1.3837876804459765e-05, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24874816834926605, | |
| "step": 2800, | |
| "valid_targets_mean": 4287.2, | |
| "valid_targets_min": 1103 | |
| }, | |
| { | |
| "epoch": 4.4879999999999995, | |
| "grad_norm": 0.367392918189934, | |
| "learning_rate": 1.3762011386494191e-05, | |
| "loss": 0.2332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22634848952293396, | |
| "step": 2805, | |
| "valid_targets_mean": 4934.0, | |
| "valid_targets_min": 917 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.3355554630083534, | |
| "learning_rate": 1.3686245269369485e-05, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21311712265014648, | |
| "step": 2810, | |
| "valid_targets_mean": 5740.8, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 4.504, | |
| "grad_norm": 0.3839903505349243, | |
| "learning_rate": 1.3610579659185809e-05, | |
| "loss": 0.2291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22502079606056213, | |
| "step": 2815, | |
| "valid_targets_mean": 4504.8, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.3433561120934192, | |
| "learning_rate": 1.35350157604434e-05, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23318302631378174, | |
| "step": 2820, | |
| "valid_targets_mean": 6099.9, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.3924173308347554, | |
| "learning_rate": 1.345955477602337e-05, | |
| "loss": 0.232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22661542892456055, | |
| "step": 2825, | |
| "valid_targets_mean": 4413.6, | |
| "valid_targets_min": 1228 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 0.3671450930398099, | |
| "learning_rate": 1.3384197907168561e-05, | |
| "loss": 0.2323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2506842613220215, | |
| "step": 2830, | |
| "valid_targets_mean": 5014.2, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 4.536, | |
| "grad_norm": 0.4471447410885578, | |
| "learning_rate": 1.3308946353464438e-05, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25687381625175476, | |
| "step": 2835, | |
| "valid_targets_mean": 3734.6, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 0.3665444810496819, | |
| "learning_rate": 1.3233801312819979e-05, | |
| "loss": 0.2364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24433283507823944, | |
| "step": 2840, | |
| "valid_targets_mean": 5373.4, | |
| "valid_targets_min": 967 | |
| }, | |
| { | |
| "epoch": 4.552, | |
| "grad_norm": 0.38498330932928676, | |
| "learning_rate": 1.3158763981448606e-05, | |
| "loss": 0.2491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23016983270645142, | |
| "step": 2845, | |
| "valid_targets_mean": 4454.9, | |
| "valid_targets_min": 1088 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.3993486182470705, | |
| "learning_rate": 1.3083835553849148e-05, | |
| "loss": 0.2384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24869374930858612, | |
| "step": 2850, | |
| "valid_targets_mean": 4435.8, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 4.568, | |
| "grad_norm": 0.4530152443374673, | |
| "learning_rate": 1.3009017222786828e-05, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2325194925069809, | |
| "step": 2855, | |
| "valid_targets_mean": 3650.1, | |
| "valid_targets_min": 1149 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.45145189985347955, | |
| "learning_rate": 1.2934310179274269e-05, | |
| "loss": 0.2342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24084368348121643, | |
| "step": 2860, | |
| "valid_targets_mean": 3644.5, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 4.584, | |
| "grad_norm": 0.4475782725112683, | |
| "learning_rate": 1.2859715612552541e-05, | |
| "loss": 0.2372, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21897542476654053, | |
| "step": 2865, | |
| "valid_targets_mean": 3397.4, | |
| "valid_targets_min": 1067 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 0.4612371616890256, | |
| "learning_rate": 1.278523471007223e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22148343920707703, | |
| "step": 2870, | |
| "valid_targets_mean": 2955.4, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.3387602746131469, | |
| "learning_rate": 1.271086865747451e-05, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24698293209075928, | |
| "step": 2875, | |
| "valid_targets_mean": 6360.4, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.3839373637303783, | |
| "learning_rate": 1.2636618638572316e-05, | |
| "loss": 0.2216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2379951924085617, | |
| "step": 2880, | |
| "valid_targets_mean": 5022.4, | |
| "valid_targets_min": 829 | |
| }, | |
| { | |
| "epoch": 4.616, | |
| "grad_norm": 0.3197611013771196, | |
| "learning_rate": 1.2562485835331466e-05, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23118649423122406, | |
| "step": 2885, | |
| "valid_targets_mean": 7057.1, | |
| "valid_targets_min": 1038 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.43290245672745614, | |
| "learning_rate": 1.2488471427851852e-05, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23651964962482452, | |
| "step": 2890, | |
| "valid_targets_mean": 3611.8, | |
| "valid_targets_min": 851 | |
| }, | |
| { | |
| "epoch": 4.632, | |
| "grad_norm": 0.4331683426323707, | |
| "learning_rate": 1.241457659434866e-05, | |
| "loss": 0.2371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22483578324317932, | |
| "step": 2895, | |
| "valid_targets_mean": 3447.9, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.3988096882137287, | |
| "learning_rate": 1.2340802511133605e-05, | |
| "loss": 0.2381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2243075966835022, | |
| "step": 2900, | |
| "valid_targets_mean": 4385.4, | |
| "valid_targets_min": 863 | |
| }, | |
| { | |
| "epoch": 4.648, | |
| "grad_norm": 0.40557367557155005, | |
| "learning_rate": 1.2267150352596216e-05, | |
| "loss": 0.2268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22742396593093872, | |
| "step": 2905, | |
| "valid_targets_mean": 4335.8, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.41903497071079515, | |
| "learning_rate": 1.2193621291185132e-05, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23160028457641602, | |
| "step": 2910, | |
| "valid_targets_mean": 3808.8, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 4.664, | |
| "grad_norm": 0.3333342358026764, | |
| "learning_rate": 1.2120216497389446e-05, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21926115453243256, | |
| "step": 2915, | |
| "valid_targets_mean": 5564.4, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.35498213383205174, | |
| "learning_rate": 1.2046937139720068e-05, | |
| "loss": 0.2202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23465262353420258, | |
| "step": 2920, | |
| "valid_targets_mean": 5612.9, | |
| "valid_targets_min": 901 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.43676624123660573, | |
| "learning_rate": 1.1973784384691121e-05, | |
| "loss": 0.2355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22778140008449554, | |
| "step": 2925, | |
| "valid_targets_mean": 3652.6, | |
| "valid_targets_min": 744 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.43153283817401733, | |
| "learning_rate": 1.1900759396801382e-05, | |
| "loss": 0.2533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23726201057434082, | |
| "step": 2930, | |
| "valid_targets_mean": 3769.8, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 4.696, | |
| "grad_norm": 0.4041357939241981, | |
| "learning_rate": 1.1827863338515741e-05, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23281404376029968, | |
| "step": 2935, | |
| "valid_targets_mean": 4432.1, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.3514377881831837, | |
| "learning_rate": 1.1755097370246669e-05, | |
| "loss": 0.224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.220592200756073, | |
| "step": 2940, | |
| "valid_targets_mean": 5495.7, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 4.712, | |
| "grad_norm": 0.4214613254086311, | |
| "learning_rate": 1.1682462650335791e-05, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2115774154663086, | |
| "step": 2945, | |
| "valid_targets_mean": 3903.4, | |
| "valid_targets_min": 785 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.5099284948600926, | |
| "learning_rate": 1.1609960335035423e-05, | |
| "loss": 0.2211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22030048072338104, | |
| "step": 2950, | |
| "valid_targets_mean": 2719.6, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 4.728, | |
| "grad_norm": 0.3655229242181005, | |
| "learning_rate": 1.1537591578490165e-05, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20158278942108154, | |
| "step": 2955, | |
| "valid_targets_mean": 4298.5, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.46139845446107813, | |
| "learning_rate": 1.146535753271853e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24375522136688232, | |
| "step": 2960, | |
| "valid_targets_mean": 3361.3, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 4.744, | |
| "grad_norm": 0.4688790022851686, | |
| "learning_rate": 1.139325934759461e-05, | |
| "loss": 0.2485, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25852593779563904, | |
| "step": 2965, | |
| "valid_targets_mean": 3596.0, | |
| "valid_targets_min": 968 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.5227422410545948, | |
| "learning_rate": 1.1321298170829768e-05, | |
| "loss": 0.2668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24697157740592957, | |
| "step": 2970, | |
| "valid_targets_mean": 2877.5, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.4038881666049419, | |
| "learning_rate": 1.1249475147954363e-05, | |
| "loss": 0.2329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21838301420211792, | |
| "step": 2975, | |
| "valid_targets_mean": 4123.2, | |
| "valid_targets_min": 777 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.5184660299912265, | |
| "learning_rate": 1.1177791422299528e-05, | |
| "loss": 0.2531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27220386266708374, | |
| "step": 2980, | |
| "valid_targets_mean": 3015.9, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 4.776, | |
| "grad_norm": 0.3654819987069819, | |
| "learning_rate": 1.1106248134978959e-05, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22408969700336456, | |
| "step": 2985, | |
| "valid_targets_mean": 5349.6, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.3660126606227132, | |
| "learning_rate": 1.1034846424870744e-05, | |
| "loss": 0.2417, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2300996333360672, | |
| "step": 2990, | |
| "valid_targets_mean": 4971.9, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 4.792, | |
| "grad_norm": 0.3756476057343205, | |
| "learning_rate": 1.0963587428599256e-05, | |
| "loss": 0.2314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2119663953781128, | |
| "step": 2995, | |
| "valid_targets_mean": 4842.7, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.4613837764914878, | |
| "learning_rate": 1.089247228051704e-05, | |
| "loss": 0.2306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24079769849777222, | |
| "step": 3000, | |
| "valid_targets_mean": 3716.3, | |
| "valid_targets_min": 759 | |
| }, | |
| { | |
| "epoch": 4.808, | |
| "grad_norm": 0.28943767912823337, | |
| "learning_rate": 1.0821502112686753e-05, | |
| "loss": 0.2298, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19776251912117004, | |
| "step": 3005, | |
| "valid_targets_mean": 6197.8, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.4161037969654231, | |
| "learning_rate": 1.0750678054863158e-05, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22658786177635193, | |
| "step": 3010, | |
| "valid_targets_mean": 3665.1, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 4.824, | |
| "grad_norm": 0.31379216309346025, | |
| "learning_rate": 1.0680001234475127e-05, | |
| "loss": 0.2212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20140352845191956, | |
| "step": 3015, | |
| "valid_targets_mean": 6177.3, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.3780126118328765, | |
| "learning_rate": 1.0609472776607715e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24119669198989868, | |
| "step": 3020, | |
| "valid_targets_mean": 6278.8, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.3373262706812363, | |
| "learning_rate": 1.0539093803984217e-05, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20110008120536804, | |
| "step": 3025, | |
| "valid_targets_mean": 5751.3, | |
| "valid_targets_min": 1033 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.37649695603656197, | |
| "learning_rate": 1.046886543694832e-05, | |
| "loss": 0.2338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2516307532787323, | |
| "step": 3030, | |
| "valid_targets_mean": 5042.4, | |
| "valid_targets_min": 1139 | |
| }, | |
| { | |
| "epoch": 4.856, | |
| "grad_norm": 0.38263029301997636, | |
| "learning_rate": 1.0398788793446263e-05, | |
| "loss": 0.2327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.207616925239563, | |
| "step": 3035, | |
| "valid_targets_mean": 4312.8, | |
| "valid_targets_min": 1036 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.42458541451939813, | |
| "learning_rate": 1.0328864989009037e-05, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22665390372276306, | |
| "step": 3040, | |
| "valid_targets_mean": 3687.1, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 4.872, | |
| "grad_norm": 0.3892606340058019, | |
| "learning_rate": 1.0259095136734634e-05, | |
| "loss": 0.2207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23070894181728363, | |
| "step": 3045, | |
| "valid_targets_mean": 4354.3, | |
| "valid_targets_min": 918 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.40230384154567744, | |
| "learning_rate": 1.0189480347270311e-05, | |
| "loss": 0.232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24141496419906616, | |
| "step": 3050, | |
| "valid_targets_mean": 4812.8, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 4.888, | |
| "grad_norm": 0.4084264186589575, | |
| "learning_rate": 1.0120021728794938e-05, | |
| "loss": 0.2204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21485230326652527, | |
| "step": 3055, | |
| "valid_targets_mean": 4172.5, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.5082884887305914, | |
| "learning_rate": 1.0050720387001334e-05, | |
| "loss": 0.2296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2505829930305481, | |
| "step": 3060, | |
| "valid_targets_mean": 2977.1, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 4.904, | |
| "grad_norm": 0.38678833449761596, | |
| "learning_rate": 9.981577425078672e-06, | |
| "loss": 0.2343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2366374433040619, | |
| "step": 3065, | |
| "valid_targets_mean": 4762.4, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.38418961481494723, | |
| "learning_rate": 9.912593943694924e-06, | |
| "loss": 0.2319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1991523802280426, | |
| "step": 3070, | |
| "valid_targets_mean": 4543.4, | |
| "valid_targets_min": 1056 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.4832708684343786, | |
| "learning_rate": 9.843771040979328e-06, | |
| "loss": 0.2498, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24161699414253235, | |
| "step": 3075, | |
| "valid_targets_mean": 3462.6, | |
| "valid_targets_min": 763 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.4309087603014352, | |
| "learning_rate": 9.775109812504922e-06, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20923660695552826, | |
| "step": 3080, | |
| "valid_targets_mean": 3484.4, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 4.936, | |
| "grad_norm": 0.3788722471104869, | |
| "learning_rate": 9.706611351271088e-06, | |
| "loss": 0.2319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22105582058429718, | |
| "step": 3085, | |
| "valid_targets_mean": 4319.1, | |
| "valid_targets_min": 721 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.4355369088708729, | |
| "learning_rate": 9.638276747686169e-06, | |
| "loss": 0.2428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24673102796077728, | |
| "step": 3090, | |
| "valid_targets_mean": 3661.9, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 4.952, | |
| "grad_norm": 0.40667695740023535, | |
| "learning_rate": 9.570107089550091e-06, | |
| "loss": 0.2351, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23325340449810028, | |
| "step": 3095, | |
| "valid_targets_mean": 4411.1, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.3942359450360996, | |
| "learning_rate": 9.502103462037074e-06, | |
| "loss": 0.2253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2507432699203491, | |
| "step": 3100, | |
| "valid_targets_mean": 4678.1, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 4.968, | |
| "grad_norm": 0.395308435273449, | |
| "learning_rate": 9.434266947678326e-06, | |
| "loss": 0.2292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22862330079078674, | |
| "step": 3105, | |
| "valid_targets_mean": 4401.1, | |
| "valid_targets_min": 1048 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.35534927093483687, | |
| "learning_rate": 9.366598626344836e-06, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24637477099895477, | |
| "step": 3110, | |
| "valid_targets_mean": 5403.4, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 4.984, | |
| "grad_norm": 0.3598184673475776, | |
| "learning_rate": 9.299099575230172e-06, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23107300698757172, | |
| "step": 3115, | |
| "valid_targets_mean": 5155.9, | |
| "valid_targets_min": 1085 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.5088527769647312, | |
| "learning_rate": 9.231770868833334e-06, | |
| "loss": 0.2464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26669660210609436, | |
| "step": 3120, | |
| "valid_targets_mean": 3198.4, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.34198183838948926, | |
| "learning_rate": 9.164613578941652e-06, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21536295115947723, | |
| "step": 3125, | |
| "valid_targets_mean": 5529.9, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.4912006951999863, | |
| "learning_rate": 9.097628774613732e-06, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2081344723701477, | |
| "step": 3130, | |
| "valid_targets_mean": 2727.4, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 5.016, | |
| "grad_norm": 0.4084241000168698, | |
| "learning_rate": 9.030817522162403e-06, | |
| "loss": 0.23, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2399434745311737, | |
| "step": 3135, | |
| "valid_targets_mean": 4014.1, | |
| "valid_targets_min": 664 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.43709779953742833, | |
| "learning_rate": 8.964180885137797e-06, | |
| "loss": 0.2182, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22896772623062134, | |
| "step": 3140, | |
| "valid_targets_mean": 4365.4, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 5.032, | |
| "grad_norm": 0.305518599208397, | |
| "learning_rate": 8.897719924310375e-06, | |
| "loss": 0.2348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20471172034740448, | |
| "step": 3145, | |
| "valid_targets_mean": 6402.6, | |
| "valid_targets_min": 931 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.37990073340945496, | |
| "learning_rate": 8.831435697654068e-06, | |
| "loss": 0.2202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19448421895503998, | |
| "step": 3150, | |
| "valid_targets_mean": 4079.8, | |
| "valid_targets_min": 902 | |
| }, | |
| { | |
| "epoch": 5.048, | |
| "grad_norm": 0.31479459501943424, | |
| "learning_rate": 8.765329260329413e-06, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19626212120056152, | |
| "step": 3155, | |
| "valid_targets_mean": 6214.5, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.3740116945023727, | |
| "learning_rate": 8.699401664666774e-06, | |
| "loss": 0.2321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19557811319828033, | |
| "step": 3160, | |
| "valid_targets_mean": 4366.0, | |
| "valid_targets_min": 788 | |
| }, | |
| { | |
| "epoch": 5.064, | |
| "grad_norm": 0.4192074460438228, | |
| "learning_rate": 8.633653960149579e-06, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26347237825393677, | |
| "step": 3165, | |
| "valid_targets_mean": 4294.9, | |
| "valid_targets_min": 869 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 0.3170004260395546, | |
| "learning_rate": 8.56808719339762e-06, | |
| "loss": 0.2366, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20620077848434448, | |
| "step": 3170, | |
| "valid_targets_mean": 6649.5, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 0.3841314180170002, | |
| "learning_rate": 8.502702408150391e-06, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24174794554710388, | |
| "step": 3175, | |
| "valid_targets_mean": 4925.1, | |
| "valid_targets_min": 909 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 0.34188326135593766, | |
| "learning_rate": 8.43750064525047e-06, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19666939973831177, | |
| "step": 3180, | |
| "valid_targets_mean": 5542.1, | |
| "valid_targets_min": 1072 | |
| }, | |
| { | |
| "epoch": 5.096, | |
| "grad_norm": 0.48888208213056195, | |
| "learning_rate": 8.372482942626952e-06, | |
| "loss": 0.2289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25927528738975525, | |
| "step": 3185, | |
| "valid_targets_mean": 3291.2, | |
| "valid_targets_min": 950 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 0.5266810833672644, | |
| "learning_rate": 8.307650335278927e-06, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2526220381259918, | |
| "step": 3190, | |
| "valid_targets_mean": 3052.0, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 5.112, | |
| "grad_norm": 0.3489078895076499, | |
| "learning_rate": 8.243003855259015e-06, | |
| "loss": 0.2394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23310494422912598, | |
| "step": 3195, | |
| "valid_targets_mean": 6052.8, | |
| "valid_targets_min": 1104 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.4273931588420199, | |
| "learning_rate": 8.178544531656897e-06, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2163240909576416, | |
| "step": 3200, | |
| "valid_targets_mean": 4048.2, | |
| "valid_targets_min": 747 | |
| }, | |
| { | |
| "epoch": 5.128, | |
| "grad_norm": 0.4287189766428896, | |
| "learning_rate": 8.11427339058299e-06, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25983962416648865, | |
| "step": 3205, | |
| "valid_targets_mean": 4372.1, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.4697741992206112, | |
| "learning_rate": 8.050191455152072e-06, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23713898658752441, | |
| "step": 3210, | |
| "valid_targets_mean": 3780.0, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 5.144, | |
| "grad_norm": 0.3426204752362461, | |
| "learning_rate": 7.986299745467013e-06, | |
| "loss": 0.2124, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2151990830898285, | |
| "step": 3215, | |
| "valid_targets_mean": 5340.3, | |
| "valid_targets_min": 1017 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.3476898053916093, | |
| "learning_rate": 7.922599278602524e-06, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2065230756998062, | |
| "step": 3220, | |
| "valid_targets_mean": 5802.0, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.34997024370706975, | |
| "learning_rate": 7.859091068588987e-06, | |
| "loss": 0.223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22452686727046967, | |
| "step": 3225, | |
| "valid_targets_mean": 5912.6, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 0.5145349413640553, | |
| "learning_rate": 7.795776126396284e-06, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26052239537239075, | |
| "step": 3230, | |
| "valid_targets_mean": 3026.5, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 5.176, | |
| "grad_norm": 0.44391793492379567, | |
| "learning_rate": 7.732655459917726e-06, | |
| "loss": 0.2162, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23127031326293945, | |
| "step": 3235, | |
| "valid_targets_mean": 3623.2, | |
| "valid_targets_min": 646 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.3630611195177078, | |
| "learning_rate": 7.669730073954005e-06, | |
| "loss": 0.2373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21552309393882751, | |
| "step": 3240, | |
| "valid_targets_mean": 5270.8, | |
| "valid_targets_min": 901 | |
| }, | |
| { | |
| "epoch": 5.192, | |
| "grad_norm": 0.4102333340497864, | |
| "learning_rate": 7.607000970197194e-06, | |
| "loss": 0.2317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24429859220981598, | |
| "step": 3245, | |
| "valid_targets_mean": 4244.9, | |
| "valid_targets_min": 891 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.36913473264732094, | |
| "learning_rate": 7.544469147214797e-06, | |
| "loss": 0.2254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22578775882720947, | |
| "step": 3250, | |
| "valid_targets_mean": 5226.1, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 5.208, | |
| "grad_norm": 0.40820878543241174, | |
| "learning_rate": 7.482135600433868e-06, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25584137439727783, | |
| "step": 3255, | |
| "valid_targets_mean": 4932.1, | |
| "valid_targets_min": 1008 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 0.4684722429375052, | |
| "learning_rate": 7.420001322125156e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24265044927597046, | |
| "step": 3260, | |
| "valid_targets_mean": 3623.2, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 5.224, | |
| "grad_norm": 0.44692307822276434, | |
| "learning_rate": 7.3580673013872946e-06, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20505103468894958, | |
| "step": 3265, | |
| "valid_targets_mean": 3615.4, | |
| "valid_targets_min": 750 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 0.3654288352288813, | |
| "learning_rate": 7.2963345241310904e-06, | |
| "loss": 0.2253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19597743451595306, | |
| "step": 3270, | |
| "valid_targets_mean": 4878.7, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 0.3915593147251127, | |
| "learning_rate": 7.234803973063797e-06, | |
| "loss": 0.226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2212725579738617, | |
| "step": 3275, | |
| "valid_targets_mean": 4959.9, | |
| "valid_targets_min": 723 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 0.4748719949243943, | |
| "learning_rate": 7.173476627673492e-06, | |
| "loss": 0.2375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2473698854446411, | |
| "step": 3280, | |
| "valid_targets_mean": 3323.0, | |
| "valid_targets_min": 1015 | |
| }, | |
| { | |
| "epoch": 5.256, | |
| "grad_norm": 0.3588612065200158, | |
| "learning_rate": 7.112353464213477e-06, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2438439577817917, | |
| "step": 3285, | |
| "valid_targets_mean": 6511.6, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 0.4663585945976981, | |
| "learning_rate": 7.051435455686735e-06, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23258927464485168, | |
| "step": 3290, | |
| "valid_targets_mean": 3390.1, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 5.272, | |
| "grad_norm": 0.4205026890280819, | |
| "learning_rate": 6.990723571830438e-06, | |
| "loss": 0.2073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1906987726688385, | |
| "step": 3295, | |
| "valid_targets_mean": 3564.7, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.426317166162649, | |
| "learning_rate": 6.93021877910052e-06, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20793795585632324, | |
| "step": 3300, | |
| "valid_targets_mean": 3830.4, | |
| "valid_targets_min": 945 | |
| }, | |
| { | |
| "epoch": 5.288, | |
| "grad_norm": 0.42620206171289865, | |
| "learning_rate": 6.8699220406562985e-06, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2456003874540329, | |
| "step": 3305, | |
| "valid_targets_mean": 4204.2, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.3345250802407866, | |
| "learning_rate": 6.809834316345117e-06, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20420578122138977, | |
| "step": 3310, | |
| "valid_targets_mean": 5562.4, | |
| "valid_targets_min": 842 | |
| }, | |
| { | |
| "epoch": 5.304, | |
| "grad_norm": 0.43008399094177285, | |
| "learning_rate": 6.749956562687083e-06, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19601237773895264, | |
| "step": 3315, | |
| "valid_targets_mean": 3619.1, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 0.44306372764221147, | |
| "learning_rate": 6.690289732859841e-06, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22992250323295593, | |
| "step": 3320, | |
| "valid_targets_mean": 3904.2, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.349409240727027, | |
| "learning_rate": 6.630834776683403e-06, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20814573764801025, | |
| "step": 3325, | |
| "valid_targets_mean": 5271.5, | |
| "valid_targets_min": 835 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.4377279904884754, | |
| "learning_rate": 6.571592640605e-06, | |
| "loss": 0.2308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24027813971042633, | |
| "step": 3330, | |
| "valid_targets_mean": 4167.5, | |
| "valid_targets_min": 1091 | |
| }, | |
| { | |
| "epoch": 5.336, | |
| "grad_norm": 0.40618186734048567, | |
| "learning_rate": 6.512564267684061e-06, | |
| "loss": 0.2156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21228620409965515, | |
| "step": 3335, | |
| "valid_targets_mean": 4030.2, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.30357992280217233, | |
| "learning_rate": 6.453750597577167e-06, | |
| "loss": 0.2388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2015833854675293, | |
| "step": 3340, | |
| "valid_targets_mean": 6969.1, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 5.352, | |
| "grad_norm": 0.773767098694673, | |
| "learning_rate": 6.395152566523106e-06, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27393215894699097, | |
| "step": 3345, | |
| "valid_targets_mean": 1623.4, | |
| "valid_targets_min": 897 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.33428819684027417, | |
| "learning_rate": 6.336771107327966e-06, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19410398602485657, | |
| "step": 3350, | |
| "valid_targets_mean": 5683.6, | |
| "valid_targets_min": 797 | |
| }, | |
| { | |
| "epoch": 5.368, | |
| "grad_norm": 0.45328327875390717, | |
| "learning_rate": 6.278607149350289e-06, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23492465913295746, | |
| "step": 3355, | |
| "valid_targets_mean": 3600.2, | |
| "valid_targets_min": 841 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.42792886228681065, | |
| "learning_rate": 6.220661618486268e-06, | |
| "loss": 0.2189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21100068092346191, | |
| "step": 3360, | |
| "valid_targets_mean": 4031.2, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 5.384, | |
| "grad_norm": 0.3557291761519541, | |
| "learning_rate": 6.162935437155024e-06, | |
| "loss": 0.2156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19593030214309692, | |
| "step": 3365, | |
| "valid_targets_mean": 5009.3, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.4833225610526371, | |
| "learning_rate": 6.105429524283901e-06, | |
| "loss": 0.231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.218770831823349, | |
| "step": 3370, | |
| "valid_targets_mean": 3252.6, | |
| "valid_targets_min": 1030 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.39045313632453454, | |
| "learning_rate": 6.04814479529386e-06, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23024097084999084, | |
| "step": 3375, | |
| "valid_targets_mean": 5329.8, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 0.45340949213189335, | |
| "learning_rate": 5.991082162084889e-06, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2215794175863266, | |
| "step": 3380, | |
| "valid_targets_mean": 3607.6, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 5.416, | |
| "grad_norm": 0.4468511461282984, | |
| "learning_rate": 5.934242533021499e-06, | |
| "loss": 0.2203, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23574653267860413, | |
| "step": 3385, | |
| "valid_targets_mean": 3929.5, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.45802290494827136, | |
| "learning_rate": 5.877626812918258e-06, | |
| "loss": 0.2204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2201964259147644, | |
| "step": 3390, | |
| "valid_targets_mean": 3413.6, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 5.432, | |
| "grad_norm": 0.3481392608973216, | |
| "learning_rate": 5.821235903025378e-06, | |
| "loss": 0.2336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18317848443984985, | |
| "step": 3395, | |
| "valid_targets_mean": 4914.8, | |
| "valid_targets_min": 774 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.46295264230395117, | |
| "learning_rate": 5.765070701014391e-06, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27391642332077026, | |
| "step": 3400, | |
| "valid_targets_mean": 3862.6, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 5.448, | |
| "grad_norm": 0.44931670150343195, | |
| "learning_rate": 5.709132100963841e-06, | |
| "loss": 0.2213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21906086802482605, | |
| "step": 3405, | |
| "valid_targets_mean": 3500.1, | |
| "valid_targets_min": 987 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.34292041400051865, | |
| "learning_rate": 5.653420993345062e-06, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21244516968727112, | |
| "step": 3410, | |
| "valid_targets_mean": 5683.7, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 5.464, | |
| "grad_norm": 0.5428548488630459, | |
| "learning_rate": 5.597938265007994e-06, | |
| "loss": 0.2312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23990648984909058, | |
| "step": 3415, | |
| "valid_targets_mean": 2892.4, | |
| "valid_targets_min": 951 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.4706595198308062, | |
| "learning_rate": 5.542684799167069e-06, | |
| "loss": 0.235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.249759703874588, | |
| "step": 3420, | |
| "valid_targets_mean": 3540.8, | |
| "valid_targets_min": 758 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.33282202476267, | |
| "learning_rate": 5.487661475387152e-06, | |
| "loss": 0.2242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19662153720855713, | |
| "step": 3425, | |
| "valid_targets_mean": 5487.8, | |
| "valid_targets_min": 666 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.4006934508283925, | |
| "learning_rate": 5.432869169569541e-06, | |
| "loss": 0.2188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20843669772148132, | |
| "step": 3430, | |
| "valid_targets_mean": 4598.9, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 5.496, | |
| "grad_norm": 0.44192500022237174, | |
| "learning_rate": 5.378308753938024e-06, | |
| "loss": 0.2173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25620633363723755, | |
| "step": 3435, | |
| "valid_targets_mean": 3980.2, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.3681471863242707, | |
| "learning_rate": 5.323981097024986e-06, | |
| "loss": 0.2311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22333069145679474, | |
| "step": 3440, | |
| "valid_targets_mean": 4963.6, | |
| "valid_targets_min": 879 | |
| }, | |
| { | |
| "epoch": 5.5120000000000005, | |
| "grad_norm": 0.356019133477824, | |
| "learning_rate": 5.269887063657595e-06, | |
| "loss": 0.2241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2231229841709137, | |
| "step": 3445, | |
| "valid_targets_mean": 5360.4, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.36879369128157946, | |
| "learning_rate": 5.216027514944027e-06, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18978220224380493, | |
| "step": 3450, | |
| "valid_targets_mean": 4393.9, | |
| "valid_targets_min": 674 | |
| }, | |
| { | |
| "epoch": 5.5280000000000005, | |
| "grad_norm": 0.3875347538251794, | |
| "learning_rate": 5.162403308259767e-06, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.289705753326416, | |
| "step": 3455, | |
| "valid_targets_mean": 5434.2, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.4285607405336305, | |
| "learning_rate": 5.109015297233935e-06, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21771425008773804, | |
| "step": 3460, | |
| "valid_targets_mean": 3872.1, | |
| "valid_targets_min": 534 | |
| }, | |
| { | |
| "epoch": 5.5440000000000005, | |
| "grad_norm": 0.32801945020364764, | |
| "learning_rate": 5.055864331735736e-06, | |
| "loss": 0.2627, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2591525912284851, | |
| "step": 3465, | |
| "valid_targets_mean": 6929.5, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.41166443102851746, | |
| "learning_rate": 5.002951257860909e-06, | |
| "loss": 0.2319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24956223368644714, | |
| "step": 3470, | |
| "valid_targets_mean": 4287.4, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.4554295252372924, | |
| "learning_rate": 4.950276917918256e-06, | |
| "loss": 0.2328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.256850004196167, | |
| "step": 3475, | |
| "valid_targets_mean": 4103.8, | |
| "valid_targets_min": 845 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 0.3506746288784145, | |
| "learning_rate": 4.8978421504162385e-06, | |
| "loss": 0.2297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2253202199935913, | |
| "step": 3480, | |
| "valid_targets_mean": 5456.3, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 5.576, | |
| "grad_norm": 0.33485422936124876, | |
| "learning_rate": 4.845647790049634e-06, | |
| "loss": 0.2278, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21637138724327087, | |
| "step": 3485, | |
| "valid_targets_mean": 6199.0, | |
| "valid_targets_min": 982 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 0.35162902696087944, | |
| "learning_rate": 4.793694667686244e-06, | |
| "loss": 0.2388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24226173758506775, | |
| "step": 3490, | |
| "valid_targets_mean": 5727.0, | |
| "valid_targets_min": 1047 | |
| }, | |
| { | |
| "epoch": 5.592, | |
| "grad_norm": 0.37080133618018396, | |
| "learning_rate": 4.741983610353664e-06, | |
| "loss": 0.2093, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1949070692062378, | |
| "step": 3495, | |
| "valid_targets_mean": 4252.1, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.3999253776520656, | |
| "learning_rate": 4.690515441226122e-06, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1985628753900528, | |
| "step": 3500, | |
| "valid_targets_mean": 3772.6, | |
| "valid_targets_min": 733 | |
| }, | |
| { | |
| "epoch": 5.608, | |
| "grad_norm": 0.4528901320681747, | |
| "learning_rate": 4.639290979611379e-06, | |
| "loss": 0.2291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23670107126235962, | |
| "step": 3505, | |
| "valid_targets_mean": 3763.6, | |
| "valid_targets_min": 771 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.44302870753176904, | |
| "learning_rate": 4.588311040937683e-06, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24087044596672058, | |
| "step": 3510, | |
| "valid_targets_mean": 3990.2, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 5.624, | |
| "grad_norm": 0.3201486197160802, | |
| "learning_rate": 4.537576436740783e-06, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2149900496006012, | |
| "step": 3515, | |
| "valid_targets_mean": 6031.9, | |
| "valid_targets_min": 817 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.46436580014316425, | |
| "learning_rate": 4.487087974651016e-06, | |
| "loss": 0.2267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25911879539489746, | |
| "step": 3520, | |
| "valid_targets_mean": 4031.8, | |
| "valid_targets_min": 809 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.525866298269229, | |
| "learning_rate": 4.436846458380455e-06, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21722733974456787, | |
| "step": 3525, | |
| "valid_targets_mean": 4505.7, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.43572904873432394, | |
| "learning_rate": 4.386852687710104e-06, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23047277331352234, | |
| "step": 3530, | |
| "valid_targets_mean": 3991.1, | |
| "valid_targets_min": 790 | |
| }, | |
| { | |
| "epoch": 5.656, | |
| "grad_norm": 0.44158709967770554, | |
| "learning_rate": 4.337107458477177e-06, | |
| "loss": 0.2352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21699798107147217, | |
| "step": 3535, | |
| "valid_targets_mean": 4153.1, | |
| "valid_targets_min": 850 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 0.3868164489125616, | |
| "learning_rate": 4.287611562562422e-06, | |
| "loss": 0.226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2065228521823883, | |
| "step": 3540, | |
| "valid_targets_mean": 4349.3, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 5.672, | |
| "grad_norm": 0.3324271889745773, | |
| "learning_rate": 4.238365787877516e-06, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2171509563922882, | |
| "step": 3545, | |
| "valid_targets_mean": 6169.1, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.35264056082201267, | |
| "learning_rate": 4.189370918352531e-06, | |
| "loss": 0.2445, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608416676521301, | |
| "step": 3550, | |
| "valid_targets_mean": 5970.1, | |
| "valid_targets_min": 799 | |
| }, | |
| { | |
| "epoch": 5.688, | |
| "grad_norm": 0.35571995306346826, | |
| "learning_rate": 4.140627733923439e-06, | |
| "loss": 0.2235, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21136212348937988, | |
| "step": 3555, | |
| "valid_targets_mean": 5257.9, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 0.5361387745629723, | |
| "learning_rate": 4.092137010519712e-06, | |
| "loss": 0.2284, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27192410826683044, | |
| "step": 3560, | |
| "valid_targets_mean": 2914.6, | |
| "valid_targets_min": 778 | |
| }, | |
| { | |
| "epoch": 5.704, | |
| "grad_norm": 0.3505859733083266, | |
| "learning_rate": 4.043899520051964e-06, | |
| "loss": 0.2171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2221640646457672, | |
| "step": 3565, | |
| "valid_targets_mean": 5925.2, | |
| "valid_targets_min": 1229 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.49758496384659806, | |
| "learning_rate": 3.995916030399658e-06, | |
| "loss": 0.2272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27344635128974915, | |
| "step": 3570, | |
| "valid_targets_mean": 3491.2, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.45141540037088984, | |
| "learning_rate": 3.948187305398892e-06, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21061620116233826, | |
| "step": 3575, | |
| "valid_targets_mean": 3213.4, | |
| "valid_targets_min": 744 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.4562178096296842, | |
| "learning_rate": 3.90071410483023e-06, | |
| "loss": 0.2238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2696354389190674, | |
| "step": 3580, | |
| "valid_targets_mean": 4130.9, | |
| "valid_targets_min": 898 | |
| }, | |
| { | |
| "epoch": 5.736, | |
| "grad_norm": 0.5457573577288195, | |
| "learning_rate": 3.853497184406623e-06, | |
| "loss": 0.2356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2726416289806366, | |
| "step": 3585, | |
| "valid_targets_mean": 2820.8, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.38517110136124666, | |
| "learning_rate": 3.80653729576135e-06, | |
| "loss": 0.212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2264452874660492, | |
| "step": 3590, | |
| "valid_targets_mean": 4501.1, | |
| "valid_targets_min": 704 | |
| }, | |
| { | |
| "epoch": 5.752, | |
| "grad_norm": 0.3357747814802782, | |
| "learning_rate": 3.7598351864360872e-06, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23266029357910156, | |
| "step": 3595, | |
| "valid_targets_mean": 6337.8, | |
| "valid_targets_min": 714 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.5020746707062533, | |
| "learning_rate": 3.713391599868985e-06, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2000608742237091, | |
| "step": 3600, | |
| "valid_targets_mean": 2661.7, | |
| "valid_targets_min": 791 | |
| }, | |
| { | |
| "epoch": 5.768, | |
| "grad_norm": 0.3763969776767038, | |
| "learning_rate": 3.6672072753828424e-06, | |
| "loss": 0.2275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21968242526054382, | |
| "step": 3605, | |
| "valid_targets_mean": 5032.1, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.41289477559493815, | |
| "learning_rate": 3.6212829481733368e-06, | |
| "loss": 0.2142, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2321629524230957, | |
| "step": 3610, | |
| "valid_targets_mean": 4038.8, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 5.784, | |
| "grad_norm": 0.5445568651515015, | |
| "learning_rate": 3.575619349297317e-06, | |
| "loss": 0.237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25765252113342285, | |
| "step": 3615, | |
| "valid_targets_mean": 2809.0, | |
| "valid_targets_min": 739 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.5472578411820961, | |
| "learning_rate": 3.5302172056611682e-06, | |
| "loss": 0.2371, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25557756423950195, | |
| "step": 3620, | |
| "valid_targets_mean": 2783.1, | |
| "valid_targets_min": 883 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.38982773078778155, | |
| "learning_rate": 3.485077240009247e-06, | |
| "loss": 0.2176, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22403252124786377, | |
| "step": 3625, | |
| "valid_targets_mean": 4643.8, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 0.5233043026636898, | |
| "learning_rate": 3.4402001709123643e-06, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24592408537864685, | |
| "step": 3630, | |
| "valid_targets_mean": 2974.1, | |
| "valid_targets_min": 979 | |
| }, | |
| { | |
| "epoch": 5.816, | |
| "grad_norm": 0.45922405516006465, | |
| "learning_rate": 3.3955867127563515e-06, | |
| "loss": 0.227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24016621708869934, | |
| "step": 3635, | |
| "valid_targets_mean": 3544.6, | |
| "valid_targets_min": 832 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.4404664235271227, | |
| "learning_rate": 3.351237575730695e-06, | |
| "loss": 0.2175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19508153200149536, | |
| "step": 3640, | |
| "valid_targets_mean": 3297.2, | |
| "valid_targets_min": 922 | |
| }, | |
| { | |
| "epoch": 5.832, | |
| "grad_norm": 0.4375315952651967, | |
| "learning_rate": 3.307153465817219e-06, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24130797386169434, | |
| "step": 3645, | |
| "valid_targets_mean": 4176.2, | |
| "valid_targets_min": 1140 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.342391410159785, | |
| "learning_rate": 3.263335084778856e-06, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21955269575119019, | |
| "step": 3650, | |
| "valid_targets_mean": 5674.6, | |
| "valid_targets_min": 916 | |
| }, | |
| { | |
| "epoch": 5.848, | |
| "grad_norm": 0.45569732049327233, | |
| "learning_rate": 3.2197831301484816e-06, | |
| "loss": 0.2237, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23995280265808105, | |
| "step": 3655, | |
| "valid_targets_mean": 3858.6, | |
| "valid_targets_min": 1042 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.43878393630957296, | |
| "learning_rate": 3.1764982952177805e-06, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2358689308166504, | |
| "step": 3660, | |
| "valid_targets_mean": 4056.5, | |
| "valid_targets_min": 815 | |
| }, | |
| { | |
| "epoch": 5.864, | |
| "grad_norm": 0.4303963408010303, | |
| "learning_rate": 3.1334812690262507e-06, | |
| "loss": 0.223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.247379869222641, | |
| "step": 3665, | |
| "valid_targets_mean": 4625.3, | |
| "valid_targets_min": 1003 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.45709638886257675, | |
| "learning_rate": 3.0907327363502084e-06, | |
| "loss": 0.2256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21598784625530243, | |
| "step": 3670, | |
| "valid_targets_mean": 3449.9, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.4280225439458205, | |
| "learning_rate": 3.0482533776918987e-06, | |
| "loss": 0.2411, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25824448466300964, | |
| "step": 3675, | |
| "valid_targets_mean": 4414.6, | |
| "valid_targets_min": 875 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 0.3077675965104222, | |
| "learning_rate": 3.0060438692686533e-06, | |
| "loss": 0.2277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1975744068622589, | |
| "step": 3680, | |
| "valid_targets_mean": 5987.2, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 5.896, | |
| "grad_norm": 0.3403603704565209, | |
| "learning_rate": 2.964104883002139e-06, | |
| "loss": 0.2161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21167896687984467, | |
| "step": 3685, | |
| "valid_targets_mean": 5536.9, | |
| "valid_targets_min": 871 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 0.4054315157161449, | |
| "learning_rate": 2.9224370865076457e-06, | |
| "loss": 0.2213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25502288341522217, | |
| "step": 3690, | |
| "valid_targets_mean": 4805.2, | |
| "valid_targets_min": 822 | |
| }, | |
| { | |
| "epoch": 5.912, | |
| "grad_norm": 0.35789980590099835, | |
| "learning_rate": 2.8810411430834716e-06, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19632747769355774, | |
| "step": 3695, | |
| "valid_targets_mean": 5478.2, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.6164021351090805, | |
| "learning_rate": 2.8399177117003595e-06, | |
| "loss": 0.2297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2608773410320282, | |
| "step": 3700, | |
| "valid_targets_mean": 2676.1, | |
| "valid_targets_min": 944 | |
| }, | |
| { | |
| "epoch": 5.928, | |
| "grad_norm": 0.45745995980330256, | |
| "learning_rate": 2.7990674469910085e-06, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21105098724365234, | |
| "step": 3705, | |
| "valid_targets_mean": 3218.9, | |
| "valid_targets_min": 639 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.3710919492928431, | |
| "learning_rate": 2.7584909992396515e-06, | |
| "loss": 0.2286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25090497732162476, | |
| "step": 3710, | |
| "valid_targets_mean": 5523.6, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 5.944, | |
| "grad_norm": 0.3323094252823238, | |
| "learning_rate": 2.7181890143716995e-06, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20140644907951355, | |
| "step": 3715, | |
| "valid_targets_mean": 6081.9, | |
| "valid_targets_min": 1116 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.4718363509817398, | |
| "learning_rate": 2.6781621339434717e-06, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20775261521339417, | |
| "step": 3720, | |
| "valid_targets_mean": 3344.9, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.3964056160212036, | |
| "learning_rate": 2.638410995131966e-06, | |
| "loss": 0.2396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2471739947795868, | |
| "step": 3725, | |
| "valid_targets_mean": 4901.1, | |
| "valid_targets_min": 930 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.3725995303239783, | |
| "learning_rate": 2.5989362307247313e-06, | |
| "loss": 0.2127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20092034339904785, | |
| "step": 3730, | |
| "valid_targets_mean": 4411.7, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 5.976, | |
| "grad_norm": 0.38039589975141935, | |
| "learning_rate": 2.5597384691097847e-06, | |
| "loss": 0.2314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21905504167079926, | |
| "step": 3735, | |
| "valid_targets_mean": 4735.8, | |
| "valid_targets_min": 717 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 0.5842176629456968, | |
| "learning_rate": 2.520818334265611e-06, | |
| "loss": 0.2326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2322233021259308, | |
| "step": 3740, | |
| "valid_targets_mean": 2426.8, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 5.992, | |
| "grad_norm": 0.3974036678792113, | |
| "learning_rate": 2.482176445751232e-06, | |
| "loss": 0.2361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23819592595100403, | |
| "step": 3745, | |
| "valid_targets_mean": 5103.4, | |
| "valid_targets_min": 806 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.5999847162525445, | |
| "learning_rate": 2.4438134186963415e-06, | |
| "loss": 0.2451, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26903235912323, | |
| "step": 3750, | |
| "valid_targets_mean": 5103.5, | |
| "valid_targets_min": 786 | |
| }, | |
| { | |
| "epoch": 6.008, | |
| "grad_norm": 0.3349477545342433, | |
| "learning_rate": 2.4057298637915105e-06, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21318253874778748, | |
| "step": 3755, | |
| "valid_targets_mean": 6269.4, | |
| "valid_targets_min": 896 | |
| }, | |
| { | |
| "epoch": 6.016, | |
| "grad_norm": 0.4317583641741853, | |
| "learning_rate": 2.3679263872784717e-06, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26914650201797485, | |
| "step": 3760, | |
| "valid_targets_mean": 4306.1, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 6.024, | |
| "grad_norm": 0.41472298145715747, | |
| "learning_rate": 2.330403590940471e-06, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24200578033924103, | |
| "step": 3765, | |
| "valid_targets_mean": 4969.2, | |
| "valid_targets_min": 864 | |
| }, | |
| { | |
| "epoch": 6.032, | |
| "grad_norm": 0.40539356972798346, | |
| "learning_rate": 2.2931620720926717e-06, | |
| "loss": 0.2365, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2599494457244873, | |
| "step": 3770, | |
| "valid_targets_mean": 5092.1, | |
| "valid_targets_min": 1093 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.5024307919648837, | |
| "learning_rate": 2.256202423572669e-06, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24911504983901978, | |
| "step": 3775, | |
| "valid_targets_mean": 3418.2, | |
| "valid_targets_min": 819 | |
| }, | |
| { | |
| "epoch": 6.048, | |
| "grad_norm": 0.4439148698555944, | |
| "learning_rate": 2.219525233731035e-06, | |
| "loss": 0.2339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22976058721542358, | |
| "step": 3780, | |
| "valid_targets_mean": 3652.3, | |
| "valid_targets_min": 878 | |
| }, | |
| { | |
| "epoch": 6.056, | |
| "grad_norm": 0.4025990629518132, | |
| "learning_rate": 2.183131086421961e-06, | |
| "loss": 0.2193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2216932326555252, | |
| "step": 3785, | |
| "valid_targets_mean": 4467.1, | |
| "valid_targets_min": 651 | |
| }, | |
| { | |
| "epoch": 6.064, | |
| "grad_norm": 0.34119261650968813, | |
| "learning_rate": 2.1470205609939533e-06, | |
| "loss": 0.2269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22316013276576996, | |
| "step": 3790, | |
| "valid_targets_mean": 5872.5, | |
| "valid_targets_min": 743 | |
| }, | |
| { | |
| "epoch": 6.072, | |
| "grad_norm": 0.44619413022846166, | |
| "learning_rate": 2.1111942322806335e-06, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22817836701869965, | |
| "step": 3795, | |
| "valid_targets_mean": 3575.1, | |
| "valid_targets_min": 782 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 0.3574078861297622, | |
| "learning_rate": 2.0756526705915635e-06, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24410027265548706, | |
| "step": 3800, | |
| "valid_targets_mean": 5989.6, | |
| "valid_targets_min": 1000 | |
| }, | |
| { | |
| "epoch": 6.088, | |
| "grad_norm": 0.6009272336278385, | |
| "learning_rate": 2.0403964417031764e-06, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23332129418849945, | |
| "step": 3805, | |
| "valid_targets_mean": 2330.4, | |
| "valid_targets_min": 616 | |
| }, | |
| { | |
| "epoch": 6.096, | |
| "grad_norm": 0.36144075081920424, | |
| "learning_rate": 2.0054261068497773e-06, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20843176543712616, | |
| "step": 3810, | |
| "valid_targets_mean": 5203.1, | |
| "valid_targets_min": 1022 | |
| }, | |
| { | |
| "epoch": 6.104, | |
| "grad_norm": 0.38552202621236137, | |
| "learning_rate": 1.9707422227145922e-06, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22240760922431946, | |
| "step": 3815, | |
| "valid_targets_mean": 4687.2, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 6.112, | |
| "grad_norm": 0.41879689644483564, | |
| "learning_rate": 1.936345341420924e-06, | |
| "loss": 0.24, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2170860469341278, | |
| "step": 3820, | |
| "valid_targets_mean": 4666.2, | |
| "valid_targets_min": 1176 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 0.6687266732348552, | |
| "learning_rate": 1.9022360105233507e-06, | |
| "loss": 0.2228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2556125521659851, | |
| "step": 3825, | |
| "valid_targets_mean": 2104.1, | |
| "valid_targets_min": 1037 | |
| }, | |
| { | |
| "epoch": 6.128, | |
| "grad_norm": 0.34492206038582207, | |
| "learning_rate": 1.8684147729990188e-06, | |
| "loss": 0.2382, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2455054074525833, | |
| "step": 3830, | |
| "valid_targets_mean": 6336.3, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 6.136, | |
| "grad_norm": 0.4159150879186752, | |
| "learning_rate": 1.8348821672389893e-06, | |
| "loss": 0.2252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.225693017244339, | |
| "step": 3835, | |
| "valid_targets_mean": 4592.3, | |
| "valid_targets_min": 1017 | |
| }, | |
| { | |
| "epoch": 6.144, | |
| "grad_norm": 0.38859249778164073, | |
| "learning_rate": 1.8016387270396784e-06, | |
| "loss": 0.2413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23034977912902832, | |
| "step": 3840, | |
| "valid_targets_mean": 5167.3, | |
| "valid_targets_min": 881 | |
| }, | |
| { | |
| "epoch": 6.152, | |
| "grad_norm": 0.3203760106705756, | |
| "learning_rate": 1.7686849815943486e-06, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21037934720516205, | |
| "step": 3845, | |
| "valid_targets_mean": 6393.3, | |
| "valid_targets_min": 919 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.3983227918706389, | |
| "learning_rate": 1.7360214554847e-06, | |
| "loss": 0.2147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2141357660293579, | |
| "step": 3850, | |
| "valid_targets_mean": 4325.4, | |
| "valid_targets_min": 920 | |
| }, | |
| { | |
| "epoch": 6.168, | |
| "grad_norm": 0.3289919821089591, | |
| "learning_rate": 1.703648668672495e-06, | |
| "loss": 0.2274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21512682735919952, | |
| "step": 3855, | |
| "valid_targets_mean": 6087.9, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 6.176, | |
| "grad_norm": 0.43853553740948986, | |
| "learning_rate": 1.6715671364913077e-06, | |
| "loss": 0.2452, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21846427023410797, | |
| "step": 3860, | |
| "valid_targets_mean": 3917.2, | |
| "valid_targets_min": 899 | |
| }, | |
| { | |
| "epoch": 6.184, | |
| "grad_norm": 0.38127578812735524, | |
| "learning_rate": 1.6397773696383091e-06, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21105417609214783, | |
| "step": 3865, | |
| "valid_targets_mean": 4652.4, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 6.192, | |
| "grad_norm": 0.3396436270940679, | |
| "learning_rate": 1.6082798741661321e-06, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20317316055297852, | |
| "step": 3870, | |
| "valid_targets_mean": 5430.1, | |
| "valid_targets_min": 893 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 0.38228587817606946, | |
| "learning_rate": 1.5770751514748273e-06, | |
| "loss": 0.2181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21367615461349487, | |
| "step": 3875, | |
| "valid_targets_mean": 4672.6, | |
| "valid_targets_min": 1012 | |
| }, | |
| { | |
| "epoch": 6.208, | |
| "grad_norm": 0.47820322237336815, | |
| "learning_rate": 1.5461636983038686e-06, | |
| "loss": 0.2155, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21911242604255676, | |
| "step": 3880, | |
| "valid_targets_mean": 3371.2, | |
| "valid_targets_min": 997 | |
| }, | |
| { | |
| "epoch": 6.216, | |
| "grad_norm": 0.4399583273624092, | |
| "learning_rate": 1.5155460067242578e-06, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23615899682044983, | |
| "step": 3885, | |
| "valid_targets_mean": 4043.5, | |
| "valid_targets_min": 745 | |
| }, | |
| { | |
| "epoch": 6.224, | |
| "grad_norm": 0.3398925492873628, | |
| "learning_rate": 1.4852225641306816e-06, | |
| "loss": 0.2255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2137679159641266, | |
| "step": 3890, | |
| "valid_targets_mean": 5939.5, | |
| "valid_targets_min": 830 | |
| }, | |
| { | |
| "epoch": 6.232, | |
| "grad_norm": 0.4733714407702554, | |
| "learning_rate": 1.4551938532337607e-06, | |
| "loss": 0.2363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2328024059534073, | |
| "step": 3895, | |
| "valid_targets_mean": 3431.8, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.561178279992971, | |
| "learning_rate": 1.4254603520523614e-06, | |
| "loss": 0.2294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21961773931980133, | |
| "step": 3900, | |
| "valid_targets_mean": 2525.5, | |
| "valid_targets_min": 969 | |
| }, | |
| { | |
| "epoch": 6.248, | |
| "grad_norm": 0.4741052980704381, | |
| "learning_rate": 1.3960225339059875e-06, | |
| "loss": 0.2233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23817236721515656, | |
| "step": 3905, | |
| "valid_targets_mean": 4482.2, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 6.256, | |
| "grad_norm": 0.4411532668736603, | |
| "learning_rate": 1.3668808674072409e-06, | |
| "loss": 0.2242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2441355437040329, | |
| "step": 3910, | |
| "valid_targets_mean": 3981.3, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 6.264, | |
| "grad_norm": 0.4003229501442681, | |
| "learning_rate": 1.338035816454375e-06, | |
| "loss": 0.2119, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21280062198638916, | |
| "step": 3915, | |
| "valid_targets_mean": 4549.2, | |
| "valid_targets_min": 1135 | |
| }, | |
| { | |
| "epoch": 6.272, | |
| "grad_norm": 0.4571069597331237, | |
| "learning_rate": 1.3094878402238887e-06, | |
| "loss": 0.2265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2197241187095642, | |
| "step": 3920, | |
| "valid_targets_mean": 3564.8, | |
| "valid_targets_min": 762 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 0.36182902012131324, | |
| "learning_rate": 1.2812373931632371e-06, | |
| "loss": 0.2133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23633159697055817, | |
| "step": 3925, | |
| "valid_targets_mean": 5213.5, | |
| "valid_targets_min": 938 | |
| }, | |
| { | |
| "epoch": 6.288, | |
| "grad_norm": 0.34785431888006046, | |
| "learning_rate": 1.2532849249835932e-06, | |
| "loss": 0.2183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20579376816749573, | |
| "step": 3930, | |
| "valid_targets_mean": 5596.8, | |
| "valid_targets_min": 776 | |
| }, | |
| { | |
| "epoch": 6.296, | |
| "grad_norm": 0.4114868588415092, | |
| "learning_rate": 1.2256308806526774e-06, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22689789533615112, | |
| "step": 3935, | |
| "valid_targets_mean": 4300.3, | |
| "valid_targets_min": 813 | |
| }, | |
| { | |
| "epoch": 6.304, | |
| "grad_norm": 0.39604646648569675, | |
| "learning_rate": 1.1982757003876855e-06, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24964919686317444, | |
| "step": 3940, | |
| "valid_targets_mean": 5153.9, | |
| "valid_targets_min": 711 | |
| }, | |
| { | |
| "epoch": 6.312, | |
| "grad_norm": 0.3849772647568709, | |
| "learning_rate": 1.1712198196482793e-06, | |
| "loss": 0.224, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26009365916252136, | |
| "step": 3945, | |
| "valid_targets_mean": 5058.6, | |
| "valid_targets_min": 980 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.48211470656475425, | |
| "learning_rate": 1.1444636691296518e-06, | |
| "loss": 0.2226, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21652325987815857, | |
| "step": 3950, | |
| "valid_targets_mean": 3130.9, | |
| "valid_targets_min": 798 | |
| }, | |
| { | |
| "epoch": 6.328, | |
| "grad_norm": 0.5644365548848527, | |
| "learning_rate": 1.11800767475567e-06, | |
| "loss": 0.246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27526530623435974, | |
| "step": 3955, | |
| "valid_targets_mean": 2978.4, | |
| "valid_targets_min": 849 | |
| }, | |
| { | |
| "epoch": 6.336, | |
| "grad_norm": 0.40333014592575334, | |
| "learning_rate": 1.0918522576721014e-06, | |
| "loss": 0.228, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27383342385292053, | |
| "step": 3960, | |
| "valid_targets_mean": 5273.9, | |
| "valid_targets_min": 630 | |
| }, | |
| { | |
| "epoch": 6.344, | |
| "grad_norm": 0.4321759599915463, | |
| "learning_rate": 1.0659978342399003e-06, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24329343438148499, | |
| "step": 3965, | |
| "valid_targets_mean": 4204.4, | |
| "valid_targets_min": 827 | |
| }, | |
| { | |
| "epoch": 6.352, | |
| "grad_norm": 0.4489831786268062, | |
| "learning_rate": 1.0404448160285897e-06, | |
| "loss": 0.2281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20862233638763428, | |
| "step": 3970, | |
| "valid_targets_mean": 3446.6, | |
| "valid_targets_min": 936 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 0.6568484958919774, | |
| "learning_rate": 1.0151936098097015e-06, | |
| "loss": 0.2307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2893434762954712, | |
| "step": 3975, | |
| "valid_targets_mean": 2431.0, | |
| "valid_targets_min": 865 | |
| }, | |
| { | |
| "epoch": 6.368, | |
| "grad_norm": 0.436806643140305, | |
| "learning_rate": 9.902446175503089e-07, | |
| "loss": 0.2075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21049389243125916, | |
| "step": 3980, | |
| "valid_targets_mean": 3591.6, | |
| "valid_targets_min": 558 | |
| }, | |
| { | |
| "epoch": 6.376, | |
| "grad_norm": 0.3733718504908191, | |
| "learning_rate": 9.655982364066197e-07, | |
| "loss": 0.2208, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19698584079742432, | |
| "step": 3985, | |
| "valid_targets_mean": 4745.1, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 6.384, | |
| "grad_norm": 0.56021488352525, | |
| "learning_rate": 9.412548587176595e-07, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21981269121170044, | |
| "step": 3990, | |
| "valid_targets_mean": 2582.5, | |
| "valid_targets_min": 679 | |
| }, | |
| { | |
| "epoch": 6.392, | |
| "grad_norm": 0.4255541904342452, | |
| "learning_rate": 9.172148719990237e-07, | |
| "loss": 0.2134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23671914637088776, | |
| "step": 3995, | |
| "valid_targets_mean": 4313.3, | |
| "valid_targets_min": 752 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.4102539193396227, | |
| "learning_rate": 8.934786589367106e-07, | |
| "loss": 0.2198, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2218114733695984, | |
| "step": 4000, | |
| "valid_targets_mean": 4082.2, | |
| "valid_targets_min": 787 | |
| }, | |
| { | |
| "epoch": 6.408, | |
| "grad_norm": 0.34989430636633106, | |
| "learning_rate": 8.700465973810246e-07, | |
| "loss": 0.2215, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1899658888578415, | |
| "step": 4005, | |
| "valid_targets_mean": 4683.2, | |
| "valid_targets_min": 963 | |
| }, | |
| { | |
| "epoch": 6.416, | |
| "grad_norm": 0.5084082139951963, | |
| "learning_rate": 8.469190603405719e-07, | |
| "loss": 0.2362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22666466236114502, | |
| "step": 4010, | |
| "valid_targets_mean": 2856.0, | |
| "valid_targets_min": 836 | |
| }, | |
| { | |
| "epoch": 6.424, | |
| "grad_norm": 0.38334389778158073, | |
| "learning_rate": 8.240964159763121e-07, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20574325323104858, | |
| "step": 4015, | |
| "valid_targets_mean": 4797.1, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 6.432, | |
| "grad_norm": 0.43778396216540555, | |
| "learning_rate": 8.015790275957003e-07, | |
| "loss": 0.2379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2721363604068756, | |
| "step": 4020, | |
| "valid_targets_mean": 4351.1, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 0.42691802245050525, | |
| "learning_rate": 7.793672536469077e-07, | |
| "loss": 0.2179, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.236424520611763, | |
| "step": 4025, | |
| "valid_targets_mean": 3813.6, | |
| "valid_targets_min": 820 | |
| }, | |
| { | |
| "epoch": 6.448, | |
| "grad_norm": 0.36072477649308493, | |
| "learning_rate": 7.574614477131081e-07, | |
| "loss": 0.2267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18866382539272308, | |
| "step": 4030, | |
| "valid_targets_mean": 4367.6, | |
| "valid_targets_min": 843 | |
| }, | |
| { | |
| "epoch": 6.456, | |
| "grad_norm": 0.41123091937206413, | |
| "learning_rate": 7.358619585068583e-07, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23328976333141327, | |
| "step": 4035, | |
| "valid_targets_mean": 4417.2, | |
| "valid_targets_min": 939 | |
| }, | |
| { | |
| "epoch": 6.464, | |
| "grad_norm": 0.4254203784811884, | |
| "learning_rate": 7.145691298645419e-07, | |
| "loss": 0.2186, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2179802656173706, | |
| "step": 4040, | |
| "valid_targets_mean": 3939.2, | |
| "valid_targets_min": 946 | |
| }, | |
| { | |
| "epoch": 6.4719999999999995, | |
| "grad_norm": 0.39330349529070346, | |
| "learning_rate": 6.935833007408965e-07, | |
| "loss": 0.2158, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22284376621246338, | |
| "step": 4045, | |
| "valid_targets_mean": 4508.6, | |
| "valid_targets_min": 623 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.44865937037871895, | |
| "learning_rate": 6.729048052036136e-07, | |
| "loss": 0.2339, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21400213241577148, | |
| "step": 4050, | |
| "valid_targets_mean": 3500.1, | |
| "valid_targets_min": 858 | |
| }, | |
| { | |
| "epoch": 6.4879999999999995, | |
| "grad_norm": 0.48021795524832595, | |
| "learning_rate": 6.52533972428031e-07, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24885594844818115, | |
| "step": 4055, | |
| "valid_targets_mean": 3483.8, | |
| "valid_targets_min": 971 | |
| }, | |
| { | |
| "epoch": 6.496, | |
| "grad_norm": 0.34882215560284985, | |
| "learning_rate": 6.324711266918826e-07, | |
| "loss": 0.2136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21410886943340302, | |
| "step": 4060, | |
| "valid_targets_mean": 5634.1, | |
| "valid_targets_min": 991 | |
| }, | |
| { | |
| "epoch": 6.504, | |
| "grad_norm": 0.5126446488160206, | |
| "learning_rate": 6.127165873701457e-07, | |
| "loss": 0.2081, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21240456402301788, | |
| "step": 4065, | |
| "valid_targets_mean": 2864.9, | |
| "valid_targets_min": 985 | |
| }, | |
| { | |
| "epoch": 6.5120000000000005, | |
| "grad_norm": 0.3140688580026172, | |
| "learning_rate": 5.932706689299461e-07, | |
| "loss": 0.2239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21024207770824432, | |
| "step": 4070, | |
| "valid_targets_mean": 6588.2, | |
| "valid_targets_min": 1101 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "grad_norm": 0.4986798403758234, | |
| "learning_rate": 5.741336809255615e-07, | |
| "loss": 0.2246, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22286343574523926, | |
| "step": 4075, | |
| "valid_targets_mean": 3069.7, | |
| "valid_targets_min": 1026 | |
| }, | |
| { | |
| "epoch": 6.5280000000000005, | |
| "grad_norm": 0.3921933421532105, | |
| "learning_rate": 5.553059279934902e-07, | |
| "loss": 0.2241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2469090223312378, | |
| "step": 4080, | |
| "valid_targets_mean": 4846.6, | |
| "valid_targets_min": 741 | |
| }, | |
| { | |
| "epoch": 6.536, | |
| "grad_norm": 1.4197403594404054, | |
| "learning_rate": 5.36787709847597e-07, | |
| "loss": 0.218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21249955892562866, | |
| "step": 4085, | |
| "valid_targets_mean": 3518.8, | |
| "valid_targets_min": 947 | |
| }, | |
| { | |
| "epoch": 6.5440000000000005, | |
| "grad_norm": 0.39790011129897357, | |
| "learning_rate": 5.185793212743529e-07, | |
| "loss": 0.225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2455723136663437, | |
| "step": 4090, | |
| "valid_targets_mean": 4974.9, | |
| "valid_targets_min": 833 | |
| }, | |
| { | |
| "epoch": 6.552, | |
| "grad_norm": 0.506846713782852, | |
| "learning_rate": 5.006810521281335e-07, | |
| "loss": 0.2219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22944439947605133, | |
| "step": 4095, | |
| "valid_targets_mean": 3162.4, | |
| "valid_targets_min": 889 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 0.5304911335607064, | |
| "learning_rate": 4.830931873266065e-07, | |
| "loss": 0.2261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.228239044547081, | |
| "step": 4100, | |
| "valid_targets_mean": 2786.3, | |
| "valid_targets_min": 655 | |
| }, | |
| { | |
| "epoch": 6.568, | |
| "grad_norm": 0.42229465032601937, | |
| "learning_rate": 4.658160068462025e-07, | |
| "loss": 0.2342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22529050707817078, | |
| "step": 4105, | |
| "valid_targets_mean": 4120.8, | |
| "valid_targets_min": 924 | |
| }, | |
| { | |
| "epoch": 6.576, | |
| "grad_norm": 0.32368728463004987, | |
| "learning_rate": 4.488497857176466e-07, | |
| "loss": 0.2132, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21758395433425903, | |
| "step": 4110, | |
| "valid_targets_mean": 6052.4, | |
| "valid_targets_min": 890 | |
| }, | |
| { | |
| "epoch": 6.584, | |
| "grad_norm": 0.3876726339635795, | |
| "learning_rate": 4.321947940215898e-07, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27703386545181274, | |
| "step": 4115, | |
| "valid_targets_mean": 5807.9, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 6.592, | |
| "grad_norm": 0.5109155715461828, | |
| "learning_rate": 4.1585129688430425e-07, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2829819321632385, | |
| "step": 4120, | |
| "valid_targets_mean": 3333.1, | |
| "valid_targets_min": 867 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 0.4470302028073575, | |
| "learning_rate": 3.998195544734706e-07, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24119625985622406, | |
| "step": 4125, | |
| "valid_targets_mean": 4098.7, | |
| "valid_targets_min": 1055 | |
| }, | |
| { | |
| "epoch": 6.608, | |
| "grad_norm": 0.3879001559853112, | |
| "learning_rate": 3.840998219940284e-07, | |
| "loss": 0.2266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23125238716602325, | |
| "step": 4130, | |
| "valid_targets_mean": 4793.9, | |
| "valid_targets_min": 995 | |
| }, | |
| { | |
| "epoch": 6.616, | |
| "grad_norm": 0.3390815157643552, | |
| "learning_rate": 3.6869234968411214e-07, | |
| "loss": 0.2191, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21983827650547028, | |
| "step": 4135, | |
| "valid_targets_mean": 5686.7, | |
| "valid_targets_min": 931 | |
| }, | |
| { | |
| "epoch": 6.624, | |
| "grad_norm": 0.48742691434984764, | |
| "learning_rate": 3.5359738281107504e-07, | |
| "loss": 0.2315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23688867688179016, | |
| "step": 4140, | |
| "valid_targets_mean": 3576.5, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 6.632, | |
| "grad_norm": 0.5825891651795995, | |
| "learning_rate": 3.38815161667585e-07, | |
| "loss": 0.2227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2347678393125534, | |
| "step": 4145, | |
| "valid_targets_mean": 2693.6, | |
| "valid_targets_min": 857 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.33240965337871686, | |
| "learning_rate": 3.24345921567788e-07, | |
| "loss": 0.2247, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22942331433296204, | |
| "step": 4150, | |
| "valid_targets_mean": 6286.4, | |
| "valid_targets_min": 1070 | |
| }, | |
| { | |
| "epoch": 6.648, | |
| "grad_norm": 0.47442097754914475, | |
| "learning_rate": 3.101898928435754e-07, | |
| "loss": 0.2112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2143860161304474, | |
| "step": 4155, | |
| "valid_targets_mean": 2954.4, | |
| "valid_targets_min": 1049 | |
| }, | |
| { | |
| "epoch": 6.656, | |
| "grad_norm": 0.47947322659482045, | |
| "learning_rate": 2.9634730084091343e-07, | |
| "loss": 0.2222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22327253222465515, | |
| "step": 4160, | |
| "valid_targets_mean": 3591.6, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 6.664, | |
| "grad_norm": 0.4454312428655186, | |
| "learning_rate": 2.8281836591624865e-07, | |
| "loss": 0.2137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22166194021701813, | |
| "step": 4165, | |
| "valid_targets_mean": 3687.9, | |
| "valid_targets_min": 710 | |
| }, | |
| { | |
| "epoch": 6.672, | |
| "grad_norm": 0.4171522266163402, | |
| "learning_rate": 2.6960330343301033e-07, | |
| "loss": 0.2166, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23376452922821045, | |
| "step": 4170, | |
| "valid_targets_mean": 4033.3, | |
| "valid_targets_min": 978 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 0.4021870631095099, | |
| "learning_rate": 2.5670232375817784e-07, | |
| "loss": 0.2115, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19755598902702332, | |
| "step": 4175, | |
| "valid_targets_mean": 4376.8, | |
| "valid_targets_min": 690 | |
| }, | |
| { | |
| "epoch": 6.688, | |
| "grad_norm": 0.4432006157292327, | |
| "learning_rate": 2.441156322589322e-07, | |
| "loss": 0.2094, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2194088101387024, | |
| "step": 4180, | |
| "valid_targets_mean": 3696.2, | |
| "valid_targets_min": 937 | |
| }, | |
| { | |
| "epoch": 6.696, | |
| "grad_norm": 0.3533442609966585, | |
| "learning_rate": 2.318434292993832e-07, | |
| "loss": 0.2187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22602006793022156, | |
| "step": 4185, | |
| "valid_targets_mean": 5390.4, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 6.704, | |
| "grad_norm": 0.5451719228606399, | |
| "learning_rate": 2.1988591023738514e-07, | |
| "loss": 0.2148, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23980574309825897, | |
| "step": 4190, | |
| "valid_targets_mean": 2911.3, | |
| "valid_targets_min": 756 | |
| }, | |
| { | |
| "epoch": 6.712, | |
| "grad_norm": 0.3356047849602916, | |
| "learning_rate": 2.0824326542142835e-07, | |
| "loss": 0.213, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18571820855140686, | |
| "step": 4195, | |
| "valid_targets_mean": 5152.4, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 0.5229315558444865, | |
| "learning_rate": 1.9691568018759931e-07, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.277045339345932, | |
| "step": 4200, | |
| "valid_targets_mean": 3369.2, | |
| "valid_targets_min": 805 | |
| }, | |
| { | |
| "epoch": 6.728, | |
| "grad_norm": 0.3325315865260218, | |
| "learning_rate": 1.8590333485664525e-07, | |
| "loss": 0.2141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2092788815498352, | |
| "step": 4205, | |
| "valid_targets_mean": 5736.8, | |
| "valid_targets_min": 972 | |
| }, | |
| { | |
| "epoch": 6.736, | |
| "grad_norm": 0.467439774714686, | |
| "learning_rate": 1.752064047310853e-07, | |
| "loss": 0.2197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23705033957958221, | |
| "step": 4210, | |
| "valid_targets_mean": 3690.5, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 6.744, | |
| "grad_norm": 0.6183840075760885, | |
| "learning_rate": 1.6482506009243949e-07, | |
| "loss": 0.2336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22905892133712769, | |
| "step": 4215, | |
| "valid_targets_mean": 2088.2, | |
| "valid_targets_min": 810 | |
| }, | |
| { | |
| "epoch": 6.752, | |
| "grad_norm": 0.3342422145961255, | |
| "learning_rate": 1.5475946619850192e-07, | |
| "loss": 0.2206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20969925820827484, | |
| "step": 4220, | |
| "valid_targets_mean": 5836.8, | |
| "valid_targets_min": 1020 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 0.34624668633463707, | |
| "learning_rate": 1.4500978328071845e-07, | |
| "loss": 0.2122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22905424237251282, | |
| "step": 4225, | |
| "valid_targets_mean": 5463.2, | |
| "valid_targets_min": 732 | |
| }, | |
| { | |
| "epoch": 6.768, | |
| "grad_norm": 0.4420870260003288, | |
| "learning_rate": 1.3557616654163775e-07, | |
| "loss": 0.2302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.223080113530159, | |
| "step": 4230, | |
| "valid_targets_mean": 3664.9, | |
| "valid_targets_min": 826 | |
| }, | |
| { | |
| "epoch": 6.776, | |
| "grad_norm": 0.5518275111575168, | |
| "learning_rate": 1.264587661524308e-07, | |
| "loss": 0.2196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22578661143779755, | |
| "step": 4235, | |
| "valid_targets_mean": 2675.6, | |
| "valid_targets_min": 966 | |
| }, | |
| { | |
| "epoch": 6.784, | |
| "grad_norm": 0.4049368287917327, | |
| "learning_rate": 1.1765772725051084e-07, | |
| "loss": 0.2131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21148250997066498, | |
| "step": 4240, | |
| "valid_targets_mean": 4096.6, | |
| "valid_targets_min": 877 | |
| }, | |
| { | |
| "epoch": 6.792, | |
| "grad_norm": 0.4040442168660199, | |
| "learning_rate": 1.0917318993721726e-07, | |
| "loss": 0.2192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21757608652114868, | |
| "step": 4245, | |
| "valid_targets_mean": 4106.6, | |
| "valid_targets_min": 657 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.4656238469376599, | |
| "learning_rate": 1.0100528927558861e-07, | |
| "loss": 0.2405, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20843830704689026, | |
| "step": 4250, | |
| "valid_targets_mean": 3208.7, | |
| "valid_targets_min": 952 | |
| }, | |
| { | |
| "epoch": 6.808, | |
| "grad_norm": 0.42442223312996796, | |
| "learning_rate": 9.31541552882087e-08, | |
| "loss": 0.2299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25054430961608887, | |
| "step": 4255, | |
| "valid_targets_mean": 4431.4, | |
| "valid_targets_min": 932 | |
| }, | |
| { | |
| "epoch": 6.816, | |
| "grad_norm": 0.5254292888858535, | |
| "learning_rate": 8.561991295514161e-08, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23713023960590363, | |
| "step": 4260, | |
| "valid_targets_mean": 3043.6, | |
| "valid_targets_min": 793 | |
| }, | |
| { | |
| "epoch": 6.824, | |
| "grad_norm": 0.4371339395015146, | |
| "learning_rate": 7.840268221193548e-08, | |
| "loss": 0.2259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23466908931732178, | |
| "step": 4265, | |
| "valid_targets_mean": 3946.0, | |
| "valid_targets_min": 874 | |
| }, | |
| { | |
| "epoch": 6.832, | |
| "grad_norm": 0.35973979810814993, | |
| "learning_rate": 7.150257794772186e-08, | |
| "loss": 0.2068, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21603873372077942, | |
| "step": 4270, | |
| "valid_targets_mean": 5114.9, | |
| "valid_targets_min": 1057 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 0.3711213623529721, | |
| "learning_rate": 6.491971000337938e-08, | |
| "loss": 0.2251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21824629604816437, | |
| "step": 4275, | |
| "valid_targets_mean": 4986.2, | |
| "valid_targets_min": 934 | |
| }, | |
| { | |
| "epoch": 6.848, | |
| "grad_norm": 0.4906868870974589, | |
| "learning_rate": 5.8654183169788435e-08, | |
| "loss": 0.2244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23453839123249054, | |
| "step": 4280, | |
| "valid_targets_mean": 3274.4, | |
| "valid_targets_min": 1037 | |
| }, | |
| { | |
| "epoch": 6.856, | |
| "grad_norm": 0.5003819660470424, | |
| "learning_rate": 5.270609718616593e-08, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23117989301681519, | |
| "step": 4285, | |
| "valid_targets_mean": 3046.3, | |
| "valid_targets_min": 887 | |
| }, | |
| { | |
| "epoch": 6.864, | |
| "grad_norm": 0.5538836489378799, | |
| "learning_rate": 4.70755467384687e-08, | |
| "loss": 0.2238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22990688681602478, | |
| "step": 4290, | |
| "valid_targets_mean": 2669.9, | |
| "valid_targets_min": 722 | |
| }, | |
| { | |
| "epoch": 6.872, | |
| "grad_norm": 0.3339220377327871, | |
| "learning_rate": 4.176262145789478e-08, | |
| "loss": 0.2123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19378703832626343, | |
| "step": 4295, | |
| "valid_targets_mean": 5727.6, | |
| "valid_targets_min": 1068 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 0.36779708232161507, | |
| "learning_rate": 3.676740591945782e-08, | |
| "loss": 0.2218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2007201462984085, | |
| "step": 4300, | |
| "valid_targets_mean": 4660.1, | |
| "valid_targets_min": 587 | |
| }, | |
| { | |
| "epoch": 6.888, | |
| "grad_norm": 0.416473109709361, | |
| "learning_rate": 3.208997964062821e-08, | |
| "loss": 0.2258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23714400827884674, | |
| "step": 4305, | |
| "valid_targets_mean": 4501.4, | |
| "valid_targets_min": 1129 | |
| }, | |
| { | |
| "epoch": 6.896, | |
| "grad_norm": 0.3816242071018597, | |
| "learning_rate": 2.773041708008295e-08, | |
| "loss": 0.2202, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2064952254295349, | |
| "step": 4310, | |
| "valid_targets_mean": 4667.4, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 6.904, | |
| "grad_norm": 0.3988685178845027, | |
| "learning_rate": 2.3688787636511057e-08, | |
| "loss": 0.219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2150478959083557, | |
| "step": 4315, | |
| "valid_targets_mean": 3976.9, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 6.912, | |
| "grad_norm": 0.4016474848255541, | |
| "learning_rate": 1.9965155647507782e-08, | |
| "loss": 0.2295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2176404893398285, | |
| "step": 4320, | |
| "valid_targets_mean": 4303.1, | |
| "valid_targets_min": 921 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 0.37224492037629603, | |
| "learning_rate": 1.655958038855765e-08, | |
| "loss": 0.2221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19105198979377747, | |
| "step": 4325, | |
| "valid_targets_mean": 4631.8, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 6.928, | |
| "grad_norm": 0.4229429441256482, | |
| "learning_rate": 1.3472116072084096e-08, | |
| "loss": 0.221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22886265814304352, | |
| "step": 4330, | |
| "valid_targets_mean": 4466.5, | |
| "valid_targets_min": 1187 | |
| }, | |
| { | |
| "epoch": 6.936, | |
| "grad_norm": 0.3238044486735016, | |
| "learning_rate": 1.0702811846590167e-08, | |
| "loss": 0.2135, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20526066422462463, | |
| "step": 4335, | |
| "valid_targets_mean": 6032.8, | |
| "valid_targets_min": 770 | |
| }, | |
| { | |
| "epoch": 6.944, | |
| "grad_norm": 0.405352461983432, | |
| "learning_rate": 8.251711795876916e-09, | |
| "loss": 0.2273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20454303920269012, | |
| "step": 4340, | |
| "valid_targets_mean": 4101.2, | |
| "valid_targets_min": 1081 | |
| }, | |
| { | |
| "epoch": 6.952, | |
| "grad_norm": 0.5254957752257481, | |
| "learning_rate": 6.1188549383373044e-09, | |
| "loss": 0.2125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20966967940330505, | |
| "step": 4345, | |
| "valid_targets_mean": 2603.4, | |
| "valid_targets_min": 856 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.4987226166041217, | |
| "learning_rate": 4.304275226338916e-09, | |
| "loss": 0.2181, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20376208424568176, | |
| "step": 4350, | |
| "valid_targets_mean": 3057.9, | |
| "valid_targets_min": 852 | |
| }, | |
| { | |
| "epoch": 6.968, | |
| "grad_norm": 0.4147630934098896, | |
| "learning_rate": 2.8080015456799503e-09, | |
| "loss": 0.2128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22926411032676697, | |
| "step": 4355, | |
| "valid_targets_mean": 4151.9, | |
| "valid_targets_min": 662 | |
| }, | |
| { | |
| "epoch": 6.976, | |
| "grad_norm": 0.4327182545700865, | |
| "learning_rate": 1.6300577151340257e-09, | |
| "loss": 0.2168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21291124820709229, | |
| "step": 4360, | |
| "valid_targets_mean": 3751.6, | |
| "valid_targets_min": 860 | |
| }, | |
| { | |
| "epoch": 6.984, | |
| "grad_norm": 0.3961055234395137, | |
| "learning_rate": 7.70462486070489e-10, | |
| "loss": 0.2178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21084696054458618, | |
| "step": 4365, | |
| "valid_targets_mean": 4619.8, | |
| "valid_targets_min": 988 | |
| }, | |
| { | |
| "epoch": 6.992, | |
| "grad_norm": 0.4419544854529956, | |
| "learning_rate": 2.2922954214799065e-10, | |
| "loss": 0.2288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23672346770763397, | |
| "step": 4370, | |
| "valid_targets_mean": 3912.4, | |
| "valid_targets_min": 955 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.4194586140813968, | |
| "learning_rate": 6.367499107984288e-12, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24523112177848816, | |
| "step": 4375, | |
| "valid_targets_mean": 4153.5, | |
| "valid_targets_min": 836 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24523112177848816, | |
| "step": 4375, | |
| "total_flos": 785288333426688.0, | |
| "train_loss": 0.2574095404761178, | |
| "train_runtime": 17825.8409, | |
| "train_samples_per_second": 3.926, | |
| "train_steps_per_second": 0.245, | |
| "valid_targets_mean": 4153.5, | |
| "valid_targets_min": 836 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 785288333426688.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |