Text Generation
Transformers
Safetensors
qwen3
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use laion/coderforge-10000__Qwen3-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use laion/coderforge-10000__Qwen3-8B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="laion/coderforge-10000__Qwen3-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("laion/coderforge-10000__Qwen3-8B") model = AutoModelForCausalLM.from_pretrained("laion/coderforge-10000__Qwen3-8B") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use laion/coderforge-10000__Qwen3-8B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "laion/coderforge-10000__Qwen3-8B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "laion/coderforge-10000__Qwen3-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/laion/coderforge-10000__Qwen3-8B
- SGLang
How to use laion/coderforge-10000__Qwen3-8B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "laion/coderforge-10000__Qwen3-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "laion/coderforge-10000__Qwen3-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "laion/coderforge-10000__Qwen3-8B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "laion/coderforge-10000__Qwen3-8B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use laion/coderforge-10000__Qwen3-8B with Docker Model Runner:
docker model run hf.co/laion/coderforge-10000__Qwen3-8B
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 735, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04792332268370607, | |
| "grad_norm": 8.982871202392836, | |
| "learning_rate": 2.1621621621621623e-06, | |
| "loss": 0.4936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1592181921005249, | |
| "step": 5, | |
| "valid_targets_mean": 8838.0, | |
| "valid_targets_min": 2815 | |
| }, | |
| { | |
| "epoch": 0.09584664536741214, | |
| "grad_norm": 4.869040341382149, | |
| "learning_rate": 4.864864864864866e-06, | |
| "loss": 0.4591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14799830317497253, | |
| "step": 10, | |
| "valid_targets_mean": 8579.1, | |
| "valid_targets_min": 2498 | |
| }, | |
| { | |
| "epoch": 0.14376996805111822, | |
| "grad_norm": 1.7556018862586495, | |
| "learning_rate": 7.567567567567569e-06, | |
| "loss": 0.4137, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13692015409469604, | |
| "step": 15, | |
| "valid_targets_mean": 8986.4, | |
| "valid_targets_min": 3372 | |
| }, | |
| { | |
| "epoch": 0.19169329073482427, | |
| "grad_norm": 1.0418919912451101, | |
| "learning_rate": 1.027027027027027e-05, | |
| "loss": 0.3792, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11497873812913895, | |
| "step": 20, | |
| "valid_targets_mean": 8493.1, | |
| "valid_targets_min": 1758 | |
| }, | |
| { | |
| "epoch": 0.23961661341853036, | |
| "grad_norm": 0.621330940276866, | |
| "learning_rate": 1.2972972972972975e-05, | |
| "loss": 0.3418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10854463279247284, | |
| "step": 25, | |
| "valid_targets_mean": 7958.0, | |
| "valid_targets_min": 2019 | |
| }, | |
| { | |
| "epoch": 0.28753993610223644, | |
| "grad_norm": 0.4964510949959199, | |
| "learning_rate": 1.5675675675675676e-05, | |
| "loss": 0.3153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11034172028303146, | |
| "step": 30, | |
| "valid_targets_mean": 9250.5, | |
| "valid_targets_min": 1615 | |
| }, | |
| { | |
| "epoch": 0.3354632587859425, | |
| "grad_norm": 0.3600917256903157, | |
| "learning_rate": 1.8378378378378383e-05, | |
| "loss": 0.2935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09477648138999939, | |
| "step": 35, | |
| "valid_targets_mean": 8652.3, | |
| "valid_targets_min": 3949 | |
| }, | |
| { | |
| "epoch": 0.38338658146964855, | |
| "grad_norm": 0.30301686045879966, | |
| "learning_rate": 2.1081081081081082e-05, | |
| "loss": 0.2636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07665778696537018, | |
| "step": 40, | |
| "valid_targets_mean": 8658.3, | |
| "valid_targets_min": 2634 | |
| }, | |
| { | |
| "epoch": 0.43130990415335463, | |
| "grad_norm": 0.2505991304110098, | |
| "learning_rate": 2.378378378378379e-05, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07965461909770966, | |
| "step": 45, | |
| "valid_targets_mean": 8387.6, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 0.4792332268370607, | |
| "grad_norm": 0.2026378613321421, | |
| "learning_rate": 2.6486486486486488e-05, | |
| "loss": 0.2276, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06772664189338684, | |
| "step": 50, | |
| "valid_targets_mean": 8567.6, | |
| "valid_targets_min": 2215 | |
| }, | |
| { | |
| "epoch": 0.5271565495207667, | |
| "grad_norm": 0.18063483795315363, | |
| "learning_rate": 2.918918918918919e-05, | |
| "loss": 0.2204, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07574020326137543, | |
| "step": 55, | |
| "valid_targets_mean": 9203.6, | |
| "valid_targets_min": 2349 | |
| }, | |
| { | |
| "epoch": 0.5750798722044729, | |
| "grad_norm": 0.18260057468719235, | |
| "learning_rate": 3.1891891891891894e-05, | |
| "loss": 0.2114, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0646074041724205, | |
| "step": 60, | |
| "valid_targets_mean": 7918.9, | |
| "valid_targets_min": 1966 | |
| }, | |
| { | |
| "epoch": 0.6230031948881789, | |
| "grad_norm": 0.1952944182498782, | |
| "learning_rate": 3.45945945945946e-05, | |
| "loss": 0.2049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06607531756162643, | |
| "step": 65, | |
| "valid_targets_mean": 8708.2, | |
| "valid_targets_min": 4730 | |
| }, | |
| { | |
| "epoch": 0.670926517571885, | |
| "grad_norm": 0.1596272851527542, | |
| "learning_rate": 3.72972972972973e-05, | |
| "loss": 0.195, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06611350178718567, | |
| "step": 70, | |
| "valid_targets_mean": 9198.4, | |
| "valid_targets_min": 2913 | |
| }, | |
| { | |
| "epoch": 0.7188498402555911, | |
| "grad_norm": 0.18104797378120346, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1905, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06400163471698761, | |
| "step": 75, | |
| "valid_targets_mean": 7796.8, | |
| "valid_targets_min": 2499 | |
| }, | |
| { | |
| "epoch": 0.7667731629392971, | |
| "grad_norm": 0.16498793730423283, | |
| "learning_rate": 3.999435301808432e-05, | |
| "loss": 0.192, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05754277855157852, | |
| "step": 80, | |
| "valid_targets_mean": 7951.4, | |
| "valid_targets_min": 3044 | |
| }, | |
| { | |
| "epoch": 0.8146964856230032, | |
| "grad_norm": 0.16971530607334578, | |
| "learning_rate": 3.997741526117775e-05, | |
| "loss": 0.1862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06384151428937912, | |
| "step": 85, | |
| "valid_targets_mean": 8073.7, | |
| "valid_targets_min": 1933 | |
| }, | |
| { | |
| "epoch": 0.8626198083067093, | |
| "grad_norm": 0.16016325817445584, | |
| "learning_rate": 3.994919629400098e-05, | |
| "loss": 0.1776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06105092912912369, | |
| "step": 90, | |
| "valid_targets_mean": 8430.1, | |
| "valid_targets_min": 3147 | |
| }, | |
| { | |
| "epoch": 0.9105431309904153, | |
| "grad_norm": 0.15777170003961027, | |
| "learning_rate": 3.990971205175375e-05, | |
| "loss": 0.1806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.057581670582294464, | |
| "step": 95, | |
| "valid_targets_mean": 8585.2, | |
| "valid_targets_min": 2840 | |
| }, | |
| { | |
| "epoch": 0.9584664536741214, | |
| "grad_norm": 0.18646622741641677, | |
| "learning_rate": 3.985898483111624e-05, | |
| "loss": 0.1755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05864211171865463, | |
| "step": 100, | |
| "valid_targets_mean": 8463.8, | |
| "valid_targets_min": 3838 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2511356329717353, | |
| "learning_rate": 3.979704327765823e-05, | |
| "loss": 0.1729, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16539210081100464, | |
| "step": 105, | |
| "valid_targets_mean": 8758.9, | |
| "valid_targets_min": 3781 | |
| }, | |
| { | |
| "epoch": 1.0479233226837061, | |
| "grad_norm": 0.16279705622118554, | |
| "learning_rate": 3.972392236966291e-05, | |
| "loss": 0.1706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.053829021751880646, | |
| "step": 110, | |
| "valid_targets_mean": 8902.6, | |
| "valid_targets_min": 1913 | |
| }, | |
| { | |
| "epoch": 1.095846645367412, | |
| "grad_norm": 0.17163665454737814, | |
| "learning_rate": 3.963966339837482e-05, | |
| "loss": 0.1714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05452179163694382, | |
| "step": 115, | |
| "valid_targets_mean": 8963.6, | |
| "valid_targets_min": 4027 | |
| }, | |
| { | |
| "epoch": 1.1437699680511182, | |
| "grad_norm": 0.15502231795052268, | |
| "learning_rate": 3.954431394468266e-05, | |
| "loss": 0.1709, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.061408303678035736, | |
| "step": 120, | |
| "valid_targets_mean": 9384.2, | |
| "valid_targets_min": 4126 | |
| }, | |
| { | |
| "epoch": 1.1916932907348243, | |
| "grad_norm": 0.26911312131506865, | |
| "learning_rate": 3.943792785225049e-05, | |
| "loss": 0.1666, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.053546734154224396, | |
| "step": 125, | |
| "valid_targets_mean": 8582.1, | |
| "valid_targets_min": 1267 | |
| }, | |
| { | |
| "epoch": 1.2396166134185305, | |
| "grad_norm": 0.1632755315156534, | |
| "learning_rate": 3.932056519711232e-05, | |
| "loss": 0.1631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.052455998957157135, | |
| "step": 130, | |
| "valid_targets_mean": 9007.6, | |
| "valid_targets_min": 1489 | |
| }, | |
| { | |
| "epoch": 1.2875399361022364, | |
| "grad_norm": 0.17105174576530605, | |
| "learning_rate": 3.919229225374726e-05, | |
| "loss": 0.1656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04397791996598244, | |
| "step": 135, | |
| "valid_targets_mean": 7291.8, | |
| "valid_targets_min": 941 | |
| }, | |
| { | |
| "epoch": 1.3354632587859425, | |
| "grad_norm": 0.17198756870105245, | |
| "learning_rate": 3.9053181457654465e-05, | |
| "loss": 0.1625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05939662456512451, | |
| "step": 140, | |
| "valid_targets_mean": 9653.4, | |
| "valid_targets_min": 2045 | |
| }, | |
| { | |
| "epoch": 1.3833865814696487, | |
| "grad_norm": 0.15757402041255247, | |
| "learning_rate": 3.89033113644489e-05, | |
| "loss": 0.1584, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05102524161338806, | |
| "step": 145, | |
| "valid_targets_mean": 9247.4, | |
| "valid_targets_min": 2404 | |
| }, | |
| { | |
| "epoch": 1.4313099041533546, | |
| "grad_norm": 0.1669340893696013, | |
| "learning_rate": 3.874276660550119e-05, | |
| "loss": 0.1621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.054257169365882874, | |
| "step": 150, | |
| "valid_targets_mean": 8428.7, | |
| "valid_targets_min": 1817 | |
| }, | |
| { | |
| "epoch": 1.4792332268370607, | |
| "grad_norm": 0.17930744302420296, | |
| "learning_rate": 3.857163784014636e-05, | |
| "loss": 0.1632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05627815052866936, | |
| "step": 155, | |
| "valid_targets_mean": 9194.1, | |
| "valid_targets_min": 1771 | |
| }, | |
| { | |
| "epoch": 1.5271565495207668, | |
| "grad_norm": 0.16977270176183656, | |
| "learning_rate": 3.8390021704488735e-05, | |
| "loss": 0.1613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05593028664588928, | |
| "step": 160, | |
| "valid_targets_mean": 8621.9, | |
| "valid_targets_min": 3475 | |
| }, | |
| { | |
| "epoch": 1.5750798722044728, | |
| "grad_norm": 0.17445879345229784, | |
| "learning_rate": 3.8198020756831694e-05, | |
| "loss": 0.1595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.055247336626052856, | |
| "step": 165, | |
| "valid_targets_mean": 8989.5, | |
| "valid_targets_min": 3428 | |
| }, | |
| { | |
| "epoch": 1.623003194888179, | |
| "grad_norm": 0.17465115639380263, | |
| "learning_rate": 3.799574341976314e-05, | |
| "loss": 0.1546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05567849799990654, | |
| "step": 170, | |
| "valid_targets_mean": 9782.6, | |
| "valid_targets_min": 2450 | |
| }, | |
| { | |
| "epoch": 1.670926517571885, | |
| "grad_norm": 0.1734995913932172, | |
| "learning_rate": 3.778330391892952e-05, | |
| "loss": 0.1593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.053123295307159424, | |
| "step": 175, | |
| "valid_targets_mean": 8914.3, | |
| "valid_targets_min": 2891 | |
| }, | |
| { | |
| "epoch": 1.718849840255591, | |
| "grad_norm": 0.15804349256301092, | |
| "learning_rate": 3.7560822218532774e-05, | |
| "loss": 0.1572, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.050238993018865585, | |
| "step": 180, | |
| "valid_targets_mean": 8918.0, | |
| "valid_targets_min": 4581 | |
| }, | |
| { | |
| "epoch": 1.766773162939297, | |
| "grad_norm": 0.18475759093632027, | |
| "learning_rate": 3.732842395358677e-05, | |
| "loss": 0.1562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.050228629261255264, | |
| "step": 185, | |
| "valid_targets_mean": 8194.7, | |
| "valid_targets_min": 3799 | |
| }, | |
| { | |
| "epoch": 1.8146964856230032, | |
| "grad_norm": 0.17207806449259455, | |
| "learning_rate": 3.708624035897144e-05, | |
| "loss": 0.1561, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05894997715950012, | |
| "step": 190, | |
| "valid_targets_mean": 8873.7, | |
| "valid_targets_min": 3497 | |
| }, | |
| { | |
| "epoch": 1.8626198083067091, | |
| "grad_norm": 0.160383974551531, | |
| "learning_rate": 3.68344081953247e-05, | |
| "loss": 0.1558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05507740378379822, | |
| "step": 195, | |
| "valid_targets_mean": 9449.2, | |
| "valid_targets_min": 2927 | |
| }, | |
| { | |
| "epoch": 1.9105431309904153, | |
| "grad_norm": 0.1688923848526839, | |
| "learning_rate": 3.657306967181394e-05, | |
| "loss": 0.1574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.051344458013772964, | |
| "step": 200, | |
| "valid_targets_mean": 7713.0, | |
| "valid_targets_min": 2578 | |
| }, | |
| { | |
| "epoch": 1.9584664536741214, | |
| "grad_norm": 0.17407301357558894, | |
| "learning_rate": 3.630237236583077e-05, | |
| "loss": 0.153, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.056261416524648666, | |
| "step": 205, | |
| "valid_targets_mean": 8383.5, | |
| "valid_targets_min": 4391 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.24633784795849772, | |
| "learning_rate": 3.6022469139654345e-05, | |
| "loss": 0.157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15247748792171478, | |
| "step": 210, | |
| "valid_targets_mean": 8627.0, | |
| "valid_targets_min": 2270 | |
| }, | |
| { | |
| "epoch": 2.047923322683706, | |
| "grad_norm": 0.17399949855938723, | |
| "learning_rate": 3.57335180541303e-05, | |
| "loss": 0.1475, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04970664530992508, | |
| "step": 215, | |
| "valid_targets_mean": 9049.2, | |
| "valid_targets_min": 2971 | |
| }, | |
| { | |
| "epoch": 2.0958466453674123, | |
| "grad_norm": 0.22794184274320198, | |
| "learning_rate": 3.543568227941408e-05, | |
| "loss": 0.1454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05050533637404442, | |
| "step": 220, | |
| "valid_targets_mean": 8644.0, | |
| "valid_targets_min": 3723 | |
| }, | |
| { | |
| "epoch": 2.143769968051118, | |
| "grad_norm": 0.18806892774190778, | |
| "learning_rate": 3.512913000282905e-05, | |
| "loss": 0.1489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05213526636362076, | |
| "step": 225, | |
| "valid_targets_mean": 8811.9, | |
| "valid_targets_min": 1989 | |
| }, | |
| { | |
| "epoch": 2.191693290734824, | |
| "grad_norm": 0.1555212602218601, | |
| "learning_rate": 3.481403433389142e-05, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04308829829096794, | |
| "step": 230, | |
| "valid_targets_mean": 8940.4, | |
| "valid_targets_min": 1986 | |
| }, | |
| { | |
| "epoch": 2.2396166134185305, | |
| "grad_norm": 0.15895262335449095, | |
| "learning_rate": 3.449057320655561e-05, | |
| "loss": 0.1477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048327021300792694, | |
| "step": 235, | |
| "valid_targets_mean": 9288.6, | |
| "valid_targets_min": 2617 | |
| }, | |
| { | |
| "epoch": 2.2875399361022364, | |
| "grad_norm": 0.15793093041554393, | |
| "learning_rate": 3.415892927873527e-05, | |
| "loss": 0.1515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04683384671807289, | |
| "step": 240, | |
| "valid_targets_mean": 8370.8, | |
| "valid_targets_min": 1504 | |
| }, | |
| { | |
| "epoch": 2.3354632587859427, | |
| "grad_norm": 0.14678394997302097, | |
| "learning_rate": 3.381928982915668e-05, | |
| "loss": 0.1453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04515903443098068, | |
| "step": 245, | |
| "valid_targets_mean": 9194.8, | |
| "valid_targets_min": 4400 | |
| }, | |
| { | |
| "epoch": 2.3833865814696487, | |
| "grad_norm": 0.1603781979087474, | |
| "learning_rate": 3.3471846651602815e-05, | |
| "loss": 0.1474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05253840982913971, | |
| "step": 250, | |
| "valid_targets_mean": 8766.1, | |
| "valid_targets_min": 3267 | |
| }, | |
| { | |
| "epoch": 2.4313099041533546, | |
| "grad_norm": 0.15879085539353155, | |
| "learning_rate": 3.31167959466077e-05, | |
| "loss": 0.1473, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05009625852108002, | |
| "step": 255, | |
| "valid_targets_mean": 8309.4, | |
| "valid_targets_min": 4013 | |
| }, | |
| { | |
| "epoch": 2.479233226837061, | |
| "grad_norm": 0.14424018717194137, | |
| "learning_rate": 3.275433821066237e-05, | |
| "loss": 0.1461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04807288199663162, | |
| "step": 260, | |
| "valid_targets_mean": 9051.2, | |
| "valid_targets_min": 2047 | |
| }, | |
| { | |
| "epoch": 2.527156549520767, | |
| "grad_norm": 0.159320759658969, | |
| "learning_rate": 3.238467812299483e-05, | |
| "loss": 0.1491, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04604367911815643, | |
| "step": 265, | |
| "valid_targets_mean": 8164.8, | |
| "valid_targets_min": 1963 | |
| }, | |
| { | |
| "epoch": 2.5750798722044728, | |
| "grad_norm": 0.16789223040102477, | |
| "learning_rate": 3.200802442998807e-05, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048190124332904816, | |
| "step": 270, | |
| "valid_targets_mean": 9103.1, | |
| "valid_targets_min": 1210 | |
| }, | |
| { | |
| "epoch": 2.623003194888179, | |
| "grad_norm": 0.17379236030377787, | |
| "learning_rate": 3.1624589827301395e-05, | |
| "loss": 0.1463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05079440772533417, | |
| "step": 275, | |
| "valid_targets_mean": 7991.2, | |
| "valid_targets_min": 3517 | |
| }, | |
| { | |
| "epoch": 2.670926517571885, | |
| "grad_norm": 0.18305013542111226, | |
| "learning_rate": 3.123459083976152e-05, | |
| "loss": 0.1463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04407097026705742, | |
| "step": 280, | |
| "valid_targets_mean": 8356.0, | |
| "valid_targets_min": 2326 | |
| }, | |
| { | |
| "epoch": 2.718849840255591, | |
| "grad_norm": 0.1664728450197544, | |
| "learning_rate": 3.083824769909142e-05, | |
| "loss": 0.1482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04852219298481941, | |
| "step": 285, | |
| "valid_targets_mean": 9024.8, | |
| "valid_targets_min": 2931 | |
| }, | |
| { | |
| "epoch": 2.7667731629392973, | |
| "grad_norm": 0.1578734610961178, | |
| "learning_rate": 3.0435784219545872e-05, | |
| "loss": 0.1493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047358766198158264, | |
| "step": 290, | |
| "valid_targets_mean": 8802.1, | |
| "valid_targets_min": 2204 | |
| }, | |
| { | |
| "epoch": 2.8146964856230032, | |
| "grad_norm": 0.22755109653603342, | |
| "learning_rate": 3.0027427671523957e-05, | |
| "loss": 0.1465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0490507110953331, | |
| "step": 295, | |
| "valid_targets_mean": 8245.7, | |
| "valid_targets_min": 3666 | |
| }, | |
| { | |
| "epoch": 2.862619808306709, | |
| "grad_norm": 0.16269867408736244, | |
| "learning_rate": 2.961340865322984e-05, | |
| "loss": 0.1459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04793532192707062, | |
| "step": 300, | |
| "valid_targets_mean": 8419.0, | |
| "valid_targets_min": 973 | |
| }, | |
| { | |
| "epoch": 2.9105431309904155, | |
| "grad_norm": 0.1528249668702955, | |
| "learning_rate": 2.9193960960454446e-05, | |
| "loss": 0.1467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.043026067316532135, | |
| "step": 305, | |
| "valid_targets_mean": 8449.1, | |
| "valid_targets_min": 2645 | |
| }, | |
| { | |
| "epoch": 2.9584664536741214, | |
| "grad_norm": 0.18558974597394937, | |
| "learning_rate": 2.8769321454551327e-05, | |
| "loss": 0.147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.044257231056690216, | |
| "step": 310, | |
| "valid_targets_mean": 8645.2, | |
| "valid_targets_min": 3405 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.2546650444205805, | |
| "learning_rate": 2.833972992868154e-05, | |
| "loss": 0.1413, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13152235746383667, | |
| "step": 315, | |
| "valid_targets_mean": 9198.5, | |
| "valid_targets_min": 2013 | |
| }, | |
| { | |
| "epoch": 3.047923322683706, | |
| "grad_norm": 0.16032522655906664, | |
| "learning_rate": 2.7905428972402872e-05, | |
| "loss": 0.1403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04698067158460617, | |
| "step": 320, | |
| "valid_targets_mean": 8638.6, | |
| "valid_targets_min": 2123 | |
| }, | |
| { | |
| "epoch": 3.0958466453674123, | |
| "grad_norm": 0.15952495144596782, | |
| "learning_rate": 2.7466663834679905e-05, | |
| "loss": 0.141, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05140436440706253, | |
| "step": 325, | |
| "valid_targets_mean": 9252.7, | |
| "valid_targets_min": 4005 | |
| }, | |
| { | |
| "epoch": 3.143769968051118, | |
| "grad_norm": 0.17042242626456597, | |
| "learning_rate": 2.7023682285392445e-05, | |
| "loss": 0.1389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047651275992393494, | |
| "step": 330, | |
| "valid_targets_mean": 8656.8, | |
| "valid_targets_min": 2922 | |
| }, | |
| { | |
| "epoch": 3.191693290734824, | |
| "grad_norm": 0.16125983283276313, | |
| "learning_rate": 2.657673447542028e-05, | |
| "loss": 0.1428, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048372238874435425, | |
| "step": 335, | |
| "valid_targets_mean": 9040.1, | |
| "valid_targets_min": 3603 | |
| }, | |
| { | |
| "epoch": 3.2396166134185305, | |
| "grad_norm": 0.15799040110371987, | |
| "learning_rate": 2.6126072795383416e-05, | |
| "loss": 0.1367, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047681644558906555, | |
| "step": 340, | |
| "valid_targets_mean": 9583.6, | |
| "valid_targets_min": 3097 | |
| }, | |
| { | |
| "epoch": 3.2875399361022364, | |
| "grad_norm": 0.14805208338883794, | |
| "learning_rate": 2.5671951733117587e-05, | |
| "loss": 0.1415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.050001755356788635, | |
| "step": 345, | |
| "valid_targets_mean": 9660.5, | |
| "valid_targets_min": 4266 | |
| }, | |
| { | |
| "epoch": 3.3354632587859427, | |
| "grad_norm": 0.16843665921293052, | |
| "learning_rate": 2.5214627729965396e-05, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04726799577474594, | |
| "step": 350, | |
| "valid_targets_mean": 9226.8, | |
| "valid_targets_min": 5487 | |
| }, | |
| { | |
| "epoch": 3.3833865814696487, | |
| "grad_norm": 0.19266516280316792, | |
| "learning_rate": 2.47543590359644e-05, | |
| "loss": 0.1386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04282751679420471, | |
| "step": 355, | |
| "valid_targets_mean": 7584.4, | |
| "valid_targets_min": 2293 | |
| }, | |
| { | |
| "epoch": 3.4313099041533546, | |
| "grad_norm": 0.16156169603076698, | |
| "learning_rate": 2.4291405564013727e-05, | |
| "loss": 0.1398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04496710002422333, | |
| "step": 360, | |
| "valid_targets_mean": 9012.9, | |
| "valid_targets_min": 2869 | |
| }, | |
| { | |
| "epoch": 3.479233226837061, | |
| "grad_norm": 0.16770901380790357, | |
| "learning_rate": 2.3826028743101763e-05, | |
| "loss": 0.1415, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.043900176882743835, | |
| "step": 365, | |
| "valid_targets_mean": 7966.8, | |
| "valid_targets_min": 2050 | |
| }, | |
| { | |
| "epoch": 3.527156549520767, | |
| "grad_norm": 0.14984841759414239, | |
| "learning_rate": 2.3358491370677693e-05, | |
| "loss": 0.1386, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04852374643087387, | |
| "step": 370, | |
| "valid_targets_mean": 9445.3, | |
| "valid_targets_min": 4423 | |
| }, | |
| { | |
| "epoch": 3.5750798722044728, | |
| "grad_norm": 0.1599146740589782, | |
| "learning_rate": 2.2889057464250196e-05, | |
| "loss": 0.1384, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04656771942973137, | |
| "step": 375, | |
| "valid_targets_mean": 9080.2, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 3.623003194888179, | |
| "grad_norm": 0.15143561107067025, | |
| "learning_rate": 2.2417992112297293e-05, | |
| "loss": 0.1407, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04388592019677162, | |
| "step": 380, | |
| "valid_targets_mean": 8641.7, | |
| "valid_targets_min": 3392 | |
| }, | |
| { | |
| "epoch": 3.670926517571885, | |
| "grad_norm": 0.15149756386513208, | |
| "learning_rate": 2.1945561324571366e-05, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05352652445435524, | |
| "step": 385, | |
| "valid_targets_mean": 9100.2, | |
| "valid_targets_min": 2539 | |
| }, | |
| { | |
| "epoch": 3.718849840255591, | |
| "grad_norm": 0.1665088787853794, | |
| "learning_rate": 2.1472031881883856e-05, | |
| "loss": 0.1394, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05265359953045845, | |
| "step": 390, | |
| "valid_targets_mean": 9276.7, | |
| "valid_targets_min": 3839 | |
| }, | |
| { | |
| "epoch": 3.7667731629392973, | |
| "grad_norm": 0.14824283461917753, | |
| "learning_rate": 2.0997671185454714e-05, | |
| "loss": 0.1396, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.046166472136974335, | |
| "step": 395, | |
| "valid_targets_mean": 8789.5, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 3.8146964856230032, | |
| "grad_norm": 0.15960953666875605, | |
| "learning_rate": 2.0522747105911378e-05, | |
| "loss": 0.1381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04279767721891403, | |
| "step": 400, | |
| "valid_targets_mean": 7870.9, | |
| "valid_targets_min": 2682 | |
| }, | |
| { | |
| "epoch": 3.862619808306709, | |
| "grad_norm": 0.16843602717950168, | |
| "learning_rate": 2.0047527832022674e-05, | |
| "loss": 0.1378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0413513258099556, | |
| "step": 405, | |
| "valid_targets_mean": 8123.9, | |
| "valid_targets_min": 1989 | |
| }, | |
| { | |
| "epoch": 3.9105431309904155, | |
| "grad_norm": 0.16005031955715257, | |
| "learning_rate": 1.9572281719253186e-05, | |
| "loss": 0.1373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04267306998372078, | |
| "step": 410, | |
| "valid_targets_mean": 8465.4, | |
| "valid_targets_min": 2045 | |
| }, | |
| { | |
| "epoch": 3.9584664536741214, | |
| "grad_norm": 0.15809782385997903, | |
| "learning_rate": 1.909727713822333e-05, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04258957877755165, | |
| "step": 415, | |
| "valid_targets_mean": 7733.6, | |
| "valid_targets_min": 2233 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.24269592632341946, | |
| "learning_rate": 1.8622782323161014e-05, | |
| "loss": 0.1343, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1285635381937027, | |
| "step": 420, | |
| "valid_targets_mean": 8452.1, | |
| "valid_targets_min": 2045 | |
| }, | |
| { | |
| "epoch": 4.047923322683706, | |
| "grad_norm": 0.15907993684353738, | |
| "learning_rate": 1.8149065220430197e-05, | |
| "loss": 0.1358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04193146899342537, | |
| "step": 425, | |
| "valid_targets_mean": 8624.6, | |
| "valid_targets_min": 4420 | |
| }, | |
| { | |
| "epoch": 4.095846645367412, | |
| "grad_norm": 0.18925345564072582, | |
| "learning_rate": 1.7676393337222115e-05, | |
| "loss": 0.1355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04186321049928665, | |
| "step": 430, | |
| "valid_targets_mean": 7756.4, | |
| "valid_targets_min": 1221 | |
| }, | |
| { | |
| "epoch": 4.143769968051118, | |
| "grad_norm": 0.16711423931591982, | |
| "learning_rate": 1.7205033590494426e-05, | |
| "loss": 0.1363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047112271189689636, | |
| "step": 435, | |
| "valid_targets_mean": 9224.7, | |
| "valid_targets_min": 1487 | |
| }, | |
| { | |
| "epoch": 4.1916932907348246, | |
| "grad_norm": 0.17036236145352987, | |
| "learning_rate": 1.6735252156243675e-05, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04689479246735573, | |
| "step": 440, | |
| "valid_targets_mean": 8922.7, | |
| "valid_targets_min": 3916 | |
| }, | |
| { | |
| "epoch": 4.23961661341853, | |
| "grad_norm": 0.1608190220970628, | |
| "learning_rate": 1.6267314319196215e-05, | |
| "loss": 0.1324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04334461688995361, | |
| "step": 445, | |
| "valid_targets_mean": 8931.1, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 4.287539936102236, | |
| "grad_norm": 0.15484652532421975, | |
| "learning_rate": 1.580148432300241e-05, | |
| "loss": 0.1363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04521436244249344, | |
| "step": 450, | |
| "valid_targets_mean": 8411.7, | |
| "valid_targets_min": 2459 | |
| }, | |
| { | |
| "epoch": 4.335463258785943, | |
| "grad_norm": 0.15640888798523264, | |
| "learning_rate": 1.5338025221018668e-05, | |
| "loss": 0.1356, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04313844442367554, | |
| "step": 455, | |
| "valid_targets_mean": 7779.2, | |
| "valid_targets_min": 2404 | |
| }, | |
| { | |
| "epoch": 4.383386581469648, | |
| "grad_norm": 0.17735167228744403, | |
| "learning_rate": 1.4877198727761748e-05, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04428839683532715, | |
| "step": 460, | |
| "valid_targets_mean": 8799.6, | |
| "valid_targets_min": 3400 | |
| }, | |
| { | |
| "epoch": 4.431309904153355, | |
| "grad_norm": 0.15431579302592524, | |
| "learning_rate": 1.4419265071119038e-05, | |
| "loss": 0.1379, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04533102735877037, | |
| "step": 465, | |
| "valid_targets_mean": 8351.8, | |
| "valid_targets_min": 1615 | |
| }, | |
| { | |
| "epoch": 4.479233226837061, | |
| "grad_norm": 0.1569404780015899, | |
| "learning_rate": 1.3964482845398281e-05, | |
| "loss": 0.1331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04367567598819733, | |
| "step": 470, | |
| "valid_targets_mean": 8522.4, | |
| "valid_targets_min": 2404 | |
| }, | |
| { | |
| "epoch": 4.527156549520766, | |
| "grad_norm": 0.1780710965354458, | |
| "learning_rate": 1.3513108865299907e-05, | |
| "loss": 0.1326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.043544650077819824, | |
| "step": 475, | |
| "valid_targets_mean": 9276.5, | |
| "valid_targets_min": 1840 | |
| }, | |
| { | |
| "epoch": 4.575079872204473, | |
| "grad_norm": 0.15959810688800943, | |
| "learning_rate": 1.3065398020894202e-05, | |
| "loss": 0.1337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04500932991504669, | |
| "step": 480, | |
| "valid_targets_mean": 8126.5, | |
| "valid_targets_min": 2047 | |
| }, | |
| { | |
| "epoch": 4.623003194888179, | |
| "grad_norm": 0.17116807313960172, | |
| "learning_rate": 1.2621603133685343e-05, | |
| "loss": 0.1333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04575769975781441, | |
| "step": 485, | |
| "valid_targets_mean": 8706.4, | |
| "valid_targets_min": 1597 | |
| }, | |
| { | |
| "epoch": 4.6709265175718855, | |
| "grad_norm": 0.17715013972093857, | |
| "learning_rate": 1.218197481384356e-05, | |
| "loss": 0.134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.048376478254795074, | |
| "step": 490, | |
| "valid_targets_mean": 8893.8, | |
| "valid_targets_min": 4343 | |
| }, | |
| { | |
| "epoch": 4.718849840255591, | |
| "grad_norm": 0.1570645103662247, | |
| "learning_rate": 1.1746761318686044e-05, | |
| "loss": 0.1355, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.044359538704156876, | |
| "step": 495, | |
| "valid_targets_mean": 9139.7, | |
| "valid_targets_min": 3056 | |
| }, | |
| { | |
| "epoch": 4.766773162939297, | |
| "grad_norm": 0.15423551749596906, | |
| "learning_rate": 1.1316208412486443e-05, | |
| "loss": 0.1313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.042892444878816605, | |
| "step": 500, | |
| "valid_targets_mean": 8499.4, | |
| "valid_targets_min": 1884 | |
| }, | |
| { | |
| "epoch": 4.814696485623003, | |
| "grad_norm": 0.16453480642681878, | |
| "learning_rate": 1.0890559227692265e-05, | |
| "loss": 0.1364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04749216139316559, | |
| "step": 505, | |
| "valid_targets_mean": 8283.1, | |
| "valid_targets_min": 2513 | |
| }, | |
| { | |
| "epoch": 4.862619808306709, | |
| "grad_norm": 0.1664740998586081, | |
| "learning_rate": 1.0470054127628411e-05, | |
| "loss": 0.1322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04286997765302658, | |
| "step": 510, | |
| "valid_targets_mean": 8387.6, | |
| "valid_targets_min": 1679 | |
| }, | |
| { | |
| "epoch": 4.9105431309904155, | |
| "grad_norm": 0.15516781277136965, | |
| "learning_rate": 1.0054930570764427e-05, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.045819588005542755, | |
| "step": 515, | |
| "valid_targets_mean": 9220.8, | |
| "valid_targets_min": 3475 | |
| }, | |
| { | |
| "epoch": 4.958466453674122, | |
| "grad_norm": 0.1452771278632104, | |
| "learning_rate": 9.645422976622154e-06, | |
| "loss": 0.1313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04815547540783882, | |
| "step": 520, | |
| "valid_targets_mean": 9208.9, | |
| "valid_targets_min": 3189 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.2436229369532187, | |
| "learning_rate": 9.241762593399437e-06, | |
| "loss": 0.1322, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1331784427165985, | |
| "step": 525, | |
| "valid_targets_mean": 8954.4, | |
| "valid_targets_min": 2293 | |
| }, | |
| { | |
| "epoch": 5.047923322683706, | |
| "grad_norm": 0.1569691647374951, | |
| "learning_rate": 8.844177367384689e-06, | |
| "loss": 0.1287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04233275726437569, | |
| "step": 530, | |
| "valid_targets_mean": 9024.9, | |
| "valid_targets_min": 3046 | |
| }, | |
| { | |
| "epoch": 5.095846645367412, | |
| "grad_norm": 0.1516535037265207, | |
| "learning_rate": 8.452891814236037e-06, | |
| "loss": 0.1319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04785648733377457, | |
| "step": 535, | |
| "valid_targets_mean": 8453.7, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 5.143769968051118, | |
| "grad_norm": 0.1545110306606347, | |
| "learning_rate": 8.068126892197728e-06, | |
| "loss": 0.1327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04464545473456383, | |
| "step": 540, | |
| "valid_targets_mean": 8877.0, | |
| "valid_targets_min": 4783 | |
| }, | |
| { | |
| "epoch": 5.1916932907348246, | |
| "grad_norm": 0.15588257021266194, | |
| "learning_rate": 7.690099877325419e-06, | |
| "loss": 0.1317, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04059495031833649, | |
| "step": 545, | |
| "valid_targets_mean": 8231.1, | |
| "valid_targets_min": 4328 | |
| }, | |
| { | |
| "epoch": 5.23961661341853, | |
| "grad_norm": 0.16029539285883082, | |
| "learning_rate": 7.319024240790768e-06, | |
| "loss": 0.1323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04444342479109764, | |
| "step": 550, | |
| "valid_targets_mean": 8885.3, | |
| "valid_targets_min": 1621 | |
| }, | |
| { | |
| "epoch": 5.287539936102236, | |
| "grad_norm": 0.15541690395010793, | |
| "learning_rate": 6.955109528334667e-06, | |
| "loss": 0.1295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04558786749839783, | |
| "step": 555, | |
| "valid_targets_mean": 8818.6, | |
| "valid_targets_min": 4804 | |
| }, | |
| { | |
| "epoch": 5.335463258785943, | |
| "grad_norm": 0.1632813232184168, | |
| "learning_rate": 6.59856124193712e-06, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04429105669260025, | |
| "step": 560, | |
| "valid_targets_mean": 8736.4, | |
| "valid_targets_min": 3477 | |
| }, | |
| { | |
| "epoch": 5.383386581469648, | |
| "grad_norm": 0.15319164306426114, | |
| "learning_rate": 6.249580723770665e-06, | |
| "loss": 0.1295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.040787823498249054, | |
| "step": 565, | |
| "valid_targets_mean": 8843.0, | |
| "valid_targets_min": 2326 | |
| }, | |
| { | |
| "epoch": 5.431309904153355, | |
| "grad_norm": 0.15092410861061442, | |
| "learning_rate": 5.908365042502801e-06, | |
| "loss": 0.1314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04534231498837471, | |
| "step": 570, | |
| "valid_targets_mean": 9223.4, | |
| "valid_targets_min": 3669 | |
| }, | |
| { | |
| "epoch": 5.479233226837061, | |
| "grad_norm": 0.16472723067001716, | |
| "learning_rate": 5.5751068820116784e-06, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04310306906700134, | |
| "step": 575, | |
| "valid_targets_mean": 8187.3, | |
| "valid_targets_min": 3898 | |
| }, | |
| { | |
| "epoch": 5.527156549520766, | |
| "grad_norm": 0.15749687446681768, | |
| "learning_rate": 5.24999443257785e-06, | |
| "loss": 0.128, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04516970366239548, | |
| "step": 580, | |
| "valid_targets_mean": 8733.0, | |
| "valid_targets_min": 4982 | |
| }, | |
| { | |
| "epoch": 5.575079872204473, | |
| "grad_norm": 0.16061448332897996, | |
| "learning_rate": 4.9332112846135664e-06, | |
| "loss": 0.1329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04158013314008713, | |
| "step": 585, | |
| "valid_targets_mean": 8391.5, | |
| "valid_targets_min": 2404 | |
| }, | |
| { | |
| "epoch": 5.623003194888179, | |
| "grad_norm": 0.1521419599103487, | |
| "learning_rate": 4.624936324989602e-06, | |
| "loss": 0.1321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04744524508714676, | |
| "step": 590, | |
| "valid_targets_mean": 8968.4, | |
| "valid_targets_min": 3648 | |
| }, | |
| { | |
| "epoch": 5.6709265175718855, | |
| "grad_norm": 0.1570326671883014, | |
| "learning_rate": 4.325343636018165e-06, | |
| "loss": 0.129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04405239224433899, | |
| "step": 595, | |
| "valid_targets_mean": 8272.5, | |
| "valid_targets_min": 2861 | |
| }, | |
| { | |
| "epoch": 5.718849840255591, | |
| "grad_norm": 0.1553455532119064, | |
| "learning_rate": 4.0346023971489215e-06, | |
| "loss": 0.1348, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.040806423872709274, | |
| "step": 600, | |
| "valid_targets_mean": 7626.4, | |
| "valid_targets_min": 2787 | |
| }, | |
| { | |
| "epoch": 5.766773162939297, | |
| "grad_norm": 0.16542431175580907, | |
| "learning_rate": 3.752876789433677e-06, | |
| "loss": 0.1289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.041984084993600845, | |
| "step": 605, | |
| "valid_targets_mean": 7940.9, | |
| "valid_targets_min": 1813 | |
| }, | |
| { | |
| "epoch": 5.814696485623003, | |
| "grad_norm": 0.15600260550531508, | |
| "learning_rate": 3.480325902813624e-06, | |
| "loss": 0.1301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.042893338948488235, | |
| "step": 610, | |
| "valid_targets_mean": 9183.9, | |
| "valid_targets_min": 2349 | |
| }, | |
| { | |
| "epoch": 5.862619808306709, | |
| "grad_norm": 0.14503907630842894, | |
| "learning_rate": 3.2171036462815563e-06, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04696850851178169, | |
| "step": 615, | |
| "valid_targets_mean": 9935.0, | |
| "valid_targets_min": 3645 | |
| }, | |
| { | |
| "epoch": 5.9105431309904155, | |
| "grad_norm": 0.1460369617623546, | |
| "learning_rate": 2.9633586609697086e-06, | |
| "loss": 0.133, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04132133349776268, | |
| "step": 620, | |
| "valid_targets_mean": 8354.1, | |
| "valid_targets_min": 1863 | |
| }, | |
| { | |
| "epoch": 5.958466453674122, | |
| "grad_norm": 0.14429256697612128, | |
| "learning_rate": 2.7192342362124048e-06, | |
| "loss": 0.1318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04253305494785309, | |
| "step": 625, | |
| "valid_targets_mean": 8797.8, | |
| "valid_targets_min": 2888 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.24017089407840048, | |
| "learning_rate": 2.4848682286308346e-06, | |
| "loss": 0.1291, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11856501549482346, | |
| "step": 630, | |
| "valid_targets_mean": 9350.8, | |
| "valid_targets_min": 2204 | |
| }, | |
| { | |
| "epoch": 6.047923322683706, | |
| "grad_norm": 0.14604381915322243, | |
| "learning_rate": 2.260392984285633e-06, | |
| "loss": 0.1264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04177611321210861, | |
| "step": 635, | |
| "valid_targets_mean": 8590.9, | |
| "valid_targets_min": 1520 | |
| }, | |
| { | |
| "epoch": 6.095846645367412, | |
| "grad_norm": 0.15105207764952777, | |
| "learning_rate": 2.0459352639413343e-06, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.036490269005298615, | |
| "step": 640, | |
| "valid_targets_mean": 8303.5, | |
| "valid_targets_min": 983 | |
| }, | |
| { | |
| "epoch": 6.143769968051118, | |
| "grad_norm": 0.14365857813801858, | |
| "learning_rate": 1.841616171484797e-06, | |
| "loss": 0.1289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03588128834962845, | |
| "step": 645, | |
| "valid_targets_mean": 7843.5, | |
| "valid_targets_min": 2037 | |
| }, | |
| { | |
| "epoch": 6.1916932907348246, | |
| "grad_norm": 0.17040842356447042, | |
| "learning_rate": 1.6475510855380195e-06, | |
| "loss": 0.1281, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03724232316017151, | |
| "step": 650, | |
| "valid_targets_mean": 8363.8, | |
| "valid_targets_min": 2404 | |
| }, | |
| { | |
| "epoch": 6.23961661341853, | |
| "grad_norm": 0.14563485589355804, | |
| "learning_rate": 1.4638495943040854e-06, | |
| "loss": 0.1319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0440392792224884, | |
| "step": 655, | |
| "valid_targets_mean": 9282.5, | |
| "valid_targets_min": 1989 | |
| }, | |
| { | |
| "epoch": 6.287539936102236, | |
| "grad_norm": 0.15135495829792323, | |
| "learning_rate": 1.2906154336828913e-06, | |
| "loss": 0.1294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.044929858297109604, | |
| "step": 660, | |
| "valid_targets_mean": 9016.3, | |
| "valid_targets_min": 3426 | |
| }, | |
| { | |
| "epoch": 6.335463258785943, | |
| "grad_norm": 0.1512474724189636, | |
| "learning_rate": 1.1279464286916508e-06, | |
| "loss": 0.1272, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.043425947427749634, | |
| "step": 665, | |
| "valid_targets_mean": 8946.5, | |
| "valid_targets_min": 1626 | |
| }, | |
| { | |
| "epoch": 6.383386581469648, | |
| "grad_norm": 0.16216185730198185, | |
| "learning_rate": 9.759344382233048e-07, | |
| "loss": 0.1299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.046256884932518005, | |
| "step": 670, | |
| "valid_targets_mean": 7957.6, | |
| "valid_targets_min": 2969 | |
| }, | |
| { | |
| "epoch": 6.431309904153355, | |
| "grad_norm": 0.15052277362683217, | |
| "learning_rate": 8.34665303173976e-07, | |
| "loss": 0.1296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04201345890760422, | |
| "step": 675, | |
| "valid_targets_mean": 8329.0, | |
| "valid_targets_min": 2717 | |
| }, | |
| { | |
| "epoch": 6.479233226837061, | |
| "grad_norm": 0.1465602776227053, | |
| "learning_rate": 7.042187979687432e-07, | |
| "loss": 0.1302, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03988049924373627, | |
| "step": 680, | |
| "valid_targets_mean": 8283.2, | |
| "valid_targets_min": 2349 | |
| }, | |
| { | |
| "epoch": 6.527156549520766, | |
| "grad_norm": 0.14185496391585614, | |
| "learning_rate": 5.846685855131929e-07, | |
| "loss": 0.1301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.040278732776641846, | |
| "step": 685, | |
| "valid_targets_mean": 9115.4, | |
| "valid_targets_min": 3979 | |
| }, | |
| { | |
| "epoch": 6.575079872204473, | |
| "grad_norm": 0.1561692679171069, | |
| "learning_rate": 4.760821755961065e-07, | |
| "loss": 0.131, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04538102447986603, | |
| "step": 690, | |
| "valid_targets_mean": 8077.0, | |
| "valid_targets_min": 3672 | |
| }, | |
| { | |
| "epoch": 6.623003194888179, | |
| "grad_norm": 0.1505424403302318, | |
| "learning_rate": 3.7852088676678665e-07, | |
| "loss": 0.1305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04282670468091965, | |
| "step": 695, | |
| "valid_targets_mean": 8324.1, | |
| "valid_targets_min": 2674 | |
| }, | |
| { | |
| "epoch": 6.6709265175718855, | |
| "grad_norm": 0.1530107403327867, | |
| "learning_rate": 2.920398117086043e-07, | |
| "loss": 0.1277, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04476252198219299, | |
| "step": 700, | |
| "valid_targets_mean": 8553.5, | |
| "valid_targets_min": 3077 | |
| }, | |
| { | |
| "epoch": 6.718849840255591, | |
| "grad_norm": 0.1451392180151871, | |
| "learning_rate": 2.1668778612825347e-07, | |
| "loss": 0.1289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04205950349569321, | |
| "step": 705, | |
| "valid_targets_mean": 8100.0, | |
| "valid_targets_min": 2628 | |
| }, | |
| { | |
| "epoch": 6.766773162939297, | |
| "grad_norm": 0.1540835400434045, | |
| "learning_rate": 1.5250736117830455e-07, | |
| "loss": 0.1307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04908519983291626, | |
| "step": 710, | |
| "valid_targets_mean": 8852.7, | |
| "valid_targets_min": 1823 | |
| }, | |
| { | |
| "epoch": 6.814696485623003, | |
| "grad_norm": 0.1511755346652257, | |
| "learning_rate": 9.953477942866052e-08, | |
| "loss": 0.1336, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047512397170066833, | |
| "step": 715, | |
| "valid_targets_mean": 8784.2, | |
| "valid_targets_min": 2941 | |
| }, | |
| { | |
| "epoch": 6.862619808306709, | |
| "grad_norm": 0.14155551457486815, | |
| "learning_rate": 5.779995440044594e-08, | |
| "loss": 0.1285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04104035720229149, | |
| "step": 720, | |
| "valid_targets_mean": 9481.8, | |
| "valid_targets_min": 3244 | |
| }, | |
| { | |
| "epoch": 6.9105431309904155, | |
| "grad_norm": 0.13812752928008018, | |
| "learning_rate": 2.7326453673872653e-08, | |
| "loss": 0.1312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04090867191553116, | |
| "step": 725, | |
| "valid_targets_mean": 8866.8, | |
| "valid_targets_min": 3547 | |
| }, | |
| { | |
| "epoch": 6.958466453674122, | |
| "grad_norm": 0.1527802388811017, | |
| "learning_rate": 8.131485579692121e-09, | |
| "loss": 0.1275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04462620988488197, | |
| "step": 730, | |
| "valid_targets_mean": 9477.2, | |
| "valid_targets_min": 4703 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.27520263662765626, | |
| "learning_rate": 2.2588948167756586e-10, | |
| "loss": 0.1299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13444499671459198, | |
| "step": 735, | |
| "valid_targets_mean": 8868.2, | |
| "valid_targets_min": 4152 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "step": 735, | |
| "total_flos": 5.441373116175483e+18, | |
| "train_loss": 0.0, | |
| "train_runtime": 1.1873, | |
| "train_samples_per_second": 58956.554, | |
| "train_steps_per_second": 619.044 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 735, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.441373116175483e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |