Text Generation
Transformers
Safetensors
lfm2_moe
Generated from Trainer
trl
sft
unsloth
conversational
compressed-tensors
Instructions to use Ba2han/augment-nvfp4a16 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ba2han/augment-nvfp4a16 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Ba2han/augment-nvfp4a16") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Ba2han/augment-nvfp4a16") model = AutoModelForCausalLM.from_pretrained("Ba2han/augment-nvfp4a16") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Ba2han/augment-nvfp4a16 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Ba2han/augment-nvfp4a16" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/augment-nvfp4a16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Ba2han/augment-nvfp4a16
- SGLang
How to use Ba2han/augment-nvfp4a16 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Ba2han/augment-nvfp4a16" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/augment-nvfp4a16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Ba2han/augment-nvfp4a16" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/augment-nvfp4a16", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Unsloth Studio new
How to use Ba2han/augment-nvfp4a16 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/augment-nvfp4a16 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/augment-nvfp4a16 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Ba2han/augment-nvfp4a16 to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Ba2han/augment-nvfp4a16", max_seq_length=2048, ) - Docker Model Runner
How to use Ba2han/augment-nvfp4a16 with Docker Model Runner:
docker model run hf.co/Ba2han/augment-nvfp4a16
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1000694927032661, | |
| "eval_steps": 500, | |
| "global_step": 2375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000463284688441047, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 0.0, | |
| "loss": 1.3776911497116089, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.000926569376882094, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 6.31578947368421e-07, | |
| "loss": 1.3288359642028809, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.001389854065323141, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 1.263157894736842e-06, | |
| "loss": 1.3400298357009888, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.001853138753764188, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 1.8947368421052632e-06, | |
| "loss": 1.2718102931976318, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0023164234422052353, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 2.526315789473684e-06, | |
| "loss": 1.3158093690872192, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002779708130646282, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 1.3021347522735596, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0032429928190873293, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 3.7894736842105264e-06, | |
| "loss": 1.3042571544647217, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.003706277507528376, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 4.4210526315789476e-06, | |
| "loss": 1.5282930135726929, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.004169562195969423, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 5.052631578947368e-06, | |
| "loss": 1.1924694776535034, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0046328468844104706, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 5.684210526315789e-06, | |
| "loss": 1.2525532245635986, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005096131572851517, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 1.354798436164856, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.005559416261292564, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 6.947368421052632e-06, | |
| "loss": 1.2614648342132568, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.006022700949733611, | |
| "grad_norm": 1.59375, | |
| "learning_rate": 7.578947368421053e-06, | |
| "loss": 1.441209077835083, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006485985638174659, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8.210526315789475e-06, | |
| "loss": 1.2802681922912598, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006949270326615705, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 8.842105263157895e-06, | |
| "loss": 1.3814109563827515, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.007412555015056752, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 1.0409232378005981, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.007875839703497799, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.0105263157894736e-05, | |
| "loss": 1.2293620109558105, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.008339124391938846, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 1.0736842105263158e-05, | |
| "loss": 1.2468554973602295, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.008802409080379893, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.1368421052631578e-05, | |
| "loss": 1.2527501583099365, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.009265693768820941, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.2057502269744873, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009728978457261988, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 1.190750241279602, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.010192263145703035, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1.3263157894736844e-05, | |
| "loss": 1.2279465198516846, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.010655547834144082, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.3894736842105265e-05, | |
| "loss": 1.2643662691116333, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.011118832522585128, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 1.4526315789473685e-05, | |
| "loss": 1.1729130744934082, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.011582117211026175, | |
| "grad_norm": 1.0, | |
| "learning_rate": 1.5157894736842105e-05, | |
| "loss": 1.249855875968933, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.012045401899467222, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.578947368421053e-05, | |
| "loss": 1.2158374786376953, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.01250868658790827, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.642105263157895e-05, | |
| "loss": 1.1104485988616943, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.012971971276349317, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.705263157894737e-05, | |
| "loss": 1.1513713598251343, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.013435255964790364, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.768421052631579e-05, | |
| "loss": 1.1879022121429443, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01389854065323141, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.831578947368421e-05, | |
| "loss": 1.250070333480835, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014361825341672458, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.894736842105263e-05, | |
| "loss": 1.1055482625961304, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.014825110030113504, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.957894736842105e-05, | |
| "loss": 1.1359180212020874, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.015288394718554551, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.0210526315789472e-05, | |
| "loss": 1.426615595817566, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.015751679406995598, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.0842105263157895e-05, | |
| "loss": 1.1893213987350464, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.016214964095436647, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.1473684210526316e-05, | |
| "loss": 1.2659728527069092, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01667824878387769, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 2.2105263157894736e-05, | |
| "loss": 1.2023910284042358, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01714153347231874, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.2736842105263157e-05, | |
| "loss": 1.4574058055877686, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.017604818160759785, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.336842105263158e-05, | |
| "loss": 0.9998283386230469, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.018068102849200834, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.094596266746521, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.018531387537641882, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.4631578947368424e-05, | |
| "loss": 1.2685940265655518, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018994672226082927, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.526315789473684e-05, | |
| "loss": 1.370650053024292, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.019457956914523976, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.5894736842105265e-05, | |
| "loss": 1.0709939002990723, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01992124160296502, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 2.652631578947369e-05, | |
| "loss": 1.2332732677459717, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02038452629140607, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.7157894736842106e-05, | |
| "loss": 1.1283347606658936, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.020847810979847115, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.778947368421053e-05, | |
| "loss": 1.050850749015808, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.021311095668288163, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.8421052631578946e-05, | |
| "loss": 1.1958825588226318, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.02177438035672921, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 2.905263157894737e-05, | |
| "loss": 1.439139485359192, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.022237665045170257, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 2.968421052631579e-05, | |
| "loss": 1.2316217422485352, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.022700949733611305, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.031578947368421e-05, | |
| "loss": 1.177676796913147, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.02316423442205235, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.094736842105263e-05, | |
| "loss": 1.1291377544403076, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0236275191104934, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.157894736842106e-05, | |
| "loss": 1.2162253856658936, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.024090803798934444, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.221052631578947e-05, | |
| "loss": 1.4145865440368652, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.024554088487375492, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.28421052631579e-05, | |
| "loss": 1.2053899765014648, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02501737317581654, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.347368421052631e-05, | |
| "loss": 1.406412959098816, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.025480657864257586, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.410526315789474e-05, | |
| "loss": 1.3238595724105835, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.025943942552698634, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.473684210526316e-05, | |
| "loss": 1.089475154876709, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.02640722724113968, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.536842105263158e-05, | |
| "loss": 0.9788758754730225, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.026870511929580728, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.6e-05, | |
| "loss": 1.3209675550460815, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.027333796618021773, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 3.663157894736842e-05, | |
| "loss": 1.0189337730407715, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.02779708130646282, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.726315789473684e-05, | |
| "loss": 1.2801414728164673, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028260365994903867, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.789473684210526e-05, | |
| "loss": 1.1465449333190918, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.028723650683344915, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.852631578947369e-05, | |
| "loss": 1.259995937347412, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.029186935371785964, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.91578947368421e-05, | |
| "loss": 1.0937800407409668, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.02965022006022701, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.978947368421053e-05, | |
| "loss": 1.218725562095642, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.030113504748668057, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.0421052631578943e-05, | |
| "loss": 1.2543126344680786, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.030576789437109102, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.105263157894737e-05, | |
| "loss": 1.2053662538528442, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03104007412555015, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.168421052631579e-05, | |
| "loss": 1.1383905410766602, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.031503358813991196, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.231578947368421e-05, | |
| "loss": 1.1001615524291992, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.031966643502432245, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.294736842105263e-05, | |
| "loss": 1.2164722681045532, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03242992819087329, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.357894736842106e-05, | |
| "loss": 1.1911834478378296, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03289321287931434, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 4.421052631578947e-05, | |
| "loss": 1.1898846626281738, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.03335649756775538, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.48421052631579e-05, | |
| "loss": 1.3878438472747803, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.03381978225619643, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.547368421052631e-05, | |
| "loss": 1.1823328733444214, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.03428306694463748, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.610526315789474e-05, | |
| "loss": 1.1236375570297241, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.03474635163307853, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.673684210526316e-05, | |
| "loss": 1.2263376712799072, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.03520963632151957, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 1.0898189544677734, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03567292100996062, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.1455436944961548, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03613620569840167, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.799997759200591e-05, | |
| "loss": 1.3289111852645874, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.036599490386842716, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.799991036806548e-05, | |
| "loss": 1.103787899017334, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.037062775075283765, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.799979832830423e-05, | |
| "loss": 1.1579338312149048, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.037526059763724806, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.799964147293139e-05, | |
| "loss": 1.3283060789108276, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.037989344452165855, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.799943980223985e-05, | |
| "loss": 0.9260512590408325, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0384526291406069, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.7999193316606205e-05, | |
| "loss": 1.1989682912826538, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.03891591382904795, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.799890201649072e-05, | |
| "loss": 1.2116103172302246, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.039379198517489, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.7998565902437354e-05, | |
| "loss": 1.2562410831451416, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03984248320593004, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.799818497507374e-05, | |
| "loss": 1.1803618669509888, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.04030576789437109, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.79977592351112e-05, | |
| "loss": 1.193110466003418, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.04076905258281214, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.799728868334472e-05, | |
| "loss": 1.1182022094726562, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.04123233727125319, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.799677332065299e-05, | |
| "loss": 1.052176833152771, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.04169562195969423, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.799621314799836e-05, | |
| "loss": 1.3748915195465088, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04215890664813528, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.799560816642687e-05, | |
| "loss": 1.1480522155761719, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.042622191336576326, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.79949583770682e-05, | |
| "loss": 1.0132099390029907, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.043085476025017375, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.799426378113573e-05, | |
| "loss": 1.1310032606124878, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.04354876071345842, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 4.799352437992651e-05, | |
| "loss": 1.1506468057632446, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.044012045401899465, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7992740174821246e-05, | |
| "loss": 0.833928108215332, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04447533009034051, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.79919111672843e-05, | |
| "loss": 1.064217209815979, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.04493861477878156, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.799103735886371e-05, | |
| "loss": 1.0427517890930176, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.04540189946722261, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.7990118751191185e-05, | |
| "loss": 1.0797550678253174, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.04586518415566365, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.798915534598205e-05, | |
| "loss": 1.0864336490631104, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.0463284688441047, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 4.79881471450353e-05, | |
| "loss": 1.1602739095687866, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04679175353254575, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.79870941502336e-05, | |
| "loss": 1.045765995979309, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.0472550382209868, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.798599636354323e-05, | |
| "loss": 1.2531412839889526, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.047718322909427846, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.7984853787014124e-05, | |
| "loss": 1.208916425704956, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.04818160759786889, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.798366642277986e-05, | |
| "loss": 1.238208532333374, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.048644892286309936, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.7982434273057635e-05, | |
| "loss": 0.9851164817810059, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.049108176974750985, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.798115734014828e-05, | |
| "loss": 1.0408838987350464, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04957146166319203, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.7979835626436254e-05, | |
| "loss": 1.191272497177124, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.05003474635163308, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.797846913438965e-05, | |
| "loss": 0.9605915546417236, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.05049803104007412, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.797705786656015e-05, | |
| "loss": 1.1408090591430664, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.05096131572851517, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.797560182558307e-05, | |
| "loss": 1.277418613433838, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05142460041695622, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.797410101417731e-05, | |
| "loss": 1.1940449476242065, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.05188788510539727, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 4.7972555435145395e-05, | |
| "loss": 1.1584206819534302, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.05235116979383831, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.7970965091373425e-05, | |
| "loss": 1.314054250717163, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.05281445448227936, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.796932998583113e-05, | |
| "loss": 1.155271053314209, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.05327773917072041, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.7967650121571754e-05, | |
| "loss": 1.0596888065338135, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.053741023859161456, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.796592550173219e-05, | |
| "loss": 1.0339300632476807, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.054204308547602505, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.7964156129532876e-05, | |
| "loss": 1.0572959184646606, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.054667593236043546, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 4.796234200827781e-05, | |
| "loss": 1.2064818143844604, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.055130877924484595, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.796048314135457e-05, | |
| "loss": 1.0486756563186646, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.05559416261292564, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.7958579532234265e-05, | |
| "loss": 1.2740678787231445, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05605744730136669, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.795663118447158e-05, | |
| "loss": 1.2558701038360596, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.05652073198980773, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7954638101704724e-05, | |
| "loss": 1.0604870319366455, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.05698401667824878, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.7952600287655444e-05, | |
| "loss": 1.0435048341751099, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.05744730136668983, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 4.795051774612902e-05, | |
| "loss": 1.0488735437393188, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05791058605513088, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.7948390481014245e-05, | |
| "loss": 1.0075401067733765, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05837387074357193, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7946218496283435e-05, | |
| "loss": 1.1233757734298706, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.05883715543201297, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.794400179599242e-05, | |
| "loss": 1.1474615335464478, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05930044012045402, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.794174038428049e-05, | |
| "loss": 1.0086901187896729, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.059763724808895066, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.793943426537048e-05, | |
| "loss": 1.119909644126892, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.060227009497336115, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.793708344356867e-05, | |
| "loss": 1.0933876037597656, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06069029418577716, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.793468792326482e-05, | |
| "loss": 1.424509882926941, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.061153578874218205, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.7932247708932184e-05, | |
| "loss": 1.086850643157959, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.06161686356265925, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.7929762805127435e-05, | |
| "loss": 1.4302482604980469, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.0620801482511003, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.7927233216490726e-05, | |
| "loss": 0.9927620887756348, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.06254343293954134, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.792465894774563e-05, | |
| "loss": 1.1411983966827393, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.06300671762798239, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.792204000369917e-05, | |
| "loss": 1.3219720125198364, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.06347000231642344, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.791937638924179e-05, | |
| "loss": 1.2182328701019287, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.06393328700486449, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.7916668109347346e-05, | |
| "loss": 1.2830442190170288, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.06439657169330554, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.791391516907309e-05, | |
| "loss": 1.1010041236877441, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.06485985638174659, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.7911117573559676e-05, | |
| "loss": 1.0438331365585327, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06532314107018763, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.7908275328031156e-05, | |
| "loss": 1.039322853088379, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.06578642575862868, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.7905388437794946e-05, | |
| "loss": 1.1718674898147583, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.06624971044706972, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.7902456908241836e-05, | |
| "loss": 1.0182360410690308, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.06671299513551077, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.789948074484594e-05, | |
| "loss": 0.9300652742385864, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.06717627982395181, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.7896459953164785e-05, | |
| "loss": 1.1588186025619507, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06763956451239286, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.7893394538839164e-05, | |
| "loss": 1.1683034896850586, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.06810284920083391, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.7890284507593236e-05, | |
| "loss": 1.2006157636642456, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06856613388927496, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.788712986523447e-05, | |
| "loss": 1.1696548461914062, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.06902941857771601, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.788393061765363e-05, | |
| "loss": 1.0099486112594604, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06949270326615706, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.7880686770824775e-05, | |
| "loss": 1.000266671180725, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0699559879545981, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.7877398330805246e-05, | |
| "loss": 1.1522239446640015, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.07041927264303914, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.7874065303735655e-05, | |
| "loss": 1.11775803565979, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.07088255733148019, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.787068769583987e-05, | |
| "loss": 1.0676116943359375, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.07134584201992124, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.786726551342502e-05, | |
| "loss": 0.9372677206993103, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.07180912670836229, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.7863798762881446e-05, | |
| "loss": 1.0489038228988647, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.07227241139680333, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7860287450682735e-05, | |
| "loss": 1.2821038961410522, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.07273569608524438, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.7856731583385665e-05, | |
| "loss": 1.3503544330596924, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.07319898077368543, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.7853131167630235e-05, | |
| "loss": 1.1046172380447388, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.07366226546212648, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.7849486210139616e-05, | |
| "loss": 1.5007928609848022, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.07412555015056753, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.784579671772015e-05, | |
| "loss": 1.1768280267715454, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07458883483900858, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 4.784206269726136e-05, | |
| "loss": 1.2257065773010254, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.07505211952744961, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7838284155735886e-05, | |
| "loss": 1.2349004745483398, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.07551540421589066, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.783446110019954e-05, | |
| "loss": 1.1083492040634155, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.07597868890433171, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.7830593537791244e-05, | |
| "loss": 1.0492440462112427, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.07644197359277276, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.7826681475733e-05, | |
| "loss": 1.0901589393615723, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.0769052582812138, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.7822724921329945e-05, | |
| "loss": 1.196974515914917, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.07736854296965485, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.781872388197029e-05, | |
| "loss": 1.2492700815200806, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.0778318276580959, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.781467836512529e-05, | |
| "loss": 0.9922595620155334, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.07829511234653695, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.781058837834929e-05, | |
| "loss": 1.2686748504638672, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.078758397034978, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.780645392927964e-05, | |
| "loss": 0.9617519378662109, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07922168172341904, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.780227502563674e-05, | |
| "loss": 1.0572490692138672, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.07968496641186008, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.7798051675223994e-05, | |
| "loss": 1.1447961330413818, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.08014825110030113, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.77937838859278e-05, | |
| "loss": 0.9723523855209351, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.08061153578874218, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.778947166571755e-05, | |
| "loss": 1.1690819263458252, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.08107482047718323, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.778511502264559e-05, | |
| "loss": 1.043947458267212, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08153810516562428, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.778071396484721e-05, | |
| "loss": 1.0934100151062012, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.08200138985406533, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.777626850054067e-05, | |
| "loss": 1.1645115613937378, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.08246467454250637, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.7771778638027116e-05, | |
| "loss": 1.0093110799789429, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.08292795923094742, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.7767244385690624e-05, | |
| "loss": 1.2085744142532349, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.08339124391938846, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 4.776266575199815e-05, | |
| "loss": 1.048790693283081, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0838545286078295, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.775804274549953e-05, | |
| "loss": 1.0067102909088135, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.08431781329627056, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.775337537482744e-05, | |
| "loss": 1.0322071313858032, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0847810979847116, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.7748663648697436e-05, | |
| "loss": 0.8763373494148254, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.08524438267315265, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.774390757590787e-05, | |
| "loss": 1.1351971626281738, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0857076673615937, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.773910716533992e-05, | |
| "loss": 1.21125066280365, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08617095205003475, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.773426242595754e-05, | |
| "loss": 1.0625823736190796, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0866342367384758, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.772937336680748e-05, | |
| "loss": 1.2072420120239258, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.08709752142691685, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.772443999701922e-05, | |
| "loss": 1.1252281665802002, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.08756080611535788, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.771946232580503e-05, | |
| "loss": 1.1829332113265991, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.08802409080379893, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.771444036245987e-05, | |
| "loss": 1.177690029144287, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08848737549223998, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.7709374116361405e-05, | |
| "loss": 1.025864839553833, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.08895066018068103, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.770426359697001e-05, | |
| "loss": 1.2552249431610107, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.08941394486912208, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 4.7699108813828735e-05, | |
| "loss": 1.1388694047927856, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.08987722955756312, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.769390977656328e-05, | |
| "loss": 1.1736036539077759, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.09034051424600417, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.768866649488196e-05, | |
| "loss": 1.1389501094818115, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.09080379893444522, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.768337897857572e-05, | |
| "loss": 1.0693917274475098, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.09126708362288627, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 4.767804723751814e-05, | |
| "loss": 1.139711856842041, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.0917303683113273, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.767267128166534e-05, | |
| "loss": 1.021757960319519, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.09219365299976835, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.766725112105602e-05, | |
| "loss": 1.2109063863754272, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.0926569376882094, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.7661786765811425e-05, | |
| "loss": 1.1550836563110352, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09312022237665045, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 4.765627822613532e-05, | |
| "loss": 1.1337045431137085, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.0935835070650915, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.7650725512313996e-05, | |
| "loss": 1.1244243383407593, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.09404679175353255, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.76451286347162e-05, | |
| "loss": 1.1743513345718384, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.0945100764419736, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.763948760379319e-05, | |
| "loss": 1.1148113012313843, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.09497336113041464, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.763380243007862e-05, | |
| "loss": 0.9122455716133118, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09543664581885569, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.7628073124188615e-05, | |
| "loss": 1.0933022499084473, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.09589993050729674, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.7622299696821693e-05, | |
| "loss": 1.0184919834136963, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.09636321519573778, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7616482158758773e-05, | |
| "loss": 1.1238012313842773, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.09682649988417882, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.761062052086313e-05, | |
| "loss": 1.22682523727417, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.09728978457261987, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.760471479408038e-05, | |
| "loss": 1.1153074502944946, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09775306926106092, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7598764989438495e-05, | |
| "loss": 1.0884509086608887, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.09821635394950197, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.7592771118047746e-05, | |
| "loss": 0.9598002433776855, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.09867963863794302, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.758673319110067e-05, | |
| "loss": 1.1340510845184326, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.09914292332638407, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.75806512198721e-05, | |
| "loss": 0.9966357946395874, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.09960620801482511, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.757452521571909e-05, | |
| "loss": 1.0271143913269043, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.10006949270326616, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.7568355190080936e-05, | |
| "loss": 0.938353419303894, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1005327773917072, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.756214115447912e-05, | |
| "loss": 1.047834873199463, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.10099606208014825, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.7555883120517335e-05, | |
| "loss": 1.1204979419708252, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.1014593467685893, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7549581099881384e-05, | |
| "loss": 1.2204188108444214, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.10192263145703034, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.7543235104339265e-05, | |
| "loss": 1.1481391191482544, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.10238591614547139, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.753684514574105e-05, | |
| "loss": 1.201314091682434, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.10284920083391244, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.753041123601891e-05, | |
| "loss": 1.159132480621338, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.10331248552235349, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.752393338718712e-05, | |
| "loss": 1.1852577924728394, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.10377577021079454, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.7517411611341954e-05, | |
| "loss": 1.0710164308547974, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.10423905489923559, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.7510845920661756e-05, | |
| "loss": 1.097131371498108, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10470233958767662, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.7504236327406854e-05, | |
| "loss": 0.9716182351112366, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.10516562427611767, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.749758284391955e-05, | |
| "loss": 1.2137223482131958, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.10562890896455872, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.7490885482624115e-05, | |
| "loss": 0.9825916886329651, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.10609219365299977, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.748414425602676e-05, | |
| "loss": 1.0940011739730835, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.10655547834144082, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.7477359176715584e-05, | |
| "loss": 0.9418008923530579, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10701876302988186, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.747053025736061e-05, | |
| "loss": 0.9472661018371582, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.10748204771832291, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.746365751071368e-05, | |
| "loss": 1.2847800254821777, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.10794533240676396, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.745674094960851e-05, | |
| "loss": 1.0107430219650269, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.10840861709520501, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.744978058696062e-05, | |
| "loss": 1.1128199100494385, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.10887190178364604, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 4.744277643576733e-05, | |
| "loss": 0.9219973087310791, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10933518647208709, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.743572850910772e-05, | |
| "loss": 1.2181633710861206, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.10979847116052814, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.74286368201426e-05, | |
| "loss": 1.2480499744415283, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.11026175584896919, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.7421501382114536e-05, | |
| "loss": 1.171923279762268, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.11072504053741024, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.741432220834775e-05, | |
| "loss": 1.0495820045471191, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.11118832522585129, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.740709931224815e-05, | |
| "loss": 1.1960190534591675, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11165160991429234, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.73998327073033e-05, | |
| "loss": 1.1030462980270386, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.11211489460273338, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.739252240708236e-05, | |
| "loss": 1.1221550703048706, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.11257817929117443, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.738516842523609e-05, | |
| "loss": 1.2760341167449951, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.11304146397961547, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.737777077549683e-05, | |
| "loss": 1.1421096324920654, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.11350474866805652, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.737032947167845e-05, | |
| "loss": 1.06126070022583, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.11396803335649756, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.7362844527676346e-05, | |
| "loss": 1.1655036211013794, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.11443131804493861, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.735531595746739e-05, | |
| "loss": 1.069222092628479, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.11489460273337966, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.7347743775109935e-05, | |
| "loss": 1.2415424585342407, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.11535788742182071, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.734012799474377e-05, | |
| "loss": 0.9880377650260925, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.11582117211026176, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.733246863059008e-05, | |
| "loss": 1.1792749166488647, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1162844567987028, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.732476569695146e-05, | |
| "loss": 1.2084414958953857, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.11674774148714386, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.731701920821184e-05, | |
| "loss": 1.2508437633514404, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1172110261755849, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.73092291788365e-05, | |
| "loss": 0.9441017508506775, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.11767431086402594, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.7301395623372014e-05, | |
| "loss": 1.1250604391098022, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.11813759555246699, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.729351855644624e-05, | |
| "loss": 1.2054286003112793, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.11860088024090804, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.7285597992768285e-05, | |
| "loss": 1.2127487659454346, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.11906416492934908, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.727763394712847e-05, | |
| "loss": 1.1345865726470947, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.11952744961779013, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.726962643439833e-05, | |
| "loss": 1.1558208465576172, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.11999073430623118, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.726157546953055e-05, | |
| "loss": 1.0446807146072388, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.12045401899467223, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.7253481067558954e-05, | |
| "loss": 1.1157430410385132, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12091730368311328, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.72453432435985e-05, | |
| "loss": 1.026274561882019, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.12138058837155433, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 4.7237162012845206e-05, | |
| "loss": 0.9912748336791992, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.12184387305999536, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.7228937390576154e-05, | |
| "loss": 1.0849391222000122, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.12230715774843641, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.722066939214945e-05, | |
| "loss": 1.133726716041565, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.12277044243687746, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.72123580330042e-05, | |
| "loss": 1.0994298458099365, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.1232337271253185, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.720400332866047e-05, | |
| "loss": 1.2458348274230957, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.12369701181375956, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7195605294719286e-05, | |
| "loss": 0.8411968946456909, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.1241602965022006, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.718716394686257e-05, | |
| "loss": 1.0096313953399658, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.12462358119064165, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.7178679300853125e-05, | |
| "loss": 0.9048255085945129, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.1250868658790827, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 4.7170151372534615e-05, | |
| "loss": 1.0907902717590332, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12555015056752375, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.71615801778315e-05, | |
| "loss": 1.1986042261123657, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.12601343525596478, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7152965732749085e-05, | |
| "loss": 0.93548184633255, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.12647671994440585, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.714430805337338e-05, | |
| "loss": 0.8795110583305359, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.12694000463284688, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.713560715587117e-05, | |
| "loss": 1.1542648077011108, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.12740328932128794, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.7126863056489925e-05, | |
| "loss": 1.1123528480529785, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12786657400972898, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.7118075771557775e-05, | |
| "loss": 0.946189820766449, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.12832985869817, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.710924531748352e-05, | |
| "loss": 1.0332181453704834, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.12879314338661108, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.7100371710756555e-05, | |
| "loss": 1.1407872438430786, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1292564280750521, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.709145496794685e-05, | |
| "loss": 1.0078046321868896, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.12971971276349317, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.7082495105704936e-05, | |
| "loss": 1.1784926652908325, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1301829974519342, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.707349214076186e-05, | |
| "loss": 1.055182695388794, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.13064628214037527, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.706444608992915e-05, | |
| "loss": 1.2529042959213257, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1311095668288163, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.70553569700988e-05, | |
| "loss": 1.1405866146087646, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.13157285151725737, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.7046224798243215e-05, | |
| "loss": 1.025738000869751, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.1320361362056984, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.7037049591415213e-05, | |
| "loss": 1.2285195589065552, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.13249942089413944, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.702783136674794e-05, | |
| "loss": 1.0521762371063232, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1329627055825805, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 4.70185701414549e-05, | |
| "loss": 1.0171458721160889, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.13342599027102153, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 4.700926593282988e-05, | |
| "loss": 1.017797589302063, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.1338892749594626, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.699991875824693e-05, | |
| "loss": 1.098080039024353, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.13435255964790363, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.6990528635160354e-05, | |
| "loss": 1.069311261177063, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1348158443363447, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.6981095581104625e-05, | |
| "loss": 1.1987462043762207, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.13527912902478573, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.697161961369438e-05, | |
| "loss": 0.9862013459205627, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1357424137132268, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.696210075062443e-05, | |
| "loss": 1.101189136505127, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.13620569840166782, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.695253900966965e-05, | |
| "loss": 1.0801221132278442, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.13666898309010886, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.694293440868499e-05, | |
| "loss": 1.0982296466827393, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13713226777854992, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.693328696560544e-05, | |
| "loss": 1.1208291053771973, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.13759555246699096, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.692359669844599e-05, | |
| "loss": 1.137648344039917, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.13805883715543202, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.691386362530158e-05, | |
| "loss": 1.225368618965149, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.13852212184387305, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.690408776434712e-05, | |
| "loss": 1.021425485610962, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.13898540653231412, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.6894269133837377e-05, | |
| "loss": 1.244565486907959, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13944869122075515, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.688440775210701e-05, | |
| "loss": 1.11764657497406, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.1399119759091962, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.6874503637570496e-05, | |
| "loss": 1.2240279912948608, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.14037526059763725, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.6864556808722126e-05, | |
| "loss": 1.058721899986267, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.14083854528607828, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.685456728413593e-05, | |
| "loss": 0.9557834267616272, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.14130182997451934, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.684453508246567e-05, | |
| "loss": 1.1231224536895752, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.14176511466296038, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.683446022244482e-05, | |
| "loss": 1.1800084114074707, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.14222839935140144, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.682434272288649e-05, | |
| "loss": 0.9980816841125488, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.14269168403984248, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.681418260268341e-05, | |
| "loss": 1.141348958015442, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.14315496872828354, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 4.680397988080792e-05, | |
| "loss": 1.0376156568527222, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.14361825341672457, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.67937345763119e-05, | |
| "loss": 0.9319735765457153, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14408153810516564, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.678344670832673e-05, | |
| "loss": 1.093515157699585, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.14454482279360667, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.677311629606328e-05, | |
| "loss": 0.9726182222366333, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.1450081074820477, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.6762743358811894e-05, | |
| "loss": 1.2113114595413208, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.14547139217048877, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.675232791594227e-05, | |
| "loss": 1.0208406448364258, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1459346768589298, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.674186998690353e-05, | |
| "loss": 0.9950704574584961, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.14639796154737086, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.673136959122409e-05, | |
| "loss": 1.0458511114120483, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1468612462358119, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.672082674851169e-05, | |
| "loss": 1.0969946384429932, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.14732453092425296, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.6710241478453334e-05, | |
| "loss": 1.00065016746521, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.147787815612694, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.669961380081523e-05, | |
| "loss": 0.9182780981063843, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.14825110030113506, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.6688943735442805e-05, | |
| "loss": 1.0130627155303955, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1487143849895761, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.667823130226061e-05, | |
| "loss": 1.091713547706604, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.14917766967801716, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.666747652127233e-05, | |
| "loss": 1.0551024675369263, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1496409543664582, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.665667941256072e-05, | |
| "loss": 1.0696836709976196, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.15010423905489922, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.664583999628757e-05, | |
| "loss": 1.0591177940368652, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1505675237433403, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.663495829269368e-05, | |
| "loss": 1.1695055961608887, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.15103080843178132, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.662403432209882e-05, | |
| "loss": 1.1184823513031006, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.15149409312022238, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 4.661306810490168e-05, | |
| "loss": 1.0364640951156616, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.15195737780866342, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.660205966157982e-05, | |
| "loss": 1.2171732187271118, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.15242066249710448, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.6591009012689685e-05, | |
| "loss": 1.1131620407104492, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.15288394718554552, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.6579916178866506e-05, | |
| "loss": 0.9144288301467896, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15334723187398658, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.6568781180824304e-05, | |
| "loss": 1.180692434310913, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.1538105165624276, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.655760403935581e-05, | |
| "loss": 1.1063326597213745, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.15427380125086865, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.654638477533249e-05, | |
| "loss": 1.1317967176437378, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.1547370859393097, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.653512340970443e-05, | |
| "loss": 1.0568040609359741, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.15520037062775074, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.6523819963500345e-05, | |
| "loss": 1.0148340463638306, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.1556636553161918, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.651247445782754e-05, | |
| "loss": 0.9750385284423828, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.15612694000463284, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.650108691387185e-05, | |
| "loss": 1.0633890628814697, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.1565902246930739, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.648965735289761e-05, | |
| "loss": 1.223706603050232, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.15705350938151494, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.647818579624761e-05, | |
| "loss": 1.2269283533096313, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.157516794069956, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.6466672265343056e-05, | |
| "loss": 0.990770697593689, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.15798007875839704, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.645511678168356e-05, | |
| "loss": 1.1373369693756104, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.15844336344683807, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.644351936684705e-05, | |
| "loss": 1.106075406074524, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.15890664813527913, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.643188004248975e-05, | |
| "loss": 0.9842250943183899, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.15936993282372017, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.642019883034617e-05, | |
| "loss": 1.1008222103118896, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.15983321751216123, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.640847575222901e-05, | |
| "loss": 1.127953052520752, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.16029650220060226, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 4.6396710830029164e-05, | |
| "loss": 1.2000129222869873, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.16075978688904333, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 4.638490408571564e-05, | |
| "loss": 1.0962949991226196, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.16122307157748436, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.637305554133559e-05, | |
| "loss": 1.0607415437698364, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.16168635626592542, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.636116521901417e-05, | |
| "loss": 1.0603266954421997, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.16214964095436646, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.6349233140954573e-05, | |
| "loss": 1.084631085395813, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1626129256428075, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.633725932943795e-05, | |
| "loss": 1.1179983615875244, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.16307621033124856, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.632524380682341e-05, | |
| "loss": 0.9485760927200317, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.1635394950196896, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.631318659554793e-05, | |
| "loss": 0.8660714626312256, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.16400277970813065, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 4.6301087718126324e-05, | |
| "loss": 1.2922559976577759, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.1644660643965717, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.628894719715124e-05, | |
| "loss": 1.1782947778701782, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.16492934908501275, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.627676505529306e-05, | |
| "loss": 1.32277512550354, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.16539263377345378, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.62645413152999e-05, | |
| "loss": 1.2258048057556152, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.16585591846189485, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.6252275999997546e-05, | |
| "loss": 1.1965945959091187, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.16631920315033588, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.6239969132289436e-05, | |
| "loss": 1.1148847341537476, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.16678248783877692, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.622762073515658e-05, | |
| "loss": 1.1224826574325562, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16724577252721798, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.621523083165755e-05, | |
| "loss": 1.1334441900253296, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.167709057215659, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.620279944492841e-05, | |
| "loss": 1.0212844610214233, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.16817234190410008, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.619032659818271e-05, | |
| "loss": 1.0923480987548828, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.1686356265925411, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.61778123147114e-05, | |
| "loss": 1.0250271558761597, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.16909891128098217, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.616525661788281e-05, | |
| "loss": 1.1997897624969482, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1695621959694232, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.6152659531142605e-05, | |
| "loss": 1.0714635848999023, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.17002548065786427, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.614002107801375e-05, | |
| "loss": 1.1388036012649536, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.1704887653463053, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.612734128209643e-05, | |
| "loss": 1.0413213968276978, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.17095205003474634, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.6114620167068055e-05, | |
| "loss": 1.0464006662368774, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.1714153347231874, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.610185775668317e-05, | |
| "loss": 0.9965865015983582, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17187861941162844, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.6089054074773446e-05, | |
| "loss": 1.0935486555099487, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.1723419041000695, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.6076209145247627e-05, | |
| "loss": 1.158833384513855, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.17280518878851053, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.606332299209146e-05, | |
| "loss": 0.897361695766449, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.1732684734769516, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.60503956393677e-05, | |
| "loss": 1.0273572206497192, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.17373175816539263, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.603742711121599e-05, | |
| "loss": 1.222002387046814, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1741950428538337, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 4.602441743185291e-05, | |
| "loss": 1.1995201110839844, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.17465832754227473, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.601136662557185e-05, | |
| "loss": 1.0347135066986084, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.17512161223071576, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.599827471674302e-05, | |
| "loss": 1.1969027519226074, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.17558489691915682, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.5985141729813366e-05, | |
| "loss": 1.1000288724899292, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.17604818160759786, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.5971967689306545e-05, | |
| "loss": 1.0385537147521973, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17651146629603892, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.595875261982288e-05, | |
| "loss": 0.8807584643363953, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.17697475098447996, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.5945496546039286e-05, | |
| "loss": 0.9811716675758362, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.17743803567292102, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.59321994927093e-05, | |
| "loss": 1.0565159320831299, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.17790132036136205, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.5918861484662906e-05, | |
| "loss": 1.0541253089904785, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.17836460504980312, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.5905482546806626e-05, | |
| "loss": 1.1991245746612549, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17882788973824415, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.58920627041234e-05, | |
| "loss": 0.9940633177757263, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.17929117442668518, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.587860198167252e-05, | |
| "loss": 1.0880647897720337, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.17975445911512625, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.586510040458965e-05, | |
| "loss": 0.9566104412078857, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.18021774380356728, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.585155799808672e-05, | |
| "loss": 1.0409622192382812, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.18068102849200834, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.583797478745191e-05, | |
| "loss": 1.0287699699401855, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18114431318044938, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.58243507980496e-05, | |
| "loss": 1.1182043552398682, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.18160759786889044, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.581068605532031e-05, | |
| "loss": 1.040753722190857, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.18207088255733148, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.5796980584780665e-05, | |
| "loss": 1.105460524559021, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.18253416724577254, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.578323441202334e-05, | |
| "loss": 0.9269900918006897, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.18299745193421357, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.5769447562717005e-05, | |
| "loss": 1.0313459634780884, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1834607366226546, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.5755620062606313e-05, | |
| "loss": 0.9970820546150208, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.18392402131109567, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.5741751937511796e-05, | |
| "loss": 1.0869134664535522, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.1843873059995367, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.572784321332987e-05, | |
| "loss": 1.0493508577346802, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.18485059068797777, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.571389391603275e-05, | |
| "loss": 0.9378384947776794, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.1853138753764188, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.56999040716684e-05, | |
| "loss": 0.9289635419845581, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18577716006485986, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.568587370636055e-05, | |
| "loss": 1.065589427947998, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.1862404447533009, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.567180284630853e-05, | |
| "loss": 0.9970924258232117, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.18670372944174196, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.565769151778733e-05, | |
| "loss": 1.1094486713409424, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.187167014130183, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.5643539747147506e-05, | |
| "loss": 1.0456472635269165, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.18763029881862406, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.562934756081511e-05, | |
| "loss": 1.055879831314087, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1880935835070651, | |
| "grad_norm": 4.5, | |
| "learning_rate": 4.5615114985291684e-05, | |
| "loss": 1.4064699411392212, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.18855686819550613, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.5600842047154176e-05, | |
| "loss": 1.0524810552597046, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.1890201528839472, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.558652877305494e-05, | |
| "loss": 1.052716851234436, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.18948343757238822, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.5572175189721586e-05, | |
| "loss": 1.1580179929733276, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.1899467222608293, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.5557781323957055e-05, | |
| "loss": 1.0525214672088623, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19041000694927032, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.5543347202639477e-05, | |
| "loss": 0.9145269989967346, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.19087329163771138, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.5528872852722156e-05, | |
| "loss": 0.9920161366462708, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.19133657632615242, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.551435830123353e-05, | |
| "loss": 1.0678218603134155, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.19179986101459348, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.54998035752771e-05, | |
| "loss": 1.0776031017303467, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.19226314570303452, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.5485208702031374e-05, | |
| "loss": 1.1205060482025146, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.19272643039147555, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.547057370874984e-05, | |
| "loss": 1.0913411378860474, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1931897150799166, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.545589862276091e-05, | |
| "loss": 1.0345311164855957, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.19365299976835765, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.544118347146784e-05, | |
| "loss": 1.1177470684051514, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.1941162844567987, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.542642828234873e-05, | |
| "loss": 1.1447192430496216, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.19457956914523974, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.5411633082956416e-05, | |
| "loss": 1.1146210432052612, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1950428538336808, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.539679790091847e-05, | |
| "loss": 1.0338633060455322, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.19550613852212184, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.538192276393712e-05, | |
| "loss": 1.0040104389190674, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1959694232105629, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.536700769978918e-05, | |
| "loss": 1.1796895265579224, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.19643270789900394, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.535205273632605e-05, | |
| "loss": 1.0307509899139404, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.19689599258744497, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 4.533705790147362e-05, | |
| "loss": 0.9913015365600586, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.19735927727588604, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.532202322323224e-05, | |
| "loss": 1.1445434093475342, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.19782256196432707, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.530694872967666e-05, | |
| "loss": 1.3544632196426392, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.19828584665276813, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.5291834448955975e-05, | |
| "loss": 0.9704390168190002, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.19874913134120917, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.5276680409293576e-05, | |
| "loss": 0.9586291909217834, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.19921241602965023, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.52614866389871e-05, | |
| "loss": 1.1021863222122192, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19967570071809126, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.5246253166408376e-05, | |
| "loss": 1.0716869831085205, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.20013898540653233, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.523098002000336e-05, | |
| "loss": 0.9597651958465576, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.20060227009497336, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 4.5215667228292114e-05, | |
| "loss": 0.910536527633667, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2010655547834144, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.52003148198687e-05, | |
| "loss": 1.0383825302124023, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.20152883947185546, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.51849228234012e-05, | |
| "loss": 1.1084293127059937, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.2019921241602965, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.516949126763156e-05, | |
| "loss": 1.1191846132278442, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.20245540884873756, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.515402018137565e-05, | |
| "loss": 0.9708357453346252, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.2029186935371786, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.513850959352314e-05, | |
| "loss": 1.1482406854629517, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.20338197822561965, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 4.512295953303746e-05, | |
| "loss": 0.9390287399291992, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2038452629140607, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.510737002895574e-05, | |
| "loss": 1.125487208366394, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.20430854760250175, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.5091741110388775e-05, | |
| "loss": 1.0969908237457275, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.20477183229094278, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.507607280652096e-05, | |
| "loss": 0.9356522560119629, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.20523511697938382, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.506036514661024e-05, | |
| "loss": 1.131638526916504, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.20569840166782488, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.504461815998803e-05, | |
| "loss": 1.0650880336761475, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.20616168635626592, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.502883187605921e-05, | |
| "loss": 1.0747191905975342, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.20662497104470698, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.5013006324302014e-05, | |
| "loss": 1.0188624858856201, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.207088255733148, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.4997141534268026e-05, | |
| "loss": 1.117804765701294, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.20755154042158908, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.498123753558208e-05, | |
| "loss": 0.9615070819854736, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2080148251100301, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.496529435794224e-05, | |
| "loss": 1.1164673566818237, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.20847810979847117, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.494931203111972e-05, | |
| "loss": 1.2451194524765015, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2089413944869122, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.493329058495885e-05, | |
| "loss": 1.0730493068695068, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.20940467917535324, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.491723004937699e-05, | |
| "loss": 1.1245779991149902, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2098679638637943, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.490113045436454e-05, | |
| "loss": 1.1549571752548218, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.21033124855223534, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.488499182998475e-05, | |
| "loss": 1.1194530725479126, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2107945332406764, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.486881420637385e-05, | |
| "loss": 0.9296231865882874, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.21125781792911744, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.4852597613740826e-05, | |
| "loss": 0.9777655601501465, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2117211026175585, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.4836342082367454e-05, | |
| "loss": 1.2194538116455078, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.21218438730599953, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.482004764260822e-05, | |
| "loss": 1.1565169095993042, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2126476719944406, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 4.4803714324890286e-05, | |
| "loss": 1.0961380004882812, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.21311095668288163, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.478734215971337e-05, | |
| "loss": 0.9595807790756226, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21357424137132267, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.477093117764976e-05, | |
| "loss": 1.0418174266815186, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.21403752605976373, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.4754481409344225e-05, | |
| "loss": 1.1094303131103516, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.21450081074820476, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.4737992885513955e-05, | |
| "loss": 0.8423942923545837, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.21496409543664582, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.472146563694851e-05, | |
| "loss": 1.148449182510376, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.21542738012508686, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.470489969450977e-05, | |
| "loss": 0.9094064831733704, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.21589066481352792, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.4688295089131864e-05, | |
| "loss": 1.0065088272094727, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.21635394950196896, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.46716518518211e-05, | |
| "loss": 1.0104409456253052, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.21681723419041002, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.465497001365598e-05, | |
| "loss": 1.0070152282714844, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.21728051887885105, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.4638249605787e-05, | |
| "loss": 1.160359263420105, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.2177438035672921, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.462149065943676e-05, | |
| "loss": 1.0029304027557373, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21820708825573315, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.4604693205899775e-05, | |
| "loss": 1.1525189876556396, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.21867037294417419, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.458785727654249e-05, | |
| "loss": 0.9948219060897827, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.21913365763261525, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.457098290280319e-05, | |
| "loss": 1.1646232604980469, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.21959694232105628, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.455407011619194e-05, | |
| "loss": 1.1069519519805908, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.22006022700949734, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.4537118948290546e-05, | |
| "loss": 1.155336618423462, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.22052351169793838, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.4520129430752487e-05, | |
| "loss": 0.8776676058769226, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.22098679638637944, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.4503101595302826e-05, | |
| "loss": 1.0383992195129395, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.22145008107482048, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.448603547373822e-05, | |
| "loss": 1.0792429447174072, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.2219133657632615, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.4468931097926796e-05, | |
| "loss": 1.2104331254959106, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.22237665045170257, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.44517884998081e-05, | |
| "loss": 1.1502063274383545, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2228399351401436, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.443460771139309e-05, | |
| "loss": 1.0817224979400635, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.22330321982858467, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.441738876476401e-05, | |
| "loss": 0.880224347114563, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2237665045170257, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.4400131692074355e-05, | |
| "loss": 0.9947736263275146, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.22422978920546677, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 4.438283652554883e-05, | |
| "loss": 1.0493063926696777, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2246930738939078, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.436550329748328e-05, | |
| "loss": 1.156111240386963, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.22515635858234886, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.4348132040244586e-05, | |
| "loss": 1.1266316175460815, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2256196432707899, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.4330722786270686e-05, | |
| "loss": 0.9672824740409851, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.22608292795923093, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.431327556807044e-05, | |
| "loss": 1.073356032371521, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.226546212647672, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.429579041822362e-05, | |
| "loss": 1.0501450300216675, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.22700949733611303, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 4.427826736938081e-05, | |
| "loss": 1.253738522529602, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2274727820245541, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.426070645426339e-05, | |
| "loss": 0.9925602078437805, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.22793606671299513, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.424310770566343e-05, | |
| "loss": 1.0422255992889404, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2283993514014362, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.4225471156443644e-05, | |
| "loss": 0.9284833669662476, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.22886263608987722, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.420779683953734e-05, | |
| "loss": 0.9178367853164673, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2293259207783183, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.419008478794835e-05, | |
| "loss": 1.1734018325805664, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.22978920546675932, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.4172335034750976e-05, | |
| "loss": 1.1945644617080688, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.23025249015520038, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 4.415454761308991e-05, | |
| "loss": 1.0181314945220947, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.23071577484364142, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 4.413672255618019e-05, | |
| "loss": 1.0368403196334839, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.23117905953208245, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.411885989730713e-05, | |
| "loss": 1.0601047277450562, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.23164234422052352, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.410095966982626e-05, | |
| "loss": 0.901918888092041, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23210562890896455, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 4.408302190716327e-05, | |
| "loss": 1.1393412351608276, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.2325689135974056, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.406504664281392e-05, | |
| "loss": 1.2997722625732422, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.23303219828584665, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.4047033910344015e-05, | |
| "loss": 0.889095664024353, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.2334954829742877, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.4028983743389327e-05, | |
| "loss": 1.096193790435791, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.23395876766272874, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.4010896175655516e-05, | |
| "loss": 0.8893133997917175, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2344220523511698, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.39927712409181e-05, | |
| "loss": 1.0893774032592773, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.23488533703961084, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.397460897302237e-05, | |
| "loss": 1.2915987968444824, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.23534862172805188, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.395640940588332e-05, | |
| "loss": 1.1468744277954102, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.23581190641649294, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.3938172573485584e-05, | |
| "loss": 1.081978678703308, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.23627519110493397, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 4.391989850988342e-05, | |
| "loss": 1.1137498617172241, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23673847579337504, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.390158724920058e-05, | |
| "loss": 0.9725139141082764, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.23720176048181607, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.388323882563028e-05, | |
| "loss": 1.2336326837539673, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.23766504517025713, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.3864853273435136e-05, | |
| "loss": 0.9616613984107971, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.23812832985869817, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.384643062694709e-05, | |
| "loss": 0.9157605767250061, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.23859161454713923, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 4.382797092056735e-05, | |
| "loss": 1.1036900281906128, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.23905489923558026, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.380947418876636e-05, | |
| "loss": 0.9066743850708008, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2395181839240213, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.379094046608364e-05, | |
| "loss": 1.0424668788909912, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.23998146861246236, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.3772369787127826e-05, | |
| "loss": 1.0981203317642212, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.2404447533009034, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.3753762186576575e-05, | |
| "loss": 1.098775863647461, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.24090803798934446, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.3735117699176455e-05, | |
| "loss": 0.8571038246154785, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2413713226777855, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.3716436359742935e-05, | |
| "loss": 0.991769552230835, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.24183460736622656, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.369771820316029e-05, | |
| "loss": 1.2347557544708252, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.2422978920546676, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.367896326438155e-05, | |
| "loss": 0.941724956035614, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.24276117674310865, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.366017157842844e-05, | |
| "loss": 0.9241411685943604, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.2432244614315497, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.3641343180391275e-05, | |
| "loss": 1.107820987701416, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.24368774611999072, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.362247810542894e-05, | |
| "loss": 1.052571177482605, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.24415103080843178, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.360357638876883e-05, | |
| "loss": 0.8987835645675659, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.24461431549687282, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.3584638065706724e-05, | |
| "loss": 1.1791050434112549, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.24507760018531388, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.356566317160677e-05, | |
| "loss": 1.173535704612732, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.24554088487375492, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.354665174190142e-05, | |
| "loss": 0.8905298709869385, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24600416956219598, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.352760381209135e-05, | |
| "loss": 1.2305561304092407, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.246467454250637, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.350851941774537e-05, | |
| "loss": 1.010733723640442, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.24693073893907808, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.34893985945004e-05, | |
| "loss": 1.0408154726028442, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.2473940236275191, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 4.347024137806139e-05, | |
| "loss": 1.157252550125122, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.24785730831596015, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.345104780420122e-05, | |
| "loss": 1.2410048246383667, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.2483205930044012, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.34318179087607e-05, | |
| "loss": 1.0325219631195068, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.24878387769284224, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.3412551727648435e-05, | |
| "loss": 1.166888952255249, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.2492471623812833, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.33932492968408e-05, | |
| "loss": 1.1053187847137451, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.24971044706972434, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.337391065238187e-05, | |
| "loss": 1.1279836893081665, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.2501737317581654, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.335453583038331e-05, | |
| "loss": 1.048471450805664, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.25063701644660646, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.333512486702438e-05, | |
| "loss": 0.949547290802002, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.2511003011350475, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.331567779855179e-05, | |
| "loss": 1.026901364326477, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.25156358582348853, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 4.3296194661279704e-05, | |
| "loss": 0.979106605052948, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.25202687051192957, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.327667549158962e-05, | |
| "loss": 0.9530601501464844, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.2524901552003706, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 4.325712032593032e-05, | |
| "loss": 1.4264435768127441, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.2529534398888117, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.3237529200817824e-05, | |
| "loss": 1.0840469598770142, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.2534167245772527, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.321790215283526e-05, | |
| "loss": 0.9668251872062683, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.25388000926569376, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.3198239218632874e-05, | |
| "loss": 1.0909249782562256, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.2543432939541348, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.317854043492791e-05, | |
| "loss": 1.1680148839950562, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.2548065786425759, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.315880583850454e-05, | |
| "loss": 1.0644400119781494, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2552698633310169, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.313903546621384e-05, | |
| "loss": 1.0424561500549316, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.25573314801945796, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.3119229354973664e-05, | |
| "loss": 0.989732563495636, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.256196432707899, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.309938754176862e-05, | |
| "loss": 1.0276066064834595, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.25665971739634, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.307951006364998e-05, | |
| "loss": 1.0524067878723145, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.2571230020847811, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 4.3059596957735606e-05, | |
| "loss": 1.1999335289001465, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.25758628677322215, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.3039648261209896e-05, | |
| "loss": 1.0140695571899414, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.2580495714616632, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.3019664011323705e-05, | |
| "loss": 1.0452879667282104, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.2585128561501042, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.2999644245394275e-05, | |
| "loss": 1.1013998985290527, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.2589761408385453, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.297958900080519e-05, | |
| "loss": 0.9173800945281982, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.25943942552698634, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.295949831500624e-05, | |
| "loss": 1.2523088455200195, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.2599027102154274, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.293937222551345e-05, | |
| "loss": 0.9910227060317993, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.2603659949038684, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.2919210769908905e-05, | |
| "loss": 1.015892744064331, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.26082927959230945, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.289901398584077e-05, | |
| "loss": 1.1399426460266113, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.26129256428075054, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.287878191102316e-05, | |
| "loss": 0.9163965582847595, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.2617558489691916, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.285851458323608e-05, | |
| "loss": 1.0406631231307983, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.2622191336576326, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.283821204032539e-05, | |
| "loss": 0.952318549156189, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.26268241834607364, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 4.281787432020269e-05, | |
| "loss": 1.0432265996932983, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.26314570303451473, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.279750146084527e-05, | |
| "loss": 1.1223399639129639, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.26360898772295577, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.2777093500296055e-05, | |
| "loss": 1.0468631982803345, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.2640722724113968, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.2756650476663475e-05, | |
| "loss": 1.0525509119033813, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.26453555709983784, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.273617242812147e-05, | |
| "loss": 0.9959677457809448, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.26499884178827887, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.271565939290939e-05, | |
| "loss": 0.8867281675338745, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.26546212647671996, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.269511140933187e-05, | |
| "loss": 1.0435187816619873, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.265925411165161, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.267452851575886e-05, | |
| "loss": 0.9710588455200195, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.26638869585360203, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.2653910750625455e-05, | |
| "loss": 1.3287699222564697, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.26685198054204307, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.2633258152431896e-05, | |
| "loss": 1.0961614847183228, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.26731526523048416, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.261257075974345e-05, | |
| "loss": 0.9179559946060181, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.2677785499189252, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 4.2591848611190364e-05, | |
| "loss": 1.1007611751556396, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.2682418346073662, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.257109174546781e-05, | |
| "loss": 1.0560678243637085, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.26870511929580726, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.2550300201335725e-05, | |
| "loss": 1.0741382837295532, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2691684039842483, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.252947401761887e-05, | |
| "loss": 0.9691828489303589, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.2696316886726894, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.250861323320666e-05, | |
| "loss": 1.2666388750076294, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.2700949733611304, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.248771788705311e-05, | |
| "loss": 0.8697996735572815, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.27055825804957145, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.24667880181768e-05, | |
| "loss": 0.9978750348091125, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.2710215427380125, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.244582366566075e-05, | |
| "loss": 1.2406501770019531, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.2714848274264536, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.24248248686524e-05, | |
| "loss": 1.0365958213806152, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.2719481121148946, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.240379166636347e-05, | |
| "loss": 0.9354648590087891, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.27241139680333565, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.238272409806997e-05, | |
| "loss": 1.082112431526184, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.2728746814917767, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.2361622203112054e-05, | |
| "loss": 1.1324368715286255, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.2733379661802177, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.234048602089398e-05, | |
| "loss": 0.9812889695167542, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.2738012508686588, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.231931559088404e-05, | |
| "loss": 0.9894756078720093, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.27426453555709984, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.2298110952614474e-05, | |
| "loss": 1.006495475769043, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.2747278202455409, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 4.22768721456814e-05, | |
| "loss": 1.0490379333496094, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.2751911049339819, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.225559920974473e-05, | |
| "loss": 1.0940771102905273, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.275654389622423, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.223429218452812e-05, | |
| "loss": 1.1644221544265747, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.27611767431086404, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.2212951109818895e-05, | |
| "loss": 0.9143954515457153, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.27658095899930507, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.219157602546792e-05, | |
| "loss": 0.9301037192344666, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.2770442436877461, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.217016697138961e-05, | |
| "loss": 0.889419436454773, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.27750752837618714, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.2148723987561786e-05, | |
| "loss": 1.0732734203338623, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.27797081306462823, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.212724711402563e-05, | |
| "loss": 1.0122696161270142, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27843409775306927, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.2105736390885625e-05, | |
| "loss": 0.8314121961593628, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.2788973824415103, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.208419185830945e-05, | |
| "loss": 1.0255941152572632, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.27936066712995133, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.206261355652791e-05, | |
| "loss": 1.0456650257110596, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.2798239518183924, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.204100152583488e-05, | |
| "loss": 0.9093706607818604, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.28028723650683346, | |
| "grad_norm": 2.3125, | |
| "learning_rate": 4.201935580658723e-05, | |
| "loss": 1.0478147268295288, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.2807505211952745, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.199767643920469e-05, | |
| "loss": 1.1493206024169922, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.28121380588371553, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.197596346416988e-05, | |
| "loss": 1.017486810684204, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.28167709057215656, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.195421692202815e-05, | |
| "loss": 1.1652302742004395, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.28214037526059765, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.1932436853387514e-05, | |
| "loss": 0.9417747259140015, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.2826036599490387, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.191062329891863e-05, | |
| "loss": 0.8607147932052612, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2830669446374797, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 4.1888776299354656e-05, | |
| "loss": 1.025602102279663, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.28353022932592076, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.186689589549121e-05, | |
| "loss": 1.0558090209960938, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.28399351401436185, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.1844982128186294e-05, | |
| "loss": 0.9318227171897888, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.2844567987028029, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.18230350383602e-05, | |
| "loss": 0.9264402389526367, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.2849200833912439, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.1801054666995453e-05, | |
| "loss": 1.160361647605896, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.28538336807968495, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.177904105513673e-05, | |
| "loss": 0.902491569519043, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.285846652768126, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.175699424389075e-05, | |
| "loss": 1.1254316568374634, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.2863099374565671, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.173491427442627e-05, | |
| "loss": 0.8220522999763489, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.2867732221450081, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.1712801187973925e-05, | |
| "loss": 1.1775267124176025, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.28723650683344915, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.1690655025826225e-05, | |
| "loss": 1.0950840711593628, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2876997915218902, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.166847582933741e-05, | |
| "loss": 1.2453440427780151, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.28816307621033127, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.164626363992343e-05, | |
| "loss": 0.9733505845069885, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.2886263608987723, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 4.162401849906183e-05, | |
| "loss": 1.0875972509384155, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.28908964558721334, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.16017404482917e-05, | |
| "loss": 1.1624879837036133, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.2895529302756544, | |
| "grad_norm": 1.125, | |
| "learning_rate": 4.1579429529213564e-05, | |
| "loss": 1.1447054147720337, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.2900162149640954, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.155708578348935e-05, | |
| "loss": 0.92429119348526, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.2904794996525365, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.1534709252842254e-05, | |
| "loss": 0.9543266296386719, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.29094278434097753, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 4.151229997905672e-05, | |
| "loss": 1.0998059511184692, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.29140606902941857, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 4.148985800397831e-05, | |
| "loss": 0.9753661155700684, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.2918693537178596, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.146738336951367e-05, | |
| "loss": 0.9926996231079102, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2923326384063007, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 4.144487611763041e-05, | |
| "loss": 0.9744971394538879, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.29279592309474173, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.142233629035706e-05, | |
| "loss": 1.1101515293121338, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.29325920778318276, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 4.1399763929783e-05, | |
| "loss": 1.1098037958145142, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.2937224924716238, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.137715907805832e-05, | |
| "loss": 1.0720516443252563, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.2941857771600649, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 4.135452177739382e-05, | |
| "loss": 1.0267348289489746, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.2946490618485059, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.133185207006086e-05, | |
| "loss": 0.9987479448318481, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.29511234653694696, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.130914999839133e-05, | |
| "loss": 0.9802069664001465, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.295575631225388, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 4.128641560477756e-05, | |
| "loss": 1.0791590213775635, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.296038915913829, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 4.1263648931672234e-05, | |
| "loss": 0.8927035927772522, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.2965022006022701, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.124085002158829e-05, | |
| "loss": 0.9619215726852417, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.29696548529071115, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 4.12180189170989e-05, | |
| "loss": 1.0131444931030273, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.2974287699791522, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.119515566083733e-05, | |
| "loss": 1.0167940855026245, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.2978920546675932, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.117226029549689e-05, | |
| "loss": 1.1122088432312012, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.2983553393560343, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.114933286383084e-05, | |
| "loss": 0.8055898547172546, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.29881862404447534, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.112637340865234e-05, | |
| "loss": 1.1266543865203857, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.2992819087329164, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.110338197283431e-05, | |
| "loss": 1.0558011531829834, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.2997451934213574, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 4.108035859930944e-05, | |
| "loss": 1.0644391775131226, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.30020847810979845, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.105730333107003e-05, | |
| "loss": 1.043839454650879, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.30067176279823954, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 4.1034216211167914e-05, | |
| "loss": 1.147243857383728, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.3011350474866806, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.1011097282714454e-05, | |
| "loss": 1.051954984664917, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3015983321751216, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.0987946588880385e-05, | |
| "loss": 1.002161979675293, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.30206161686356264, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.096476417289574e-05, | |
| "loss": 0.9687187671661377, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.30252490155200373, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.094155007804981e-05, | |
| "loss": 1.1040300130844116, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.30298818624044477, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.091830434769105e-05, | |
| "loss": 1.0147384405136108, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.3034514709288858, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.089502702522696e-05, | |
| "loss": 0.8908687233924866, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.30391475561732684, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 4.087171815412406e-05, | |
| "loss": 1.1389329433441162, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.30437804030576787, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.0848377777907765e-05, | |
| "loss": 1.061093807220459, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.30484132499420896, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.0825005940162326e-05, | |
| "loss": 1.0024491548538208, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.30530460968265, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.080160268453075e-05, | |
| "loss": 1.2541595697402954, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.30576789437109103, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.07781680547147e-05, | |
| "loss": 0.9933417439460754, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.30623117905953207, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.075470209447442e-05, | |
| "loss": 1.053157091140747, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.30669446374797316, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 4.073120484762868e-05, | |
| "loss": 1.1531765460968018, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.3071577484364142, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.070767635805466e-05, | |
| "loss": 1.125023603439331, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.3076210331248552, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.068411666968788e-05, | |
| "loss": 0.8804372549057007, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.30808431781329626, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 4.066052582652213e-05, | |
| "loss": 1.0438697338104248, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.3085476025017373, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.0636903872609336e-05, | |
| "loss": 0.9848630428314209, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.3090108871901784, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.061325085205958e-05, | |
| "loss": 1.0336278676986694, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.3094741718786194, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.058956680904091e-05, | |
| "loss": 1.0741722583770752, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.30993745656706045, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.0565851787779316e-05, | |
| "loss": 1.1203691959381104, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.3104007412555015, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.054210583255864e-05, | |
| "loss": 1.099678874015808, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3108640259439426, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.051832898772048e-05, | |
| "loss": 1.0561059713363647, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.3113273106323836, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.049452129766413e-05, | |
| "loss": 1.186478853225708, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.31179059532082465, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.047068280684646e-05, | |
| "loss": 0.9179085493087769, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.3122538800092657, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.044681355978187e-05, | |
| "loss": 0.9686939716339111, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3127171646977067, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.042291360104219e-05, | |
| "loss": 1.121710181236267, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3131804493861478, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.03989829752566e-05, | |
| "loss": 1.0991014242172241, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.31364373407458884, | |
| "grad_norm": 0.75, | |
| "learning_rate": 4.0375021727111543e-05, | |
| "loss": 1.0645341873168945, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.3141070187630299, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 4.0351029901350636e-05, | |
| "loss": 1.1132837533950806, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.3145703034514709, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.032700754277461e-05, | |
| "loss": 1.0442454814910889, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.315033588139912, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.0302954696241206e-05, | |
| "loss": 0.9802740812301636, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31549687282835304, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 4.02788714066651e-05, | |
| "loss": 0.9827386140823364, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.31596015751679407, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.025475771901779e-05, | |
| "loss": 1.1004528999328613, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.3164234422052351, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.023061367832757e-05, | |
| "loss": 1.0051753520965576, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.31688672689367614, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.02064393296794e-05, | |
| "loss": 0.9524490833282471, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.31735001158211723, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 4.018223471821483e-05, | |
| "loss": 1.079671859741211, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.31781329627055827, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.0157999889131936e-05, | |
| "loss": 0.912105917930603, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.3182765809589993, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.01337348876852e-05, | |
| "loss": 1.2040618658065796, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.31873986564744033, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.0109439759185465e-05, | |
| "loss": 0.8994999527931213, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3192031503358814, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.0085114548999816e-05, | |
| "loss": 1.244059681892395, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.31966643502432246, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 4.006075930255152e-05, | |
| "loss": 1.126865029335022, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3201297197127635, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.003637406531992e-05, | |
| "loss": 1.0775233507156372, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.32059300440120453, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.001195888284037e-05, | |
| "loss": 1.0177921056747437, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.32105628908964556, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.998751380070416e-05, | |
| "loss": 1.057099461555481, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.32151957377808665, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.9963038864558385e-05, | |
| "loss": 0.9934321045875549, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3219828584665277, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.993853412010589e-05, | |
| "loss": 0.9183391332626343, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3224461431549687, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.9913999613105204e-05, | |
| "loss": 0.9654147624969482, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.32290942784340976, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.98894353893704e-05, | |
| "loss": 1.0339151620864868, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.32337271253185085, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 3.986484149477107e-05, | |
| "loss": 0.9817367792129517, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.3238359972202919, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.98402179752322e-05, | |
| "loss": 0.9361385703086853, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.3242992819087329, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.981556487673409e-05, | |
| "loss": 0.9315399527549744, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.32476256659717395, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.979088224531229e-05, | |
| "loss": 1.005590796470642, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.325225851285615, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.976617012705749e-05, | |
| "loss": 0.8338845372200012, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.3256891359740561, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.9741428568115435e-05, | |
| "loss": 1.0329554080963135, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.3261524206624971, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 3.9716657614686844e-05, | |
| "loss": 0.8598560094833374, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.32661570535093815, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.9691857313027335e-05, | |
| "loss": 0.9257340431213379, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.3270789900393792, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.966702770944734e-05, | |
| "loss": 0.8521995544433594, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.32754227472782027, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.964216885031197e-05, | |
| "loss": 1.1843841075897217, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.3280055594162613, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.961728078204101e-05, | |
| "loss": 1.1602882146835327, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.32846884410470234, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.9592363551108756e-05, | |
| "loss": 1.020529866218567, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.3289321287931434, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.956741720404397e-05, | |
| "loss": 0.9926280975341797, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.3293954134815844, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.9542441787429795e-05, | |
| "loss": 0.7993087768554688, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.3298586981700255, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.9517437347903635e-05, | |
| "loss": 0.9188562631607056, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.33032198285846653, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.949240393215711e-05, | |
| "loss": 0.9771900177001953, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.33078526754690757, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.9467341586935936e-05, | |
| "loss": 0.989328145980835, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.3312485522353486, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.9442250359039855e-05, | |
| "loss": 1.002003788948059, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.3317118369237897, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.941713029532253e-05, | |
| "loss": 1.0104445219039917, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.33217512161223073, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 3.93919814426915e-05, | |
| "loss": 1.0114507675170898, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.33263840630067176, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.936680384810803e-05, | |
| "loss": 0.9771013855934143, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.3331016909891128, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.934159755858707e-05, | |
| "loss": 1.0455206632614136, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.33356497567755383, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 3.931636262119716e-05, | |
| "loss": 0.875360369682312, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.3340282603659949, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 3.929109908306032e-05, | |
| "loss": 0.9550399780273438, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.33449154505443596, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.9265806991351995e-05, | |
| "loss": 1.1120067834854126, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.334954829742877, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.9240486393300924e-05, | |
| "loss": 0.9513478875160217, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.335418114431318, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.9215137336189096e-05, | |
| "loss": 0.9390691518783569, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.3358813991197591, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.918975986735164e-05, | |
| "loss": 1.0198416709899902, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.33634468380820015, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.916435403417674e-05, | |
| "loss": 0.9613708257675171, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.3368079684966412, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.913891988410554e-05, | |
| "loss": 0.9495355486869812, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.3372712531850822, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 3.9113457464632056e-05, | |
| "loss": 0.9577147960662842, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.33773453787352326, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.9087966823303105e-05, | |
| "loss": 0.9388977885246277, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.33819782256196435, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.906244800771821e-05, | |
| "loss": 0.9760944247245789, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3386611072504054, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.903690106552948e-05, | |
| "loss": 1.0838488340377808, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.3391243919388464, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.9011326044441564e-05, | |
| "loss": 0.937881350517273, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.33958767662728745, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.898572299221153e-05, | |
| "loss": 1.1330440044403076, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.34005096131572854, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 3.896009195664882e-05, | |
| "loss": 1.1508278846740723, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.3405142460041696, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.893443298561508e-05, | |
| "loss": 0.9706493020057678, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3409775306926106, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.890874612702417e-05, | |
| "loss": 0.9598948955535889, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.34144081538105164, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.8883031428842e-05, | |
| "loss": 1.0114010572433472, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.3419041000694927, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.8857288939086474e-05, | |
| "loss": 1.0451589822769165, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.34236738475793377, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.8831518705827376e-05, | |
| "loss": 1.100400686264038, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.3428306694463748, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.8805720777186314e-05, | |
| "loss": 0.907010555267334, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.34329395413481584, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.877989520133662e-05, | |
| "loss": 1.0915554761886597, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.34375723882325687, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.8754042026503224e-05, | |
| "loss": 1.011785626411438, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.34422052351169796, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.872816130096261e-05, | |
| "loss": 1.0811213254928589, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.344683808200139, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.8702253073042716e-05, | |
| "loss": 0.834938645362854, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.34514709288858003, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.8676317391122824e-05, | |
| "loss": 0.9235035181045532, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.34561037757702107, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.865035430363348e-05, | |
| "loss": 1.0086536407470703, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.3460736622654621, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.862436385905641e-05, | |
| "loss": 0.987399697303772, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.3465369469539032, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.859834610592443e-05, | |
| "loss": 1.1993310451507568, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.3470002316423442, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.857230109282134e-05, | |
| "loss": 1.0457353591918945, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.34746351633078526, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.854622886838185e-05, | |
| "loss": 1.139293909072876, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3479268010192263, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 3.852012948129148e-05, | |
| "loss": 1.0585147142410278, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.3483900857076674, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.849400298028647e-05, | |
| "loss": 0.9727704524993896, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.3488533703961084, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 3.846784941415371e-05, | |
| "loss": 0.9992061257362366, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.34931665508454945, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.8441668831730586e-05, | |
| "loss": 1.1475231647491455, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.3497799397729905, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.8415461281904984e-05, | |
| "loss": 1.036689281463623, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.3502432244614315, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.83892268136151e-05, | |
| "loss": 0.87852543592453, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.3507065091498726, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.8362965475849445e-05, | |
| "loss": 0.9990617036819458, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.35116979383831365, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 3.833667731764665e-05, | |
| "loss": 1.1460075378417969, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.3516330785267547, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.831036238809548e-05, | |
| "loss": 0.9850847721099854, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.3520963632151957, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.828402073633464e-05, | |
| "loss": 0.9239014387130737, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.3525596479036368, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.825765241155279e-05, | |
| "loss": 0.9535107016563416, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.35302293259207784, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.8231257462988355e-05, | |
| "loss": 0.9702818989753723, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.3534862172805189, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.820483593992948e-05, | |
| "loss": 0.9807397127151489, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.3539495019689599, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.817838789171397e-05, | |
| "loss": 0.9782893061637878, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.35441278665740095, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.815191336772911e-05, | |
| "loss": 1.050409197807312, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.35487607134584204, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.812541241741164e-05, | |
| "loss": 0.9888057112693787, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.35533935603428307, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 3.809888509024769e-05, | |
| "loss": 0.9265248775482178, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.3558026407227241, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.807233143577258e-05, | |
| "loss": 0.9314517378807068, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.35626592541116514, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.804575150357084e-05, | |
| "loss": 1.0255682468414307, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.35672921009960623, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.8019145343276026e-05, | |
| "loss": 1.2007423639297485, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.35719249478804727, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.799251300457071e-05, | |
| "loss": 1.0465315580368042, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.3576557794764883, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.796585453718635e-05, | |
| "loss": 0.9410252571105957, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.35811906416492933, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.7939169990903146e-05, | |
| "loss": 0.8860654830932617, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.35858234885337037, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.791245941555004e-05, | |
| "loss": 0.9542768001556396, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.35904563354181146, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 3.788572286100457e-05, | |
| "loss": 1.121732473373413, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.3595089182302525, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.785896037719278e-05, | |
| "loss": 1.0113410949707031, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.35997220291869353, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.7832172014089136e-05, | |
| "loss": 0.9060476422309875, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.36043548760713456, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 3.780535782171643e-05, | |
| "loss": 0.9059662818908691, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.36089877229557565, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.777851785014569e-05, | |
| "loss": 1.008833646774292, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.3613620569840167, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.775165214949605e-05, | |
| "loss": 0.9600525498390198, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3618253416724577, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 3.772476076993474e-05, | |
| "loss": 1.1373387575149536, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.36228862636089876, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.769784376167691e-05, | |
| "loss": 0.9134207367897034, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.3627519110493398, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.767090117498558e-05, | |
| "loss": 0.9469197988510132, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.3632151957377809, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.764393306017151e-05, | |
| "loss": 1.1006484031677246, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.3636784804262219, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.761693946759315e-05, | |
| "loss": 0.9012340307235718, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.36414176511466295, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.758992044765654e-05, | |
| "loss": 1.0260119438171387, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.364605049803104, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.756287605081517e-05, | |
| "loss": 1.1195753812789917, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.3650683344915451, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.753580632756993e-05, | |
| "loss": 0.9033543467521667, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.3655316191799861, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 3.7508711328469e-05, | |
| "loss": 0.8747038841247559, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.36599490386842715, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.7481591104107775e-05, | |
| "loss": 1.0821847915649414, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3664581885568682, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.745444570512872e-05, | |
| "loss": 1.023503303527832, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.3669214732453092, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 3.7427275182221356e-05, | |
| "loss": 1.1093895435333252, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.3673847579337503, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 3.740007958612207e-05, | |
| "loss": 1.0260508060455322, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.36784804262219134, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.73728589676141e-05, | |
| "loss": 0.961272120475769, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.3683113273106324, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.734561337752741e-05, | |
| "loss": 1.2031164169311523, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.3687746119990734, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.7318342866738565e-05, | |
| "loss": 0.998257577419281, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.3692378966875145, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.72910474861707e-05, | |
| "loss": 1.0234450101852417, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.36970118137595553, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.726372728679338e-05, | |
| "loss": 1.0095072984695435, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.37016446606439657, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.7236382319622494e-05, | |
| "loss": 1.0547491312026978, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.3706277507528376, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.720901263572021e-05, | |
| "loss": 1.1043885946273804, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.37109103544127864, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.7181618286194834e-05, | |
| "loss": 1.0135180950164795, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.37155432012971973, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 3.715419932220074e-05, | |
| "loss": 0.9376970529556274, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.37201760481816076, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 3.7126755794938255e-05, | |
| "loss": 1.0911214351654053, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.3724808895066018, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.7099287755653566e-05, | |
| "loss": 1.1035547256469727, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.37294417419504283, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.707179525563866e-05, | |
| "loss": 1.1932406425476074, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.3734074588834839, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.704427834623118e-05, | |
| "loss": 1.0412805080413818, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.37387074357192496, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 3.7016737078814365e-05, | |
| "loss": 1.1123768091201782, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.374334028260366, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.698917150481692e-05, | |
| "loss": 0.9360041618347168, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.374797312948807, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.696158167571294e-05, | |
| "loss": 0.9965537190437317, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.3752605976372481, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.693396764302183e-05, | |
| "loss": 0.9210027456283569, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.37572388232568915, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.690632945830817e-05, | |
| "loss": 0.9936932325363159, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.3761871670141302, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.687866717318166e-05, | |
| "loss": 1.0398387908935547, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.3766504517025712, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.685098083929699e-05, | |
| "loss": 0.9467533826828003, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.37711373639101226, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.682327050835376e-05, | |
| "loss": 1.156292200088501, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.37757702107945335, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.6795536232096374e-05, | |
| "loss": 0.986288845539093, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.3780403057678944, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.676777806231396e-05, | |
| "loss": 1.123473882675171, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.3785035904563354, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.673999605084028e-05, | |
| "loss": 1.0756930112838745, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.37896687514477645, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.671219024955357e-05, | |
| "loss": 1.0483829975128174, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.37943015983321754, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 3.668436071037653e-05, | |
| "loss": 0.9522889852523804, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.3798934445216586, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.665650748527616e-05, | |
| "loss": 1.056382179260254, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3803567292100996, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 3.662863062626371e-05, | |
| "loss": 1.141240119934082, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.38082001389854064, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 3.660073018539456e-05, | |
| "loss": 0.861331582069397, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.3812832985869817, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.657280621476811e-05, | |
| "loss": 1.1151138544082642, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.38174658327542277, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 3.654485876652772e-05, | |
| "loss": 0.9755687117576599, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.3822098679638638, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 3.651688789286056e-05, | |
| "loss": 0.9543071985244751, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.38267315265230484, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.6488893645997575e-05, | |
| "loss": 0.9777738451957703, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.3831364373407459, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.646087607821333e-05, | |
| "loss": 1.010209321975708, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.38359972202918696, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.6432835241825965e-05, | |
| "loss": 1.0441359281539917, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.384063006717628, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.640477118919705e-05, | |
| "loss": 0.8406580090522766, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.38452629140606903, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.637668397273149e-05, | |
| "loss": 1.0144675970077515, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.38498957609451007, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 3.6348573644877495e-05, | |
| "loss": 1.2290412187576294, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.3854528607829511, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.63204402581264e-05, | |
| "loss": 0.8894533514976501, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.3859161454713922, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.629228386501259e-05, | |
| "loss": 1.1188613176345825, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.3863794301598332, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.626410451811342e-05, | |
| "loss": 0.9740458726882935, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.38684271484827426, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.623590227004913e-05, | |
| "loss": 0.7910479307174683, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3873059995367153, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.620767717348268e-05, | |
| "loss": 0.9454694986343384, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.3877692842251564, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.617942928111973e-05, | |
| "loss": 1.0109909772872925, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.3882325689135974, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.615115864570851e-05, | |
| "loss": 0.9801681041717529, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.38869585360203845, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.612286532003969e-05, | |
| "loss": 1.106335163116455, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.3891591382904795, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.609454935694634e-05, | |
| "loss": 0.9830104112625122, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3896224229789205, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.606621080930376e-05, | |
| "loss": 1.0451645851135254, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.3900857076673616, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.603784973002948e-05, | |
| "loss": 0.980257511138916, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.39054899235580265, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.600946617208306e-05, | |
| "loss": 0.9005157351493835, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.3910122770442437, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.5981060188466055e-05, | |
| "loss": 0.9599143266677856, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.3914755617326847, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.5952631832221895e-05, | |
| "loss": 0.9783821702003479, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3919388464211258, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.592418115643576e-05, | |
| "loss": 0.9757992625236511, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.39240213110956684, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.589570821423457e-05, | |
| "loss": 0.9026694297790527, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.3928654157980079, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.586721305878676e-05, | |
| "loss": 0.9864629507064819, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3933287004864489, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.583869574330227e-05, | |
| "loss": 0.9566922783851624, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.39379198517488995, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.5810156321032424e-05, | |
| "loss": 1.0206118822097778, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.39425526986333104, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 3.5781594845269824e-05, | |
| "loss": 1.2455644607543945, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.39471855455177207, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.575301136934825e-05, | |
| "loss": 0.9965265393257141, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.3951818392402131, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.5724405946642565e-05, | |
| "loss": 1.058623194694519, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.39564512392865414, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 3.569577863056861e-05, | |
| "loss": 0.9021344184875488, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.39610840861709523, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.5667129474583116e-05, | |
| "loss": 1.1672606468200684, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.39657169330553627, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 3.5638458532183604e-05, | |
| "loss": 0.8217394351959229, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.3970349779939773, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.5609765856908244e-05, | |
| "loss": 1.0040171146392822, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.39749826268241834, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.5581051502335834e-05, | |
| "loss": 1.053956389427185, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.39796154737085937, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.555231552208561e-05, | |
| "loss": 1.0706506967544556, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.39842483205930046, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.5523557969817226e-05, | |
| "loss": 0.8872452974319458, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3988881167477415, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.5494778899230605e-05, | |
| "loss": 0.9684238433837891, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.39935140143618253, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.5465978364065835e-05, | |
| "loss": 1.1116052865982056, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.39981468612462356, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.543715641810312e-05, | |
| "loss": 0.9077733159065247, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.40027797081306465, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.540831311516261e-05, | |
| "loss": 1.0570735931396484, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.4007412555015057, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.537944850910436e-05, | |
| "loss": 1.1230758428573608, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4012045401899467, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.5350562653828204e-05, | |
| "loss": 0.9723849296569824, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.40166782487838776, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.532165560327364e-05, | |
| "loss": 0.9751421213150024, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.4021311095668288, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.529272741141974e-05, | |
| "loss": 0.9020988941192627, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4025943942552699, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.5263778132285085e-05, | |
| "loss": 0.9929109811782837, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.4030576789437109, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.5234807819927625e-05, | |
| "loss": 1.088818073272705, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.40352096363215195, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.520581652844454e-05, | |
| "loss": 1.1746731996536255, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.403984248320593, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.517680431197226e-05, | |
| "loss": 1.0509936809539795, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.4044475330090341, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.514777122468621e-05, | |
| "loss": 1.023998737335205, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.4049108176974751, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.511871732080087e-05, | |
| "loss": 0.9446095824241638, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.40537410238591615, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 3.508964265456951e-05, | |
| "loss": 0.9351980686187744, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4058373870743572, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 3.506054728028423e-05, | |
| "loss": 0.9516130685806274, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.4063006717627982, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.503143125227577e-05, | |
| "loss": 1.006507158279419, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.4067639564512393, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 3.500229462491346e-05, | |
| "loss": 0.8910001516342163, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.40722724113968034, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 3.497313745260507e-05, | |
| "loss": 0.8634387850761414, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.4076905258281214, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 3.494395978979673e-05, | |
| "loss": 1.022470235824585, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4081538105165624, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.491476169097288e-05, | |
| "loss": 1.0753809213638306, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.4086170952050035, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.488554321065606e-05, | |
| "loss": 0.8573417067527771, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.40908037989344453, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.485630440340692e-05, | |
| "loss": 0.9140716195106506, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.40954366458188557, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.482704532382404e-05, | |
| "loss": 0.8698415756225586, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.4100069492703266, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.479776602654384e-05, | |
| "loss": 0.9196599721908569, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.41047023395876764, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.476846656624054e-05, | |
| "loss": 1.182805061340332, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.41093351864720873, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.4739146997625966e-05, | |
| "loss": 1.1990854740142822, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.41139680333564976, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.4709807375449526e-05, | |
| "loss": 0.9600467681884766, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.4118600880240908, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.468044775449804e-05, | |
| "loss": 0.9062017202377319, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.41232337271253183, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.4651068189595725e-05, | |
| "loss": 1.0649828910827637, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4127866574009729, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.4621668735603974e-05, | |
| "loss": 0.8955351710319519, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.41324994208941396, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.459224944742137e-05, | |
| "loss": 0.8985044360160828, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.413713226777855, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.4562810379983515e-05, | |
| "loss": 0.9573203921318054, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.414176511466296, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.453335158826294e-05, | |
| "loss": 0.8726100921630859, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.41463979615473706, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.450387312726902e-05, | |
| "loss": 0.9778281450271606, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.41510308084317815, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.447437505204785e-05, | |
| "loss": 0.9495804905891418, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.4155663655316192, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.444485741768216e-05, | |
| "loss": 1.0823774337768555, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.4160296502200602, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.441532027929119e-05, | |
| "loss": 1.0764063596725464, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.41649293490850126, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.438576369203061e-05, | |
| "loss": 0.9195699691772461, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.41695621959694235, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.435618771109241e-05, | |
| "loss": 1.0315985679626465, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4174195042853834, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.43265923917048e-05, | |
| "loss": 0.9629949331283569, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.4178827889738244, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.4296977789132076e-05, | |
| "loss": 0.9754863977432251, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.41834607366226545, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.4267343958674553e-05, | |
| "loss": 1.0928244590759277, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.4188093583507065, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.423769095566848e-05, | |
| "loss": 0.9829870462417603, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.4192726430391476, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.420801883548586e-05, | |
| "loss": 1.0500094890594482, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.4197359277275886, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.417832765353443e-05, | |
| "loss": 1.0949947834014893, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.42019921241602964, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.4148617465257505e-05, | |
| "loss": 0.9589704275131226, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.4206624971044707, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 3.41188883261339e-05, | |
| "loss": 0.9546459913253784, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.42112578179291177, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.40891402916778e-05, | |
| "loss": 1.1055099964141846, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.4215890664813528, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.40593734174387e-05, | |
| "loss": 0.9318978786468506, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.42205235116979384, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.402958775900126e-05, | |
| "loss": 0.8830047249794006, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.4225156358582349, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.399978337198521e-05, | |
| "loss": 0.9814854264259338, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.4229789205466759, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.3969960312045276e-05, | |
| "loss": 0.9006556272506714, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.423442205235117, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.394011863487102e-05, | |
| "loss": 1.1782516241073608, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.42390548992355803, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.39102583961868e-05, | |
| "loss": 0.9220030903816223, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.42436877461199907, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.388037965175161e-05, | |
| "loss": 1.0260250568389893, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.4248320593004401, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 3.385048245735901e-05, | |
| "loss": 0.9909316301345825, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.4252953439888812, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.3820566868837025e-05, | |
| "loss": 0.9483840465545654, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.4257586286773222, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.3790632942048e-05, | |
| "loss": 0.9133286476135254, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.42622191336576326, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 3.376068073288856e-05, | |
| "loss": 0.8733887076377869, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.4266851980542043, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.373071029728942e-05, | |
| "loss": 0.9942792654037476, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.42714848274264533, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.370072169121539e-05, | |
| "loss": 1.031928539276123, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.4276117674310864, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.367071497066516e-05, | |
| "loss": 1.044718861579895, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.42807505211952745, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.364069019167127e-05, | |
| "loss": 1.10916268825531, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.4285383368079685, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 3.361064741029997e-05, | |
| "loss": 1.1920192241668701, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4290016214964095, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.3580586682651144e-05, | |
| "loss": 0.8233553171157837, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.4294649061848506, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.3550508064858165e-05, | |
| "loss": 0.8930643796920776, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.42992819087329165, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.352041161308782e-05, | |
| "loss": 0.9572421312332153, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.4303914755617327, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.349029738354023e-05, | |
| "loss": 1.018913984298706, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.4308547602501737, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.346016543244865e-05, | |
| "loss": 0.9118576049804688, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.43131804493861475, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.343001581607949e-05, | |
| "loss": 0.8083831071853638, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.43178132962705584, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.339984859073209e-05, | |
| "loss": 0.8767201900482178, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.4322446143154969, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.3369663812738717e-05, | |
| "loss": 0.9277627468109131, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.4327078990039379, | |
| "grad_norm": 1.0, | |
| "learning_rate": 3.333946153846441e-05, | |
| "loss": 1.1319029331207275, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.43317118369237895, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.330924182430684e-05, | |
| "loss": 0.8661171197891235, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.43363446838082004, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.327900472669629e-05, | |
| "loss": 0.9028452634811401, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.43409775306926107, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 3.324875030209549e-05, | |
| "loss": 1.1232396364212036, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.4345610377577021, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 3.32184786069995e-05, | |
| "loss": 1.0140894651412964, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.43502432244614314, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.318818969793567e-05, | |
| "loss": 1.0171058177947998, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.4354876071345842, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.3157883631463465e-05, | |
| "loss": 1.07037353515625, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.43595089182302527, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.312756046417441e-05, | |
| "loss": 1.1801575422286987, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.4364141765114663, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.309722025269193e-05, | |
| "loss": 1.1600738763809204, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.43687746119990734, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.306686305367132e-05, | |
| "loss": 0.9927069544792175, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.43734074588834837, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.303648892379956e-05, | |
| "loss": 1.0282708406448364, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.43780403057678946, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.300609791979526e-05, | |
| "loss": 1.0274934768676758, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.4382673152652305, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.2975690098408555e-05, | |
| "loss": 0.9179637432098389, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.43873059995367153, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 3.2945265516420954e-05, | |
| "loss": 0.9191789627075195, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.43919388464211256, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 3.291482423064528e-05, | |
| "loss": 1.0582070350646973, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.4396571693305536, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.2884366297925543e-05, | |
| "loss": 0.9630937576293945, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.4401204540189947, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 3.2853891775136854e-05, | |
| "loss": 1.039337158203125, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.4405837387074357, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.2823400719185286e-05, | |
| "loss": 1.1467114686965942, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.44104702339587676, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 3.279289318700778e-05, | |
| "loss": 1.0572043657302856, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.4415103080843178, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.276236923557206e-05, | |
| "loss": 1.015242099761963, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.4419735927727589, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.27318289218765e-05, | |
| "loss": 1.0399034023284912, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.4424368774611999, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.2701272302950036e-05, | |
| "loss": 1.0193357467651367, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.44290016214964095, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.2670699435852034e-05, | |
| "loss": 0.9832947850227356, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.443363446838082, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 3.2640110377672225e-05, | |
| "loss": 0.9701854586601257, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.443826731526523, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.260950518553056e-05, | |
| "loss": 1.1149054765701294, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.4442900162149641, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.257888391657711e-05, | |
| "loss": 0.949036180973053, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.44475330090340515, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.254824662799199e-05, | |
| "loss": 0.976753294467926, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.4452165855918462, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.2517593376985216e-05, | |
| "loss": 0.9391505122184753, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.4456798702802872, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.248692422079659e-05, | |
| "loss": 1.0805474519729614, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.4461431549687283, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.245623921669565e-05, | |
| "loss": 0.8896968364715576, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.44660643965716934, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.2425538421981515e-05, | |
| "loss": 0.9105522036552429, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.4470697243456104, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.2394821893982765e-05, | |
| "loss": 1.0720794200897217, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.4475330090340514, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.2364089690057414e-05, | |
| "loss": 0.9761070013046265, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.44799629372249244, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.2333341867592697e-05, | |
| "loss": 0.9775373339653015, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.44845957841093353, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.230257848400503e-05, | |
| "loss": 1.045255184173584, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.44892286309937457, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.22717995967399e-05, | |
| "loss": 0.9985790848731995, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.4493861477878156, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 3.224100526327173e-05, | |
| "loss": 0.8842822909355164, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.44984943247625664, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 3.221019554110378e-05, | |
| "loss": 1.080345630645752, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.45031271716469773, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.2179370487768067e-05, | |
| "loss": 1.0478893518447876, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.45077600185313876, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 3.214853016082523e-05, | |
| "loss": 0.9434666037559509, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.4512392865415798, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.211767461786441e-05, | |
| "loss": 0.9274519681930542, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.45170257123002083, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.208680391650319e-05, | |
| "loss": 1.0596266984939575, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.45216585591846187, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 3.205591811438744e-05, | |
| "loss": 0.9719846844673157, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.45262914060690296, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 3.2025017269191223e-05, | |
| "loss": 1.1913756132125854, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.453092425295344, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.199410143861671e-05, | |
| "loss": 0.8659987449645996, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.453555709983785, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.196317068039405e-05, | |
| "loss": 1.1136746406555176, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.45401899467222606, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.193222505228125e-05, | |
| "loss": 0.954369068145752, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.45448227936066715, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.1901264612064124e-05, | |
| "loss": 1.025739073753357, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.4549455640491082, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.1870289417556095e-05, | |
| "loss": 1.1050188541412354, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.4554088487375492, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 3.1839299526598156e-05, | |
| "loss": 0.9074011445045471, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.45587213342599026, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.180829499705879e-05, | |
| "loss": 0.8634052872657776, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.4563354181144313, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.1777275886833714e-05, | |
| "loss": 0.9090867638587952, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.4567987028028724, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 3.1746242253845975e-05, | |
| "loss": 0.90366530418396, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.4572619874913134, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.1715194156045676e-05, | |
| "loss": 0.9696229696273804, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.45772527217975445, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 3.168413165140996e-05, | |
| "loss": 0.9196410179138184, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.4581885568681955, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.165305479794285e-05, | |
| "loss": 1.135054111480713, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.4586518415566366, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.162196365367518e-05, | |
| "loss": 0.9602132439613342, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4591151262450776, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.1590858276664475e-05, | |
| "loss": 0.875511884689331, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.45957841093351864, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.155973872499481e-05, | |
| "loss": 0.9517480731010437, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.4600416956219597, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 3.152860505677676e-05, | |
| "loss": 1.126634120941162, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.46050498031040077, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.149745733014724e-05, | |
| "loss": 1.0964951515197754, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.4609682649988418, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.146629560326942e-05, | |
| "loss": 1.0599464178085327, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.46143154968728284, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 3.143511993433263e-05, | |
| "loss": 0.894392728805542, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.4618948343757239, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 3.140393038155219e-05, | |
| "loss": 0.9883759021759033, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.4623581190641649, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.1372727003169414e-05, | |
| "loss": 1.1054998636245728, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.462821403752606, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 3.1341509857451374e-05, | |
| "loss": 0.9076305031776428, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.46328468844104703, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 3.131027900269087e-05, | |
| "loss": 1.0859215259552002, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.46374797312948807, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.12790344972063e-05, | |
| "loss": 0.8842138648033142, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.4642112578179291, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 3.1247776399341574e-05, | |
| "loss": 0.953213632106781, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.4646745425063702, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.121650476746595e-05, | |
| "loss": 0.9613085389137268, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.4651378271948112, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.1185219659973974e-05, | |
| "loss": 1.0341384410858154, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.46560111188325226, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.115392113528536e-05, | |
| "loss": 0.8540540337562561, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4660643965716933, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.112260925184487e-05, | |
| "loss": 0.8874945044517517, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.46652768126013433, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.1091284068122206e-05, | |
| "loss": 0.9373153448104858, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.4669909659485754, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.1059945642611913e-05, | |
| "loss": 0.9182353615760803, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.46745425063701646, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.1028594033833274e-05, | |
| "loss": 0.9852114319801331, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.4679175353254575, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.099722930033017e-05, | |
| "loss": 1.0278407335281372, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4683808200138985, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.0965851500670984e-05, | |
| "loss": 0.9236195683479309, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.4688441047023396, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.093446069344854e-05, | |
| "loss": 0.8953359127044678, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.46930738939078065, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.09030569372799e-05, | |
| "loss": 0.8945424556732178, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.4697706740792217, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 3.087164029080634e-05, | |
| "loss": 0.9093310236930847, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.4702339587676627, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.084021081269319e-05, | |
| "loss": 1.0238938331604004, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.47069724345610375, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.080876856162976e-05, | |
| "loss": 0.9495306015014648, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.47116052814454484, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.0777313596329175e-05, | |
| "loss": 0.8257219791412354, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.4716238128329859, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.074584597552834e-05, | |
| "loss": 1.2120308876037598, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.4720870975214269, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 3.071436575798779e-05, | |
| "loss": 0.8118107318878174, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.47255038220986795, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.068287300249154e-05, | |
| "loss": 0.9661107659339905, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.47301366689830904, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 3.065136776784706e-05, | |
| "loss": 0.9633262753486633, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.47347695158675007, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.061985011288511e-05, | |
| "loss": 0.8450291752815247, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.4739402362751911, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.0588320096459646e-05, | |
| "loss": 0.8301048278808594, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.47440352096363214, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.0556777777447695e-05, | |
| "loss": 1.1722376346588135, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.4748668056520732, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.0525223214749266e-05, | |
| "loss": 1.0436758995056152, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.47533009034051427, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 3.0493656467287242e-05, | |
| "loss": 1.0242863893508911, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.4757933750289553, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.046207759400723e-05, | |
| "loss": 1.0861283540725708, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.47625665971739634, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.043048665387751e-05, | |
| "loss": 1.0913084745407104, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.47671994440583737, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.0398883705888867e-05, | |
| "loss": 0.9872074723243713, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.47718322909427846, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.0367268809054554e-05, | |
| "loss": 0.9497346878051758, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4776465137827195, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.0335642022410072e-05, | |
| "loss": 0.9556658864021301, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.47810979847116053, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 3.0304003405013176e-05, | |
| "loss": 1.1097627878189087, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.47857308315960156, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 3.0272353015943694e-05, | |
| "loss": 1.0165081024169922, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.4790363678480426, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.024069091430343e-05, | |
| "loss": 0.957397997379303, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.4794996525364837, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.0209017159216076e-05, | |
| "loss": 0.8591142892837524, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.4799629372249247, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.0177331809827064e-05, | |
| "loss": 0.8509551286697388, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.48042622191336576, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 3.0145634925303502e-05, | |
| "loss": 0.9679578542709351, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.4808895066018068, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.011392656483401e-05, | |
| "loss": 1.2282400131225586, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.4813527912902479, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.0082206787628658e-05, | |
| "loss": 0.8720540404319763, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.4818160759786889, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 3.005047565291882e-05, | |
| "loss": 1.1174383163452148, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.48227936066712995, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.0018733219957094e-05, | |
| "loss": 0.9691076278686523, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.482742645355571, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.998697954801717e-05, | |
| "loss": 1.0722172260284424, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.483205930044012, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.9955214696393707e-05, | |
| "loss": 1.0778554677963257, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.4836692147324531, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.9923438724402278e-05, | |
| "loss": 0.9753159880638123, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.48413249942089415, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.989165169137918e-05, | |
| "loss": 0.9562061429023743, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.4845957841093352, | |
| "grad_norm": 0.75, | |
| "learning_rate": 2.9859853656681395e-05, | |
| "loss": 0.8260341882705688, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.4850590687977762, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.982804467968643e-05, | |
| "loss": 1.0546704530715942, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.4855223534862173, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.979622481979224e-05, | |
| "loss": 0.9112711548805237, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.48598563817465834, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.9764394136417088e-05, | |
| "loss": 0.9864487648010254, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.4864489228630994, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.973255268899945e-05, | |
| "loss": 0.9202637076377869, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4869122075515404, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.970070053699792e-05, | |
| "loss": 1.0404424667358398, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.48737549223998144, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.9668837739891063e-05, | |
| "loss": 1.0003234148025513, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.48783877692842254, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.9636964357177317e-05, | |
| "loss": 0.9466184377670288, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.48830206161686357, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.9605080448374903e-05, | |
| "loss": 1.1342371702194214, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.4887653463053046, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.9573186073021696e-05, | |
| "loss": 1.0444382429122925, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.48922863099374564, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 2.95412812906751e-05, | |
| "loss": 0.9123279452323914, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.48969191568218673, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.9509366160911977e-05, | |
| "loss": 0.9924875497817993, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.49015520037062776, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.9477440743328484e-05, | |
| "loss": 0.9442932605743408, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.4906184850590688, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.944550509754e-05, | |
| "loss": 0.9229353666305542, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.49108176974750983, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.9413559283181028e-05, | |
| "loss": 0.8888018727302551, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.49154505443595087, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 2.9381603359905006e-05, | |
| "loss": 0.8846039175987244, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.49200833912439196, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.9349637387384297e-05, | |
| "loss": 1.0494961738586426, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.492471623812833, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.9317661425310004e-05, | |
| "loss": 0.9032423496246338, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.492934908501274, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.928567553339189e-05, | |
| "loss": 1.0323213338851929, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.49339819318971506, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 2.925367977135828e-05, | |
| "loss": 1.0107799768447876, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.49386147787815615, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.9221674198955883e-05, | |
| "loss": 1.0235852003097534, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.4943247625665972, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.9189658875949772e-05, | |
| "loss": 1.0340992212295532, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.4947880472550382, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.9157633862123216e-05, | |
| "loss": 0.9333528280258179, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.49525133194347926, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 2.9125599217277566e-05, | |
| "loss": 1.2312861680984497, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.4957146166319203, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.9093555001232157e-05, | |
| "loss": 0.8942990303039551, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4961779013203614, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.9061501273824226e-05, | |
| "loss": 0.9628137350082397, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.4966411860088024, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 2.902943809490874e-05, | |
| "loss": 0.9911953806877136, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.49710447069724345, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.8997365524358323e-05, | |
| "loss": 0.8831644058227539, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.4975677553856845, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.8965283622063147e-05, | |
| "loss": 1.0550167560577393, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.4980310400741256, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.89331924479308e-05, | |
| "loss": 0.9322593212127686, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.4984943247625666, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.8901092061886184e-05, | |
| "loss": 0.9539169073104858, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.49895760945100764, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.88689825238714e-05, | |
| "loss": 0.9450077414512634, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.4994208941394487, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 2.883686389384565e-05, | |
| "loss": 0.9067424535751343, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.4998841788278897, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 2.8804736231785098e-05, | |
| "loss": 0.8575161695480347, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.5003474635163307, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 2.87725995976828e-05, | |
| "loss": 0.9551107287406921, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5008107482047718, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.8740454051548526e-05, | |
| "loss": 1.1050188541412354, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.5012740328932129, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.8708299653408722e-05, | |
| "loss": 0.9398777484893799, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.501737317581654, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.8676136463306354e-05, | |
| "loss": 0.9366370439529419, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.502200602270095, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.8643964541300793e-05, | |
| "loss": 0.916124701499939, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.502663886958536, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.861178394746774e-05, | |
| "loss": 0.94605952501297, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.5031271716469771, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.8579594741899052e-05, | |
| "loss": 0.812589168548584, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.5035904563354181, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.8547396984702716e-05, | |
| "loss": 1.3426378965377808, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.5040537410238591, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 2.851519073600265e-05, | |
| "loss": 0.992534875869751, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.5045170257123002, | |
| "grad_norm": 0.75, | |
| "learning_rate": 2.8482976055938628e-05, | |
| "loss": 1.0410428047180176, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.5049803104007412, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.8450753004666203e-05, | |
| "loss": 0.9198648929595947, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5054435950891824, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.841852164235652e-05, | |
| "loss": 1.0291709899902344, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.5059068797776234, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 2.8386282029196264e-05, | |
| "loss": 1.1194522380828857, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.5063701644660644, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 2.8354034225387525e-05, | |
| "loss": 0.843272864818573, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.5068334491545055, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.832177829114769e-05, | |
| "loss": 0.8743926882743835, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.5072967338429465, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.8289514286709315e-05, | |
| "loss": 1.130947470664978, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.5077600185313875, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.825724227232004e-05, | |
| "loss": 0.961344838142395, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.5082233032198286, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.8224962308242467e-05, | |
| "loss": 0.9592534303665161, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.5086865879082696, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.8192674454754016e-05, | |
| "loss": 0.9232071042060852, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.5091498725967106, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.8160378772146868e-05, | |
| "loss": 0.9333939552307129, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.5096131572851518, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.8128075320727803e-05, | |
| "loss": 0.8948829174041748, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5100764419735928, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.8095764160818127e-05, | |
| "loss": 1.1355596780776978, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.5105397266620338, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 2.8063445352753518e-05, | |
| "loss": 0.9624730944633484, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.5110030113504749, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.8031118956883958e-05, | |
| "loss": 1.002945899963379, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.5114662960389159, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.7998785033573583e-05, | |
| "loss": 0.8556519746780396, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.511929580727357, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 2.7966443643200585e-05, | |
| "loss": 0.9405410289764404, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.512392865415798, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.7934094846157126e-05, | |
| "loss": 1.1397373676300049, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.512856150104239, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.7901738702849144e-05, | |
| "loss": 0.7816023230552673, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.51331943479268, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.7869375273696355e-05, | |
| "loss": 0.8463333249092102, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.5137827194811212, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.7837004619132037e-05, | |
| "loss": 0.8399050831794739, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.5142460041695622, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 2.7804626799602988e-05, | |
| "loss": 1.0282368659973145, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5147092888580033, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.7772241875569362e-05, | |
| "loss": 0.944800853729248, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.5151725735464443, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 2.77398499075046e-05, | |
| "loss": 0.9259645938873291, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.5156358582348853, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.7707450955895287e-05, | |
| "loss": 1.0247880220413208, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.5160991429233264, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.7675045081241056e-05, | |
| "loss": 0.8392752408981323, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.5165624276117674, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.7642632344054442e-05, | |
| "loss": 0.9830076098442078, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.5170257123002084, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.761021280486082e-05, | |
| "loss": 0.8453760147094727, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.5174889969886495, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.7577786524198273e-05, | |
| "loss": 0.9899021983146667, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.5179522816770906, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 2.7545353562617444e-05, | |
| "loss": 0.8067998290061951, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.5184155663655317, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.7512913980681483e-05, | |
| "loss": 1.0544780492782593, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.5188788510539727, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.7480467838965872e-05, | |
| "loss": 1.0132243633270264, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5193421357424137, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.744801519805837e-05, | |
| "loss": 0.9769008755683899, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.5198054204308548, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.7415556118558848e-05, | |
| "loss": 0.9457086324691772, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.5202687051192958, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.7383090661079215e-05, | |
| "loss": 0.9511300921440125, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.5207319898077368, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.7350618886243286e-05, | |
| "loss": 0.9526143670082092, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.5211952744961779, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.731814085468667e-05, | |
| "loss": 0.8233035206794739, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.5216585591846189, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.7285656627056673e-05, | |
| "loss": 0.8465626239776611, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.52212184387306, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.7253166264012142e-05, | |
| "loss": 0.8446078896522522, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.5225851285615011, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.7220669826223418e-05, | |
| "loss": 0.8863167762756348, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.5230484132499421, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.7188167374372155e-05, | |
| "loss": 0.9343461394309998, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.5235116979383831, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.715565896915125e-05, | |
| "loss": 0.9686758518218994, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.5239749826268242, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.7123144671264715e-05, | |
| "loss": 0.9815382361412048, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.5244382673152652, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.7090624541427566e-05, | |
| "loss": 1.1032721996307373, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.5249015520037063, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.7058098640365716e-05, | |
| "loss": 0.9356403350830078, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.5253648366921473, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.702556702881584e-05, | |
| "loss": 0.8864633440971375, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.5258281213805883, | |
| "grad_norm": 0.75, | |
| "learning_rate": 2.6993029767525295e-05, | |
| "loss": 0.8282592296600342, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.5262914060690295, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.6960486917251967e-05, | |
| "loss": 0.9786428213119507, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.5267546907574705, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.6927938538764197e-05, | |
| "loss": 0.8919640183448792, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.5272179754459115, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.6895384692840634e-05, | |
| "loss": 1.1693425178527832, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.5276812601343526, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.6862825440270143e-05, | |
| "loss": 0.961223304271698, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.5281445448227936, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.683026084185169e-05, | |
| "loss": 1.0943102836608887, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.5286078295112346, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.679769095839422e-05, | |
| "loss": 0.9912340641021729, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.5290711141996757, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.6765115850716548e-05, | |
| "loss": 1.1055166721343994, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.5295343988881167, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.673253557964724e-05, | |
| "loss": 0.9419571161270142, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.5299976835765577, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.66999502060245e-05, | |
| "loss": 0.949909508228302, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.5304609682649989, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.6667359790696074e-05, | |
| "loss": 0.9741145372390747, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.5309242529534399, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.6634764394519106e-05, | |
| "loss": 0.9066289663314819, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.531387537641881, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 2.660216407836006e-05, | |
| "loss": 1.090651512145996, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.531850822330322, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.656955890309457e-05, | |
| "loss": 1.0257591009140015, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.532314107018763, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.6536948929607355e-05, | |
| "loss": 1.2080005407333374, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.5327773917072041, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.6504334218792082e-05, | |
| "loss": 0.9306067228317261, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.5332406763956451, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.6471714831551293e-05, | |
| "loss": 1.0611542463302612, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.5337039610840861, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.6439090828796214e-05, | |
| "loss": 0.9698439240455627, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.5341672457725272, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.6406462271446732e-05, | |
| "loss": 0.8757132887840271, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.5346305304609683, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.6373829220431218e-05, | |
| "loss": 0.7780014872550964, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.5350938151494093, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 2.6341191736686438e-05, | |
| "loss": 1.0552921295166016, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.5355570998378504, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.6308549881157446e-05, | |
| "loss": 0.8510526418685913, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.5360203845262914, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 2.6275903714797442e-05, | |
| "loss": 1.0102177858352661, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.5364836692147325, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 2.624325329856769e-05, | |
| "loss": 0.8171606063842773, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.5369469539031735, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.621059869343739e-05, | |
| "loss": 0.8785892128944397, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.5374102385916145, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.6177939960383546e-05, | |
| "loss": 0.7128728628158569, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.5378735232800556, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.6145277160390888e-05, | |
| "loss": 0.9567815661430359, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.5383368079684966, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.611261035445173e-05, | |
| "loss": 1.065600872039795, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.5388000926569377, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.6079939603565884e-05, | |
| "loss": 0.9629444479942322, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.5392633773453788, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 2.6047264968740505e-05, | |
| "loss": 1.0867857933044434, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.5397266620338198, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.6014586510990015e-05, | |
| "loss": 1.0058200359344482, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.5401899467222608, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 2.598190429133598e-05, | |
| "loss": 1.0715608596801758, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.5406532314107019, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.5949218370806967e-05, | |
| "loss": 0.9926679730415344, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.5411165160991429, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.591652881043848e-05, | |
| "loss": 0.9041029810905457, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.5415798007875839, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 2.5883835671272798e-05, | |
| "loss": 0.8764104843139648, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.542043085476025, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.5851139014358903e-05, | |
| "loss": 0.8071422576904297, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.542506370164466, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.581843890075233e-05, | |
| "loss": 1.064335584640503, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.5429696548529072, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.5785735391515083e-05, | |
| "loss": 0.9307787418365479, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.5434329395413482, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 2.5753028547715487e-05, | |
| "loss": 0.8384194374084473, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.5438962242297892, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 2.572031843042813e-05, | |
| "loss": 0.9470226764678955, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.5443595089182303, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.5687605100733652e-05, | |
| "loss": 1.0152946710586548, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.5448227936066713, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.5654888619718762e-05, | |
| "loss": 0.8778582811355591, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.5452860782951123, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.5622169048475996e-05, | |
| "loss": 1.002034306526184, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.5457493629835534, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.5589446448103687e-05, | |
| "loss": 1.00825834274292, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.5462126476719944, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 2.5556720879705834e-05, | |
| "loss": 0.8981512784957886, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.5466759323604354, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.552399240439195e-05, | |
| "loss": 0.9638339281082153, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.5471392170488766, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.5491261083277014e-05, | |
| "loss": 0.832276463508606, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.5476025017373176, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.545852697748127e-05, | |
| "loss": 0.7502810955047607, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.5480657864257586, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 2.5425790148130212e-05, | |
| "loss": 0.8867089748382568, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.5485290711141997, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 2.5393050656354378e-05, | |
| "loss": 1.0290521383285522, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.5489923558026407, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.5360308563289304e-05, | |
| "loss": 0.9290564060211182, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.5494556404910818, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.5327563930075376e-05, | |
| "loss": 1.1053215265274048, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.5499189251795228, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.5294816817857718e-05, | |
| "loss": 0.9043922424316406, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.5503822098679638, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.5262067287786105e-05, | |
| "loss": 0.8395538926124573, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.5508454945564049, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.5229315401014793e-05, | |
| "loss": 0.9732888340950012, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.551308779244846, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.5196561218702462e-05, | |
| "loss": 0.9241658449172974, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.551772063933287, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.5163804802012067e-05, | |
| "loss": 0.810673713684082, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.5522353486217281, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.5131046212110748e-05, | |
| "loss": 0.9831432700157166, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.5526986333101691, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.509828551016969e-05, | |
| "loss": 0.9300224781036377, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.5531619179986101, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.5065522757364026e-05, | |
| "loss": 1.0542097091674805, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.5536252026870512, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.5032758014872737e-05, | |
| "loss": 1.0396784543991089, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.5540884873754922, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.4999991343878476e-05, | |
| "loss": 0.999907910823822, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.5545517720639332, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.496722280556755e-05, | |
| "loss": 1.0439246892929077, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.5550150567523743, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.4934452461129705e-05, | |
| "loss": 1.1016960144042969, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.5554783414408154, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.49016803717581e-05, | |
| "loss": 1.0953267812728882, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.5559416261292565, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.486890659864912e-05, | |
| "loss": 1.0697580575942993, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5564049108176975, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.4836131203002307e-05, | |
| "loss": 0.9801178574562073, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.5568681955061385, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.4803354246020246e-05, | |
| "loss": 0.8415432572364807, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.5573314801945796, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 2.4770575788908413e-05, | |
| "loss": 0.8861055374145508, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.5577947648830206, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.4737795892875114e-05, | |
| "loss": 0.8379161357879639, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.5582580495714616, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.4705014619131302e-05, | |
| "loss": 0.7862236499786377, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.5587213342599027, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.4672232028890544e-05, | |
| "loss": 1.1108901500701904, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.5591846189483437, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 2.4639448183368848e-05, | |
| "loss": 0.9187421798706055, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.5596479036367848, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.4606663143784546e-05, | |
| "loss": 0.9021953344345093, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.5601111883252259, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 2.457387697135824e-05, | |
| "loss": 0.8863352537155151, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.5605744730136669, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 2.454108972731261e-05, | |
| "loss": 0.9452582597732544, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.561037757702108, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.4508301472872372e-05, | |
| "loss": 0.9802470803260803, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.561501042390549, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.4475512269264102e-05, | |
| "loss": 1.040086269378662, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.56196432707899, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.4442722177716155e-05, | |
| "loss": 0.8640920519828796, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.5624276117674311, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.4409931259458534e-05, | |
| "loss": 1.000259518623352, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.5628908964558721, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 2.4377139575722816e-05, | |
| "loss": 1.037365198135376, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.5633541811443131, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.4344347187741992e-05, | |
| "loss": 0.8693150281906128, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.5638174658327543, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 2.4311554156750342e-05, | |
| "loss": 1.0040010213851929, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.5642807505211953, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.427876054398339e-05, | |
| "loss": 1.1392626762390137, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.5647440352096363, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 2.424596641067772e-05, | |
| "loss": 1.005986213684082, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.5652073198980774, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.4213171818070896e-05, | |
| "loss": 0.8679553270339966, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.5656706045865184, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 2.4180376827401326e-05, | |
| "loss": 1.0555665493011475, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.5661338892749594, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 2.4147581499908194e-05, | |
| "loss": 0.7991371154785156, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.5665971739634005, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 2.4114785896831272e-05, | |
| "loss": 1.079115390777588, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.5670604586518415, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.4081990079410877e-05, | |
| "loss": 0.9234097599983215, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.5675237433402825, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.4049194108887712e-05, | |
| "loss": 0.8750408291816711, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.5679870280287237, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.401639804650277e-05, | |
| "loss": 0.959878146648407, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.5684503127171647, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.398360195349723e-05, | |
| "loss": 0.8520339131355286, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.5689135974056058, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.3950805891112282e-05, | |
| "loss": 0.9484376907348633, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.5693768820940468, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.391800992058913e-05, | |
| "loss": 1.069106101989746, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.5698401667824878, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.3885214103168733e-05, | |
| "loss": 0.8645256161689758, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5703034514709289, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.3852418500091814e-05, | |
| "loss": 0.8843850493431091, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.5707667361593699, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.3819623172598675e-05, | |
| "loss": 0.848798394203186, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.5712300208478109, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.378682818192911e-05, | |
| "loss": 0.867169201374054, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.571693305536252, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.375403358932229e-05, | |
| "loss": 0.9214097857475281, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.5721565902246931, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 2.372123945601661e-05, | |
| "loss": 1.0255508422851562, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.5726198749131342, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.3688445843249663e-05, | |
| "loss": 1.0348937511444092, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.5730831596015752, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.3655652812258016e-05, | |
| "loss": 0.9052950143814087, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.5735464442900162, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.3622860424277185e-05, | |
| "loss": 1.0120303630828857, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.5740097289784573, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.359006874054147e-05, | |
| "loss": 1.0887019634246826, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.5744730136668983, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.3557277822283853e-05, | |
| "loss": 0.9740076065063477, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5749362983553393, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.352448773073591e-05, | |
| "loss": 0.9325964450836182, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.5753995830437804, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.3491698527127626e-05, | |
| "loss": 0.9349949955940247, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.5758628677322214, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.345891027268739e-05, | |
| "loss": 0.8547556400299072, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.5763261524206625, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.3426123028641764e-05, | |
| "loss": 0.8496311902999878, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.5767894371091036, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.339333685621546e-05, | |
| "loss": 0.8979066610336304, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5772527217975446, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.3360551816631163e-05, | |
| "loss": 0.9577649831771851, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.5777160064859856, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.3327767971109457e-05, | |
| "loss": 0.844273567199707, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.5781792911744267, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.32949853808687e-05, | |
| "loss": 0.9688032269477844, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.5786425758628677, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.326220410712489e-05, | |
| "loss": 0.8966050148010254, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.5791058605513087, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.322942421109159e-05, | |
| "loss": 0.8237859010696411, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5795691452397498, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.319664575397975e-05, | |
| "loss": 0.998030424118042, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.5800324299281908, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 2.3163868796997697e-05, | |
| "loss": 0.8741839528083801, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.580495714616632, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.3131093401350886e-05, | |
| "loss": 0.8840314745903015, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.580958999305073, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.309831962824191e-05, | |
| "loss": 0.8226364850997925, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.581422283993514, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 2.30655475388703e-05, | |
| "loss": 1.046274185180664, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5818855686819551, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.3032777194432454e-05, | |
| "loss": 0.962317943572998, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.5823488533703961, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.3000008656121528e-05, | |
| "loss": 1.0184528827667236, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.5828121380588371, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 2.2967241985127264e-05, | |
| "loss": 0.9843693375587463, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.5832754227472782, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.2934477242635975e-05, | |
| "loss": 0.8737512826919556, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.5837387074357192, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 2.2901714489830313e-05, | |
| "loss": 0.9993818998336792, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5842019921241604, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.2868953787889257e-05, | |
| "loss": 0.8710107207298279, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.5846652768126014, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.283619519798794e-05, | |
| "loss": 0.9011490345001221, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.5851285615010424, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.2803438781297542e-05, | |
| "loss": 1.043053150177002, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.5855918461894835, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.2770684598985215e-05, | |
| "loss": 0.9350489377975464, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.5860551308779245, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.2737932712213897e-05, | |
| "loss": 0.9060037136077881, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5865184155663655, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.2705183182142283e-05, | |
| "loss": 1.0310790538787842, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.5869817002548066, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 2.2672436069924625e-05, | |
| "loss": 1.060436725616455, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.5874449849432476, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.2639691436710704e-05, | |
| "loss": 0.9399782419204712, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.5879082696316886, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 2.2606949343645633e-05, | |
| "loss": 0.7602555155754089, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.5883715543201298, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.2574209851869796e-05, | |
| "loss": 0.8838884830474854, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5888348390085708, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 2.2541473022518734e-05, | |
| "loss": 0.9206914305686951, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.5892981236970118, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.250873891672299e-05, | |
| "loss": 1.0698550939559937, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.5897614083854529, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.247600759560805e-05, | |
| "loss": 1.186781406402588, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.5902246930738939, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 2.2443279120294167e-05, | |
| "loss": 0.859512209892273, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.590687977762335, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.2410553551896318e-05, | |
| "loss": 1.1249911785125732, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.591151262450776, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.2377830951524016e-05, | |
| "loss": 0.8921389579772949, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.591614547139217, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 2.2345111380281246e-05, | |
| "loss": 1.017206072807312, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.592077831827658, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.231239489926635e-05, | |
| "loss": 0.8455703258514404, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.5925411165160992, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.2279681569571873e-05, | |
| "loss": 1.062213659286499, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.5930044012045402, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.2246971452284514e-05, | |
| "loss": 0.9728783965110779, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5934676858929813, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.221426460848491e-05, | |
| "loss": 0.9838144779205322, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.5939309705814223, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.218156109924767e-05, | |
| "loss": 0.996467649936676, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.5943942552698633, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.2148860985641098e-05, | |
| "loss": 0.9236133694648743, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.5948575399583044, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.2116164328727207e-05, | |
| "loss": 0.9230520129203796, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.5953208246467454, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.2083471189561532e-05, | |
| "loss": 1.163870096206665, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5957841093351864, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.2050781629193035e-05, | |
| "loss": 0.9241350293159485, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.5962473940236275, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.2018095708664025e-05, | |
| "loss": 0.9975556135177612, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.5967106787120686, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.198541348900998e-05, | |
| "loss": 0.9642297625541687, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.5971739634005097, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.19527350312595e-05, | |
| "loss": 1.0175257921218872, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.5976372480889507, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.1920060396434114e-05, | |
| "loss": 0.8721986413002014, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5981005327773917, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.188738964554827e-05, | |
| "loss": 0.9678272008895874, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.5985638174658328, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 2.185472283960912e-05, | |
| "loss": 0.8769973516464233, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.5990271021542738, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.182206003961646e-05, | |
| "loss": 0.8562041521072388, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.5994903868427148, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.1789401306562614e-05, | |
| "loss": 0.9333268404006958, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.5999536715311559, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.1756746701432304e-05, | |
| "loss": 1.0222218036651611, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.6004169562195969, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.172409628520256e-05, | |
| "loss": 0.9753302931785583, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.600880240908038, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.169145011884255e-05, | |
| "loss": 0.7853343486785889, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.6013435255964791, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.1658808263313563e-05, | |
| "loss": 0.9388110637664795, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.6018068102849201, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.162617077956879e-05, | |
| "loss": 1.0297507047653198, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.6022700949733611, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.1593537728553272e-05, | |
| "loss": 1.123502492904663, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6027333796618022, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.156090917120379e-05, | |
| "loss": 0.987675130367279, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.6031966643502432, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.152828516844871e-05, | |
| "loss": 0.9241386651992798, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.6036599490386843, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.149566578120792e-05, | |
| "loss": 1.0078705549240112, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.6041232337271253, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.1463051070392643e-05, | |
| "loss": 1.0738126039505005, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.6045865184155663, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.1430441096905437e-05, | |
| "loss": 0.9489339590072632, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.6050498031040075, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.139783592163994e-05, | |
| "loss": 0.7799573540687561, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.6055130877924485, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.13652356054809e-05, | |
| "loss": 0.957494854927063, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.6059763724808895, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.1332640209303938e-05, | |
| "loss": 0.939851701259613, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.6064396571693306, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 2.1300049793975505e-05, | |
| "loss": 0.8805224895477295, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.6069029418577716, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.126746442035277e-05, | |
| "loss": 0.8852795958518982, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.6073662265462126, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 2.1234884149283453e-05, | |
| "loss": 1.1347432136535645, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.6078295112346537, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.1202309041605784e-05, | |
| "loss": 0.8818974494934082, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.6082927959230947, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.1169739158148307e-05, | |
| "loss": 1.2017409801483154, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.6087560806115357, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 2.113717455972986e-05, | |
| "loss": 0.8007137179374695, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.6092193652999769, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.1104615307159377e-05, | |
| "loss": 1.1176743507385254, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.6096826499884179, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 2.1072061461235807e-05, | |
| "loss": 1.1391681432724, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.610145934676859, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.1039513082748037e-05, | |
| "loss": 1.0258846282958984, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.6106092193653, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 2.1006970232474706e-05, | |
| "loss": 0.8150526881217957, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.611072504053741, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.0974432971184163e-05, | |
| "loss": 0.9746679067611694, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.6115357887421821, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.0941901359634282e-05, | |
| "loss": 0.9087226390838623, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.6119990734306231, | |
| "grad_norm": 0.75, | |
| "learning_rate": 2.0909375458572435e-05, | |
| "loss": 0.7601380348205566, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.6124623581190641, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 2.0876855328735293e-05, | |
| "loss": 0.8713633418083191, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.6129256428075052, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.0844341030848755e-05, | |
| "loss": 0.8106034398078918, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.6133889274959463, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.0811832625627853e-05, | |
| "loss": 0.9880518913269043, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.6138522121843873, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.0779330173776586e-05, | |
| "loss": 1.0225927829742432, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.6143154968728284, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.074683373598786e-05, | |
| "loss": 0.8777214288711548, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.6147787815612694, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.0714343372943328e-05, | |
| "loss": 0.9182271957397461, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.6152420662497105, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.0681859145313334e-05, | |
| "loss": 1.0506292581558228, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.6157053509381515, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.0649381113756725e-05, | |
| "loss": 0.9353680610656738, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.6161686356265925, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.0616909338920793e-05, | |
| "loss": 0.9634753465652466, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6166319203150336, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 2.058444388144116e-05, | |
| "loss": 0.8841714859008789, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.6170952050034746, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 2.0551984801941635e-05, | |
| "loss": 0.8491650819778442, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.6175584896919157, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.051953216103413e-05, | |
| "loss": 1.1438350677490234, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.6180217743803568, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.048708601931852e-05, | |
| "loss": 0.9793174862861633, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.6184850590687978, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 2.0454646437382557e-05, | |
| "loss": 0.933502197265625, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.6189483437572388, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 2.0422213475801728e-05, | |
| "loss": 1.0236554145812988, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.6194116284456799, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 2.0389787195139183e-05, | |
| "loss": 1.0013253688812256, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.6198749131341209, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.035736765594557e-05, | |
| "loss": 0.8655804991722107, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.6203381978225619, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.0324954918758952e-05, | |
| "loss": 0.9141555428504944, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.620801482511003, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.0292549044104714e-05, | |
| "loss": 0.8629526495933533, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.621264767199444, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.0260150092495398e-05, | |
| "loss": 0.8775175213813782, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.6217280518878852, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.0227758124430642e-05, | |
| "loss": 0.9920527935028076, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.6221913365763262, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.019537320039701e-05, | |
| "loss": 0.9502109289169312, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.6226546212647672, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 2.0162995380867968e-05, | |
| "loss": 0.9974714517593384, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.6231179059532083, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.0130624726303653e-05, | |
| "loss": 0.8903458714485168, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.6235811906416493, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.0098261297150857e-05, | |
| "loss": 0.8124538660049438, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.6240444753300903, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.0065905153842885e-05, | |
| "loss": 0.925495982170105, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.6245077600185314, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.0033556356799412e-05, | |
| "loss": 1.0063080787658691, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.6249710447069724, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.0001214966426418e-05, | |
| "loss": 0.857069730758667, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.6254343293954134, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.9968881043116043e-05, | |
| "loss": 1.0339258909225464, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6258976140838546, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.993655464724649e-05, | |
| "loss": 0.8328740000724792, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.6263608987722956, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.9904235839181884e-05, | |
| "loss": 1.000057578086853, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.6268241834607367, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.98719246792722e-05, | |
| "loss": 0.9434694647789001, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.6272874681491777, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.983962122785314e-05, | |
| "loss": 0.8397997617721558, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.6277507528376187, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.9807325545245985e-05, | |
| "loss": 1.1510478258132935, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.6282140375260598, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.9775037691757538e-05, | |
| "loss": 0.9587162733078003, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.6286773222145008, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 1.9742757727679956e-05, | |
| "loss": 0.8667926788330078, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.6291406069029418, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.971048571329069e-05, | |
| "loss": 0.8282247185707092, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.6296038915913829, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.967822170885231e-05, | |
| "loss": 1.0788094997406006, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.630067176279824, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.964596577461248e-05, | |
| "loss": 0.9873729348182678, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.630530460968265, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.9613717970803744e-05, | |
| "loss": 0.8831279873847961, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.6309937456567061, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.9581478357643482e-05, | |
| "loss": 0.9821964502334595, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.6314570303451471, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.9549246995333805e-05, | |
| "loss": 0.9710639119148254, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.6319203150335881, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.951702394406137e-05, | |
| "loss": 0.8819524645805359, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.6323835997220292, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.948480926399736e-05, | |
| "loss": 0.8822925090789795, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.6328468844104702, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.9452603015297282e-05, | |
| "loss": 0.9902225136756897, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.6333101690989112, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.942040525810095e-05, | |
| "loss": 0.877446174621582, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.6337734537873523, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.938821605253227e-05, | |
| "loss": 1.0983047485351562, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.6342367384757934, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.9356035458699208e-05, | |
| "loss": 1.289228916168213, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.6347000231642345, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.9323863536693654e-05, | |
| "loss": 0.9632030129432678, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.6351633078526755, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.929170034659128e-05, | |
| "loss": 0.8725213408470154, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.6356265925411165, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.925954594845148e-05, | |
| "loss": 1.0300366878509521, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.6360898772295576, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.9227400402317202e-05, | |
| "loss": 1.045912504196167, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.6365531619179986, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.9195263768214903e-05, | |
| "loss": 0.9979844093322754, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.6370164466064396, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.9163136106154358e-05, | |
| "loss": 0.8099144697189331, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.6374797312948807, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.9131017476128604e-05, | |
| "loss": 0.9197705388069153, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.6379430159833217, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.9098907938113824e-05, | |
| "loss": 0.8932551741600037, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.6384063006717628, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.90668075520692e-05, | |
| "loss": 0.8447968363761902, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.6388695853602039, | |
| "grad_norm": 1.125, | |
| "learning_rate": 1.9034716377936855e-05, | |
| "loss": 1.2065844535827637, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.6393328700486449, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.9002634475641678e-05, | |
| "loss": 0.8183916211128235, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.639796154737086, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.8970561905091263e-05, | |
| "loss": 0.875165581703186, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.640259439425527, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 1.8938498726175775e-05, | |
| "loss": 0.9768784642219543, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.640722724113968, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.8906444998767847e-05, | |
| "loss": 0.8940538763999939, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.6411860088024091, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.887440078272245e-05, | |
| "loss": 0.8721079230308533, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.6416492934908501, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.884236613787679e-05, | |
| "loss": 0.8797422051429749, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.6421125781792911, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 1.881034112405023e-05, | |
| "loss": 0.978660523891449, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.6425758628677323, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1.8778325801044118e-05, | |
| "loss": 0.9717423319816589, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.6430391475561733, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.8746320228641726e-05, | |
| "loss": 0.9432107210159302, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.6435024322446143, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.8714324466608103e-05, | |
| "loss": 0.7963858246803284, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.6439657169330554, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.8682338574690004e-05, | |
| "loss": 1.0017811059951782, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.6444290016214964, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.8650362612615715e-05, | |
| "loss": 0.9127390384674072, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.6448922863099374, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 1.8618396640095e-05, | |
| "loss": 1.0621755123138428, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.6453555709983785, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.8586440716818984e-05, | |
| "loss": 0.8622088432312012, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.6458188556868195, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.8554494902459995e-05, | |
| "loss": 0.8669024705886841, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.6462821403752605, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.852255925667152e-05, | |
| "loss": 1.1172361373901367, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.6467454250637017, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.8490633839088025e-05, | |
| "loss": 0.879069447517395, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.6472087097521427, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.84587187093249e-05, | |
| "loss": 0.9635634422302246, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.6476719944405838, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.8426813926978312e-05, | |
| "loss": 1.092876672744751, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.6481352791290248, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.83949195516251e-05, | |
| "loss": 0.7953818440437317, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.6485985638174658, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.836303564282269e-05, | |
| "loss": 0.9496350288391113, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6490618485059069, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.8331162260108945e-05, | |
| "loss": 0.9301480054855347, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.6495251331943479, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.8299299463002083e-05, | |
| "loss": 0.8314633965492249, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.6499884178827889, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.826744731100055e-05, | |
| "loss": 0.7800553441047668, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.65045170257123, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.823560586358292e-05, | |
| "loss": 0.8455618619918823, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.6509149872596711, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.8203775180207772e-05, | |
| "loss": 0.7878734469413757, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.6513782719481122, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.8171955320313575e-05, | |
| "loss": 0.8364105820655823, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.6518415566365532, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.814014634331861e-05, | |
| "loss": 0.9706379771232605, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.6523048413249942, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.8108348308620824e-05, | |
| "loss": 0.9855102300643921, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.6527681260134353, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 1.8076561275597727e-05, | |
| "loss": 0.8652253746986389, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.6532314107018763, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.8044785303606288e-05, | |
| "loss": 0.8510443568229675, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.6536946953903173, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.8013020451982835e-05, | |
| "loss": 0.9615334868431091, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.6541579800787584, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.7981266780042904e-05, | |
| "loss": 0.8480163812637329, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.6546212647671994, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.7949524347081187e-05, | |
| "loss": 0.9262988567352295, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.6550845494556405, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 1.7917793212371354e-05, | |
| "loss": 1.2862350940704346, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.6555478341440816, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.7886073435165996e-05, | |
| "loss": 1.0281234979629517, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.6560111188325226, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.785436507469651e-05, | |
| "loss": 0.8532906770706177, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.6564744035209636, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1.7822668190172938e-05, | |
| "loss": 0.8838380575180054, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.6569376882094047, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.779098284078393e-05, | |
| "loss": 0.8856160044670105, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.6574009728978457, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.775930908569657e-05, | |
| "loss": 0.8010708093643188, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.6578642575862867, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.772764698405631e-05, | |
| "loss": 0.8682946562767029, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.6583275422747278, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.7695996594986836e-05, | |
| "loss": 1.0009998083114624, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.6587908269631688, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.7664357977589932e-05, | |
| "loss": 0.9323142170906067, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.65925411165161, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.7632731190945454e-05, | |
| "loss": 0.9634347558021545, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.659717396340051, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.7601116294111127e-05, | |
| "loss": 1.1587272882461548, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.660180681028492, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.7569513346122498e-05, | |
| "loss": 1.0130343437194824, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.6606439657169331, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1.7537922405992772e-05, | |
| "loss": 0.904670000076294, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.6611072504053741, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.7506343532712762e-05, | |
| "loss": 1.015366554260254, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.6615705350938151, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.7474776785250742e-05, | |
| "loss": 1.0041009187698364, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.6620338197822562, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.744322222255231e-05, | |
| "loss": 0.8108413815498352, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.6624971044706972, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.7411679903540366e-05, | |
| "loss": 0.8150144815444946, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.6629603891591382, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.7380149887114892e-05, | |
| "loss": 0.9165526032447815, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.6634236738475794, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.7348632232152943e-05, | |
| "loss": 0.8653296828269958, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.6638869585360204, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.7317126997508464e-05, | |
| "loss": 0.8013156652450562, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.6643502432244615, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.7285634242012216e-05, | |
| "loss": 0.800615668296814, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.6648135279129025, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.725415402447165e-05, | |
| "loss": 0.8959740996360779, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.6652768126013435, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.722268640367083e-05, | |
| "loss": 0.9206319451332092, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.6657400972897846, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.719123143837025e-05, | |
| "loss": 0.9993884563446045, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.6662033819782256, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 1.715978918730681e-05, | |
| "loss": 0.9378257393836975, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.7128359709193664e-05, | |
| "loss": 1.0335369110107422, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.6671299513551077, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.70969430627201e-05, | |
| "loss": 0.843043327331543, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.6675932360435488, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.7065539306551467e-05, | |
| "loss": 0.9367977380752563, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.6680565207319898, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.7034148499329014e-05, | |
| "loss": 0.9247215390205383, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.6685198054204309, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.700277069966984e-05, | |
| "loss": 0.9778249859809875, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.6689830901088719, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.6971405966166737e-05, | |
| "loss": 0.7912523150444031, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.669446374797313, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.6940054357388088e-05, | |
| "loss": 0.891732394695282, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.669909659485754, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.6908715931877802e-05, | |
| "loss": 1.0751419067382812, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.670372944174195, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.6877390748155137e-05, | |
| "loss": 0.952225923538208, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.670836228862636, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.6846078864714642e-05, | |
| "loss": 0.9902151823043823, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.6712995135510771, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.6814780340026027e-05, | |
| "loss": 0.9151983261108398, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.6717627982395182, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.6783495232534053e-05, | |
| "loss": 0.81586754322052, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.6722260829279593, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.6752223600658437e-05, | |
| "loss": 0.8691772818565369, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.6726893676164003, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.67209655027937e-05, | |
| "loss": 0.7808753848075867, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.6731526523048413, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1.6689720997309144e-05, | |
| "loss": 0.9605461955070496, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.6736159369932824, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.6658490142548634e-05, | |
| "loss": 0.8777569532394409, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.6740792216817234, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.6627272996830594e-05, | |
| "loss": 0.8905514478683472, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.6745425063701644, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.659606961844781e-05, | |
| "loss": 1.0816737413406372, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.6750057910586055, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.656488006566738e-05, | |
| "loss": 0.9656401872634888, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.6754690757470465, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.6533704396730586e-05, | |
| "loss": 0.9407315850257874, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.6759323604354877, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.6502542669852762e-05, | |
| "loss": 0.9171674847602844, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.6763956451239287, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.647139494322325e-05, | |
| "loss": 0.8651185035705566, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.6768589298123697, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.644026127500519e-05, | |
| "loss": 0.9177205562591553, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.6773222145008108, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.6409141723335533e-05, | |
| "loss": 0.8896920680999756, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.6777854991892518, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.637803634632482e-05, | |
| "loss": 0.9356686472892761, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.6782487838776928, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.6346945202057156e-05, | |
| "loss": 0.9953092336654663, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.6787120685661339, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.6315868348590043e-05, | |
| "loss": 0.9929192066192627, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.6791753532545749, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.6284805843954325e-05, | |
| "loss": 0.9500339031219482, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.6796386379430159, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.6253757746154036e-05, | |
| "loss": 0.932491660118103, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.6801019226314571, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.622272411316629e-05, | |
| "loss": 0.9832372665405273, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.6805652073198981, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.6191705002941227e-05, | |
| "loss": 0.8932191133499146, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.6810284920083391, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.6160700473401838e-05, | |
| "loss": 0.9749459624290466, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.6814917766967802, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 1.6129710582443913e-05, | |
| "loss": 1.025862216949463, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.6819550613852212, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.609873538793588e-05, | |
| "loss": 0.7363277077674866, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.6824183460736623, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.6067774947718745e-05, | |
| "loss": 0.9932308793067932, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.6828816307621033, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 1.6036829319605963e-05, | |
| "loss": 1.0912789106369019, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.6833449154505443, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.6005898561383296e-05, | |
| "loss": 0.8973681330680847, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.6838082001389854, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.5974982730808785e-05, | |
| "loss": 0.9494137167930603, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.6842714848274265, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.5944081885612567e-05, | |
| "loss": 1.014467477798462, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 0.6847347695158675, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.5913196083496813e-05, | |
| "loss": 0.8571305871009827, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.6851980542043086, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.588232538213559e-05, | |
| "loss": 1.0085415840148926, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 0.6856613388927496, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.5851469839174773e-05, | |
| "loss": 0.9112807512283325, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6861246235811906, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1.5820629512231938e-05, | |
| "loss": 0.7599020004272461, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 0.6865879082696317, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.578980445889622e-05, | |
| "loss": 1.0665782690048218, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.6870511929580727, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.5758994736728285e-05, | |
| "loss": 0.9333577752113342, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 0.6875144776465137, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.5728200403260104e-05, | |
| "loss": 1.0278582572937012, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.6879777623349548, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.5697421515994976e-05, | |
| "loss": 0.8942716121673584, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.6884410470233959, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.5666658132407308e-05, | |
| "loss": 0.8894409537315369, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.688904331711837, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.563591030994259e-05, | |
| "loss": 0.9284682273864746, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 0.689367616400278, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.560517810601723e-05, | |
| "loss": 0.9897350072860718, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.689830901088719, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.5574461578018493e-05, | |
| "loss": 0.9923799633979797, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 0.6902941857771601, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.554376078330436e-05, | |
| "loss": 0.8437385559082031, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6907574704656011, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.5513075779203417e-05, | |
| "loss": 1.065632700920105, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.6912207551540421, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.5482406623014795e-05, | |
| "loss": 0.9187345504760742, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.6916840398424832, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.545175337200801e-05, | |
| "loss": 0.8924624919891357, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 0.6921473245309242, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.5421116083422887e-05, | |
| "loss": 0.9492448568344116, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.6926106092193653, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.539049481446944e-05, | |
| "loss": 0.738587498664856, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.6930738939078064, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.5359889622327773e-05, | |
| "loss": 1.0237951278686523, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.6935371785962474, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.5329300564147974e-05, | |
| "loss": 0.9008263349533081, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 0.6940004632846885, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 1.529872769704997e-05, | |
| "loss": 1.008281946182251, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.6944637479731295, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 1.5268171078123503e-05, | |
| "loss": 1.0567467212677002, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 0.6949270326615705, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.5237630764427945e-05, | |
| "loss": 0.8660717606544495, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6953903173500116, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.5207106812992225e-05, | |
| "loss": 0.9844351410865784, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 0.6958536020384526, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.5176599280814716e-05, | |
| "loss": 0.9823206067085266, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.6963168867268936, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.5146108224863147e-05, | |
| "loss": 0.8877736330032349, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 0.6967801714153348, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.5115633702074463e-05, | |
| "loss": 0.8826972842216492, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.6972434561037758, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.5085175769354723e-05, | |
| "loss": 0.9058219194412231, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6977067407922168, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.5054734483579058e-05, | |
| "loss": 0.9776875972747803, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.6981700254806579, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.5024309901591453e-05, | |
| "loss": 0.8491601347923279, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 0.6986333101690989, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.4993902080204744e-05, | |
| "loss": 0.9605410099029541, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.6990965948575399, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.4963511076200446e-05, | |
| "loss": 0.8161846995353699, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 0.699559879545981, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 1.4933136946328686e-05, | |
| "loss": 0.992790699005127, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.700023164234422, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.4902779747308069e-05, | |
| "loss": 0.75239497423172, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 0.700486448922863, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.4872439535825595e-05, | |
| "loss": 0.8581488132476807, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.7009497336113042, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.484211636853654e-05, | |
| "loss": 0.9911954998970032, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 0.7014130182997452, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.4811810302064333e-05, | |
| "loss": 0.9692468047142029, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.7018763029881863, | |
| "grad_norm": 0.875, | |
| "learning_rate": 1.4781521393000504e-05, | |
| "loss": 0.9069231152534485, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.7023395876766273, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.4751249697904517e-05, | |
| "loss": 0.8558336496353149, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.7028028723650683, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.4720995273303713e-05, | |
| "loss": 0.8646467328071594, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 0.7032661570535094, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.4690758175693161e-05, | |
| "loss": 0.9294448494911194, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.7037294417419504, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.46605384615356e-05, | |
| "loss": 1.5108270645141602, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.7041927264303914, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.463033618726129e-05, | |
| "loss": 0.7721649408340454, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.7046560111188325, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1.4600151409267915e-05, | |
| "loss": 0.9843413233757019, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 0.7051192958072736, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.4569984183920527e-05, | |
| "loss": 1.0829393863677979, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.7055825804957147, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.453983456755135e-05, | |
| "loss": 0.8989307284355164, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 0.7060458651841557, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.4509702616459779e-05, | |
| "loss": 1.061563491821289, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.7065091498725967, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 1.4479588386912172e-05, | |
| "loss": 0.9384868741035461, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.7069724345610378, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.4449491935141836e-05, | |
| "loss": 0.7927857637405396, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.7074357192494788, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.4419413317348868e-05, | |
| "loss": 0.7902013063430786, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 0.7078990039379198, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.4389352589700028e-05, | |
| "loss": 0.8877607583999634, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.7083622886263609, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.4359309808328738e-05, | |
| "loss": 0.8904527425765991, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 0.7088255733148019, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1.4329285029334844e-05, | |
| "loss": 0.9153241515159607, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.709288858003243, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.4299278308784614e-05, | |
| "loss": 0.8823608756065369, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 0.7097521426916841, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1.4269289702710575e-05, | |
| "loss": 0.8942029476165771, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.7102154273801251, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.4239319267111447e-05, | |
| "loss": 0.9542202353477478, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.7106787120685661, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.4209367057952005e-05, | |
| "loss": 0.834882915019989, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.7111419967570072, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.4179433131162978e-05, | |
| "loss": 0.8554872870445251, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.7116052814454482, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.4149517542640996e-05, | |
| "loss": 0.9996330738067627, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.7120685661338892, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.4119620348248392e-05, | |
| "loss": 0.8751652240753174, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 0.7125318508223303, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.4089741603813209e-05, | |
| "loss": 0.9856979846954346, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.7129951355107713, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 1.4059881365128982e-05, | |
| "loss": 0.9445462226867676, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 0.7134584201992125, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 1.4030039687954728e-05, | |
| "loss": 0.7988513112068176, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.7139217048876535, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.4000216628014782e-05, | |
| "loss": 0.8697713017463684, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 0.7143849895760945, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.3970412240998741e-05, | |
| "loss": 1.1426811218261719, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.7148482742645356, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.3940626582561308e-05, | |
| "loss": 0.932056188583374, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 0.7153115589529766, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.3910859708322204e-05, | |
| "loss": 0.9006248116493225, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.7157748436414176, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.3881111673866106e-05, | |
| "loss": 0.869684636592865, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.7162381283298587, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.3851382534742493e-05, | |
| "loss": 1.0497184991836548, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.7167014130182997, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.3821672346465575e-05, | |
| "loss": 0.9837194681167603, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 0.7171646977067407, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 1.3791981164514141e-05, | |
| "loss": 0.9810088872909546, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.7176279823951819, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.376230904433153e-05, | |
| "loss": 0.9475647211074829, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 0.7180912670836229, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.3732656041325448e-05, | |
| "loss": 0.7182843089103699, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.718554551772064, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.370302221086793e-05, | |
| "loss": 0.8560316562652588, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 0.719017836460505, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.3673407608295208e-05, | |
| "loss": 0.9808245301246643, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.719481121148946, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.3643812288907586e-05, | |
| "loss": 0.8075791597366333, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 0.7199444058373871, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1.3614236307969388e-05, | |
| "loss": 0.8571614027023315, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.7204076905258281, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 1.3584679720708808e-05, | |
| "loss": 1.0778717994689941, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.7208709752142691, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1.3555142582317846e-05, | |
| "loss": 0.726076602935791, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.7213342599027102, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.352562494795216e-05, | |
| "loss": 0.8657900094985962, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 0.7217975445911513, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.3496126872730989e-05, | |
| "loss": 1.003196120262146, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.7222608292795923, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.3466648411737065e-05, | |
| "loss": 0.990123450756073, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 0.7227241139680334, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.3437189620016487e-05, | |
| "loss": 0.8534030914306641, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7231873986564744, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.3407750552578635e-05, | |
| "loss": 0.9646372199058533, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 0.7236506833449154, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.3378331264396029e-05, | |
| "loss": 0.7858661413192749, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.7241139680333565, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1.3348931810404288e-05, | |
| "loss": 0.7996460795402527, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 0.7245772527217975, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.331955224550195e-05, | |
| "loss": 0.9326428174972534, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.7250405374102386, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.329019262455048e-05, | |
| "loss": 1.1491047143936157, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.7255038220986796, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.3260853002374042e-05, | |
| "loss": 0.8331834077835083, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.7259671067871207, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 1.323153343375947e-05, | |
| "loss": 0.920341432094574, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 0.7264303914755618, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.3202233973456163e-05, | |
| "loss": 1.0477654933929443, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.7268936761640028, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.3172954676175968e-05, | |
| "loss": 0.877657949924469, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 0.7273569608524438, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.3143695596593085e-05, | |
| "loss": 0.8888318538665771, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7278202455408849, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.3114456789343936e-05, | |
| "loss": 1.0591609477996826, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 0.7282835302293259, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 1.308523830902713e-05, | |
| "loss": 0.9316011071205139, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.7287468149177669, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 1.3056040210203272e-05, | |
| "loss": 0.9526464343070984, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 0.729210099606208, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.302686254739494e-05, | |
| "loss": 0.900551438331604, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.729673384294649, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.299770537508655e-05, | |
| "loss": 0.9370485544204712, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.7301366689830902, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.2968568747724228e-05, | |
| "loss": 0.9385664463043213, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.7305999536715312, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.2939452719715771e-05, | |
| "loss": 0.8683898448944092, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 0.7310632383599722, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.291035734543049e-05, | |
| "loss": 0.8029768466949463, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.7315265230484133, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.2881282679199143e-05, | |
| "loss": 0.9224568605422974, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 0.7319898077368543, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.2852228775313794e-05, | |
| "loss": 0.8857653737068176, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.7324530924252953, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.2823195688027754e-05, | |
| "loss": 0.9866558313369751, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.7329163771137364, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.2794183471555458e-05, | |
| "loss": 0.8291232585906982, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.7333796618021774, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.2765192180072382e-05, | |
| "loss": 1.0451363325119019, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 0.7338429464906184, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.2736221867714914e-05, | |
| "loss": 0.8132802248001099, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.7343062311790596, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.2707272588580259e-05, | |
| "loss": 0.9321390390396118, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.7347695158675006, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1.2678344396726367e-05, | |
| "loss": 0.8528720140457153, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.7352328005559416, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.2649437346171806e-05, | |
| "loss": 0.9147552847862244, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 0.7356960852443827, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.262055149089564e-05, | |
| "loss": 0.938007652759552, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.7361593699328237, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.2591686884837398e-05, | |
| "loss": 0.9420212507247925, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 0.7366226546212647, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 1.2562843581896892e-05, | |
| "loss": 0.8320150375366211, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.7370859393097058, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.2534021635934168e-05, | |
| "loss": 0.8456387519836426, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 0.7375492239981468, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.25052211007694e-05, | |
| "loss": 0.8855749368667603, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.7380125086865879, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.2476442030182779e-05, | |
| "loss": 0.9768263101577759, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 0.738475793375029, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.244768447791439e-05, | |
| "loss": 0.7757108211517334, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.73893907806347, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.2418948497664178e-05, | |
| "loss": 0.7897142767906189, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.7394023627519111, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.2390234143091761e-05, | |
| "loss": 1.009446620941162, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.7398656474403521, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.2361541467816402e-05, | |
| "loss": 0.7995726466178894, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 0.7403289321287931, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 1.2332870525416888e-05, | |
| "loss": 0.9182596206665039, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.7407922168172342, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.2304221369431394e-05, | |
| "loss": 0.8457068800926208, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 0.7412555015056752, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.227559405335744e-05, | |
| "loss": 1.0693312883377075, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7417187861941162, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.2246988630651752e-05, | |
| "loss": 0.9207885265350342, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 0.7421820708825573, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.2218405154730182e-05, | |
| "loss": 0.9426785111427307, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.7426453555709984, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.2189843678967586e-05, | |
| "loss": 0.8506474494934082, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 0.7431086402594395, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.2161304256697735e-05, | |
| "loss": 0.8817695379257202, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.7435719249478805, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.2132786941213243e-05, | |
| "loss": 0.9216139316558838, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.7440352096363215, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.2104291785765427e-05, | |
| "loss": 0.9422940611839294, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.7444984943247626, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.2075818843564235e-05, | |
| "loss": 0.8162217140197754, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 0.7449617790132036, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.2047368167778111e-05, | |
| "loss": 0.8420689105987549, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.7454250637016446, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.2018939811533943e-05, | |
| "loss": 0.8831995129585266, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 0.7458883483900857, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.1990533827916945e-05, | |
| "loss": 0.9695085287094116, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.7463516330785267, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.1962150269970522e-05, | |
| "loss": 0.7822751402854919, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 0.7468149177669678, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 1.1933789190696248e-05, | |
| "loss": 1.0877046585083008, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.7472782024554089, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 1.1905450643053673e-05, | |
| "loss": 0.9393079280853271, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 0.7477414871438499, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.187713467996031e-05, | |
| "loss": 0.8477166295051575, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.748204771832291, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.1848841354291486e-05, | |
| "loss": 1.0002130270004272, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.748668056520732, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.1820570718880265e-05, | |
| "loss": 0.8591433167457581, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.749131341209173, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.179232282651732e-05, | |
| "loss": 0.8319763541221619, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 0.749594625897614, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.1764097729950881e-05, | |
| "loss": 0.8276806473731995, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.7500579105860551, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.1735895481886583e-05, | |
| "loss": 0.940848708152771, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 0.7505211952744962, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.1707716134987416e-05, | |
| "loss": 0.8151571154594421, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.7509844799629373, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.167955974187361e-05, | |
| "loss": 0.8742914795875549, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 0.7514477646513783, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.1651426355122506e-05, | |
| "loss": 0.8702387809753418, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.7519110493398193, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.1623316027268506e-05, | |
| "loss": 0.8356806039810181, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 0.7523743340282604, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.1595228810802956e-05, | |
| "loss": 0.8744298815727234, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.7528376187167014, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.156716475817404e-05, | |
| "loss": 0.8392553329467773, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.7533009034051424, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.1539123921786677e-05, | |
| "loss": 0.9593334197998047, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.7537641880935835, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.1511106354002433e-05, | |
| "loss": 0.9393202066421509, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 0.7542274727820245, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.1483112107139444e-05, | |
| "loss": 0.8775235414505005, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.7546907574704657, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.1455141233472282e-05, | |
| "loss": 0.9231534004211426, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 0.7551540421589067, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.1427193785231894e-05, | |
| "loss": 1.0438674688339233, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.7556173268473477, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.1399269814605442e-05, | |
| "loss": 1.0147441625595093, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 0.7560806115357888, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.1371369373736287e-05, | |
| "loss": 1.0305769443511963, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.7565438962242298, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.1343492514723849e-05, | |
| "loss": 0.9958128333091736, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 0.7570071809126708, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.131563928962348e-05, | |
| "loss": 0.997496485710144, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.7574704656011119, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 1.128780975044644e-05, | |
| "loss": 0.885054886341095, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.7579337502895529, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.1260003949159729e-05, | |
| "loss": 0.9202278256416321, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.7583970349779939, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.1232221937686033e-05, | |
| "loss": 0.7514294385910034, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 0.7588603196664351, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.1204463767903624e-05, | |
| "loss": 0.7889110445976257, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.7593236043548761, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.1176729491646248e-05, | |
| "loss": 0.8639605045318604, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 0.7597868890433171, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.1149019160703012e-05, | |
| "loss": 0.9009301662445068, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.7602501737317582, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.1121332826818346e-05, | |
| "loss": 0.8193938732147217, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 0.7607134584201992, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.1093670541691834e-05, | |
| "loss": 0.9459896683692932, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.7611767431086403, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.1066032356978173e-05, | |
| "loss": 1.0018055438995361, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 0.7616400277970813, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 1.1038418324287065e-05, | |
| "loss": 0.9474573731422424, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.7621033124855223, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.1010828495183086e-05, | |
| "loss": 0.8761516213417053, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.7625665971739634, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.0983262921185635e-05, | |
| "loss": 0.8465573191642761, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.7630298818624045, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 1.095572165376881e-05, | |
| "loss": 1.064576268196106, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 0.7634931665508455, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.0928204744361344e-05, | |
| "loss": 0.9846107363700867, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.7639564512392866, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.0900712244346447e-05, | |
| "loss": 0.9933382272720337, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 0.7644197359277276, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.087324420506176e-05, | |
| "loss": 1.019972324371338, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.7648830206161686, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.0845800677799265e-05, | |
| "loss": 0.8567442893981934, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 0.7653463053046097, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.0818381713805164e-05, | |
| "loss": 0.833297610282898, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.7658095899930507, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.0790987364279792e-05, | |
| "loss": 1.0111546516418457, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 0.7662728746814917, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.0763617680377507e-05, | |
| "loss": 0.9289141893386841, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.7667361593699328, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.0736272713206621e-05, | |
| "loss": 0.8431038856506348, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.7671994440583739, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.0708952513829302e-05, | |
| "loss": 0.911212682723999, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.767662728746815, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 1.0681657133261436e-05, | |
| "loss": 1.0007601976394653, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 0.768126013435256, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.0654386622472605e-05, | |
| "loss": 0.7746074795722961, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.768589298123697, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 1.0627141032385904e-05, | |
| "loss": 0.8235775828361511, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 0.7690525828121381, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.0599920413877935e-05, | |
| "loss": 1.0395088195800781, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.7695158675005791, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.0572724817778647e-05, | |
| "loss": 0.9071087837219238, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 0.7699791521890201, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.0545554294871282e-05, | |
| "loss": 0.836544930934906, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.7704424368774612, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.051840889589224e-05, | |
| "loss": 0.8565930724143982, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 0.7709057215659022, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.0491288671531e-05, | |
| "loss": 0.9731587171554565, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.7713690062543433, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.0464193672430078e-05, | |
| "loss": 0.9530751705169678, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.7718322909427844, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.0437123949184834e-05, | |
| "loss": 0.9852890968322754, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.7722955756312254, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.0410079552343468e-05, | |
| "loss": 1.010793924331665, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 0.7727588603196665, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.0383060532406851e-05, | |
| "loss": 0.8825767636299133, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.7732221450081075, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.0356066939828494e-05, | |
| "loss": 0.7823730707168579, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 0.7736854296965485, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.0329098825014424e-05, | |
| "loss": 0.8294221758842468, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.7741487143849896, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 1.0302156238323093e-05, | |
| "loss": 0.9734241962432861, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 0.7746119990734306, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.0275239230065266e-05, | |
| "loss": 1.0100374221801758, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.7750752837618716, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.0248347850503954e-05, | |
| "loss": 1.160994529724121, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 0.7755385684503128, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.0221482149854319e-05, | |
| "loss": 1.0030722618103027, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.7760018531387538, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.0194642178283568e-05, | |
| "loss": 0.9101303219795227, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.7764651378271948, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.0167827985910865e-05, | |
| "loss": 0.9005215167999268, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.7769284225156359, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.014103962280722e-05, | |
| "loss": 0.935279369354248, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 0.7773917072040769, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.0114277138995428e-05, | |
| "loss": 0.8451523780822754, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.7778549918925179, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 1.0087540584449966e-05, | |
| "loss": 0.8488246202468872, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 0.778318276580959, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.0060830009096858e-05, | |
| "loss": 0.9785677194595337, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.7787815612694, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.0034145462813665e-05, | |
| "loss": 1.1581677198410034, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 0.779244845957841, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.000748699542929e-05, | |
| "loss": 0.9785441160202026, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.7797081306462822, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 9.980854656723977e-06, | |
| "loss": 0.9519538879394531, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 0.7801714153347232, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 9.954248496429166e-06, | |
| "loss": 0.8969386219978333, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.7806347000231643, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 9.927668564227422e-06, | |
| "loss": 0.8716106414794922, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.7810979847116053, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 9.901114909752323e-06, | |
| "loss": 0.8693481087684631, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.7815612694000463, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 9.874587582588353e-06, | |
| "loss": 0.9671769142150879, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 0.7820245540884874, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 9.848086632270901e-06, | |
| "loss": 0.8428149223327637, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.7824878387769284, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 9.821612108286036e-06, | |
| "loss": 1.0409609079360962, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 0.7829511234653694, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 9.795164060070523e-06, | |
| "loss": 0.9725620746612549, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.7834144081538105, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 9.768742537011652e-06, | |
| "loss": 0.9622225165367126, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 0.7838776928422516, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 9.74234758844721e-06, | |
| "loss": 0.762078583240509, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.7843409775306927, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 9.715979263665355e-06, | |
| "loss": 0.8382387161254883, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 0.7848042622191337, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 9.689637611904528e-06, | |
| "loss": 0.8127785921096802, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.7852675469075747, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 9.663322682353359e-06, | |
| "loss": 0.7931768894195557, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.7857308315960158, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 9.637034524150567e-06, | |
| "loss": 0.9206767678260803, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.7861941162844568, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 9.610773186384898e-06, | |
| "loss": 0.9987959265708923, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 0.7866574009728978, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 9.584538718095019e-06, | |
| "loss": 0.9304317235946655, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.7871206856613389, | |
| "grad_norm": 0.875, | |
| "learning_rate": 9.558331168269418e-06, | |
| "loss": 0.8821865320205688, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 0.7875839703497799, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 9.532150585846297e-06, | |
| "loss": 0.879505455493927, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.788047255038221, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 9.505997019713527e-06, | |
| "loss": 0.9673975110054016, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 0.7885105397266621, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 9.479870518708525e-06, | |
| "loss": 0.9111735820770264, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.7889738244151031, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 9.453771131618154e-06, | |
| "loss": 1.1614623069763184, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 0.7894371091035441, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 9.42769890717867e-06, | |
| "loss": 0.8665764331817627, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.7899003937919852, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 9.401653894075576e-06, | |
| "loss": 0.9338579177856445, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.7903636784804262, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 9.375636140943591e-06, | |
| "loss": 1.0184825658798218, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.7908269631688672, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 9.349645696366522e-06, | |
| "loss": 0.8786851167678833, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 0.7912902478573083, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 9.32368260887718e-06, | |
| "loss": 1.002317190170288, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.7917535325457493, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 9.29774692695729e-06, | |
| "loss": 0.9711103439331055, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 0.7922168172341905, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 9.271838699037386e-06, | |
| "loss": 0.8316183090209961, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.7926801019226315, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 9.24595797349678e-06, | |
| "loss": 1.0970137119293213, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 0.7931433866110725, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 9.220104798663383e-06, | |
| "loss": 0.8820261359214783, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.7936066712995136, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 9.194279222813689e-06, | |
| "loss": 0.9609889388084412, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 0.7940699559879546, | |
| "grad_norm": 0.875, | |
| "learning_rate": 9.168481294172628e-06, | |
| "loss": 0.8790441751480103, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.7945332406763956, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 9.142711060913529e-06, | |
| "loss": 0.8385621309280396, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.7949965253648367, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 9.116968571158004e-06, | |
| "loss": 0.7539405822753906, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.7954598100532777, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 9.09125387297583e-06, | |
| "loss": 0.8373897075653076, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 0.7959230947417187, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 9.065567014384927e-06, | |
| "loss": 0.8968495726585388, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.7963863794301599, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 9.03990804335119e-06, | |
| "loss": 0.779243528842926, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 0.7968496641186009, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 9.014277007788471e-06, | |
| "loss": 0.8106821775436401, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.797312948807042, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8.988673955558443e-06, | |
| "loss": 0.8644953370094299, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 0.797776233495483, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8.96309893447053e-06, | |
| "loss": 0.9038248658180237, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.798239518183924, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 8.937551992281796e-06, | |
| "loss": 0.996218204498291, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 0.7987028028723651, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 8.912033176696893e-06, | |
| "loss": 0.8666702508926392, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.7991660875608061, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 8.886542535367954e-06, | |
| "loss": 0.8723835349082947, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.7996293722492471, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8.861080115894469e-06, | |
| "loss": 0.9064018130302429, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.8000926569376882, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8.83564596582327e-06, | |
| "loss": 0.8690557479858398, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 0.8005559416261293, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 8.810240132648365e-06, | |
| "loss": 0.9081324934959412, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.8010192263145703, | |
| "grad_norm": 1.0, | |
| "learning_rate": 8.784862663810909e-06, | |
| "loss": 0.8451979160308838, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 0.8014825110030114, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 8.759513606699077e-06, | |
| "loss": 1.040283203125, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.8019457956914524, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8.734193008648011e-06, | |
| "loss": 1.03599214553833, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 0.8024090803798934, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8.708900916939685e-06, | |
| "loss": 1.0581880807876587, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.8028723650683345, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8.683637378802835e-06, | |
| "loss": 0.9091055989265442, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 0.8033356497567755, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8.658402441412928e-06, | |
| "loss": 0.7468848824501038, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.8037989344452166, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 8.63319615189197e-06, | |
| "loss": 1.0149157047271729, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.8042622191336576, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8.608018557308506e-06, | |
| "loss": 0.825045645236969, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.8047255038220987, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8.58286970467747e-06, | |
| "loss": 0.7038781642913818, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 0.8051887885105398, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8.55774964096015e-06, | |
| "loss": 1.1522748470306396, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.8056520731989808, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8.53265841306407e-06, | |
| "loss": 0.949603259563446, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 0.8061153578874218, | |
| "grad_norm": 0.875, | |
| "learning_rate": 8.507596067842894e-06, | |
| "loss": 0.9490684866905212, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.8065786425758629, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8.482562652096375e-06, | |
| "loss": 0.874863862991333, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 0.8070419272643039, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8.457558212570205e-06, | |
| "loss": 0.9082040190696716, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.8075052119527449, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 8.432582795956032e-06, | |
| "loss": 0.9235297441482544, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 0.807968496641186, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8.407636448891245e-06, | |
| "loss": 0.9147178530693054, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.808431781329627, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8.382719217958996e-06, | |
| "loss": 1.0070385932922363, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.8088950660180682, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8.35783114968803e-06, | |
| "loss": 0.8351930975914001, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.8093583507065092, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 8.332972290552663e-06, | |
| "loss": 0.8390335440635681, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 0.8098216353949502, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8.308142686972666e-06, | |
| "loss": 0.9591008424758911, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.8102849200833913, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 8.28334238531316e-06, | |
| "loss": 1.086327075958252, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 0.8107482047718323, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 8.258571431884575e-06, | |
| "loss": 0.960287868976593, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.8112114894602733, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8.233829872942513e-06, | |
| "loss": 0.9874426126480103, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 0.8116747741487144, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 8.209117754687708e-06, | |
| "loss": 0.9690349102020264, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.8121380588371554, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8.184435123265906e-06, | |
| "loss": 1.0509775876998901, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 0.8126013435255964, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 8.159782024767808e-06, | |
| "loss": 1.0827224254608154, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.8130646282140376, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 8.13515850522894e-06, | |
| "loss": 1.030488133430481, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.8135279129024786, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 8.110564610629599e-06, | |
| "loss": 0.920238733291626, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.8139911975909196, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8.086000386894804e-06, | |
| "loss": 0.8451364040374756, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.8144544822793607, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 8.061465879894107e-06, | |
| "loss": 0.893768846988678, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.8149177669678017, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 8.036961135441621e-06, | |
| "loss": 0.9750687479972839, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 0.8153810516562428, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8.01248619929584e-06, | |
| "loss": 1.0192598104476929, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.8158443363446838, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 7.988041117159626e-06, | |
| "loss": 1.0063856840133667, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 0.8163076210331248, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 7.96362593468009e-06, | |
| "loss": 0.8969213366508484, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.8167709057215659, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 7.939240697448489e-06, | |
| "loss": 1.0545355081558228, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 0.817234190410007, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 7.914885451000196e-06, | |
| "loss": 0.8668513894081116, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.817697475098448, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 7.890560240814532e-06, | |
| "loss": 0.9358338713645935, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.8181607597868891, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 7.866265112314799e-06, | |
| "loss": 0.8214500546455383, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.8186240444753301, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 7.842000110868062e-06, | |
| "loss": 0.970549464225769, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.8190873291637711, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 7.81776528178517e-06, | |
| "loss": 0.7393088936805725, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.8195506138522122, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 7.793560670320604e-06, | |
| "loss": 1.0626767873764038, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 0.8200138985406532, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 7.769386321672433e-06, | |
| "loss": 0.9659113883972168, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.8204771832290942, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 7.74524228098222e-06, | |
| "loss": 0.9227487444877625, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.8209404679175353, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 7.72112859333491e-06, | |
| "loss": 1.034658670425415, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.8214037526059764, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 7.6970453037588e-06, | |
| "loss": 0.90932297706604, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 0.8218670372944175, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 7.672992457225394e-06, | |
| "loss": 0.9367461204528809, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.8223303219828585, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 7.648970098649369e-06, | |
| "loss": 1.1317241191864014, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.8227936066712995, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 7.6249782728884594e-06, | |
| "loss": 1.149112343788147, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.8232568913597406, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 7.601017024743406e-06, | |
| "loss": 0.9359456300735474, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 0.8237201760481816, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 7.57708639895781e-06, | |
| "loss": 0.9038832783699036, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.8241834607366226, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 7.553186440218129e-06, | |
| "loss": 0.9364471435546875, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 0.8246467454250637, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 7.529317193153543e-06, | |
| "loss": 0.8946930170059204, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8251100301135047, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 7.505478702335871e-06, | |
| "loss": 0.9415825009346008, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 0.8255733148019458, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 7.481671012279523e-06, | |
| "loss": 0.9467945694923401, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.8260365994903869, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 7.457894167441365e-06, | |
| "loss": 0.9999266266822815, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 0.8264998841788279, | |
| "grad_norm": 0.875, | |
| "learning_rate": 7.434148212220688e-06, | |
| "loss": 0.854033350944519, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.826963168867269, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 7.4104331909591e-06, | |
| "loss": 0.8315075039863586, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.82742645355571, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 7.3867491479404256e-06, | |
| "loss": 0.7753373980522156, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.827889738244151, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 7.363096127390672e-06, | |
| "loss": 1.0225822925567627, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 0.828353022932592, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 7.339474173477875e-06, | |
| "loss": 0.8440317511558533, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.8288163076210331, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 7.315883330312121e-06, | |
| "loss": 0.8816229701042175, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 0.8292795923094741, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 7.292323641945339e-06, | |
| "loss": 0.9245011210441589, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8297428769979153, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 7.268795152371322e-06, | |
| "loss": 0.919562816619873, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 0.8302061616863563, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 7.245297905525582e-06, | |
| "loss": 0.9542215466499329, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.8306694463747973, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 7.2218319452853055e-06, | |
| "loss": 0.840879499912262, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 0.8311327310632384, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 7.198397315469257e-06, | |
| "loss": 1.072795033454895, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.8315960157516794, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 7.174994059837673e-06, | |
| "loss": 0.9962195754051208, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.8320593004401204, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 7.1516222220922425e-06, | |
| "loss": 0.8247233033180237, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.8325225851285615, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 7.128281845875946e-06, | |
| "loss": 0.8432327508926392, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 0.8329858698170025, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 7.104972974773042e-06, | |
| "loss": 1.0490100383758545, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.8334491545054435, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 7.081695652308952e-06, | |
| "loss": 1.007668375968933, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 0.8339124391938847, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 7.058449921950193e-06, | |
| "loss": 0.9167599081993103, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8343757238823257, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 7.035235827104265e-06, | |
| "loss": 0.930167019367218, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 0.8348390085707668, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 7.012053411119619e-06, | |
| "loss": 0.8546901941299438, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.8353022932592078, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 6.988902717285545e-06, | |
| "loss": 0.9504755139350891, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 0.8357655779476488, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 6.9657837888320815e-06, | |
| "loss": 0.9580096006393433, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.8362288626360899, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 6.94269666892998e-06, | |
| "loss": 0.7468809485435486, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.8366921473245309, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 6.919641400690559e-06, | |
| "loss": 0.8644688129425049, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.8371554320129719, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 6.896618027165684e-06, | |
| "loss": 0.9328237771987915, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 0.837618716701413, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 6.873626591347671e-06, | |
| "loss": 1.1448862552642822, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.8380820013898541, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.850667136169164e-06, | |
| "loss": 0.8983963131904602, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 0.8385452860782951, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 6.8277397045031205e-06, | |
| "loss": 0.8409160375595093, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.8390085707667362, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 6.804844339162666e-06, | |
| "loss": 0.8944919109344482, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 0.8394718554551772, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 6.781981082901101e-06, | |
| "loss": 0.9417099952697754, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.8399351401436183, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 6.759149978411709e-06, | |
| "loss": 0.8804126977920532, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 0.8403984248320593, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 6.736351068327776e-06, | |
| "loss": 0.8906807899475098, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.8408617095205003, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 6.713584395222441e-06, | |
| "loss": 0.9027203917503357, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.8413249942089414, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 6.690850001608671e-06, | |
| "loss": 0.8666508793830872, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.8417882788973824, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 6.668147929939147e-06, | |
| "loss": 0.8996185064315796, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 0.8422515635858235, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 6.645478222606184e-06, | |
| "loss": 0.8643486499786377, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.8427148482742646, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 6.622840921941684e-06, | |
| "loss": 0.8814486265182495, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 0.8431781329627056, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 6.600236070216997e-06, | |
| "loss": 0.9077647924423218, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.8436414176511466, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 6.577663709642938e-06, | |
| "loss": 0.9069101810455322, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 0.8441047023395877, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.555123882369596e-06, | |
| "loss": 0.7405815720558167, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.8445679870280287, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 6.532616630486341e-06, | |
| "loss": 0.929868757724762, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 0.8450312717164697, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 6.5101419960216925e-06, | |
| "loss": 0.9069142937660217, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.8454945564049108, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 6.48770002094328e-06, | |
| "loss": 0.9167734980583191, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.8459578410933518, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 6.465290747157745e-06, | |
| "loss": 0.8654367923736572, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.846421125781793, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 6.442914216510651e-06, | |
| "loss": 0.7541770935058594, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 0.846884410470234, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.420570470786438e-06, | |
| "loss": 0.8677940964698792, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.847347695158675, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 6.3982595517083064e-06, | |
| "loss": 0.8606151342391968, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 0.8478109798471161, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 6.375981500938173e-06, | |
| "loss": 0.9796140193939209, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.8482742645355571, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 6.353736360076578e-06, | |
| "loss": 0.921321451663971, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 0.8487375492239981, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 6.3315241706625946e-06, | |
| "loss": 0.9960170388221741, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.8492008339124392, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 6.309344974173784e-06, | |
| "loss": 0.783862292766571, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 0.8496641186008802, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 6.287198812026068e-06, | |
| "loss": 0.9817046523094177, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.8501274032893212, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 6.265085725573732e-06, | |
| "loss": 0.9676863551139832, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.8505906879777624, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 6.243005756109246e-06, | |
| "loss": 0.926174521446228, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.8510539726662034, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 6.220958944863276e-06, | |
| "loss": 0.889807939529419, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 0.8515172573546445, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 6.198945333004545e-06, | |
| "loss": 0.9411275386810303, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.8519805420430855, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 6.176964961639795e-06, | |
| "loss": 0.8610736131668091, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 0.8524438267315265, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 6.1550178718137095e-06, | |
| "loss": 0.8684600591659546, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.8529071114199676, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 6.13310410450879e-06, | |
| "loss": 0.8963911533355713, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 0.8533703961084086, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 6.111223700645352e-06, | |
| "loss": 1.0184478759765625, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.8538336807968496, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 6.089376701081368e-06, | |
| "loss": 0.9415737390518188, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 0.8542969654852907, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 6.067563146612489e-06, | |
| "loss": 0.9422698020935059, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.8547602501737318, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 6.045783077971863e-06, | |
| "loss": 0.8872048854827881, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.8552235348621728, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 6.024036535830124e-06, | |
| "loss": 0.7562741041183472, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.8556868195506139, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 6.002323560795314e-06, | |
| "loss": 0.8720545172691345, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 0.8561501042390549, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 5.980644193412778e-06, | |
| "loss": 0.9384455680847168, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.856613388927496, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 5.958998474165121e-06, | |
| "loss": 0.8861362934112549, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 0.857076673615937, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 5.937386443472092e-06, | |
| "loss": 1.1345970630645752, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.857539958304378, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 5.915808141690556e-06, | |
| "loss": 0.8685821294784546, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 0.858003242992819, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 5.894263609114378e-06, | |
| "loss": 0.996828019618988, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.8584665276812601, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 5.872752885974371e-06, | |
| "loss": 0.9989946484565735, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 0.8589298123697012, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 5.851276012438224e-06, | |
| "loss": 0.7909801006317139, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.8593930970581423, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 5.829833028610395e-06, | |
| "loss": 1.1088160276412964, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.8598563817465833, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 5.80842397453208e-06, | |
| "loss": 0.8807237148284912, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.8603196664350243, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 5.787048890181105e-06, | |
| "loss": 0.8608243465423584, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 0.8607829511234654, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 5.765707815471878e-06, | |
| "loss": 0.9504100680351257, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.8612462358119064, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 5.744400790255271e-06, | |
| "loss": 0.9452154040336609, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 0.8617095205003474, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 5.72312785431861e-06, | |
| "loss": 0.8969765305519104, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.8621728051887885, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 5.701889047385529e-06, | |
| "loss": 0.996848464012146, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 0.8626360898772295, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 5.68068440911596e-06, | |
| "loss": 0.8867621421813965, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.8630993745656707, | |
| "grad_norm": 0.875, | |
| "learning_rate": 5.6595139791060246e-06, | |
| "loss": 1.0145070552825928, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 0.8635626592541117, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 5.63837779688795e-06, | |
| "loss": 0.8787609934806824, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.8640259439425527, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 5.617275901930037e-06, | |
| "loss": 0.9451928734779358, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.8644892286309938, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 5.596208333636525e-06, | |
| "loss": 0.9350622892379761, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.8649525133194348, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 5.5751751313476055e-06, | |
| "loss": 0.7591818571090698, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 0.8654157980078758, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 5.554176334339251e-06, | |
| "loss": 0.995162844657898, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.8658790826963169, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 5.533211981823204e-06, | |
| "loss": 1.090124487876892, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 0.8663423673847579, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 5.512282112946889e-06, | |
| "loss": 0.8698755502700806, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.8668056520731989, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 5.4913867667933405e-06, | |
| "loss": 0.8936692476272583, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 0.8672689367616401, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 5.470525982381133e-06, | |
| "loss": 0.9556330442428589, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.8677322214500811, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 5.449699798664276e-06, | |
| "loss": 0.7762373089790344, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 0.8681955061385221, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 5.428908254532204e-06, | |
| "loss": 0.8480014204978943, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.8686587908269632, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 5.4081513888096335e-06, | |
| "loss": 1.0431307554244995, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.8691220755154042, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 5.3874292402565515e-06, | |
| "loss": 0.9694292545318604, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.8695853602038452, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 5.366741847568112e-06, | |
| "loss": 0.9653794765472412, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 0.8700486448922863, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 5.346089249374549e-06, | |
| "loss": 0.8758606314659119, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.8705119295807273, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 5.325471484241144e-06, | |
| "loss": 0.8884270191192627, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 0.8709752142691684, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 5.304888590668126e-06, | |
| "loss": 0.9315338730812073, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.8714384989576095, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 5.284340607090616e-06, | |
| "loss": 0.874808132648468, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 0.8719017836460505, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 5.263827571878527e-06, | |
| "loss": 0.95280921459198, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.8723650683344916, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 5.243349523336532e-06, | |
| "loss": 0.7937729954719543, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 0.8728283530229326, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 5.222906499703955e-06, | |
| "loss": 0.8925231695175171, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.8732916377113736, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 5.20249853915473e-06, | |
| "loss": 0.7958294153213501, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.8737549223998147, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 5.1821256797973185e-06, | |
| "loss": 0.8661794066429138, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.8742182070882557, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 5.1617879596746155e-06, | |
| "loss": 0.8948233723640442, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 0.8746814917766967, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 5.141485416763928e-06, | |
| "loss": 0.9799279570579529, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.8751447764651378, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 5.121218088976843e-06, | |
| "loss": 0.8897976279258728, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 0.8756080611535789, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 5.1009860141592314e-06, | |
| "loss": 1.034111738204956, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.87607134584202, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 5.080789230091099e-06, | |
| "loss": 0.8257846236228943, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 0.876534630530461, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 5.060627774486557e-06, | |
| "loss": 0.9181721210479736, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.876997915218902, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 5.04050168499376e-06, | |
| "loss": 0.8174453377723694, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 0.8774611999073431, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 5.020410999194815e-06, | |
| "loss": 0.9571653604507446, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.8779244845957841, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 5.0003557546057275e-06, | |
| "loss": 0.8483561277389526, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.8783877692842251, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 4.9803359886763e-06, | |
| "loss": 0.9033476114273071, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.8788510539726662, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.960351738790113e-06, | |
| "loss": 0.9668432474136353, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 0.8793143386611072, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.9404030422644e-06, | |
| "loss": 0.8885450959205627, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.8797776233495483, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 4.92048993635002e-06, | |
| "loss": 0.7515835762023926, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 0.8802409080379894, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 4.9006124582313825e-06, | |
| "loss": 0.8569181561470032, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8807041927264304, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.880770645026336e-06, | |
| "loss": 0.8861024975776672, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 0.8811674774148714, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 4.8609645337861615e-06, | |
| "loss": 0.884182333946228, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.8816307621033125, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 4.841194161495456e-06, | |
| "loss": 0.8558884263038635, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 0.8820940467917535, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.8214595650720945e-06, | |
| "loss": 0.9538986682891846, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.8825573314801946, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.8017607813671255e-06, | |
| "loss": 0.9202069640159607, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.8830206161686356, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.782097847164745e-06, | |
| "loss": 0.8663555383682251, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.8834839008570766, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 4.762470799182182e-06, | |
| "loss": 0.8842315673828125, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 0.8839471855455178, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.7428796740696775e-06, | |
| "loss": 1.0153151750564575, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.8844104702339588, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.723324508410386e-06, | |
| "loss": 0.8607885837554932, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 0.8848737549223998, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.703805338720301e-06, | |
| "loss": 0.8658420443534851, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.8853370396108409, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 4.684322201448219e-06, | |
| "loss": 0.814765453338623, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 0.8858003242992819, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 4.664875132975623e-06, | |
| "loss": 1.0322341918945312, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.8862636089877229, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.645464169616691e-06, | |
| "loss": 0.8110833168029785, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 0.886726893676164, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.6260893476181384e-06, | |
| "loss": 0.836736798286438, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.887190178364605, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 4.606750703159197e-06, | |
| "loss": 0.9669207334518433, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.887653463053046, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 4.587448272351564e-06, | |
| "loss": 0.8839113116264343, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.8881167477414872, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 4.568182091239298e-06, | |
| "loss": 0.8875067234039307, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 0.8885800324299282, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 4.548952195798783e-06, | |
| "loss": 0.8245463967323303, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.8890433171183693, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.529758621938616e-06, | |
| "loss": 0.9345543384552002, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 0.8895066018068103, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.510601405499605e-06, | |
| "loss": 0.841739296913147, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.8899698864952513, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.491480582254634e-06, | |
| "loss": 0.8611487746238708, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 0.8904331711836924, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 4.472396187908652e-06, | |
| "loss": 0.7870234251022339, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.8908964558721334, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 4.453348258098582e-06, | |
| "loss": 1.1207804679870605, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 0.8913597405605744, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.434336828393233e-06, | |
| "loss": 1.0150240659713745, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.8918230252490155, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 4.415361934293283e-06, | |
| "loss": 1.0550124645233154, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.8922863099374566, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.396423611231171e-06, | |
| "loss": 0.8985774517059326, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.8927495946258976, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.377521894571057e-06, | |
| "loss": 0.9433072209358215, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 0.8932128793143387, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.358656819608728e-06, | |
| "loss": 0.9555040001869202, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.8936761640027797, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.339828421571566e-06, | |
| "loss": 0.9342141151428223, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 0.8941394486912208, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.321036735618446e-06, | |
| "loss": 0.9744370579719543, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.8946027333796618, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.302281796839706e-06, | |
| "loss": 0.8748108744621277, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 0.8950660180681028, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.283563640257069e-06, | |
| "loss": 0.9074385762214661, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.8955293027565439, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 4.2648823008235475e-06, | |
| "loss": 0.8706763982772827, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 0.8959925874449849, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.246237813423425e-06, | |
| "loss": 0.9113630652427673, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.896455872133426, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 4.227630212872168e-06, | |
| "loss": 1.0912119150161743, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.8969191568218671, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 4.2090595339163665e-06, | |
| "loss": 0.8499932289123535, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.8973824415103081, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 4.190525811233652e-06, | |
| "loss": 0.9822698831558228, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 0.8978457261987491, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 4.172029079432648e-06, | |
| "loss": 0.9892884492874146, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.8983090108871902, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.153569373052913e-06, | |
| "loss": 1.048028588294983, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 0.8987722955756312, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.135146726564865e-06, | |
| "loss": 0.7311965227127075, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.8992355802640722, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.116761174369723e-06, | |
| "loss": 0.967644989490509, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 0.8996988649525133, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.098412750799421e-06, | |
| "loss": 0.9470330476760864, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.9001621496409543, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 4.080101490116581e-06, | |
| "loss": 0.9476629495620728, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 0.9006254343293955, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.061827426514416e-06, | |
| "loss": 0.8855443000793457, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.9010887190178365, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 4.043590594116685e-06, | |
| "loss": 1.0060893297195435, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.9015520037062775, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 4.025391026977633e-06, | |
| "loss": 0.9967565536499023, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.9020152883947186, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 4.007228759081898e-06, | |
| "loss": 1.0718729496002197, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 0.9024785730831596, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 3.989103824344483e-06, | |
| "loss": 0.7362527251243591, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.9029418577716006, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.971016256610675e-06, | |
| "loss": 0.7745991945266724, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 0.9034051424600417, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.95296608965599e-06, | |
| "loss": 0.9183497428894043, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.9038684271484827, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 3.934953357186084e-06, | |
| "loss": 0.9153457880020142, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 0.9043317118369237, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.916978092836737e-06, | |
| "loss": 0.9395539164543152, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.9047949965253649, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.899040330173741e-06, | |
| "loss": 0.9092686176300049, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.9052582812138059, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.881140102692869e-06, | |
| "loss": 0.957666277885437, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.905721565902247, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 3.863277443819814e-06, | |
| "loss": 0.8853251934051514, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.906184850590688, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.845452386910094e-06, | |
| "loss": 0.9027367234230042, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.906648135279129, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.827664965249025e-06, | |
| "loss": 0.8307380080223083, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 0.90711141996757, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.8099152120516485e-06, | |
| "loss": 0.9248343110084534, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.9075747046560111, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.792203160462667e-06, | |
| "loss": 0.9491377472877502, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 0.9080379893444521, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 3.7745288435563653e-06, | |
| "loss": 0.9673945307731628, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.9085012740328932, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 3.7568922943365755e-06, | |
| "loss": 0.9584711790084839, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 0.9089645587213343, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.7392935457366088e-06, | |
| "loss": 0.9805790781974792, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.9094278434097753, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 3.7217326306191865e-06, | |
| "loss": 0.8713183403015137, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 0.9098911280982164, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.704209581776387e-06, | |
| "loss": 0.9644355177879333, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.9103544127866574, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 3.686724431929563e-06, | |
| "loss": 0.9322176575660706, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.9108176974750984, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.6692772137293233e-06, | |
| "loss": 0.8894251585006714, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.9112809821635395, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 3.6518679597554196e-06, | |
| "loss": 0.8077326416969299, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 0.9117442668519805, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.634496702516724e-06, | |
| "loss": 0.8675779104232788, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.9122075515404215, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.6171634744511705e-06, | |
| "loss": 1.0307801961898804, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 0.9126708362288626, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.5998683079256456e-06, | |
| "loss": 0.8632080554962158, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.9131341209173037, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.5826112352359906e-06, | |
| "loss": 0.9748227596282959, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 0.9135974056057448, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.5653922886069056e-06, | |
| "loss": 1.0766937732696533, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.9140606902941858, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.548211500191897e-06, | |
| "loss": 0.9421984553337097, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 0.9145239749826268, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 3.5310689020732137e-06, | |
| "loss": 0.9485040903091431, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.9149872596710679, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 3.513964526261783e-06, | |
| "loss": 0.7310665249824524, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.9154505443595089, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.496898404697176e-06, | |
| "loss": 0.8767428398132324, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.9159138290479499, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.4798705692475193e-06, | |
| "loss": 0.7951971292495728, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 0.916377113736391, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.4628810517094586e-06, | |
| "loss": 1.0523958206176758, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.9168403984248321, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 3.445929883808061e-06, | |
| "loss": 0.9560039639472961, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 0.9173036831132731, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.4290170971968125e-06, | |
| "loss": 0.9428179860115051, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.9177669678017142, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.4121427234575058e-06, | |
| "loss": 0.8995485901832581, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 0.9182302524901552, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.3953067941002262e-06, | |
| "loss": 0.8385268449783325, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.9186935371785963, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 3.3785093405632497e-06, | |
| "loss": 0.8148472309112549, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 0.9191568218670373, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.3617503942130034e-06, | |
| "loss": 0.8488112688064575, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.9196201065554783, | |
| "grad_norm": 0.875, | |
| "learning_rate": 3.34502998634403e-06, | |
| "loss": 0.9861905574798584, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.9200833912439194, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 3.3283481481788926e-06, | |
| "loss": 0.9687526226043701, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.9205466759323604, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 3.3117049108681425e-06, | |
| "loss": 0.7825883030891418, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 0.9210099606208015, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.295100305490232e-06, | |
| "loss": 0.89034503698349, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.9214732453092426, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.2785343630514944e-06, | |
| "loss": 0.8503515720367432, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 0.9219365299976836, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 3.2620071144860517e-06, | |
| "loss": 0.887080192565918, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.9223998146861246, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.2455185906557792e-06, | |
| "loss": 0.9105535745620728, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 0.9228630993745657, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.2290688223502485e-06, | |
| "loss": 0.9910966157913208, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.9233263840630067, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 3.212657840286637e-06, | |
| "loss": 0.9041646122932434, | |
| "step": 1993 | |
| }, | |
| { | |
| "epoch": 0.9237896687514477, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.196285675109717e-06, | |
| "loss": 0.8051247596740723, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.9242529534398888, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 3.1799523573917726e-06, | |
| "loss": 0.9509384036064148, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.9247162381283298, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 3.1636579176325494e-06, | |
| "loss": 0.8491644859313965, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.925179522816771, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.1474023862591808e-06, | |
| "loss": 0.9533796906471252, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 0.925642807505212, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 3.1311857936261537e-06, | |
| "loss": 0.9359503984451294, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.926106092193653, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.115008170015248e-06, | |
| "loss": 0.9185171723365784, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 0.9265693768820941, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 3.098869545635469e-06, | |
| "loss": 0.9881656169891357, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9270326615705351, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.082769950623007e-06, | |
| "loss": 0.7174080610275269, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 0.9274959462589761, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.066709415041155e-06, | |
| "loss": 0.8646811246871948, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.9279592309474172, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.0506879688802826e-06, | |
| "loss": 0.6516255736351013, | |
| "step": 2003 | |
| }, | |
| { | |
| "epoch": 0.9284225156358582, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 3.0347056420577633e-06, | |
| "loss": 0.8709309101104736, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.9288858003242992, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.0187624644179235e-06, | |
| "loss": 0.9930815696716309, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.9293490850127404, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 3.002858465731981e-06, | |
| "loss": 0.8484547138214111, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.9298123697011814, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.9869936756979873e-06, | |
| "loss": 0.9493208527565002, | |
| "step": 2007 | |
| }, | |
| { | |
| "epoch": 0.9302756543896225, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 2.9711681239407924e-06, | |
| "loss": 0.8300023078918457, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.9307389390780635, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.9553818400119676e-06, | |
| "loss": 1.0500361919403076, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 0.9312022237665045, | |
| "grad_norm": 1.0, | |
| "learning_rate": 2.939634853389765e-06, | |
| "loss": 1.172685980796814, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9316655084549456, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 2.923927193479039e-06, | |
| "loss": 0.966960608959198, | |
| "step": 2011 | |
| }, | |
| { | |
| "epoch": 0.9321287931433866, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.908258889611223e-06, | |
| "loss": 0.9223648905754089, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.9325920778318276, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 2.892629971044265e-06, | |
| "loss": 1.0637054443359375, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 0.9330553625202687, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.8770404669625426e-06, | |
| "loss": 0.9099516272544861, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.9335186472087098, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.8614904064768603e-06, | |
| "loss": 0.8674840927124023, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.9339819318971508, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.8459798186243478e-06, | |
| "loss": 0.9098578691482544, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.9344452165855919, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.8305087323684396e-06, | |
| "loss": 0.9073185920715332, | |
| "step": 2017 | |
| }, | |
| { | |
| "epoch": 0.9349085012740329, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.8150771765988054e-06, | |
| "loss": 1.0626872777938843, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.935371785962474, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 2.799685180131296e-06, | |
| "loss": 1.0506995916366577, | |
| "step": 2019 | |
| }, | |
| { | |
| "epoch": 0.935835070650915, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.7843327717078906e-06, | |
| "loss": 0.9011653065681458, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.936298355339356, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.7690199799966412e-06, | |
| "loss": 0.8372233510017395, | |
| "step": 2021 | |
| }, | |
| { | |
| "epoch": 0.936761640027797, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.7537468335916275e-06, | |
| "loss": 0.8183858394622803, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.9372249247162381, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.7385133610129018e-06, | |
| "loss": 0.9930511713027954, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 0.9376882094046792, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.7233195907064297e-06, | |
| "loss": 0.8853211998939514, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.9381514940931203, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 2.708165551044031e-06, | |
| "loss": 0.8356503844261169, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.9386147787815613, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.6930512703233423e-06, | |
| "loss": 1.003953218460083, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.9390780634700023, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.677976776767765e-06, | |
| "loss": 0.950794517993927, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 0.9395413481584434, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.6629420985263856e-06, | |
| "loss": 0.9656116962432861, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.9400046328468844, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.6479472636739592e-06, | |
| "loss": 0.825056791305542, | |
| "step": 2029 | |
| }, | |
| { | |
| "epoch": 0.9404679175353254, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.632992300210825e-06, | |
| "loss": 0.9464682340621948, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.9409312022237665, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.6180772360628837e-06, | |
| "loss": 0.8668578863143921, | |
| "step": 2031 | |
| }, | |
| { | |
| "epoch": 0.9413944869122075, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.6032020990815257e-06, | |
| "loss": 0.887437105178833, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.9418577716006487, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 2.588366917043583e-06, | |
| "loss": 0.9934899806976318, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 0.9423210562890897, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 2.5735717176512722e-06, | |
| "loss": 0.9330331683158875, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.9427843409775307, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.5588165285321597e-06, | |
| "loss": 0.8642808198928833, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.9432476256659718, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.5441013772390964e-06, | |
| "loss": 1.014452576637268, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.9437109103544128, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.5294262912501636e-06, | |
| "loss": 1.0355772972106934, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 0.9441741950428538, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.5147912979686352e-06, | |
| "loss": 0.9281973838806152, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.9446374797312949, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.5001964247229074e-06, | |
| "loss": 1.0396404266357422, | |
| "step": 2039 | |
| }, | |
| { | |
| "epoch": 0.9451007644197359, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.4856416987664723e-06, | |
| "loss": 0.8407167196273804, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.9455640491081769, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 2.471127147277846e-06, | |
| "loss": 0.9255853891372681, | |
| "step": 2041 | |
| }, | |
| { | |
| "epoch": 0.9460273337966181, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 2.4566527973605314e-06, | |
| "loss": 0.9377841353416443, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.9464906184850591, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.4422186760429565e-06, | |
| "loss": 0.8124240636825562, | |
| "step": 2043 | |
| }, | |
| { | |
| "epoch": 0.9469539031735001, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 2.4278248102784187e-06, | |
| "loss": 0.7741151452064514, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.9474171878619412, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 2.4134712269450693e-06, | |
| "loss": 0.8896522521972656, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.9478804725503822, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 2.3991579528458198e-06, | |
| "loss": 0.8712372779846191, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.9483437572388232, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.3848850147083223e-06, | |
| "loss": 0.8915302753448486, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 0.9488070419272643, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.3706524391848946e-06, | |
| "loss": 0.9843184947967529, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.9492703266157053, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.3564602528524985e-06, | |
| "loss": 1.0059175491333008, | |
| "step": 2049 | |
| }, | |
| { | |
| "epoch": 0.9497336113041464, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.3423084822126735e-06, | |
| "loss": 0.9545824527740479, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.9501968959925875, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 2.3281971536914734e-06, | |
| "loss": 1.022434115409851, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 0.9506601806810285, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.3141262936394595e-06, | |
| "loss": 0.8228369355201721, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.9511234653694696, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.3000959283315955e-06, | |
| "loss": 0.9236583709716797, | |
| "step": 2053 | |
| }, | |
| { | |
| "epoch": 0.9515867500579106, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.2861060839672546e-06, | |
| "loss": 0.8788405656814575, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.9520500347463516, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 2.27215678667013e-06, | |
| "loss": 0.7980551719665527, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.9525133194347927, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.258248062488206e-06, | |
| "loss": 0.8565947413444519, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.9529766041232337, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 2.244379937393691e-06, | |
| "loss": 0.8685353994369507, | |
| "step": 2057 | |
| }, | |
| { | |
| "epoch": 0.9534398888116747, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.230552437282996e-06, | |
| "loss": 0.7926799654960632, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.9539031735001158, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.2167655879766687e-06, | |
| "loss": 0.9113929271697998, | |
| "step": 2059 | |
| }, | |
| { | |
| "epoch": 0.9543664581885569, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 2.20301941521934e-06, | |
| "loss": 0.8047410845756531, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.954829742876998, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 2.1893139446796958e-06, | |
| "loss": 0.718073844909668, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 0.955293027565439, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.175649201950405e-06, | |
| "loss": 0.822567880153656, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.95575631225388, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.1620252125480936e-06, | |
| "loss": 0.8844413757324219, | |
| "step": 2063 | |
| }, | |
| { | |
| "epoch": 0.9562195969423211, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.1484420019132813e-06, | |
| "loss": 0.9247415065765381, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.9566828816307621, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.134899595410353e-06, | |
| "loss": 0.9057773351669312, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.9571461663192031, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 2.1213980183274828e-06, | |
| "loss": 0.8755348324775696, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.9576094510076442, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 2.1079372958766046e-06, | |
| "loss": 0.9061083197593689, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 0.9580727356960852, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 2.0945174531933697e-06, | |
| "loss": 0.8380372524261475, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.9585360203845263, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.0811385153370924e-06, | |
| "loss": 1.0740100145339966, | |
| "step": 2069 | |
| }, | |
| { | |
| "epoch": 0.9589993050729674, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.0678005072907108e-06, | |
| "loss": 0.7675211429595947, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.9594625897614084, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.0545034539607104e-06, | |
| "loss": 0.8730876445770264, | |
| "step": 2071 | |
| }, | |
| { | |
| "epoch": 0.9599258744498494, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 2.0412473801771247e-06, | |
| "loss": 0.9389110207557678, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.9603891591382905, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.0280323106934574e-06, | |
| "loss": 0.9151057600975037, | |
| "step": 2073 | |
| }, | |
| { | |
| "epoch": 0.9608524438267315, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.0148582701866327e-06, | |
| "loss": 0.8602491617202759, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.9613157285151726, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 2.0017252832569802e-06, | |
| "loss": 0.7774481773376465, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.9617790132036136, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.9886333744281473e-06, | |
| "loss": 0.9065883755683899, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.9622422978920546, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.9755825681470903e-06, | |
| "loss": 0.842190146446228, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 0.9627055825804958, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.962572888784009e-06, | |
| "loss": 0.7789183259010315, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.9631688672689368, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.9496043606323098e-06, | |
| "loss": 0.9603561162948608, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 0.9636321519573778, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 1.936677007908539e-06, | |
| "loss": 0.9474771022796631, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9640954366458189, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.9237908547523742e-06, | |
| "loss": 0.8616044521331787, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 0.9645587213342599, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.910945925226553e-06, | |
| "loss": 1.0991566181182861, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.9650220060227009, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 1.8981422433168307e-06, | |
| "loss": 0.8651741147041321, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 0.965485290711142, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.8853798329319515e-06, | |
| "loss": 0.9337427616119385, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.965948575399583, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.872658717903569e-06, | |
| "loss": 0.7772614359855652, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.966411860088024, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.8599789219862499e-06, | |
| "loss": 0.7997364401817322, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.9668751447764652, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.8473404688573876e-06, | |
| "loss": 0.7531715631484985, | |
| "step": 2087 | |
| }, | |
| { | |
| "epoch": 0.9673384294649062, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.8347433821171917e-06, | |
| "loss": 0.8086212277412415, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.9678017141533473, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 1.822187685288606e-06, | |
| "loss": 0.8939145803451538, | |
| "step": 2089 | |
| }, | |
| { | |
| "epoch": 0.9682649988417883, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.809673401817289e-06, | |
| "loss": 0.9865738749504089, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.9687282835302293, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.7972005550715907e-06, | |
| "loss": 0.9553055763244629, | |
| "step": 2091 | |
| }, | |
| { | |
| "epoch": 0.9691915682186704, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.7847691683424535e-06, | |
| "loss": 0.9253486394882202, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.9696548529071114, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.7723792648434237e-06, | |
| "loss": 0.8802915811538696, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 0.9701181375955524, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.760030867710567e-06, | |
| "loss": 0.8802918791770935, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.9705814222839935, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.7477240000024547e-06, | |
| "loss": 0.843493640422821, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.9710447069724346, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.7354586847001068e-06, | |
| "loss": 0.9937857389450073, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.9715079916608756, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.7232349447069462e-06, | |
| "loss": 0.8251986503601074, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 0.9719712763493167, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.7110528028487676e-06, | |
| "loss": 0.9000308513641357, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.9724345610377577, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.6989122818736754e-06, | |
| "loss": 1.0388299226760864, | |
| "step": 2099 | |
| }, | |
| { | |
| "epoch": 0.9728978457261988, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.6868134044520744e-06, | |
| "loss": 1.0854367017745972, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9733611304146398, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.674756193176588e-06, | |
| "loss": 1.0435681343078613, | |
| "step": 2101 | |
| }, | |
| { | |
| "epoch": 0.9738244151030808, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.6627406705620516e-06, | |
| "loss": 0.972065806388855, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.9742876997915219, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.6507668590454375e-06, | |
| "loss": 1.1784340143203735, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 0.9747509844799629, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 1.6388347809858335e-06, | |
| "loss": 0.9283071756362915, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.975214269168404, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.6269444586644113e-06, | |
| "loss": 0.8603734970092773, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.9756775538568451, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.6150959142843543e-06, | |
| "loss": 0.9831134080886841, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.9761408385452861, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.6032891699708412e-06, | |
| "loss": 0.986380934715271, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 0.9766041232337271, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1.591524247770991e-06, | |
| "loss": 0.9720912575721741, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.9770674079221682, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.5798011696538277e-06, | |
| "loss": 0.8218910098075867, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 0.9775306926106092, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.568119957510243e-06, | |
| "loss": 0.9122781753540039, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.9779939772990502, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.5564806331529538e-06, | |
| "loss": 1.0062001943588257, | |
| "step": 2111 | |
| }, | |
| { | |
| "epoch": 0.9784572619874913, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.5448832183164436e-06, | |
| "loss": 0.9109072685241699, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.9789205466759323, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.5333277346569414e-06, | |
| "loss": 0.9397470951080322, | |
| "step": 2113 | |
| }, | |
| { | |
| "epoch": 0.9793838313643735, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.5218142037523973e-06, | |
| "loss": 0.8526613712310791, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.9798471160528145, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.5103426471023944e-06, | |
| "loss": 0.9518367052078247, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.9803104007412555, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.4989130861281527e-06, | |
| "loss": 1.002701997756958, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.9807736854296966, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.4875255421724579e-06, | |
| "loss": 0.9528016448020935, | |
| "step": 2117 | |
| }, | |
| { | |
| "epoch": 0.9812369701181376, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.4761800364996524e-06, | |
| "loss": 0.870173454284668, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.9817002548065786, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.4648765902955763e-06, | |
| "loss": 0.8694907426834106, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 0.9821635394950197, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 1.453615224667513e-06, | |
| "loss": 0.9634788036346436, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.9826268241834607, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.4423959606441911e-06, | |
| "loss": 0.8832241296768188, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 0.9830901088719017, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.4312188191757027e-06, | |
| "loss": 0.9558946490287781, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.9835533935603429, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.4200838211334962e-06, | |
| "loss": 0.9503135085105896, | |
| "step": 2123 | |
| }, | |
| { | |
| "epoch": 0.9840166782487839, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.4089909873103181e-06, | |
| "loss": 1.1282013654708862, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.984479962937225, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.3979403384201828e-06, | |
| "loss": 0.8594451546669006, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.984943247625666, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.3869318950983276e-06, | |
| "loss": 0.9713156819343567, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.985406532314107, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.3759656779011786e-06, | |
| "loss": 0.8094725012779236, | |
| "step": 2127 | |
| }, | |
| { | |
| "epoch": 0.985869817002548, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 1.3650417073063208e-06, | |
| "loss": 0.8664292097091675, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.9863331016909891, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.3541600037124343e-06, | |
| "loss": 0.85650235414505, | |
| "step": 2129 | |
| }, | |
| { | |
| "epoch": 0.9867963863794301, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.3433205874392886e-06, | |
| "loss": 0.9315167665481567, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.9872596710678712, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.3325234787276746e-06, | |
| "loss": 0.9233248233795166, | |
| "step": 2131 | |
| }, | |
| { | |
| "epoch": 0.9877229557563123, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.321768697739392e-06, | |
| "loss": 0.8415129780769348, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.9881862404447533, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.3110562645571954e-06, | |
| "loss": 1.0400590896606445, | |
| "step": 2133 | |
| }, | |
| { | |
| "epoch": 0.9886495251331944, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.3003861991847687e-06, | |
| "loss": 0.8513540029525757, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.9891128098216354, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 1.2897585215466699e-06, | |
| "loss": 0.8044668436050415, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.9895760945100764, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.2791732514883067e-06, | |
| "loss": 0.9986090660095215, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.9900393791985175, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.2686304087759108e-06, | |
| "loss": 0.756338894367218, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 0.9905026638869585, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.2581300130964728e-06, | |
| "loss": 0.7462416887283325, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.9909659485753995, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 1.2476720840577294e-06, | |
| "loss": 0.981809675693512, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 0.9914292332638406, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 1.23725664118811e-06, | |
| "loss": 0.9728780388832092, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.9918925179522817, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1.226883703936716e-06, | |
| "loss": 1.043047308921814, | |
| "step": 2141 | |
| }, | |
| { | |
| "epoch": 0.9923558026407228, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 1.2165532916732768e-06, | |
| "loss": 0.8684824705123901, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.9928190873291638, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.206265423688106e-06, | |
| "loss": 0.9691533446311951, | |
| "step": 2143 | |
| }, | |
| { | |
| "epoch": 0.9932823720176048, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 1.196020119192082e-06, | |
| "loss": 0.8577444553375244, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.9937456567060459, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 1.1858173973165886e-06, | |
| "loss": 0.824654757976532, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.9942089413944869, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.1756572771135146e-06, | |
| "loss": 1.0085506439208984, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.9946722260829279, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.165539777555182e-06, | |
| "loss": 0.8890453577041626, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 0.995135510771369, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.1554649175343316e-06, | |
| "loss": 0.8785421848297119, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.99559879545981, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.1454327158640743e-06, | |
| "loss": 0.9128347039222717, | |
| "step": 2149 | |
| }, | |
| { | |
| "epoch": 0.9960620801482511, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.1354431912778758e-06, | |
| "loss": 0.7984659671783447, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.9965253648366922, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.1254963624295052e-06, | |
| "loss": 0.8814120292663574, | |
| "step": 2151 | |
| }, | |
| { | |
| "epoch": 0.9969886495251332, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.1155922478929928e-06, | |
| "loss": 0.9477824568748474, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.9974519342135743, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.10573086616263e-06, | |
| "loss": 0.9535715579986572, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 0.9979152189020153, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 1.0959122356528868e-06, | |
| "loss": 0.9569465517997742, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.9983785035904563, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.0861363746984196e-06, | |
| "loss": 0.8506355285644531, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.9988417882788974, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.0764033015540182e-06, | |
| "loss": 0.9563447833061218, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.9993050729673384, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.0667130343945627e-06, | |
| "loss": 0.8507230281829834, | |
| "step": 2157 | |
| }, | |
| { | |
| "epoch": 0.9997683576557794, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.0570655913150135e-06, | |
| "loss": 0.9006310701370239, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 1.0474609903303493e-06, | |
| "loss": 1.0268394947052002, | |
| "step": 2159 | |
| }, | |
| { | |
| "epoch": 1.0004632846884411, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 1.0378992493755704e-06, | |
| "loss": 0.6635380387306213, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.000926569376882, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.0283803863056181e-06, | |
| "loss": 0.6334539651870728, | |
| "step": 2161 | |
| }, | |
| { | |
| "epoch": 1.0013898540653232, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.0189044188953833e-06, | |
| "loss": 0.7979657053947449, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 1.0018531387537641, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.0094713648396478e-06, | |
| "loss": 0.7369372844696045, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 1.0023164234422053, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.0000812417530654e-06, | |
| "loss": 0.7056367993354797, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 1.0027797081306462, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 9.907340671701244e-07, | |
| "loss": 0.6825569868087769, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 1.0032429928190874, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 9.81429858545103e-07, | |
| "loss": 0.7204247117042542, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 1.0037062775075283, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 9.721686332520658e-07, | |
| "loss": 0.8663711547851562, | |
| "step": 2167 | |
| }, | |
| { | |
| "epoch": 1.0041695621959694, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 9.629504085847903e-07, | |
| "loss": 0.8728551864624023, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 1.0046328468844106, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 9.537752017567814e-07, | |
| "loss": 0.7198824882507324, | |
| "step": 2169 | |
| }, | |
| { | |
| "epoch": 1.0050961315728515, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 9.446430299011981e-07, | |
| "loss": 0.9775660037994385, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.0055594162612926, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 9.355539100708504e-07, | |
| "loss": 0.7355836033821106, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 1.0060227009497336, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 9.265078592381402e-07, | |
| "loss": 0.8342987298965454, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.0064859856381747, | |
| "grad_norm": 1.0, | |
| "learning_rate": 9.175048942950647e-07, | |
| "loss": 0.8058943748474121, | |
| "step": 2173 | |
| }, | |
| { | |
| "epoch": 1.0069492703266156, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 9.08545032053155e-07, | |
| "loss": 0.7417223453521729, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 1.0074125550150568, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8.996282892434513e-07, | |
| "loss": 0.6943836212158203, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.0078758397034977, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8.907546825164854e-07, | |
| "loss": 0.8752128481864929, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 1.0083391243919388, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8.819242284422267e-07, | |
| "loss": 0.7245913743972778, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 1.00880240908038, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8.731369435100796e-07, | |
| "loss": 0.7406237125396729, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 1.009265693768821, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8.643928441288331e-07, | |
| "loss": 0.7131381034851074, | |
| "step": 2179 | |
| }, | |
| { | |
| "epoch": 1.009728978457262, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 8.556919466266182e-07, | |
| "loss": 0.865633487701416, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.010192263145703, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 8.470342672509208e-07, | |
| "loss": 0.8932554721832275, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 1.0106555478341441, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 8.384198221684942e-07, | |
| "loss": 0.827540397644043, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 1.011118832522585, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8.298486274653935e-07, | |
| "loss": 0.7067131996154785, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 1.0115821172110262, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8.213206991468747e-07, | |
| "loss": 0.7793404459953308, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.0120454018994671, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 8.128360531374313e-07, | |
| "loss": 0.6501293182373047, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 1.0125086865879083, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 8.043947052807124e-07, | |
| "loss": 0.8105236887931824, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 1.0129719712763494, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 7.959966713395304e-07, | |
| "loss": 0.6882289052009583, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 1.0134352559647903, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 7.876419669958077e-07, | |
| "loss": 0.6595849394798279, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 1.0138985406532315, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 7.793306078505529e-07, | |
| "loss": 0.8440772294998169, | |
| "step": 2189 | |
| }, | |
| { | |
| "epoch": 1.0143618253416724, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 7.710626094238498e-07, | |
| "loss": 0.685444712638855, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.0148251100301136, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 7.628379871547937e-07, | |
| "loss": 0.7895556688308716, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 1.0152883947185545, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 7.546567564014994e-07, | |
| "loss": 0.6867796182632446, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 1.0157516794069956, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 7.465189324410427e-07, | |
| "loss": 0.7532359957695007, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 1.0162149640954365, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 7.384245304694544e-07, | |
| "loss": 0.9113466739654541, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 1.0166782487838777, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 7.303735656016705e-07, | |
| "loss": 0.830723226070404, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 1.0171415334723188, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 7.223660528715268e-07, | |
| "loss": 0.712505578994751, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.0176048181607598, | |
| "grad_norm": 0.875, | |
| "learning_rate": 7.144020072317181e-07, | |
| "loss": 0.7443718314170837, | |
| "step": 2197 | |
| }, | |
| { | |
| "epoch": 1.018068102849201, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 7.064814435537592e-07, | |
| "loss": 0.7996273040771484, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 1.0185313875376418, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 6.98604376627987e-07, | |
| "loss": 0.783358097076416, | |
| "step": 2199 | |
| }, | |
| { | |
| "epoch": 1.018994672226083, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 6.907708211635022e-07, | |
| "loss": 0.917162299156189, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.019457956914524, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 6.829807917881609e-07, | |
| "loss": 0.7591511607170105, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 1.019921241602965, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 6.752343030485433e-07, | |
| "loss": 0.8324123620986938, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 1.020384526291406, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.675313694099208e-07, | |
| "loss": 0.7730574607849121, | |
| "step": 2203 | |
| }, | |
| { | |
| "epoch": 1.0208478109798471, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 6.598720052562328e-07, | |
| "loss": 0.8956565856933594, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 1.0213110956682883, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 6.522562248900652e-07, | |
| "loss": 0.7237347364425659, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 1.0217743803567292, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 6.446840425326128e-07, | |
| "loss": 0.9014825224876404, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 1.0222376650451703, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 6.371554723236583e-07, | |
| "loss": 0.8383098244667053, | |
| "step": 2207 | |
| }, | |
| { | |
| "epoch": 1.0227009497336113, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 6.296705283215509e-07, | |
| "loss": 0.5707777142524719, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.0231642344220524, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 6.222292245031715e-07, | |
| "loss": 0.7602838277816772, | |
| "step": 2209 | |
| }, | |
| { | |
| "epoch": 1.0236275191104933, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 6.14831574763909e-07, | |
| "loss": 0.8218472003936768, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.0240908037989345, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 6.074775929176442e-07, | |
| "loss": 0.8414373397827148, | |
| "step": 2211 | |
| }, | |
| { | |
| "epoch": 1.0245540884873754, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 6.001672926967015e-07, | |
| "loss": 0.8075520396232605, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 1.0250173731758165, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 5.929006877518494e-07, | |
| "loss": 0.757814347743988, | |
| "step": 2213 | |
| }, | |
| { | |
| "epoch": 1.0254806578642577, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 5.856777916522526e-07, | |
| "loss": 0.7999564409255981, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 1.0259439425526986, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 5.784986178854688e-07, | |
| "loss": 0.8249316215515137, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 1.0264072272411398, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 5.713631798574008e-07, | |
| "loss": 0.8913872241973877, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 1.0268705119295807, | |
| "grad_norm": 0.98046875, | |
| "learning_rate": 5.642714908922866e-07, | |
| "loss": 0.7781076431274414, | |
| "step": 2217 | |
| }, | |
| { | |
| "epoch": 1.0273337966180218, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 5.572235642326718e-07, | |
| "loss": 0.7786509990692139, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 1.0277970813064627, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 5.502194130393807e-07, | |
| "loss": 0.6516589522361755, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 1.028260365994904, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 5.432590503914954e-07, | |
| "loss": 0.8788017630577087, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.0287236506833448, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 5.363424892863255e-07, | |
| "loss": 0.793433666229248, | |
| "step": 2221 | |
| }, | |
| { | |
| "epoch": 1.029186935371786, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 5.294697426393986e-07, | |
| "loss": 0.7199576497077942, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 1.029650220060227, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 5.22640823284414e-07, | |
| "loss": 0.6732587218284607, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 1.030113504748668, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 5.158557439732432e-07, | |
| "loss": 0.7081973552703857, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 1.0305767894371092, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 5.091145173758873e-07, | |
| "loss": 0.7844109535217285, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.03104007412555, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 5.024171560804529e-07, | |
| "loss": 0.7437909841537476, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 1.0315033588139912, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 4.957636725931493e-07, | |
| "loss": 0.8256717920303345, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 1.0319666435024322, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 4.891540793382436e-07, | |
| "loss": 0.872868001461029, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 1.0324299281908733, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 4.825883886580469e-07, | |
| "loss": 0.8539603352546692, | |
| "step": 2229 | |
| }, | |
| { | |
| "epoch": 1.0328932128793142, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 4.7606661281288523e-07, | |
| "loss": 0.9318234920501709, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0333564975677554, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.695887639810916e-07, | |
| "loss": 0.7605723142623901, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 1.0338197822561965, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 4.631548542589581e-07, | |
| "loss": 0.6197681427001953, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.0342830669446375, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 4.567648956607382e-07, | |
| "loss": 0.8866512775421143, | |
| "step": 2233 | |
| }, | |
| { | |
| "epoch": 1.0347463516330786, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 4.5041890011861517e-07, | |
| "loss": 0.7138317823410034, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 1.0352096363215195, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.4411687948267266e-07, | |
| "loss": 0.7621663212776184, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 1.0356729210099607, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.3785884552087854e-07, | |
| "loss": 0.720496654510498, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 1.0361362056984016, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.316448099190664e-07, | |
| "loss": 0.7380560636520386, | |
| "step": 2237 | |
| }, | |
| { | |
| "epoch": 1.0365994903868427, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 4.254747842809117e-07, | |
| "loss": 0.8146321773529053, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 1.0370627750752837, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 4.193487801279021e-07, | |
| "loss": 0.8686697483062744, | |
| "step": 2239 | |
| }, | |
| { | |
| "epoch": 1.0375260597637248, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 4.132668088993299e-07, | |
| "loss": 0.8822686672210693, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.037989344452166, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 4.0722888195225693e-07, | |
| "loss": 0.6353955864906311, | |
| "step": 2241 | |
| }, | |
| { | |
| "epoch": 1.0384526291406069, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 4.012350105615017e-07, | |
| "loss": 0.692233681678772, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 1.038915913829048, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 3.9528520591962305e-07, | |
| "loss": 0.8289405703544617, | |
| "step": 2243 | |
| }, | |
| { | |
| "epoch": 1.039379198517489, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.8937947913688034e-07, | |
| "loss": 0.7374635934829712, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.03984248320593, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.8351784124123343e-07, | |
| "loss": 0.6225665807723999, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 1.040305767894371, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.777003031783055e-07, | |
| "loss": 0.9105774164199829, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 1.0407690525828122, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 3.7192687581138807e-07, | |
| "loss": 0.7380335927009583, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 1.041232337271253, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.661975699213853e-07, | |
| "loss": 0.8130779266357422, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 1.0416956219596942, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 3.6051239620681665e-07, | |
| "loss": 0.682541012763977, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 1.0421589066481354, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 3.548713652837954e-07, | |
| "loss": 0.8330915570259094, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0426221913365763, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.4927448768600476e-07, | |
| "loss": 0.8342495560646057, | |
| "step": 2251 | |
| }, | |
| { | |
| "epoch": 1.0430854760250174, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.4372177386467673e-07, | |
| "loss": 0.7605646848678589, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 1.0435487607134584, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.3821323418857843e-07, | |
| "loss": 0.7368906140327454, | |
| "step": 2253 | |
| }, | |
| { | |
| "epoch": 1.0440120454018995, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.32748878943983e-07, | |
| "loss": 0.8219422698020935, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 1.0444753300903404, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.2732871833466427e-07, | |
| "loss": 0.8227874040603638, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 1.0449386147787816, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.219527624818621e-07, | |
| "loss": 0.780153751373291, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.0454018994672225, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.1662102142427974e-07, | |
| "loss": 0.6628552079200745, | |
| "step": 2257 | |
| }, | |
| { | |
| "epoch": 1.0458651841556637, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 3.1133350511804905e-07, | |
| "loss": 0.8577846884727478, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 1.0463284688441048, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.06090223436728e-07, | |
| "loss": 0.721747875213623, | |
| "step": 2259 | |
| }, | |
| { | |
| "epoch": 1.0467917535325457, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 3.0089118617126064e-07, | |
| "loss": 0.8224932551383972, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.0472550382209869, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.957364030299852e-07, | |
| "loss": 0.8007409572601318, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 1.0477183229094278, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 2.9062588363859645e-07, | |
| "loss": 0.756182849407196, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 1.048181607597869, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.855596375401381e-07, | |
| "loss": 0.7461752891540527, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 1.0486448922863099, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.8053767419497076e-07, | |
| "loss": 0.8917368054389954, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 1.049108176974751, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.755600029807797e-07, | |
| "loss": 0.9534367918968201, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 1.049571461663192, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 2.706266331925269e-07, | |
| "loss": 0.8556983470916748, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 1.050034746351633, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 2.657375740424621e-07, | |
| "loss": 0.8406637907028198, | |
| "step": 2267 | |
| }, | |
| { | |
| "epoch": 1.0504980310400742, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.6089283466007987e-07, | |
| "loss": 0.765261173248291, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.0509613157285151, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.560924240921221e-07, | |
| "loss": 0.7666542530059814, | |
| "step": 2269 | |
| }, | |
| { | |
| "epoch": 1.0514246004169563, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 2.513363513025597e-07, | |
| "loss": 0.6919992566108704, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.0518878851053972, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.466246251725579e-07, | |
| "loss": 0.7279144525527954, | |
| "step": 2271 | |
| }, | |
| { | |
| "epoch": 1.0523511697938384, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.4195725450047865e-07, | |
| "loss": 0.7998260855674744, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.0528144544822793, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 2.373342480018543e-07, | |
| "loss": 0.9555582404136658, | |
| "step": 2273 | |
| }, | |
| { | |
| "epoch": 1.0532777391707204, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 2.3275561430937942e-07, | |
| "loss": 0.7288169860839844, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 1.0537410238591614, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 2.282213619728868e-07, | |
| "loss": 0.8385715484619141, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.0542043085476025, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.2373149945933423e-07, | |
| "loss": 0.8468941450119019, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 1.0546675932360436, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 2.1928603515279388e-07, | |
| "loss": 0.6845361590385437, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 1.0551308779244846, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.148849773544175e-07, | |
| "loss": 0.8109369277954102, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 1.0555941626129257, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 2.1052833428245244e-07, | |
| "loss": 0.7616801857948303, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 1.0560574473013666, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 2.062161140721992e-07, | |
| "loss": 0.8856765031814575, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.0565207319898078, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 2.0194832477600856e-07, | |
| "loss": 0.6820222735404968, | |
| "step": 2281 | |
| }, | |
| { | |
| "epoch": 1.0569840166782487, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.97724974363263e-07, | |
| "loss": 0.7769261598587036, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 1.0574473013666899, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.9354607072036335e-07, | |
| "loss": 0.8834435343742371, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 1.0579105860551308, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 1.8941162165071557e-07, | |
| "loss": 0.8411494493484497, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 1.058373870743572, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 1.85321634874712e-07, | |
| "loss": 0.7289970517158508, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 1.058837155432013, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.8127611802971534e-07, | |
| "loss": 0.7000952959060669, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 1.059300440120454, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 1.7727507867005343e-07, | |
| "loss": 0.7273321151733398, | |
| "step": 2287 | |
| }, | |
| { | |
| "epoch": 1.0597637248088951, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.7331852426700057e-07, | |
| "loss": 0.629833459854126, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 1.060227009497336, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 1.694064622087641e-07, | |
| "loss": 0.9701935052871704, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 1.0606902941857772, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.6553889980045788e-07, | |
| "loss": 0.8223315477371216, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.0611535788742181, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.617158442641129e-07, | |
| "loss": 0.8183466196060181, | |
| "step": 2291 | |
| }, | |
| { | |
| "epoch": 1.0616168635626593, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.5793730273864527e-07, | |
| "loss": 0.6966171264648438, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.0620801482511002, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 1.542032822798509e-07, | |
| "loss": 0.8441136479377747, | |
| "step": 2293 | |
| }, | |
| { | |
| "epoch": 1.0625434329395413, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.50513789860387e-07, | |
| "loss": 0.9092806577682495, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 1.0630067176279825, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.4686883236976378e-07, | |
| "loss": 0.6524341702461243, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 1.0634700023164234, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 1.432684166143341e-07, | |
| "loss": 0.759156346321106, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 1.0639332870048646, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 1.3971254931726663e-07, | |
| "loss": 0.7915992736816406, | |
| "step": 2297 | |
| }, | |
| { | |
| "epoch": 1.0643965716933055, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.362012371185539e-07, | |
| "loss": 0.9682400226593018, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 1.0648598563817466, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 1.3273448657498044e-07, | |
| "loss": 0.7774080038070679, | |
| "step": 2299 | |
| }, | |
| { | |
| "epoch": 1.0653231410701876, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 1.2931230416012785e-07, | |
| "loss": 0.8881627917289734, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.0657864257586287, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.2593469626434573e-07, | |
| "loss": 0.6603068709373474, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 1.0662497104470696, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.2260166919475423e-07, | |
| "loss": 0.953567624092102, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 1.0667129951355108, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.1931322917522548e-07, | |
| "loss": 0.7024879455566406, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 1.067176279823952, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.160693823463701e-07, | |
| "loss": 0.7515615224838257, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.0676395645123928, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 1.1287013476552943e-07, | |
| "loss": 0.7856634855270386, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 1.068102849200834, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.0971549240676204e-07, | |
| "loss": 0.8335432410240173, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 1.068566133889275, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.0660546116083847e-07, | |
| "loss": 0.7041558027267456, | |
| "step": 2307 | |
| }, | |
| { | |
| "epoch": 1.069029418577716, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 1.0354004683522256e-07, | |
| "loss": 0.7460358738899231, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 1.069492703266157, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.0051925515405813e-07, | |
| "loss": 0.7642945647239685, | |
| "step": 2309 | |
| }, | |
| { | |
| "epoch": 1.0699559879545981, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 9.754309175817166e-08, | |
| "loss": 0.603370189666748, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.070419272643039, | |
| "grad_norm": 1.0, | |
| "learning_rate": 9.461156220505363e-08, | |
| "loss": 0.7628229856491089, | |
| "step": 2311 | |
| }, | |
| { | |
| "epoch": 1.0708825573314802, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 9.172467196884249e-08, | |
| "loss": 0.7144607901573181, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 1.0713458420199213, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8.888242644032207e-08, | |
| "loss": 0.7531965970993042, | |
| "step": 2313 | |
| }, | |
| { | |
| "epoch": 1.0718091267083623, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8.608483092691355e-08, | |
| "loss": 0.7036784887313843, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 1.0722724113968034, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 8.333189065265679e-08, | |
| "loss": 0.9241542220115662, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 1.0727356960852443, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8.062361075821034e-08, | |
| "loss": 0.6981607675552368, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.0731989807736855, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 7.795999630083017e-08, | |
| "loss": 0.8028992414474487, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 1.0736622654621264, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 7.534105225437493e-08, | |
| "loss": 0.9001232385635376, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 1.0741255501505675, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 7.276678350927934e-08, | |
| "loss": 0.778639554977417, | |
| "step": 2319 | |
| }, | |
| { | |
| "epoch": 1.0745888348390085, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 7.023719487256752e-08, | |
| "loss": 0.8548743724822998, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.0750521195274496, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 6.775229106781832e-08, | |
| "loss": 0.8301196098327637, | |
| "step": 2321 | |
| }, | |
| { | |
| "epoch": 1.0755154042158908, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.531207673517603e-08, | |
| "loss": 0.6911361813545227, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 1.0759786889043317, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 6.291655643133165e-08, | |
| "loss": 0.872600257396698, | |
| "step": 2323 | |
| }, | |
| { | |
| "epoch": 1.0764419735927728, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 6.056573462952032e-08, | |
| "loss": 0.8005569577217102, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 1.0769052582812138, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 5.825961571950789e-08, | |
| "loss": 0.8710095882415771, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.077368542969655, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 5.5998204007585705e-08, | |
| "loss": 0.8382307291030884, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 1.0778318276580958, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 5.378150371656254e-08, | |
| "loss": 0.7570433616638184, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 1.078295112346537, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 5.160951898575661e-08, | |
| "loss": 0.8490878343582153, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.078758397034978, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.9482253870984925e-08, | |
| "loss": 0.7219923734664917, | |
| "step": 2329 | |
| }, | |
| { | |
| "epoch": 1.079221681723419, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 4.7399712344560643e-08, | |
| "loss": 0.8974231481552124, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.0796849664118602, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.536189829528237e-08, | |
| "loss": 0.797977089881897, | |
| "step": 2331 | |
| }, | |
| { | |
| "epoch": 1.080148251100301, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 4.3368815528423536e-08, | |
| "loss": 0.802757203578949, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 1.0806115357887423, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 4.142046776573771e-08, | |
| "loss": 0.753846287727356, | |
| "step": 2333 | |
| }, | |
| { | |
| "epoch": 1.0810748204771832, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 3.95168586454373e-08, | |
| "loss": 0.7284574508666992, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 1.0815381051656243, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.7657991722190865e-08, | |
| "loss": 0.7171909213066101, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 1.0820013898540652, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.5843870467125784e-08, | |
| "loss": 0.7073402404785156, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.0824646745425064, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 3.4074498267809614e-08, | |
| "loss": 0.8231452107429504, | |
| "step": 2337 | |
| }, | |
| { | |
| "epoch": 1.0829279592309473, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 3.234987842824744e-08, | |
| "loss": 0.8806526064872742, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 1.0833912439193885, | |
| "grad_norm": 0.87109375, | |
| "learning_rate": 3.067001416887916e-08, | |
| "loss": 0.8457326292991638, | |
| "step": 2339 | |
| }, | |
| { | |
| "epoch": 1.0838545286078296, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 2.9034908626571545e-08, | |
| "loss": 0.6565863490104675, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.0843178132962705, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 2.7444564854607558e-08, | |
| "loss": 0.8216748237609863, | |
| "step": 2341 | |
| }, | |
| { | |
| "epoch": 1.0847810979847117, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 2.5898985822694343e-08, | |
| "loss": 0.9930620789527893, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 1.0852443826731526, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.4398174416936593e-08, | |
| "loss": 0.8227108716964722, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 1.0857076673615937, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 2.294213343985252e-08, | |
| "loss": 0.8761816024780273, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 1.0861709520500347, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 2.1530865610352555e-08, | |
| "loss": 0.6705437302589417, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 1.0866342367384758, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 2.0164373563744675e-08, | |
| "loss": 0.8744497895240784, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 1.0870975214269167, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 1.8842659851723732e-08, | |
| "loss": 0.8473079800605774, | |
| "step": 2347 | |
| }, | |
| { | |
| "epoch": 1.0875608061153579, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.7565726942371464e-08, | |
| "loss": 0.6917478442192078, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 1.088024090803799, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 1.633357722014317e-08, | |
| "loss": 0.6745292544364929, | |
| "step": 2349 | |
| }, | |
| { | |
| "epoch": 1.08848737549224, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.5146212985875706e-08, | |
| "loss": 0.7799302935600281, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.088950660180681, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 1.4003636456771496e-08, | |
| "loss": 0.6096100807189941, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 1.089413944869122, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.2905849766401189e-08, | |
| "loss": 0.6739075183868408, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.0898772295575632, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 1.1852854964698346e-08, | |
| "loss": 0.6615080833435059, | |
| "step": 2353 | |
| }, | |
| { | |
| "epoch": 1.090340514246004, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 1.0844654017956757e-08, | |
| "loss": 0.8795668482780457, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 1.0908037989344452, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 9.881248808817134e-09, | |
| "loss": 0.6835682392120361, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 1.0912670836228862, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 8.962641136285754e-09, | |
| "loss": 0.8570435047149658, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 1.0917303683113273, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8.088832715702487e-09, | |
| "loss": 0.653952956199646, | |
| "step": 2357 | |
| }, | |
| { | |
| "epoch": 1.0921936529997684, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 7.259825178759449e-09, | |
| "loss": 0.7819563150405884, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 1.0926569376882094, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 6.475620073493005e-09, | |
| "loss": 0.7712017893791199, | |
| "step": 2359 | |
| }, | |
| { | |
| "epoch": 1.0931202223766505, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 5.736218864273113e-09, | |
| "loss": 0.7736300230026245, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.0935835070650914, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 5.041622931805989e-09, | |
| "loss": 0.8023624420166016, | |
| "step": 2361 | |
| }, | |
| { | |
| "epoch": 1.0940467917535326, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 4.391833573136772e-09, | |
| "loss": 0.7673137187957764, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 1.0945100764419735, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 3.786852001636198e-09, | |
| "loss": 0.7695264220237732, | |
| "step": 2363 | |
| }, | |
| { | |
| "epoch": 1.0949733611304147, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 3.2266793470085986e-09, | |
| "loss": 0.8211096525192261, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.0954366458188556, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 2.7113166552812375e-09, | |
| "loss": 0.7800441980361938, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 1.0958999305072967, | |
| "grad_norm": 0.875, | |
| "learning_rate": 2.2407648888069786e-09, | |
| "loss": 1.0416970252990723, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 1.0963632151957379, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.8150249262616214e-09, | |
| "loss": 0.6914156675338745, | |
| "step": 2367 | |
| }, | |
| { | |
| "epoch": 1.0968264998841788, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.4340975626465635e-09, | |
| "loss": 0.8113836050033569, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.09728978457262, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 1.0979835092808087e-09, | |
| "loss": 0.7729677557945251, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 1.0977530692610609, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 8.066833937956375e-10, | |
| "loss": 0.6993372440338135, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.098216353949502, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 5.601977601505936e-10, | |
| "loss": 0.8089796304702759, | |
| "step": 2371 | |
| }, | |
| { | |
| "epoch": 1.098679638637943, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 3.585270686121689e-10, | |
| "loss": 0.7753397822380066, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 1.099142923326384, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.0167169577245402e-10, | |
| "loss": 0.9297423362731934, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 1.099606208014825, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8.963193452515838e-11, | |
| "loss": 0.8522340655326843, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 1.1000694927032661, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 2.2407994092255023e-11, | |
| "loss": 0.8018955588340759, | |
| "step": 2375 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 2375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.087985967887275e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |