Instructions to use Ba2han/lfm-cpt with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ba2han/lfm-cpt with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="Ba2han/lfm-cpt") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("Ba2han/lfm-cpt") model = AutoModelForMultimodalLM.from_pretrained("Ba2han/lfm-cpt") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Ba2han/lfm-cpt with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Ba2han/lfm-cpt" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-cpt", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/Ba2han/lfm-cpt
- SGLang
How to use Ba2han/lfm-cpt with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Ba2han/lfm-cpt" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-cpt", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Ba2han/lfm-cpt" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-cpt", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Unsloth Studio
How to use Ba2han/lfm-cpt with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/lfm-cpt to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/lfm-cpt to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Ba2han/lfm-cpt to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Ba2han/lfm-cpt", max_seq_length=2048, ) - Docker Model Runner
How to use Ba2han/lfm-cpt with Docker Model Runner:
docker model run hf.co/Ba2han/lfm-cpt
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4205, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004756949606065111, | |
| "grad_norm": 17.125, | |
| "learning_rate": 3.80952380952381e-07, | |
| "loss": 3.0223777294158936, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0009513899212130222, | |
| "grad_norm": 18.875, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": 3.0833311080932617, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0014270848818195333, | |
| "grad_norm": 17.5, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 3.0257577896118164, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0019027798424260443, | |
| "grad_norm": 14.375, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 2.894188642501831, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.002378474803032555, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 2.8602077960968018, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0028541697636390666, | |
| "grad_norm": 9.1875, | |
| "learning_rate": 4.190476190476191e-06, | |
| "loss": 2.8444406986236572, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0033298647242455777, | |
| "grad_norm": 5.75, | |
| "learning_rate": 4.952380952380953e-06, | |
| "loss": 2.7274436950683594, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0038055596848520887, | |
| "grad_norm": 3.96875, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 2.6735000610351562, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0042812546454586, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 6.476190476190477e-06, | |
| "loss": 2.7295286655426025, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.00475694960606511, | |
| "grad_norm": 3.390625, | |
| "learning_rate": 7.238095238095239e-06, | |
| "loss": 2.674283027648926, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005232644566671622, | |
| "grad_norm": 2.375, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.604132652282715, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.005708339527278133, | |
| "grad_norm": 1.984375, | |
| "learning_rate": 8.761904761904763e-06, | |
| "loss": 2.5608315467834473, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.006184034487884644, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 2.4369235038757324, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.006659729448491155, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 2.526822090148926, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.007135424409097666, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.104761904761905e-05, | |
| "loss": 2.478888511657715, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.007611119369704177, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.180952380952381e-05, | |
| "loss": 2.4330101013183594, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.008086814330310688, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.2571428571428572e-05, | |
| "loss": 2.412613868713379, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0085625092909172, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.4466609954833984, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.009038204251523711, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.4095238095238097e-05, | |
| "loss": 2.3658487796783447, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.00951389921213022, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.4857142857142858e-05, | |
| "loss": 2.355985164642334, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.009989594172736732, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 1.5619047619047622e-05, | |
| "loss": 2.2941904067993164, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.010465289133343244, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.6380952380952384e-05, | |
| "loss": 2.2010486125946045, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.010940984093949755, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 2.2753114700317383, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.011416679054556267, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.7904761904761907e-05, | |
| "loss": 2.248267650604248, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.011892374015162776, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 2.219675064086914, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.012368068975769288, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 2.195375919342041, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0128437639363758, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.0190476190476192e-05, | |
| "loss": 2.113194227218628, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01331945889698231, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 2.0952380952380954e-05, | |
| "loss": 2.1435773372650146, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01379515385758882, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 2.1714285714285715e-05, | |
| "loss": 2.1617753505706787, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.014270848818195332, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.2476190476190477e-05, | |
| "loss": 2.0906386375427246, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.014746543778801843, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 2.3238095238095242e-05, | |
| "loss": 2.13519024848938, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.015222238739408355, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.0451605319976807, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.015697933700014866, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.4761904761904766e-05, | |
| "loss": 2.18241548538208, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.016173628660621376, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 2.5523809523809524e-05, | |
| "loss": 2.0362777709960938, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.01664932362122789, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 2.628571428571429e-05, | |
| "loss": 2.0474891662597656, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0171250185818344, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 2.704761904761905e-05, | |
| "loss": 2.0943374633789062, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.01760071354244091, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.780952380952381e-05, | |
| "loss": 2.007606029510498, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.018076408503047422, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 2.8571428571428574e-05, | |
| "loss": 2.010784149169922, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.01855210346365393, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 2.9333333333333333e-05, | |
| "loss": 2.0787105560302734, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.01902779842426044, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 3.0095238095238098e-05, | |
| "loss": 2.0407357215881348, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.019503493384866954, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 3.085714285714286e-05, | |
| "loss": 1.9725031852722168, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.019979188345473464, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 3.161904761904762e-05, | |
| "loss": 1.9424755573272705, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.020454883306079977, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 3.2380952380952386e-05, | |
| "loss": 2.0050058364868164, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.020930578266686487, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 3.314285714285715e-05, | |
| "loss": 1.9476414918899536, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.021406273227292997, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 3.390476190476191e-05, | |
| "loss": 1.9908151626586914, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02188196818789951, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 3.466666666666667e-05, | |
| "loss": 1.9584457874298096, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02235766314850602, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 3.542857142857143e-05, | |
| "loss": 1.9743244647979736, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.022833358109112533, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 3.6190476190476195e-05, | |
| "loss": 1.9396190643310547, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.023309053069719043, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 3.6952380952380956e-05, | |
| "loss": 1.8824760913848877, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.023784748030325552, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.771428571428572e-05, | |
| "loss": 1.8970260620117188, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.024260442990932066, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.847619047619048e-05, | |
| "loss": 1.9105536937713623, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.024736137951538575, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 3.923809523809524e-05, | |
| "loss": 1.8593096733093262, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.02521183291214509, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4e-05, | |
| "loss": 1.8682916164398193, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0256875278727516, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.0761904761904765e-05, | |
| "loss": 1.9372856616973877, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.026163222833358108, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 4.1523809523809533e-05, | |
| "loss": 1.9114850759506226, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02663891779396462, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.228571428571429e-05, | |
| "loss": 1.8821630477905273, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.02711461275457113, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.304761904761905e-05, | |
| "loss": 1.8732749223709106, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.02759030771517764, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 4.380952380952382e-05, | |
| "loss": 1.8635611534118652, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.028066002675784154, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.4571428571428574e-05, | |
| "loss": 1.8261184692382812, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.028541697636390664, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 4.5333333333333335e-05, | |
| "loss": 1.8533995151519775, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.029017392596997177, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 4.60952380952381e-05, | |
| "loss": 1.816650390625, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.029493087557603687, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 4.6857142857142865e-05, | |
| "loss": 1.823215365409851, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.029968782518210196, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 1.8113462924957275, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03044447747881671, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 4.838095238095238e-05, | |
| "loss": 1.7880184650421143, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03092017243942322, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 4.914285714285715e-05, | |
| "loss": 1.8012118339538574, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03139586740002973, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 4.990476190476191e-05, | |
| "loss": 1.783468246459961, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03187156236063624, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 5.066666666666667e-05, | |
| "loss": 1.7474174499511719, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.03234725732124275, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 5.1428571428571436e-05, | |
| "loss": 1.8491697311401367, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.03282295228184926, | |
| "grad_norm": 0.984375, | |
| "learning_rate": 5.21904761904762e-05, | |
| "loss": 1.77945077419281, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03329864724245578, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 5.295238095238095e-05, | |
| "loss": 1.7462689876556396, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03377434220306229, | |
| "grad_norm": 1.25, | |
| "learning_rate": 5.3714285714285714e-05, | |
| "loss": 1.77305006980896, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.0342500371636688, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 5.447619047619048e-05, | |
| "loss": 1.7020612955093384, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.03472573212427531, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 5.5238095238095244e-05, | |
| "loss": 1.8065619468688965, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.03520142708488182, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 5.6e-05, | |
| "loss": 1.6848450899124146, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.035677122045488334, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 5.676190476190477e-05, | |
| "loss": 1.747304916381836, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.036152817006094844, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 5.752380952380953e-05, | |
| "loss": 1.7690556049346924, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.03662851196670135, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 5.828571428571429e-05, | |
| "loss": 1.7610713243484497, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.03710420692730786, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 5.904761904761905e-05, | |
| "loss": 1.6751768589019775, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.03757990188791437, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 5.9809523809523814e-05, | |
| "loss": 1.6568293571472168, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.03805559684852088, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 6.0571428571428576e-05, | |
| "loss": 1.7163995504379272, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0385312918091274, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 6.133333333333334e-05, | |
| "loss": 1.7115540504455566, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.03900698676973391, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 6.20952380952381e-05, | |
| "loss": 1.6549824476242065, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.03948268173034042, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 6.285714285714286e-05, | |
| "loss": 1.6670048236846924, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.03995837669094693, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 6.361904761904762e-05, | |
| "loss": 1.6787292957305908, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.04043407165155344, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 6.438095238095238e-05, | |
| "loss": 1.6751407384872437, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.040909766612159955, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 6.514285714285715e-05, | |
| "loss": 1.679162621498108, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.041385461572766465, | |
| "grad_norm": 0.75, | |
| "learning_rate": 6.590476190476191e-05, | |
| "loss": 1.6422595977783203, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.041861156533372974, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.693905234336853, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.042336851493979484, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 6.742857142857143e-05, | |
| "loss": 1.7319214344024658, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.042812546454585994, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 6.81904761904762e-05, | |
| "loss": 1.7077994346618652, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04328824141519251, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 6.895238095238095e-05, | |
| "loss": 1.6633131504058838, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04376393637579902, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 6.971428571428572e-05, | |
| "loss": 1.6569929122924805, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.04423963133640553, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 7.047619047619048e-05, | |
| "loss": 1.6756895780563354, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.04471532629701204, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 7.123809523809524e-05, | |
| "loss": 1.7126249074935913, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.04519102125761855, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 7.2e-05, | |
| "loss": 1.6484733819961548, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.045666716218225066, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 7.276190476190476e-05, | |
| "loss": 1.71817147731781, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.046142411178831576, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 7.352380952380953e-05, | |
| "loss": 1.7061476707458496, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.046618106139438086, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 7.42857142857143e-05, | |
| "loss": 1.654850959777832, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.047093801100044595, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 7.504761904761905e-05, | |
| "loss": 1.6332194805145264, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.047569496060651105, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 7.580952380952381e-05, | |
| "loss": 1.6425645351409912, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04804519102125762, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 7.657142857142859e-05, | |
| "loss": 1.6112370491027832, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.04852088598186413, | |
| "grad_norm": 0.875, | |
| "learning_rate": 7.733333333333333e-05, | |
| "loss": 1.6736791133880615, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.04899658094247064, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 7.80952380952381e-05, | |
| "loss": 1.5582149028778076, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.04947227590307715, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 7.885714285714287e-05, | |
| "loss": 1.605231523513794, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.04994797086368366, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 7.961904761904763e-05, | |
| "loss": 1.6272740364074707, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05042366582429018, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6227126121520996, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.05089936078489669, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.7201282978057861, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.0513750557455032, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.671586036682129, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.051850750706109706, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.596938133239746, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.052326445666716216, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5416910648345947, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.052802140627322726, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6337580680847168, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.05327783558792924, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.694180965423584, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.05375353054853575, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5831806659698486, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.05422922550914226, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6646983623504639, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.05470492046974877, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.632063627243042, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05518061543035528, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6186381578445435, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.0556563103909618, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5822536945343018, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.05613200535156831, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.598821759223938, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.05660770031217482, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6583571434020996, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.05708339527278133, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6493302583694458, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05755909023338784, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5849549770355225, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.058034785193994354, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5187630653381348, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.058510480154600863, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6261816024780273, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.05898617511520737, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5440542697906494, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.05946187007581388, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5579140186309814, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05993756503642039, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5661745071411133, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.06041325999702691, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5916748046875, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06088895495763342, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.582345724105835, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.06136464991823993, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6371424198150635, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.06184034487884644, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5874426364898682, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06231603983945295, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5973892211914062, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.06279173480005946, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6360384225845337, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.06326742976066597, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4682174921035767, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.06374312472127248, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5434261560440063, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.064218819681879, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5429248809814453, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0646945146424855, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.520768404006958, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.06517020960309201, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6287932395935059, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.06564590456369852, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 1.574143409729004, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.06612159952430503, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5749611854553223, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.06659729448491156, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.511296033859253, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06707298944551807, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5300962924957275, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.06754868440612458, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5132012367248535, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.06802437936673109, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5248513221740723, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.0685000743273376, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4714152812957764, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.0689757692879441, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.536270022392273, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06945146424855062, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5859988927841187, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.06992715920915712, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.6076054573059082, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.07040285416976363, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.537914752960205, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.07087854913037014, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.530918002128601, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.07135424409097667, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5722606182098389, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07182993905158318, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5380263328552246, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.07230563401218969, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.543945074081421, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.0727813289727962, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4990713596343994, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.0732570239334027, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5049118995666504, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.07373271889400922, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5481094121932983, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07420841385461573, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5431925058364868, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.07468410881522224, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5000509023666382, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.07515980377582875, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.453176498413086, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.07563549873643526, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5792964696884155, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.07611119369704177, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5420498847961426, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.07658688865764829, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5112196207046509, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.0770625836182548, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5195896625518799, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.07753827857886131, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4650981426239014, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.07801397353946782, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4874310493469238, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.07848966850007433, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4700895547866821, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07896536346068084, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4657363891601562, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.07944105842128735, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5102603435516357, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.07991675338189386, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.48199462890625, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.08039244834250037, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.573965072631836, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.08086814330310688, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.552716851234436, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0813438382637134, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5109150409698486, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.08181953322431991, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.478667974472046, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.08229522818492642, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4951369762420654, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.08277092314553293, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4743764400482178, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.08324661810613944, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5435149669647217, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08372231306674595, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4307265281677246, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.08419800802735246, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5382444858551025, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.08467370298795897, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4578557014465332, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.08514939794856548, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5319006443023682, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.08562509290917199, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.511313796043396, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.08610078786977851, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4577925205230713, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.08657648283038502, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5221188068389893, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.08705217779099153, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4162304401397705, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.08752787275159804, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4972211122512817, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.08800356771220455, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5569958686828613, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.08847926267281106, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.467348337173462, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.08895495763341757, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4739539623260498, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.08943065259402408, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.478308916091919, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.08990634755463059, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4767718315124512, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.0903820425152371, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.51509690284729, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09085773747584361, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4501855373382568, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.09133343243645013, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.45088529586792, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.09180912739705664, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4980132579803467, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.09228482235766315, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5103974342346191, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.09276051731826966, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4751368761062622, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09323621227887617, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.494489073753357, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.09371190723948268, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.497837781906128, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.09418760220008919, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5514724254608154, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.0946632971606957, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5110323429107666, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.09513899212130221, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4153172969818115, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09561468708190872, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4518225193023682, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.09609038204251524, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4477956295013428, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.09656607700312175, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4921960830688477, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.09704177196372826, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4914698600769043, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.09751746692433477, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4477533102035522, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.09799316188494128, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.499894142150879, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.09846885684554779, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5634784698486328, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.0989445518061543, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4383997917175293, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.09942024676676081, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4373674392700195, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.09989594172736732, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.475003719329834, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.10037163668797383, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4678364992141724, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.10084733164858035, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.45807945728302, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.10132302660918686, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5057690143585205, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.10179872156979337, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4296057224273682, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.10227441653039988, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5102698802947998, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1027501114910064, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.37981116771698, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1032258064516129, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4619908332824707, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.10370150141221941, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4256863594055176, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.10417719637282592, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4363038539886475, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.10465289133343243, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3884978294372559, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.10512858629403894, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4545469284057617, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.10560428125464545, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4439201354980469, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.10607997621525198, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5349268913269043, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.10655567117585849, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.591422438621521, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.107031366136465, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4320346117019653, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1075070610970715, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4518539905548096, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.10798275605767801, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5056893825531006, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.10845845101828452, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3535287380218506, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.10893414597889103, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4125394821166992, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.10940984093949754, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3990814685821533, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.10988553590010405, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4865885972976685, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.11036123086071056, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.415689468383789, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.11083692582131709, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.453460931777954, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.1113126207819236, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4493913650512695, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.1117883157425301, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4510160684585571, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11226401070313662, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5191359519958496, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.11273970566374313, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.5257389545440674, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.11321540062434964, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4620857238769531, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.11369109558495614, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4049038887023926, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.11416679054556265, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.387476921081543, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11464248550616916, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4375786781311035, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.11511818046677567, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4502665996551514, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1155938754273822, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4172968864440918, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.11606957038798871, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3814518451690674, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.11654526534859522, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4727611541748047, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.11702096030920173, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4043948650360107, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.11749665526980824, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4327163696289062, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.11797235023041475, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4427610635757446, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.11844804519102126, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4240474700927734, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.11892374015162777, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.471658706665039, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11939943511223428, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4233098030090332, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.11987513007284079, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3341891765594482, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.1203508250334473, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3719563484191895, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.12082651999405382, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4592832326889038, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.12130221495466033, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4251080751419067, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12177790991526684, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4636266231536865, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.12225360487587335, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3918344974517822, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.12272929983647986, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4410995244979858, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.12320499479708637, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4553332328796387, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.12368068975769288, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4932277202606201, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12415638471829939, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4398219585418701, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.1246320796789059, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4100382328033447, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.12510777463951242, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4194281101226807, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.12558346960011893, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4007759094238281, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.12605916456072544, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3943548202514648, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.12653485952133195, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3672170639038086, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.12701055448193846, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3937108516693115, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.12748624944254497, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4582862854003906, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.12796194440315148, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.417186975479126, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.128437639363758, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3944048881530762, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1289133343243645, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4348058700561523, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.129389029284971, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4025098085403442, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.12986472424557752, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4666318893432617, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.13034041920618403, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3959743976593018, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.13081611416679054, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.435917615890503, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.13129180912739705, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4515659809112549, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.13176750408800356, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4431695938110352, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.13224319904861007, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3696998357772827, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.1327188940092166, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4764920473098755, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.1331945889698231, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3894143104553223, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.13367028393042962, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4079980850219727, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.13414597889103613, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3896784782409668, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.13462167385164264, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4342916011810303, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.13509736881224915, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4243568181991577, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.13557306377285566, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4043259620666504, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.13604875873346217, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4198546409606934, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.13652445369406868, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3509985208511353, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.1370001486546752, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3983509540557861, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.1374758436152817, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4067437648773193, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.1379515385758882, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4863321781158447, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.13842723353649472, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4052914381027222, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.13890292849710123, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.408928394317627, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.13937862345770774, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4460136890411377, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.13985431841831425, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4335639476776123, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.14033001337892076, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3965034484863281, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14080570833952727, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4012255668640137, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.14128140330013378, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4261143207550049, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.1417570982607403, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4008715152740479, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.1422327932213468, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4034451246261597, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.14270848818195334, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4082181453704834, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14318418314255985, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3725682497024536, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.14365987810316636, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4293782711029053, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.14413557306377286, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4520360231399536, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.14461126802437937, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3525224924087524, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.14508696298498588, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4208955764770508, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1455626579455924, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3334312438964844, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.1460383529061989, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3503882884979248, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.1465140478668054, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4191619157791138, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.14698974282741192, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3381874561309814, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.14746543778801843, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3882290124893188, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.14794113274862494, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4173054695129395, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.14841682770923145, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3113012313842773, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.14889252266983796, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.407628059387207, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.14936821763044447, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.447212815284729, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.14984391259105098, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3606481552124023, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1503196075516575, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4575624465942383, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.150795302512264, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.355147123336792, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.1512709974728705, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.37825345993042, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.15174669243347702, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4053802490234375, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.15222238739408353, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3817956447601318, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.15269808235469007, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3938934803009033, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.15317377731529658, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.35261869430542, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.1536494722759031, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3819756507873535, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.1541251672365096, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4077363014221191, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.1546008621971161, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4303150177001953, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15507655715772262, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3727548122406006, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.15555225211832913, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4012013673782349, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.15602794707893564, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3778249025344849, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.15650364203954215, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.384866714477539, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.15697933700014866, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4081860780715942, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.15745503196075517, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3876349925994873, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.15793072692136167, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.420417070388794, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.15840642188196818, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.427546501159668, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.1588821168425747, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.404707431793213, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.1593578118031812, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4167988300323486, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1598335067637877, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.36492919921875, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.16030920172439422, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4290658235549927, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.16078489668500073, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3939204216003418, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.16126059164560724, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.368532419204712, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.16173628660621375, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4039356708526611, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1622119815668203, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4631690979003906, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.1626876765274268, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3834668397903442, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.1631633714880333, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.373947262763977, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.16363906644863982, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4164583683013916, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.16411476140924633, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4322106838226318, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.16459045636985284, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3455379009246826, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.16506615133045935, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.34842050075531, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.16554184629106586, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.428257942199707, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.16601754125167237, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.417719841003418, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.16649323621227888, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4131088256835938, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1669689311728854, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.356804370880127, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.1674446261334919, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.355417251586914, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.1679203210940984, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4380789995193481, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.16839601605470492, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3789442777633667, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.16887171101531143, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3488481044769287, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.16934740597591794, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3990561962127686, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.16982310093652445, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3976104259490967, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.17029879589713096, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.430433750152588, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.17077449085773747, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.359434723854065, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.17125018581834398, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4045766592025757, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.17172588077895048, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3606858253479004, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.17220157573955702, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4614171981811523, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.17267727070016353, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.416619062423706, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.17315296566077004, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2924635410308838, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.17362866062137655, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3354673385620117, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.17410435558198306, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3578845262527466, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.17458005054258957, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4009724855422974, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.17505574550319608, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4139372110366821, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.1755314404638026, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3427128791809082, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.1760071354244091, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3915586471557617, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1764828303850156, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3710131645202637, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.17695852534562212, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3700971603393555, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.17743422030622863, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3621227741241455, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.17790991526683514, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.38692307472229, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.17838561022744165, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.352332592010498, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.17886130518804816, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4046599864959717, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.17933700014865467, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3857762813568115, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.17981269510926118, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3184947967529297, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.1802883900698677, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3482776880264282, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.1807640850304742, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.434415340423584, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.1812397799910807, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3801504373550415, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.18171547495168722, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3628723621368408, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.18219116991229375, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3805229663848877, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.18266686487290026, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3568819761276245, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.18314255983350677, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3049235343933105, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.18361825479411328, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3783180713653564, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.1840939497547198, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3785371780395508, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.1845696447153263, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4072458744049072, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.1850453396759328, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3426545858383179, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.18552103463653932, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.352428674697876, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.18599672959714583, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3136948347091675, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.18647242455775234, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3649238348007202, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.18694811951835885, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4003939628601074, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.18742381447896536, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3522775173187256, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.18789950943957187, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.353920578956604, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.18837520440017838, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4120471477508545, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.1888508993607849, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.425047755241394, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.1893265943213914, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3698722124099731, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.1898022892819979, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3825695514678955, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.19027798424260442, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.384330153465271, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.19075367920321093, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.365710735321045, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.19122937416381744, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.351928472518921, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.19170506912442398, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.364558458328247, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.1921807640850305, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4033458232879639, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.192656459045637, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.378347635269165, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1931321540062435, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.364283561706543, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.19360784896685002, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.414649248123169, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.19408354392745653, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3634638786315918, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.19455923888806304, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4743528366088867, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.19503493384866954, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3400163650512695, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.19551062880927605, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3880252838134766, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.19598632376988256, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.395135521888733, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.19646201873048907, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3433012962341309, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.19693771369109558, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.317229986190796, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.1974134086517021, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3179906606674194, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1978891036123086, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3207850456237793, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.1983647985729151, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4160897731781006, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.19884049353352162, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.34122633934021, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.19931618849412813, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3531912565231323, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.19979188345473464, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3703558444976807, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.20026757841534115, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3876454830169678, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.20074327337594766, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3795206546783447, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.20121896833655417, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3170604705810547, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2016946632971607, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3548598289489746, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.20217035825776722, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.359254002571106, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.20264605321837373, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.389087438583374, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.20312174817898024, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3296732902526855, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.20359744313958675, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3714617490768433, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.20407313810019326, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3371829986572266, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.20454883306079977, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.386389970779419, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.20502452802140628, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.338608741760254, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.2055002229820128, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.328315019607544, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.2059759179426193, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3264660835266113, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.2064516129032258, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4002896547317505, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.20692730786383232, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3713188171386719, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.20740300282443883, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.350874900817871, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.20787869778504534, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3784689903259277, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.20835439274565185, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3428910970687866, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.20883008770625835, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3714317083358765, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.20930578266686486, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3759992122650146, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.20978147762747137, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2990326881408691, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.21025717258807788, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3624963760375977, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.2107328675486844, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3324933052062988, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.2112085625092909, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4013808965682983, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.21168425746989744, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.338510274887085, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.21215995243050395, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4126381874084473, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.21263564739111046, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.371493935585022, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.21311134235171697, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.383441686630249, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.21358703731232348, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3659964799880981, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.21406273227293, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2908456325531006, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2145384272335365, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3610074520111084, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.215014122194143, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3580766916275024, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.21548981715474952, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.458742618560791, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.21596551211535603, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2720565795898438, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.21644120707596254, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3226542472839355, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.21691690203656905, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3100987672805786, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.21739259699717556, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3754308223724365, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.21786829195778207, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3694303035736084, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.21834398691838858, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.394423007965088, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.2188196818789951, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3577532768249512, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2192953768396016, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2522318363189697, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.2197710718002081, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3532583713531494, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.22024676676081462, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3816845417022705, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.22072246172142113, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.362253189086914, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.22119815668202766, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3231050968170166, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.22167385164263417, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3791143894195557, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.22214954660324068, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3647040128707886, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.2226252415638472, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3556348085403442, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.2231009365244537, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3153495788574219, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.2235766314850602, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2898294925689697, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.22405232644566672, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3297260999679565, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.22452802140627323, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3505053520202637, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.22500371636687974, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3983497619628906, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.22547941132748625, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3593679666519165, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.22595510628809276, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.336477518081665, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.22643080124869927, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3708462715148926, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.22690649620930578, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4065918922424316, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.2273821911699123, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.361413836479187, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.2278578861305188, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3337655067443848, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.2283335810911253, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2802634239196777, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.22880927605173182, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3333477973937988, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.22928497101233833, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3998594284057617, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.22976066597294484, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3377106189727783, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.23023636093355135, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2901934385299683, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.23071205589415786, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3435245752334595, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2311877508547644, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3464173078536987, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.2316634458153709, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3196808099746704, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.23213914077597742, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3490209579467773, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.23261483573658392, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3755543231964111, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.23309053069719043, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3209686279296875, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.23356622565779694, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3944644927978516, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.23404192061840345, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3737695217132568, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.23451761557900996, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.352348804473877, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.23499331053961647, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2491270303726196, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.23546900550022298, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4017226696014404, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2359447004608295, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3329687118530273, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.236420395421436, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2956058979034424, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.2368960903820425, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.376798391342163, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.23737178534264902, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3533029556274414, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.23784748030325553, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3347084522247314, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23832317526386204, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3693647384643555, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.23879887022446855, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3653826713562012, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.23927456518507506, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.345597505569458, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.23975026014568157, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2825236320495605, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.24022595510628808, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3502631187438965, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2407016500668946, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3079496622085571, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.24117734502750113, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3474180698394775, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.24165303998810764, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3570088148117065, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.24212873494871415, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3702566623687744, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.24260442990932066, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3773030042648315, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.24308012486992717, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.333245873451233, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.24355581983053368, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3305965662002563, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.24403151479114019, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3149254322052002, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.2445072097517467, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.34800124168396, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.2449829047123532, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3632348775863647, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.24545859967295972, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3612074851989746, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.24593429463356622, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3488757610321045, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.24640998959417273, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3559046983718872, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.24688568455477924, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3708908557891846, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.24736137951538575, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.33760666847229, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.24783707447599226, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2863125801086426, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.24831276943659877, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3174580335617065, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.24878846439720528, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3641953468322754, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.2492641593578118, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3469069004058838, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.2497398543184183, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3538458347320557, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.25021554927902484, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.354750633239746, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.25069124423963135, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3567293882369995, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.25116693920023786, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3444650173187256, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.25164263416084437, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3146984577178955, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.2521183291214509, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3101708889007568, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2525940240820574, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3136630058288574, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.2530697190426639, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.35286545753479, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.2535454140032704, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.319314956665039, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.2540211089638769, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3844151496887207, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.25449680392448343, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2962524890899658, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.25497249888508994, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3922219276428223, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.25544819384569645, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3131260871887207, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.25592388880630296, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.322244644165039, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.25639958376690947, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3516499996185303, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.256875278727516, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3630871772766113, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2573509736881225, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3094751834869385, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.257826668648729, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.306774616241455, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.2583023636093355, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3254430294036865, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.258778058569942, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.306220293045044, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.2592537535305485, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.337794303894043, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.25972944849115503, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.324033260345459, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.26020514345176154, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3060619831085205, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.26068083841236805, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.411613941192627, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.26115653337297456, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3333206176757812, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.2616322283335811, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3340492248535156, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2621079232941876, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3538923263549805, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.2625836182547941, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2844221591949463, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.2630593132154006, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2786577939987183, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.2635350081760071, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3393871784210205, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.2640107031366136, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3317300081253052, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.26448639809722013, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.329606056213379, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.26496209305782664, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3412857055664062, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.2654377880184332, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3141382932662964, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.2659134829790397, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3066372871398926, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.2663891779396462, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.336663007736206, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.26686487290025274, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3393672704696655, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.26734056786085925, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.338028907775879, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.26781626282146576, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.25938880443573, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.26829195778207227, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3836978673934937, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.2687676527426788, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3326656818389893, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2692433477032853, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2927348613739014, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.2697190426638918, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4277849197387695, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.2701947376244983, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3989144563674927, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.2706704325851048, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3444643020629883, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.2711461275457113, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2833266258239746, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.27162182250631783, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.358147382736206, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.27209751746692434, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3105173110961914, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.27257321242753085, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3114371299743652, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.27304890738813736, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2814993858337402, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.2735246023487439, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3203294277191162, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2740002973093504, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2979755401611328, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.2744759922699569, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3081634044647217, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.2749516872305634, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3281807899475098, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.2754273821911699, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3202593326568604, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.2759030771517764, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3806310892105103, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.27637877211238293, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3173789978027344, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.27685446707298944, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3223962783813477, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.27733016203359595, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3117542266845703, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.27780585699420246, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3533828258514404, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.27828155195480897, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3214187622070312, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2787572469154155, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3294625282287598, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.279232941876022, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3171840906143188, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.2797086368366285, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3028992414474487, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.280184331797235, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3446723222732544, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.2806600267578415, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.296749472618103, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.28113572171844803, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3563461303710938, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.28161141667905454, | |
| "grad_norm": 0.5, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3181467056274414, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.28208711163966105, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3786540031433105, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.28256280660026756, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3328609466552734, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.28303850156087407, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3462462425231934, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.2835141965214806, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3369724750518799, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.2839898914820871, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3191611766815186, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.2844655864426936, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3197510242462158, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.2849412814033001, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3486484289169312, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.28541697636390667, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3362209796905518, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2858926713245132, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.322070837020874, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.2863683662851197, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3231661319732666, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.2868440612457262, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2671769857406616, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.2873197562063327, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3193705081939697, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.2877954511669392, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3454172611236572, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.28827114612754573, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.351203441619873, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.28874684108815224, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3364512920379639, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.28922253604875875, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3189178705215454, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.28969823100936526, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2867472171783447, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.29017392596997177, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3088388442993164, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.2906496209305783, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2755553722381592, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.2911253158911848, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3116247653961182, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.2916010108517913, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2760411500930786, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.2920767058123978, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3090481758117676, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.2925524007730043, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3445281982421875, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2930280957336108, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3193836212158203, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.29350379069421734, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.20814847946167, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.29397948565482385, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.266977310180664, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.29445518061543036, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3388676643371582, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.29493087557603687, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3216158151626587, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2954065705366434, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3377256393432617, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.2958822654972499, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3120627403259277, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.2963579604578564, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.38155198097229, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.2968336554184629, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3631592988967896, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.2973093503790694, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.342321515083313, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.2977850453396759, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.391056776046753, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.29826074030028243, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.275477409362793, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.29873643526088894, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3247549533843994, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.29921213022149545, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3454852104187012, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.29968782518210196, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3124552965164185, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3001635201427085, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.309496521949768, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.300639215103315, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3047943115234375, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.3011149100639215, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.348783016204834, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.301590605024528, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2833664417266846, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.3020662999851345, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3276420831680298, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.302541994945741, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3440744876861572, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.30301768990634753, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2924749851226807, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.30349338486695404, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3171639442443848, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.30396907982756055, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3555333614349365, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.30444477478816706, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2927653789520264, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3049204697487736, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.34412682056427, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.30539616470938014, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3178520202636719, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.30587185966998665, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.278599739074707, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.30634755463059316, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.254746437072754, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.30682324959119966, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3239991664886475, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3072989445518062, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2205564975738525, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.3077746395124127, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.301189661026001, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.3082503344730192, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3148789405822754, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.3087260294336257, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3096203804016113, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.3092017243942322, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3105592727661133, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3096774193548387, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.312976360321045, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.31015311431544523, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2987452745437622, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.31062880927605174, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3321504592895508, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.31110450423665825, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2754004001617432, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.31158019919726476, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3137989044189453, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.31205589415787127, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.311619520187378, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.3125315891184778, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3559669256210327, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.3130072840790843, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3143202066421509, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.3134829790396908, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.27866530418396, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.3139586740002973, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3592901229858398, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.3144343689609038, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2884161472320557, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.31491006392151033, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3565433025360107, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.31538575888211684, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.237494945526123, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.31586145384272335, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2647333145141602, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.31633714880332986, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.303452968597412, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.31681284376393637, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.297559142112732, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.3172885387245429, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2743000984191895, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.3177642336851494, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3135099411010742, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.3182399286457559, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3139266967773438, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.3187156236063624, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2548094987869263, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3191913185669689, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.324897289276123, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.3196670135275754, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.256618618965149, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.32014270848818194, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3173911571502686, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.32061840344878845, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3058103322982788, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.32109409840939496, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2872593402862549, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.32156979337000147, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3403403759002686, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.322045488330608, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.320298433303833, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.3225211832912145, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2507272958755493, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.322996878251821, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2136597633361816, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.3234725732124275, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.302070140838623, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.323948268173034, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3372619152069092, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.3244239631336406, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2919752597808838, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.3248996580942471, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3035356998443604, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.3253753530548536, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3196332454681396, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.3258510480154601, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2436224222183228, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.3263267429760666, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3428776264190674, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.32680243793667313, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2375438213348389, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.32727813289727964, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2973229885101318, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.32775382785788615, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.327859878540039, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.32822952281849266, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.286755919456482, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.32870521777909917, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2941248416900635, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.3291809127397057, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.304626226425171, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.3296566077003122, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2754319906234741, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.3301323026609187, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2561947107315063, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.3306079976215252, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2895267009735107, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3310836925821317, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.386023759841919, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.3315593875427382, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2892621755599976, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.33203508250334474, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2891567945480347, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.33251077746395125, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.322417140007019, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.33298647242455776, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3004422187805176, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.33346216738516427, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2988290786743164, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.3339378623457708, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3143279552459717, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.3344135573063773, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.281175136566162, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.3348892522669838, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289182424545288, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.3353649472275903, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2937148809432983, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.3358406421881968, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2338749170303345, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.3363163371488033, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2975019216537476, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.33679203210940983, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2488545179367065, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.33726772707001634, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289847493171692, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.33774342203062285, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2936429977416992, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.33821911699122936, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2761449813842773, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.3386948119518359, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2870736122131348, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.3391705069124424, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.33278489112854, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.3396462018730489, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2804453372955322, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.3401218968336554, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.303015112876892, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.3405975917942619, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.333322286605835, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.3410732867548684, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.320211410522461, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.34154898171547493, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3331108093261719, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.34202467667608144, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.26707923412323, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.34250037163668795, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3269259929656982, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.34297606659729446, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3051103353500366, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.34345176155790097, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.307328701019287, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.3439274565185075, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3046774864196777, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.34440315147911404, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3187592029571533, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.34487884643972055, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289937973022461, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.34535454140032706, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.300656795501709, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.3458302363609336, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.328467607498169, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.3463059313215401, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3533457517623901, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.3467816262821466, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2739849090576172, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.3472573212427531, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3364741802215576, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.3477330162033596, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2574357986450195, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.3482087111639661, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3151860237121582, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.34868440612457263, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3361979722976685, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.34916010108517914, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3134095668792725, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.34963579604578565, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2602635622024536, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.35011149100639216, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2942792177200317, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.35058718596699867, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3020391464233398, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.3510628809276052, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2818697690963745, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.3515385758882117, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.357285976409912, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.3520142708488182, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.256792664527893, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3524899658094247, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2922112941741943, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.3529656607700312, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.313403606414795, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.35344135573063773, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3349361419677734, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.35391705069124424, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2362300157546997, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.35439274565185075, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2810988426208496, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.35486844061245726, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3440229892730713, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.35534413557306377, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3103101253509521, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.3558198305336703, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2558884620666504, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.3562955254942768, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3156042098999023, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.3567712204548833, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.294625997543335, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3572469154154898, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2535991668701172, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.3577226103760963, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.281435489654541, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.3581983053367028, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3105072975158691, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.35867400029730934, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3389477729797363, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.35914969525791585, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3460373878479004, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.35962539021852236, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2376234531402588, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.36010108517912887, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3217864036560059, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.3605767801397354, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2773277759552002, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.3610524751003419, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2766404151916504, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.3615281700609484, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2760717868804932, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.3620038650215549, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3625264167785645, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.3624795599821614, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247987985610962, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.3629552549427679, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2980639934539795, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.36343094990337443, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2997593879699707, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.363906644863981, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2603178024291992, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.3643823398245875, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.302708625793457, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.364858034785194, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2671213150024414, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.36533372974580053, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3167269229888916, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.36580942470640704, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3100299835205078, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.36628511966701355, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2652174234390259, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.36676081462762006, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3316433429718018, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.36723650958822657, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2940750122070312, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.3677122045488331, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3008698225021362, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.3681878995094396, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2859610319137573, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.3686635944700461, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2531521320343018, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.3691392894306526, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2496728897094727, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.3696149843912591, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.33748459815979, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.3700906793518656, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2866451740264893, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.37056637431247214, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.34792160987854, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.37104206927307865, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2783215045928955, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.37151776423368515, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2765138149261475, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.37199345919429166, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3496522903442383, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.3724691541548982, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3075356483459473, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.3729448491155047, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2998372316360474, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.3734205440761112, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2793023586273193, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3738962390367177, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2992515563964844, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.3743719339973242, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2910690307617188, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.3748476289579307, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2138452529907227, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.37532332391853723, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2589681148529053, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.37579901887914374, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3131399154663086, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.37627471383975025, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2620333433151245, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.37675040880035676, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2692234516143799, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.37722610376096327, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3276705741882324, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.3777017987215698, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2581058740615845, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.3781774936821763, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3049333095550537, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3786531886427828, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2860021591186523, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.3791288836033893, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3235461711883545, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.3796045785639958, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2726843357086182, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.38008027352460233, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2600030899047852, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.38055596848520884, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3035047054290771, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.38103166344581535, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2810437679290771, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.38150735840642186, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3053010702133179, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.38198305336702837, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.285233497619629, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.3824587483276349, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.328747034072876, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.3829344432882414, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3264154195785522, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.38341013824884795, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2244741916656494, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.38388583320945446, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247675895690918, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.384361528170061, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289712905883789, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.3848372231306675, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.299727439880371, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.385312918091274, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.229569911956787, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3857886130518805, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.322393774986267, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.386264308012487, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2661751508712769, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.3867400029730935, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2912821769714355, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.38721569793370003, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2871348857879639, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.38769139289430654, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2939109802246094, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.38816708785491305, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2797859907150269, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.38864278281551956, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2935690879821777, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.38911847777612607, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2815215587615967, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.3895941727367326, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2466614246368408, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.3900698676973391, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3364429473876953, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3905455626579456, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2810378074645996, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.3910212576185521, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2879207134246826, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.3914969525791586, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.258134365081787, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.39197264753976513, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2918128967285156, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.39244834250037164, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2425835132598877, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.39292403746097815, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.213120698928833, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.39339973242158466, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2344098091125488, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.39387542738219117, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2660351991653442, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.3943511223427977, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.330662488937378, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.3948268173034042, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2738041877746582, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3953025122640107, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2888221740722656, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.3957782072246172, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.288313865661621, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.3962539021852237, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.290661096572876, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.3967295971458302, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.249962568283081, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.39720529210643674, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2480006217956543, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.39768098706704325, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2179031372070312, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.39815668202764976, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3344948291778564, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.39863237698825627, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2962273359298706, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.3991080719488628, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.266413688659668, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.3995837669094693, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2850923538208008, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4000594618700758, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2746737003326416, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.4005351568306823, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2807002067565918, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.4010108517912888, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2683579921722412, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.4014865467518953, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2930378913879395, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.40196224171250183, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2729506492614746, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.40243793667310834, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2865461111068726, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.40291363163371485, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.249645709991455, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.4033893265943214, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2383712530136108, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.4038650215549279, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.246313452720642, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.40434071651553444, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2288974523544312, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.40481641147614095, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2907836437225342, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.40529210643674746, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2730671167373657, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.40576780139735397, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3130565881729126, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.4062434963579605, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2681382894515991, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.406719191318567, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.296158790588379, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4071948862791735, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247192144393921, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.40767058123978, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2639563083648682, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.4081462762003865, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2656540870666504, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.408621971160993, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.205491542816162, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.40909766612159953, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.277217984199524, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.40957336108220604, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3079639673233032, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.41004905604281255, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2672646045684814, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.41052475100341906, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2784157991409302, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.4110004459640256, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2682194709777832, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.4114761409246321, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2786941528320312, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4119518358852386, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.281358003616333, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.4124275308458451, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.246321678161621, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.4129032258064516, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2259719371795654, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.4133789207670581, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3222472667694092, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.41385461572766463, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.216452956199646, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.41433031068827114, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3299109935760498, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.41480600564887765, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.250302791595459, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.41528170060948416, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2744085788726807, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.41575739557009067, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2700858116149902, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.4162330905306972, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2627267837524414, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4167087854913037, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.269378423690796, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.4171844804519102, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3320337533950806, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.4176601754125167, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2717125415802002, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.4181358703731232, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2660107612609863, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.41861156533372973, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2718021869659424, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.41908726029433624, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2778680324554443, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.41956295525494275, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247718334197998, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.42003865021554926, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2594034671783447, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.42051434517615577, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2796156406402588, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.4209900401367623, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2971086502075195, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4214657350973688, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2875339984893799, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.4219414300579753, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2802563905715942, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.4224171250185818, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.297211766242981, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.4228928199791884, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1946825981140137, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.4233685149397949, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.256436824798584, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4238442099004014, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2787179946899414, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.4243199048610079, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2537882328033447, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.4247955998216144, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2897108793258667, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.4252712947822209, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2501720190048218, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.42574698974282743, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2753134965896606, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.42622268470343394, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2809417247772217, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.42669837966404045, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2722747325897217, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.42717407462464696, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.294141411781311, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.42764976958525347, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2701992988586426, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.42812546454586, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2379639148712158, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4286011595064665, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.262845516204834, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.429076854467073, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.213247299194336, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.4295525494276795, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.270270824432373, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.430028244388286, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1981269121170044, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.43050393934889253, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2772140502929688, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.43097963430949904, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2868304252624512, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.43145532927010555, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2759490013122559, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.43193102423071206, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.292006015777588, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.43240671919131857, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2290836572647095, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.4328824141519251, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2799795866012573, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4333581091125316, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2700903415679932, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.4338338040731381, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.276972770690918, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.4343094990337446, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2546138763427734, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.4347851939943511, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2350144386291504, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.4352608889549576, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.255623459815979, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.43573658391556414, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3046503067016602, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.43621227887617064, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2888526916503906, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.43668797383677715, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2555067539215088, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.43716366879738366, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3039183616638184, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.4376393637579902, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2199631929397583, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.4381150587185967, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2994630336761475, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.4385907536792032, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.241573452949524, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.4390664486398097, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2570286989212036, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.4395421436004162, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2594881057739258, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.4400178385610227, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2928112745285034, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.44049353352162923, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2726936340332031, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.44096922848223574, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.32316255569458, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.44144492344284225, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2438604831695557, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.44192061840344876, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.293677568435669, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.4423963133640553, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3002068996429443, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.44287200832466184, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.294407844543457, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.44334770328526835, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.242573857307434, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.44382339824587486, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2922019958496094, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.44429909320648137, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2213215827941895, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.4447747881670879, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2707006931304932, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.4452504831276944, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2314999103546143, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.4457261780883009, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3218889236450195, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.4462018730489074, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2159051895141602, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.4466775680095139, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3076913356781006, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.4471532629701204, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2779921293258667, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.44762895793072693, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2604446411132812, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.44810465289133344, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2382320165634155, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.44858034785193995, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3830101490020752, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.44905604281254646, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.228447675704956, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.449531737773153, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2446924448013306, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.4500074327337595, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2444430589675903, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.450483127694366, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1985334157943726, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.4509588226549725, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2600021362304688, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.451434517615579, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2708806991577148, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.4519102125761855, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.273937702178955, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.45238590753679203, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2232747077941895, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.45286160249739854, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2665836811065674, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.45333729745800505, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2973511219024658, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.45381299241861156, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.239840030670166, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.45428868737921807, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2163124084472656, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.4547643823398246, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.245084285736084, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.4552400773004311, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3053221702575684, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.4557157722610376, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2788605690002441, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.4561914672216441, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2649834156036377, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.4566671621822506, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2921392917633057, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2440087795257568, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.45761855210346364, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2340590953826904, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.45809424706407015, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2697665691375732, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.45856994202467666, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2789232730865479, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.45904563698528317, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.262975811958313, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.4595213319458897, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.286508321762085, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.4599970269064962, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2848749160766602, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.4604727218671027, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.253462314605713, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.4609484168277092, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2408068180084229, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.4614241117883157, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2684673070907593, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.4618998067489222, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2076165676116943, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.4623755017095288, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2525750398635864, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.4628511966701353, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2482715845108032, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.4633268916307418, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2954301834106445, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.4638025865913483, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2526676654815674, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.46427828155195483, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.270956039428711, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.46475397651256134, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2305779457092285, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.46522967147316785, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3026628494262695, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.46570536643377436, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.276360273361206, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.46618106139438087, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2271491289138794, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.4666567563549874, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.27445650100708, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.4671324513155939, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2661042213439941, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.4676081462762004, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2653608322143555, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.4680838412368069, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2979223728179932, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.4685595361974134, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2312049865722656, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.4690352311580199, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3057024478912354, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.46951092611862644, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2951257228851318, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.46998662107923295, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2718441486358643, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.47046231603983946, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2305831909179688, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.47093801100044597, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2023491859436035, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.4714137059610525, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.219580888748169, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.471889400921659, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247983455657959, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.4723650958822655, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2529908418655396, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.472840790842872, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2776696681976318, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.4733164858034785, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.249483585357666, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.473792180764085, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3180161714553833, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.47426787572469153, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2910526990890503, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.47474357068529804, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2458434104919434, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.47521926564590455, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3033870458602905, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.47569496060651106, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2500510215759277, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4761706555671176, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2625623941421509, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.4766463505277241, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2688162326812744, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.4771220454883306, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2744543552398682, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.4775977404489371, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.28806471824646, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.4780734354095436, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2513093948364258, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.4785491303701501, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2578754425048828, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.47902482533075663, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2690682411193848, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.47950052029136314, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.244710922241211, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.47997621525196965, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2703763246536255, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.48045191021257616, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289364218711853, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.48092760517318267, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2618646621704102, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.4814033001337892, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2179932594299316, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.48187899509439575, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2760300636291504, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.48235469005500226, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2909011840820312, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.48283038501560877, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2532756328582764, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.4833060799762153, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2610164880752563, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.4837817749368218, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2327613830566406, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.4842574698974283, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2339394092559814, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.4847331648580348, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2110052108764648, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.4852088598186413, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2191238403320312, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4856845547792478, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2192617654800415, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.48616024973985433, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2636759281158447, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.48663594470046084, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.207831621170044, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.48711163966106735, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1997129917144775, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.48758733462167386, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2564668655395508, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.48806302958228037, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2735317945480347, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.4885387245428869, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2732312679290771, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.4890144195034934, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2548601627349854, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.4894901144640999, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2576682567596436, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.4899658094247064, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2264012098312378, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.4904415043853129, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2854325771331787, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.49091719934591943, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3236126899719238, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.49139289430652594, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2489556074142456, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.49186858926713245, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2574775218963623, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.49234428422773896, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2095019817352295, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.49281997918834547, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2634193897247314, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.493295674148952, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2667688131332397, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.4937713691095585, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2418756484985352, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.494247064070165, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2592451572418213, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.4947227590307715, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2803057432174683, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.495198453991378, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2541866302490234, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.49567414895198453, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2359161376953125, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.49614984391259104, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2950963973999023, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.49662553887319755, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.283148169517517, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.49710123383380406, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2806365489959717, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.49757692879441057, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.21940279006958, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.4980526237550171, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.243825912475586, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.4985283187156236, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.271630048751831, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.4990040136762301, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2829158306121826, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.4994797086368366, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.244653582572937, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4999554035974431, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2149741649627686, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.5004310985580497, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2326526641845703, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.5009067935186562, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2960628271102905, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.5013824884792627, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2571380138397217, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.5018581834398692, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1806836128234863, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5023338784004757, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2098257541656494, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.5028095733610822, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.245821237564087, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.5032852683216887, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2121808528900146, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.5037609632822952, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.271606683731079, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.5042366582429018, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2480086088180542, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5047123532035083, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2532334327697754, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.5051880481641148, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2482119798660278, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.5056637431247213, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2053478956222534, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.5061394380853278, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2333269119262695, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.5066151330459343, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2575441598892212, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5070908280065408, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.239387035369873, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.5075665229671473, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2529371976852417, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.5080422179277538, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2657462358474731, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.5085179128883603, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2482880353927612, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.5089936078489669, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2640414237976074, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5094693028095734, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2489564418792725, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.5099449977701799, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1927671432495117, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.5104206927307864, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2743709087371826, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.5108963876913929, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2421848773956299, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.5113720826519994, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2825573682785034, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5118477776126059, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2068567276000977, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.5123234725732124, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2677295207977295, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.5127991675338189, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3184689283370972, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.5132748624944254, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2172642946243286, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.513750557455032, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2620975971221924, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5142262524156385, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1976842880249023, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.514701947376245, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2858420610427856, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.5151776423368515, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1963508129119873, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.515653337297458, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2777037620544434, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2379100322723389, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.516604727218671, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3012006282806396, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.5170804221792775, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2644760608673096, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.517556117139884, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2243112325668335, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.5180318121004905, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.282224416732788, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.518507507061097, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2459665536880493, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5189832020217036, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2327321767807007, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.5194588969823101, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2502856254577637, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.5199345919429166, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2643475532531738, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.5204102869035231, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2353267669677734, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.5208859818641296, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2246006727218628, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5213616768247361, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2746915817260742, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.5218373717853426, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2547008991241455, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.5223130667459491, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2143683433532715, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.5227887617065556, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2223198413848877, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.5232644566671621, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2719308137893677, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5237401516277687, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1987100839614868, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.5242158465883752, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2595856189727783, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.5246915415489817, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2550652027130127, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.5251672365095882, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2664339542388916, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.5256429314701947, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2030470371246338, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5261186264308012, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.257197618484497, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.5265943213914077, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2173645496368408, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.5270700163520142, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.22458815574646, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.5275457113126207, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2425655126571655, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.5280214062732272, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2677123546600342, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5284971012338338, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.203295111656189, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.5289727961944403, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.260411262512207, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.5294484911550468, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.270648717880249, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.5299241861156533, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2358521223068237, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.5303998810762598, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2879903316497803, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5308755760368664, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2541499137878418, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.5313512709974729, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1964361667633057, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.5318269659580794, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2229571342468262, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.5323026609186859, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.173935890197754, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.5327783558792925, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2346327304840088, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.533254050839899, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2555508613586426, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.5337297458005055, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2254197597503662, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.534205440761112, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2302241325378418, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.5346811357217185, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2882341146469116, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.535156830682325, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2494275569915771, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5356325256429315, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2567485570907593, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.536108220603538, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2737491130828857, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.5365839155641445, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2650439739227295, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.537059610524751, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2500125169754028, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.5375353054853576, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2558541297912598, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.5380110004459641, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2639573812484741, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.5384866954065706, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2046759128570557, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.5389623903671771, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3005247116088867, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.5394380853277836, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2678768634796143, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.5399137802883901, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2596511840820312, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.5403894752489966, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2320735454559326, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.5408651702096031, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2533533573150635, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.5413408651702096, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2232404947280884, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.5418165601308161, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2465150356292725, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.5422922550914226, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2359066009521484, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5427679500520292, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.179032325744629, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.5432436450126357, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2295031547546387, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.5437193399732422, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2600789070129395, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.5441950349338487, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2444316148757935, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.5446707298944552, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1947736740112305, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5451464248550617, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2450361251831055, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.5456221198156682, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.189820647239685, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.5460978147762747, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2700152397155762, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.5465735097368812, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1997301578521729, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.5470492046974877, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2718980312347412, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5475248996580943, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2360846996307373, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.5480005946187008, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2985812425613403, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.5484762895793073, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2672054767608643, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.5489519845399138, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1932951211929321, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.5494276795005203, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2733159065246582, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5499033744611268, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3332585096359253, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.5503790694217333, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.267357587814331, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.5508547643823398, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2183654308319092, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.5513304593429463, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.244970440864563, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.5518061543035528, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2315990924835205, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.5522818492641594, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.251598834991455, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.5527575442247659, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.277734637260437, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.5532332391853724, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.27321195602417, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.5537089341459789, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2716056108474731, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.5541846291065854, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2450883388519287, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.5546603240671919, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2274010181427002, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.5551360190277984, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2627809047698975, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.5556117139884049, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1710472106933594, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.5560874089490114, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2715725898742676, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.5565631039096179, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2005977630615234, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.5570387988702244, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.254575252532959, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.557514493830831, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2813735008239746, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.5579901887914375, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2456145286560059, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.558465883752044, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2311184406280518, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.5589415787126505, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2404234409332275, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.559417273673257, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.280110239982605, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.5598929686338635, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.250441551208496, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.56036866359447, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2462382316589355, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.5608443585550765, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2737480401992798, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.561320053515683, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.276233434677124, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.5617957484762895, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2580044269561768, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.5622714434368961, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.247071385383606, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.5627471383975026, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2175970077514648, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.5632228333581091, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.217498540878296, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.5636985283187156, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.242640495300293, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.5641742232793221, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2354379892349243, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.5646499182399286, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3069782257080078, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.5651256132005351, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2084176540374756, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.5656013081611416, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2104275226593018, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.5660770031217481, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2741985321044922, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.5665526980823546, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.230583906173706, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.5670283930429612, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1865020990371704, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.5675040880035677, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2901579141616821, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.5679797829641742, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2829887866973877, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.5684554779247807, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2711780071258545, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.5689311728853872, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2897326946258545, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.5694068678459937, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2700235843658447, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.5698825628066002, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2746386528015137, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.5703582577672068, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2726595401763916, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.5708339527278133, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2205541133880615, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5713096476884199, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2636919021606445, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.5717853426490264, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2315757274627686, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.5722610376096329, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2510037422180176, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.5727367325702394, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2190053462982178, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.5732124275308459, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2241978645324707, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.5736881224914524, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2733025550842285, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.5741638174520589, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1712183952331543, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.5746395124126654, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2777495384216309, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.5751152073732719, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.272843837738037, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.5755909023338784, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2414473295211792, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.576066597294485, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2244441509246826, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.5765422922550915, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2712140083312988, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.577017987215698, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2499234676361084, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.5774936821763045, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.286064624786377, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.577969377136911, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2061920166015625, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5784450720975175, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2993005514144897, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.578920767058124, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.26462721824646, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.5793964620187305, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2716201543807983, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.579872156979337, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2104942798614502, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.5803478519399435, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2860119342803955, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.58082354690055, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3008224964141846, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.5812992418611566, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2540391683578491, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.5817749368217631, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2667243480682373, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.5822506317823696, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2277895212173462, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.5827263267429761, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2290725708007812, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5832020217035826, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.227077841758728, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.5836777166641891, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2680089473724365, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.5841534116247956, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.236987590789795, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.5846291065854021, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.255210518836975, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.5851048015460086, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.26492440700531, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.5855804965066151, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2221426963806152, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.5860561914672217, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2715880870819092, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.5865318864278282, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2061941623687744, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.5870075813884347, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1762068271636963, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.5874832763490412, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2648086547851562, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.5879589713096477, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2263320684432983, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.5884346662702542, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2949507236480713, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.5889103612308607, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2580509185791016, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.5893860561914672, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2583489418029785, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.5898617511520737, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2290489673614502, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.5903374461126802, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1821609735488892, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.5908131410732868, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.265315294265747, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.5912888360338933, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2531765699386597, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.5917645309944998, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2487919330596924, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.5922402259551063, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2386280298233032, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.5927159209157128, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2352405786514282, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.5931916158763193, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2241952419281006, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.5936673108369258, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2149953842163086, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.5941430057975323, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2526779174804688, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.5946187007581388, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2221908569335938, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5950943957187453, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2482354640960693, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 0.5955700906793518, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.189880609512329, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.5960457856399584, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2685422897338867, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.5965214806005649, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2144076824188232, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 0.5969971755611714, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1419060230255127, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.5974728705217779, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2288837432861328, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 0.5979485654823844, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2173268795013428, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 0.5984242604429909, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2456581592559814, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 0.5988999554035974, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.310453176498413, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 0.5993756503642039, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.271226406097412, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.5998513453248104, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2474400997161865, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 0.600327040285417, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2638752460479736, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 0.6008027352460235, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2449238300323486, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 0.60127843020663, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2357442378997803, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.6017541251672365, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2108559608459473, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.602229820127843, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2793264389038086, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 0.6027055150884495, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2260792255401611, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.603181210049056, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2476468086242676, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 0.6036569050096625, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2396502494812012, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 0.604132599970269, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2091515064239502, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6046082949308755, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2249176502227783, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.605083989891482, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.274390459060669, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 0.6055596848520886, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2077170610427856, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.6060353798126951, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2330358028411865, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.6065110747733016, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2728490829467773, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6069867697339081, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1577916145324707, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.6074624646945146, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2705035209655762, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 0.6079381596551211, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2194724082946777, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 0.6084138546157276, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.223502278327942, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 0.6088895495763341, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2284711599349976, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6093652445369407, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2612671852111816, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.6098409394975473, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2475411891937256, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 0.6103166344581538, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2346876859664917, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 0.6107923294187603, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1875958442687988, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 0.6112680243793668, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2490816116333008, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6117437193399733, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2314293384552002, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 0.6122194143005798, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2150312662124634, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 0.6126951092611863, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2333028316497803, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.6131708042217928, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2211978435516357, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 0.6136464991823993, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2382583618164062, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6141221941430058, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2812304496765137, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 0.6145978891036123, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2513656616210938, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 0.6150735840642189, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2187235355377197, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 0.6155492790248254, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2611520290374756, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 0.6160249739854319, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2477173805236816, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.6165006689460384, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1624467372894287, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.6169763639066449, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2253656387329102, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 0.6174520588672514, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2614085674285889, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 0.6179277538278579, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1892552375793457, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 0.6184034487884644, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.304673671722412, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6188791437490709, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1897804737091064, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 0.6193548387096774, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2602784633636475, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.619830533670284, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1673520803451538, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 0.6203062286308905, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2243266105651855, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 0.620781923591497, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2490243911743164, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6212576185521035, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1751642227172852, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 0.62173331351271, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2147471904754639, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 0.6222090084733165, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2540574073791504, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 0.622684703433923, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2662967443466187, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.6231603983945295, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2161445617675781, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.623636093355136, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2706882953643799, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 0.6241117883157425, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2533507347106934, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.624587483276349, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2325465679168701, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 0.6250631782369556, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2847120761871338, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 0.6255388731975621, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2086182832717896, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6260145681581686, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2454938888549805, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.6264902631187751, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.253279685974121, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 0.6269659580793816, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.200972318649292, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 0.6274416530399881, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2435922622680664, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 0.6279173480005946, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2706129550933838, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6283930429612011, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2585011720657349, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 0.6288687379218076, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2510229349136353, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 0.6293444328824142, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2418980598449707, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.6298201278430207, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2494986057281494, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 0.6302958228036272, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2714817523956299, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.6307715177642337, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2182557582855225, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 0.6312472127248402, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2318391799926758, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 0.6317229076854467, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2475109100341797, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.6321986026460532, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2712761163711548, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 0.6326742976066597, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2394472360610962, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6331499925672662, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2201759815216064, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 0.6336256875278727, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1588757038116455, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 0.6341013824884792, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1701884269714355, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.6345770774490858, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2547426223754883, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.6350527724096923, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2501137256622314, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6355284673702988, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2706825733184814, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 0.6360041623309053, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2127528190612793, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.6364798572915118, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2191646099090576, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 0.6369555522521183, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2211954593658447, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 0.6374312472127248, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2068610191345215, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6379069421733313, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2012649774551392, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 0.6383826371339378, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2034168243408203, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 0.6388583320945443, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1512229442596436, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 0.6393340270551509, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.273275375366211, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.6398097220157574, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2607433795928955, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.6402854169763639, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2025877237319946, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 0.6407611119369704, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.237597942352295, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 0.6412368068975769, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2055954933166504, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 0.6417125018581834, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2079732418060303, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 0.6421881968187899, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2707421779632568, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6426638917793964, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.229077696800232, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 0.6431395867400029, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.171201229095459, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 0.6436152817006094, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2386970520019531, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 0.644090976661216, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2508089542388916, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 0.6445666716218225, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2166051864624023, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.645042366582429, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1718792915344238, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 0.6455180615430355, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.208460807800293, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 0.645993756503642, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2658112049102783, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 0.6464694514642485, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2218315601348877, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 0.646945146424855, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2742373943328857, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.6474208413854615, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2009773254394531, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 0.647896536346068, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2288341522216797, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 0.6483722313066745, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2737244367599487, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 0.6488479262672812, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.289430856704712, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 0.6493236212278877, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2465755939483643, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.6497993161884942, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2071137428283691, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 0.6502750111491007, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.194615125656128, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 0.6507507061097072, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2709908485412598, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 0.6512264010703137, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2470756769180298, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 0.6517020960309202, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2120921611785889, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6521777909915267, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2401468753814697, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 0.6526534859521332, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2416322231292725, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 0.6531291809127397, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1909356117248535, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 0.6536048758733463, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2629019021987915, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 0.6540805708339528, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2401649951934814, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6545562657945593, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2681682109832764, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 0.6550319607551658, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2728466987609863, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 0.6555076557157723, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.223940372467041, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 0.6559833506763788, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2075097560882568, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 0.6564590456369853, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.266689419746399, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.6569347405975918, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2576415538787842, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 0.6574104355581983, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2286550998687744, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 0.6578861305188048, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2462449073791504, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 0.6583618254794114, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2215523719787598, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 0.6588375204400179, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.200398325920105, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.6593132154006244, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2600317001342773, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 0.6597889103612309, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2789270877838135, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 0.6602646053218374, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.252886176109314, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 0.6607403002824439, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1967723369598389, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 0.6612159952430504, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.171466588973999, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.6616916902036569, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2854139804840088, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 0.6621673851642634, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.259742021560669, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 0.6626430801248699, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2996937036514282, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 0.6631187750854765, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2012677192687988, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 0.663594470046083, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2383891344070435, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.6640701650066895, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2412991523742676, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 0.664545859967296, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2173049449920654, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 0.6650215549279025, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2093093395233154, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 0.665497249888509, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2188637256622314, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 0.6659729448491155, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2203969955444336, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.666448639809722, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2513853311538696, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 0.6669243347703285, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1890287399291992, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 0.667400029730935, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2430322170257568, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 0.6678757246915416, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2834746837615967, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 0.6683514196521481, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2333581447601318, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.6688271146127546, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2029738426208496, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 0.6693028095733611, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2190194129943848, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 0.6697785045339676, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2467260360717773, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 0.6702541994945741, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2383447885513306, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 0.6707298944551806, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2235246896743774, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.6712055894157871, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2301299571990967, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 0.6716812843763936, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2388970851898193, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 0.6721569793370001, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2599682807922363, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 0.6726326742976066, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2594590187072754, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 0.6731083692582132, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2377604246139526, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.6735840642188197, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2168340682983398, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 0.6740597591794262, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1372761726379395, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 0.6745354541400327, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.17765212059021, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 0.6750111491006392, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.235781192779541, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 0.6754868440612457, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.243680715560913, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.6759625390218522, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2039899826049805, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 0.6764382339824587, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.273780345916748, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 0.6769139289430652, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1936399936676025, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 0.6773896239036717, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1913855075836182, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 0.6778653188642783, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2655634880065918, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.6783410138248848, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.229090690612793, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 0.6788167087854913, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.229933738708496, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 0.6792924037460978, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2448334693908691, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 0.6797680987067043, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1777703762054443, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 0.6802437936673108, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1988234519958496, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.6807194886279173, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2633662223815918, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 0.6811951835885238, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1914260387420654, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 0.6816708785491303, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1301052570343018, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 0.6821465735097368, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2647099494934082, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 0.6826222684703434, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2434825897216797, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.6830979634309499, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1619213819503784, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 0.6835736583915564, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.272236943244934, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 0.6840493533521629, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2712485790252686, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 0.6845250483127694, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2562975883483887, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 0.6850007432733759, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2557085752487183, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.6854764382339824, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2001773118972778, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 0.6859521331945889, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2661209106445312, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 0.6864278281551954, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2700567245483398, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 0.6869035231158019, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.201700210571289, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 0.6873792180764084, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2309627532958984, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.687854913037015, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2442858219146729, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 0.6883306079976216, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2312313318252563, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 0.6888063029582281, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.168707013130188, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 0.6892819979188346, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2480907440185547, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 0.6897576928794411, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2292897701263428, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6902333878400476, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2023284435272217, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 0.6907090828006541, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2282081842422485, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 0.6911847777612606, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2390121221542358, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 0.6916604727218671, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.255518913269043, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 0.6921361676824737, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1897988319396973, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6926118626430802, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1889443397521973, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 0.6930875576036867, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2725920677185059, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 0.6935632525642932, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2496650218963623, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 0.6940389475248997, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1894876956939697, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 0.6945146424855062, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2089958190917969, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.6949903374461127, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2501626014709473, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 0.6954660324067192, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.221423625946045, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 0.6959417273673257, | |
| "grad_norm": 0.375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2127522230148315, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 0.6964174223279322, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2586814165115356, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 0.6968931172885388, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2253403663635254, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.6973688122491453, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2009187936782837, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 0.6978445072097518, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.179222583770752, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 0.6983202021703583, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1949589252471924, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 0.6987958971309648, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2387232780456543, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 0.6992715920915713, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.184262990951538, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.6997472870521778, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1451635360717773, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 0.7002229820127843, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2731480598449707, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 0.7006986769733908, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 7.999950424154985e-05, | |
| "loss": 1.2006233930587769, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 0.7011743719339973, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 7.999801697848817e-05, | |
| "loss": 1.2164214849472046, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 0.7016500668946039, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 7.999553824768115e-05, | |
| "loss": 1.2032701969146729, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.7021257618552104, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.999206811057136e-05, | |
| "loss": 1.184319257736206, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 0.7026014568158169, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 7.998760665317632e-05, | |
| "loss": 1.1767771244049072, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 0.7030771517764234, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.998215398608625e-05, | |
| "loss": 1.1959552764892578, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 0.7035528467370299, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.997571024446146e-05, | |
| "loss": 1.1779606342315674, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 0.7040285416976364, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.996827558802894e-05, | |
| "loss": 1.1682159900665283, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.7045042366582429, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.995985020107833e-05, | |
| "loss": 1.181810736656189, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 0.7049799316188494, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.995043429245751e-05, | |
| "loss": 1.2362987995147705, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 0.7054556265794559, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.994002809556727e-05, | |
| "loss": 1.1899755001068115, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 0.7059313215400624, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.992863186835562e-05, | |
| "loss": 1.223832607269287, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 0.706407016500669, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.991624589331135e-05, | |
| "loss": 1.2033984661102295, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.7068827114612755, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 7.990287047745706e-05, | |
| "loss": 1.2263352870941162, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 0.707358406421882, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.988850595234152e-05, | |
| "loss": 1.204215168952942, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 0.7078341013824885, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.987315267403146e-05, | |
| "loss": 1.2107601165771484, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 0.708309796343095, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.985681102310276e-05, | |
| "loss": 1.2664358615875244, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 0.7087854913037015, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 7.983948140463098e-05, | |
| "loss": 1.1956796646118164, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.709261186264308, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.982116424818139e-05, | |
| "loss": 1.2163138389587402, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 0.7097368812249145, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.980186000779822e-05, | |
| "loss": 1.1702892780303955, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 0.710212576185521, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.978156916199348e-05, | |
| "loss": 1.2452645301818848, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 0.7106882711461275, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.976029221373511e-05, | |
| "loss": 1.1621694564819336, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 0.711163966106734, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.973802969043444e-05, | |
| "loss": 1.2300595045089722, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7116396610673406, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 7.971478214393316e-05, | |
| "loss": 1.1861531734466553, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 0.7121153560279471, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.969055015048968e-05, | |
| "loss": 1.2321807146072388, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 0.7125910509885536, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.966533431076474e-05, | |
| "loss": 1.197440266609192, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 0.7130667459491601, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.963913524980666e-05, | |
| "loss": 1.1787972450256348, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 0.7135424409097666, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.961195361703569e-05, | |
| "loss": 1.2083191871643066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7140181358703731, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 7.958379008622808e-05, | |
| "loss": 1.178969144821167, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 0.7144938308309796, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.955464535549922e-05, | |
| "loss": 1.2047157287597656, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 0.7149695257915861, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.952452014728645e-05, | |
| "loss": 1.1746503114700317, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 0.7154452207521926, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 7.949341520833109e-05, | |
| "loss": 1.1968495845794678, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 0.7159209157127991, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 7.946133130965995e-05, | |
| "loss": 1.1814994812011719, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.7163966106734057, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 7.942826924656624e-05, | |
| "loss": 1.2259728908538818, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 0.7168723056340122, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.939422983858982e-05, | |
| "loss": 1.2128264904022217, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 0.7173480005946187, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.935921392949688e-05, | |
| "loss": 1.1720407009124756, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 0.7178236955552252, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 7.932322238725907e-05, | |
| "loss": 1.187741994857788, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 0.7182993905158317, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.928625610403196e-05, | |
| "loss": 1.2031012773513794, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.7187750854764382, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.924831599613289e-05, | |
| "loss": 1.2213904857635498, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 0.7192507804370447, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.920940300401832e-05, | |
| "loss": 1.2365423440933228, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 0.7197264753976512, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 7.91695180922605e-05, | |
| "loss": 1.2173717021942139, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 0.7202021703582577, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.912866224952352e-05, | |
| "loss": 1.1911011934280396, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 0.7206778653188642, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 7.908683648853886e-05, | |
| "loss": 1.1721656322479248, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7211535602794708, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.904404184608021e-05, | |
| "loss": 1.2273123264312744, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 0.7216292552400773, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.900027938293788e-05, | |
| "loss": 1.1623331308364868, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 0.7221049502006838, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.895555018389241e-05, | |
| "loss": 1.1802709102630615, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 0.7225806451612903, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.890985535768771e-05, | |
| "loss": 1.2304480075836182, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 0.7230563401218968, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 7.88631960370036e-05, | |
| "loss": 1.182260513305664, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.7235320350825033, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.881557337842769e-05, | |
| "loss": 1.2020962238311768, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 0.7240077300431098, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 7.876698856242677e-05, | |
| "loss": 1.1832443475723267, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 0.7244834250037163, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 7.871744279331747e-05, | |
| "loss": 1.223937749862671, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 0.7249591199643228, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 7.866693729923651e-05, | |
| "loss": 1.2505052089691162, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 0.7254348149249293, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 7.861547333211014e-05, | |
| "loss": 1.2611567974090576, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7259105098855358, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 7.85630521676232e-05, | |
| "loss": 1.2265489101409912, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 0.7263862048461424, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.850967510518743e-05, | |
| "loss": 1.2124598026275635, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 0.7268618998067489, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.845534346790934e-05, | |
| "loss": 1.1696916818618774, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 0.7273375947673555, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 7.840005860255733e-05, | |
| "loss": 1.2019386291503906, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 0.727813289727962, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.834382187952839e-05, | |
| "loss": 1.2763334512710571, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.7282889846885685, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.828663469281401e-05, | |
| "loss": 1.2215170860290527, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 0.728764679649175, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.822849845996578e-05, | |
| "loss": 1.2576022148132324, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 0.7292403746097815, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.81694146220601e-05, | |
| "loss": 1.2104671001434326, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 0.729716069570388, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.810938464366258e-05, | |
| "loss": 1.2412121295928955, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 0.7301917645309945, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.804841001279169e-05, | |
| "loss": 1.2783949375152588, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7306674594916011, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.798649224088184e-05, | |
| "loss": 1.3144667148590088, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.7311431544522076, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.792363286274595e-05, | |
| "loss": 1.1726528406143188, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 0.7316188494128141, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 7.785983343653742e-05, | |
| "loss": 1.2941590547561646, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 0.7320945443734206, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.779509554371152e-05, | |
| "loss": 1.184098482131958, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 0.7325702393340271, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.772942078898607e-05, | |
| "loss": 1.202735424041748, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7330459342946336, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.766281080030182e-05, | |
| "loss": 1.2427330017089844, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 0.7335216292552401, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.7595267228782e-05, | |
| "loss": 1.2848570346832275, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 0.7339973242158466, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.752679174869145e-05, | |
| "loss": 1.2101168632507324, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 0.7344730191764531, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 7.745738605739504e-05, | |
| "loss": 1.2171400785446167, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 0.7349487141370596, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 7.738705187531568e-05, | |
| "loss": 1.2718677520751953, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.7354244090976662, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 7.731579094589161e-05, | |
| "loss": 1.219995141029358, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 0.7359001040582727, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 7.724360503553326e-05, | |
| "loss": 1.2589280605316162, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 0.7363757990188792, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 7.717049593357937e-05, | |
| "loss": 1.2229852676391602, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 0.7368514939794857, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.709646545225266e-05, | |
| "loss": 1.2284798622131348, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 0.7373271889400922, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.7021515426615e-05, | |
| "loss": 1.2732608318328857, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7378028839006987, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 7.694564771452179e-05, | |
| "loss": 1.215606927871704, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 0.7382785788613052, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 7.686886419657603e-05, | |
| "loss": 1.194861650466919, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 0.7387542738219117, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 7.67911667760816e-05, | |
| "loss": 1.1924793720245361, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 0.7392299687825182, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 7.671255737899613e-05, | |
| "loss": 1.196773648262024, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 0.7397056637431247, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 7.663303795388326e-05, | |
| "loss": 1.2454726696014404, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.7401813587037313, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.655261047186437e-05, | |
| "loss": 1.2148265838623047, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 0.7406570536643378, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 7.647127692656961e-05, | |
| "loss": 1.2220816612243652, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 0.7411327486249443, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 7.638903933408862e-05, | |
| "loss": 1.186138391494751, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 0.7416084435855508, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 7.630589973292046e-05, | |
| "loss": 1.1910457611083984, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 0.7420841385461573, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 7.622186018392313e-05, | |
| "loss": 1.182339072227478, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.7425598335067638, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 7.613692277026247e-05, | |
| "loss": 1.2056699991226196, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 0.7430355284673703, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 7.605108959736048e-05, | |
| "loss": 1.228093147277832, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 0.7435112234279768, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 7.596436279284322e-05, | |
| "loss": 1.2399944067001343, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 0.7439869183885833, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 7.587674450648798e-05, | |
| "loss": 1.2229018211364746, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 0.7444626133491898, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.578823691017007e-05, | |
| "loss": 1.2147870063781738, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.7449383083097963, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 7.569884219780893e-05, | |
| "loss": 1.18184494972229, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 0.7454140032704029, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.560856258531374e-05, | |
| "loss": 1.2729527950286865, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 0.7458896982310094, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.551740031052857e-05, | |
| "loss": 1.2199832201004028, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 0.7463653931916159, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.54253576331768e-05, | |
| "loss": 1.2424662113189697, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 0.7468410881522224, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 7.53324368348052e-05, | |
| "loss": 1.1974238157272339, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.7473167831128289, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.52386402187273e-05, | |
| "loss": 1.2078831195831299, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 0.7477924780734354, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.514397010996637e-05, | |
| "loss": 1.2366812229156494, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 0.7482681730340419, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.504842885519771e-05, | |
| "loss": 1.2229359149932861, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 0.7487438679946484, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 7.495201882269055e-05, | |
| "loss": 1.2356886863708496, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 0.7492195629552549, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.485474240224932e-05, | |
| "loss": 1.2112306356430054, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.7496952579158614, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.475660200515437e-05, | |
| "loss": 1.1738417148590088, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 0.750170952876468, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.465760006410228e-05, | |
| "loss": 1.197131633758545, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 0.7506466478370745, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.455773903314544e-05, | |
| "loss": 1.1941673755645752, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 0.751122342797681, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.445702138763142e-05, | |
| "loss": 1.2553668022155762, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 0.7515980377582875, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.435544962414136e-05, | |
| "loss": 1.1946885585784912, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.752073732718894, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.425302626042829e-05, | |
| "loss": 1.2392586469650269, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 0.7525494276795005, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.41497538353546e-05, | |
| "loss": 1.1681256294250488, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 0.753025122640107, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 7.404563490882917e-05, | |
| "loss": 1.1748747825622559, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 0.7535008176007135, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.394067206174386e-05, | |
| "loss": 1.1887366771697998, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 0.75397651256132, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 7.383486789590961e-05, | |
| "loss": 1.1796954870224, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.7544522075219265, | |
| "grad_norm": 0.375, | |
| "learning_rate": 7.372822503399188e-05, | |
| "loss": 1.1664338111877441, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 0.754927902482533, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.362074611944566e-05, | |
| "loss": 1.235155463218689, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 0.7554035974431396, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.351243381644998e-05, | |
| "loss": 1.1678838729858398, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 0.7558792924037461, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.340329080984177e-05, | |
| "loss": 1.2551286220550537, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 0.7563549873643526, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.329331980504947e-05, | |
| "loss": 1.200148105621338, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.7568306823249591, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.318252352802579e-05, | |
| "loss": 1.255072832107544, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 0.7573063772855656, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.307090472518026e-05, | |
| "loss": 1.1907069683074951, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 0.7577820722461721, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.295846616331113e-05, | |
| "loss": 1.202185034751892, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 0.7582577672067786, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 7.284521062953675e-05, | |
| "loss": 1.169918179512024, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 0.7587334621673851, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 7.27311409312265e-05, | |
| "loss": 1.1812635660171509, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.7592091571279916, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 7.261625989593127e-05, | |
| "loss": 1.184064507484436, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 0.7596848520885982, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.250057037131322e-05, | |
| "loss": 1.1607537269592285, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 0.7601605470492047, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.238407522507533e-05, | |
| "loss": 1.2583791017532349, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 0.7606362420098112, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 7.226677734489026e-05, | |
| "loss": 1.2102004289627075, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 0.7611119369704177, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 7.214867963832877e-05, | |
| "loss": 1.2008968591690063, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7615876319310242, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.202978503278766e-05, | |
| "loss": 1.1674326658248901, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 0.7620633268916307, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 7.191009647541721e-05, | |
| "loss": 1.168144941329956, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 0.7625390218522372, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.178961693304809e-05, | |
| "loss": 1.1678907871246338, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 0.7630147168128437, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 7.166834939211786e-05, | |
| "loss": 1.1986507177352905, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 0.7634904117734502, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 7.154629685859694e-05, | |
| "loss": 1.1866064071655273, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.7639661067340567, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.142346235791406e-05, | |
| "loss": 1.1903237104415894, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 0.7644418016946632, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.129984893488132e-05, | |
| "loss": 1.177189826965332, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 0.7649174966552698, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 7.117545965361866e-05, | |
| "loss": 1.1988158226013184, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 0.7653931916158763, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.105029759747794e-05, | |
| "loss": 1.1733431816101074, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 0.7658688865764828, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 7.092436586896653e-05, | |
| "loss": 1.287745714187622, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.7663445815370893, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.079766758967032e-05, | |
| "loss": 1.1643383502960205, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 0.7668202764976959, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 7.067020590017648e-05, | |
| "loss": 1.1338480710983276, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 0.7672959714583024, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 7.054198395999546e-05, | |
| "loss": 1.1828383207321167, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 0.7677716664189089, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 7.04130049474828e-05, | |
| "loss": 1.215213656425476, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 0.7682473613795154, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 7.028327205976026e-05, | |
| "loss": 1.2250659465789795, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.768723056340122, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 7.01527885126366e-05, | |
| "loss": 1.2371430397033691, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 0.7691987513007285, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 7.002155754052789e-05, | |
| "loss": 1.202965497970581, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 0.769674446261335, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 6.988958239637727e-05, | |
| "loss": 1.1786177158355713, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 0.7701501412219415, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.975686635157441e-05, | |
| "loss": 1.1610124111175537, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 0.770625836182548, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.962341269587436e-05, | |
| "loss": 1.2252613306045532, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.7711015311431545, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 6.948922473731594e-05, | |
| "loss": 1.2469508647918701, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 0.771577226103761, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 6.935430580213993e-05, | |
| "loss": 1.1859698295593262, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 0.7720529210643675, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 6.92186592347064e-05, | |
| "loss": 1.21319580078125, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 0.772528616024974, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.908228839741198e-05, | |
| "loss": 1.145960807800293, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 0.7730043109855805, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 6.894519667060638e-05, | |
| "loss": 1.2456450462341309, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.773480005946187, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.880738745250872e-05, | |
| "loss": 1.186368703842163, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 0.7739557009067936, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.866886415912325e-05, | |
| "loss": 1.185645580291748, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 0.7744313958674001, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 6.852963022415458e-05, | |
| "loss": 1.2109339237213135, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 0.7749070908280066, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 6.838968909892272e-05, | |
| "loss": 1.2080646753311157, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 0.7753827857886131, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 6.824904425227746e-05, | |
| "loss": 1.23634934425354, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.7758584807492196, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.810769917051233e-05, | |
| "loss": 1.1664297580718994, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 0.7763341757098261, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.796565735727829e-05, | |
| "loss": 1.176924467086792, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 0.7768098706704326, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.782292233349676e-05, | |
| "loss": 1.2261974811553955, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 0.7772855656310391, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.767949763727251e-05, | |
| "loss": 1.2133498191833496, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 0.7777612605916456, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 6.753538682380573e-05, | |
| "loss": 1.2278404235839844, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.7782369555522521, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 6.739059346530412e-05, | |
| "loss": 1.176490306854248, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 0.7787126505128587, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.724512115089426e-05, | |
| "loss": 1.223867654800415, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 0.7791883454734652, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 6.709897348653258e-05, | |
| "loss": 1.1769992113113403, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 0.7796640404340717, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.695215409491605e-05, | |
| "loss": 1.1771578788757324, | |
| "step": 3278 | |
| }, | |
| { | |
| "epoch": 0.7801397353946782, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 6.68046666153924e-05, | |
| "loss": 1.2103866338729858, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.7806154303552847, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.66565147038698e-05, | |
| "loss": 1.1617302894592285, | |
| "step": 3282 | |
| }, | |
| { | |
| "epoch": 0.7810911253158912, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 6.65077020327264e-05, | |
| "loss": 1.2142266035079956, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 0.7815668202764977, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.635823229071915e-05, | |
| "loss": 1.2032921314239502, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 0.7820425152371042, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 6.620810918289241e-05, | |
| "loss": 1.1510361433029175, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 0.7825182101977107, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 6.605733643048615e-05, | |
| "loss": 1.209721326828003, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.7829939051583172, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.590591777084368e-05, | |
| "loss": 1.1635715961456299, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 0.7834696001189237, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.575385695731902e-05, | |
| "loss": 1.1776684522628784, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 0.7839452950795303, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 6.560115775918379e-05, | |
| "loss": 1.1247327327728271, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 0.7844209900401368, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.544782396153392e-05, | |
| "loss": 1.270646572113037, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 0.7848966850007433, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 6.529385936519568e-05, | |
| "loss": 1.1621270179748535, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.7853723799613498, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.513926778663156e-05, | |
| "loss": 1.1540793180465698, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 0.7858480749219563, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 6.498405305784562e-05, | |
| "loss": 1.1824688911437988, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 0.7863237698825628, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 6.482821902628857e-05, | |
| "loss": 1.182361125946045, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 0.7867994648431693, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 6.467176955476224e-05, | |
| "loss": 1.2419183254241943, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 0.7872751598037758, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 6.451470852132409e-05, | |
| "loss": 1.198357105255127, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.7877508547643823, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 6.435703981919077e-05, | |
| "loss": 1.1796178817749023, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 0.7882265497249888, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 6.419876735664188e-05, | |
| "loss": 1.1940312385559082, | |
| "step": 3314 | |
| }, | |
| { | |
| "epoch": 0.7887022446855954, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 6.403989505692296e-05, | |
| "loss": 1.1873643398284912, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 0.7891779396462019, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.388042685814827e-05, | |
| "loss": 1.1884150505065918, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 0.7896536346068084, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 6.372036671320315e-05, | |
| "loss": 1.1984798908233643, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.7901293295674149, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 6.355971858964607e-05, | |
| "loss": 1.191229224205017, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 0.7906050245280214, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 6.339848646961029e-05, | |
| "loss": 1.1361331939697266, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 0.7910807194886279, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 6.323667434970508e-05, | |
| "loss": 1.2309892177581787, | |
| "step": 3326 | |
| }, | |
| { | |
| "epoch": 0.7915564144492344, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 6.307428624091674e-05, | |
| "loss": 1.1435422897338867, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 0.7920321094098409, | |
| "grad_norm": 0.373046875, | |
| "learning_rate": 6.291132616850912e-05, | |
| "loss": 1.181205153465271, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.7925078043704474, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 6.274779817192389e-05, | |
| "loss": 1.1939911842346191, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 0.7929834993310539, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.258370630468032e-05, | |
| "loss": 1.2611286640167236, | |
| "step": 3334 | |
| }, | |
| { | |
| "epoch": 0.7934591942916605, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.241905463427493e-05, | |
| "loss": 1.1541907787322998, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 0.793934889252267, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.225384724208056e-05, | |
| "loss": 1.2033154964447021, | |
| "step": 3338 | |
| }, | |
| { | |
| "epoch": 0.7944105842128735, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 6.208808822324524e-05, | |
| "loss": 1.191408634185791, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.79488627917348, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 6.192178168659069e-05, | |
| "loss": 1.1633325815200806, | |
| "step": 3342 | |
| }, | |
| { | |
| "epoch": 0.7953619741340865, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 6.175493175451045e-05, | |
| "loss": 1.130890965461731, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 0.795837669094693, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 6.15875425628677e-05, | |
| "loss": 1.2087476253509521, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 0.7963133640552995, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 6.141961826089276e-05, | |
| "loss": 1.2083730697631836, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 0.796789059015906, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.125116301108021e-05, | |
| "loss": 1.1795260906219482, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.7972647539765125, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 6.108218098908573e-05, | |
| "loss": 1.160348892211914, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 0.797740448937119, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 6.0912676383622595e-05, | |
| "loss": 1.2218070030212402, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 0.7982161438977255, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 6.074265339635782e-05, | |
| "loss": 1.2201728820800781, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 0.7986918388583321, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 6.057211624180803e-05, | |
| "loss": 1.2353184223175049, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 0.7991675338189386, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 6.0401069147235016e-05, | |
| "loss": 1.199735403060913, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.7996432287795451, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 6.02295163525409e-05, | |
| "loss": 1.1990015506744385, | |
| "step": 3362 | |
| }, | |
| { | |
| "epoch": 0.8001189237401516, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 6.0057462110163054e-05, | |
| "loss": 1.2302005290985107, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 0.8005946187007581, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 5.9884910684968704e-05, | |
| "loss": 1.1892058849334717, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 0.8010703136613646, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 5.9711866354149205e-05, | |
| "loss": 1.1621990203857422, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 0.8015460086219711, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 5.953833340711404e-05, | |
| "loss": 1.191482663154602, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.8020217035825776, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.9364316145384424e-05, | |
| "loss": 1.2096929550170898, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 0.8024973985431841, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 5.918981888248679e-05, | |
| "loss": 1.1668099164962769, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 0.8029730935037906, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 5.901484594384574e-05, | |
| "loss": 1.2103668451309204, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 0.8034487884643972, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 5.883940166667692e-05, | |
| "loss": 1.208052396774292, | |
| "step": 3378 | |
| }, | |
| { | |
| "epoch": 0.8039244834250037, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 5.866349039987949e-05, | |
| "loss": 1.1915090084075928, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.8044001783856102, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.8487116503928294e-05, | |
| "loss": 1.1815118789672852, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 0.8048758733462167, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.8310284350765796e-05, | |
| "loss": 1.1728663444519043, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 0.8053515683068232, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 5.813299832369371e-05, | |
| "loss": 1.1404354572296143, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 0.8058272632674297, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 5.7955262817264333e-05, | |
| "loss": 1.2187399864196777, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 0.8063029582280363, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 5.777708223717162e-05, | |
| "loss": 1.1979572772979736, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.8067786531886428, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.7598461000142e-05, | |
| "loss": 1.181311011314392, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 0.8072543481492493, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.7419403533824825e-05, | |
| "loss": 1.1990816593170166, | |
| "step": 3394 | |
| }, | |
| { | |
| "epoch": 0.8077300431098559, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.7239914276682735e-05, | |
| "loss": 1.176539659500122, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 0.8082057380704624, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.7059997677881495e-05, | |
| "loss": 1.1944094896316528, | |
| "step": 3398 | |
| }, | |
| { | |
| "epoch": 0.8086814330310689, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 5.687965819717982e-05, | |
| "loss": 1.1964213848114014, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8091571279916754, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.66989003048188e-05, | |
| "loss": 1.1884610652923584, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 0.8096328229522819, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.651772848141104e-05, | |
| "loss": 1.2424553632736206, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 0.8101085179128884, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 5.633614721782968e-05, | |
| "loss": 1.1997463703155518, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 0.8105842128734949, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 5.6154161015096985e-05, | |
| "loss": 1.1804287433624268, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 0.8110599078341014, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.5971774384272875e-05, | |
| "loss": 1.2427394390106201, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.8115356027947079, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.5788991846343e-05, | |
| "loss": 1.2132554054260254, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 0.8120112977553144, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.5605817932106757e-05, | |
| "loss": 1.2068378925323486, | |
| "step": 3414 | |
| }, | |
| { | |
| "epoch": 0.812486992715921, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 5.542225718206494e-05, | |
| "loss": 1.2010424137115479, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 0.8129626876765275, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 5.523831414630719e-05, | |
| "loss": 1.1713800430297852, | |
| "step": 3418 | |
| }, | |
| { | |
| "epoch": 0.813438382637134, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 5.505399338439922e-05, | |
| "loss": 1.1795239448547363, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8139140775977405, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.48692994652698e-05, | |
| "loss": 1.2366602420806885, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 0.814389772558347, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 5.4684236967097475e-05, | |
| "loss": 1.178973913192749, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 0.8148654675189535, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 5.449881047719713e-05, | |
| "loss": 1.186044454574585, | |
| "step": 3426 | |
| }, | |
| { | |
| "epoch": 0.81534116247956, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.431302459190621e-05, | |
| "loss": 1.2068400382995605, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 0.8158168574401665, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 5.412688391647084e-05, | |
| "loss": 1.155308723449707, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.816292552400773, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 5.394039306493167e-05, | |
| "loss": 1.1697208881378174, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 0.8167682473613795, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.3753556660009475e-05, | |
| "loss": 1.160557746887207, | |
| "step": 3434 | |
| }, | |
| { | |
| "epoch": 0.817243942321986, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 5.356637933299057e-05, | |
| "loss": 1.1798973083496094, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 0.8177196372825926, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 5.337886572361205e-05, | |
| "loss": 1.1533775329589844, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 0.8181953322431991, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 5.319102047994672e-05, | |
| "loss": 1.1831254959106445, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8186710272038056, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 5.300284825828793e-05, | |
| "loss": 1.1955242156982422, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 0.8191467221644121, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.2814353723034126e-05, | |
| "loss": 1.188542127609253, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 0.8196224171250186, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.262554154657324e-05, | |
| "loss": 1.2146074771881104, | |
| "step": 3446 | |
| }, | |
| { | |
| "epoch": 0.8200981120856251, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 5.2436416409166884e-05, | |
| "loss": 1.1553959846496582, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 0.8205738070462316, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.2246982998834276e-05, | |
| "loss": 1.1827256679534912, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8210495020068381, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 5.205724601123614e-05, | |
| "loss": 1.1618741750717163, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 0.8215251969674446, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 5.186721014955822e-05, | |
| "loss": 1.2132587432861328, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 0.8220008919280511, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 5.167688012439472e-05, | |
| "loss": 1.1444640159606934, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 0.8224765868886577, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 5.1486260653631554e-05, | |
| "loss": 1.1991591453552246, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 0.8229522818492642, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 5.129535646232941e-05, | |
| "loss": 1.1526660919189453, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8234279768098707, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.110417228260657e-05, | |
| "loss": 1.1603717803955078, | |
| "step": 3462 | |
| }, | |
| { | |
| "epoch": 0.8239036717704772, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 5.091271285352167e-05, | |
| "loss": 1.154017448425293, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 0.8243793667310837, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.07209829209562e-05, | |
| "loss": 1.1853911876678467, | |
| "step": 3466 | |
| }, | |
| { | |
| "epoch": 0.8248550616916902, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 5.0528987237496866e-05, | |
| "loss": 1.2097725868225098, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 0.8253307566522967, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 5.033673056231781e-05, | |
| "loss": 1.200005054473877, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.8258064516129032, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 5.0144217661062574e-05, | |
| "loss": 1.2073945999145508, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 0.8262821465735097, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 4.9951453305726055e-05, | |
| "loss": 1.1431573629379272, | |
| "step": 3474 | |
| }, | |
| { | |
| "epoch": 0.8267578415341162, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 4.975844227453615e-05, | |
| "loss": 1.2093596458435059, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 0.8272335364947228, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 4.9565189351835336e-05, | |
| "loss": 1.1971302032470703, | |
| "step": 3478 | |
| }, | |
| { | |
| "epoch": 0.8277092314553293, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 4.93716993279621e-05, | |
| "loss": 1.1951606273651123, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.8281849264159358, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 4.917797699913215e-05, | |
| "loss": 1.1961910724639893, | |
| "step": 3482 | |
| }, | |
| { | |
| "epoch": 0.8286606213765423, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 4.8984027167319566e-05, | |
| "loss": 1.1884233951568604, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 0.8291363163371488, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 4.8789854640137736e-05, | |
| "loss": 1.1898481845855713, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 0.8296120112977553, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 4.859546423072023e-05, | |
| "loss": 1.1624311208724976, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 0.8300877062583618, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 4.840086075760146e-05, | |
| "loss": 1.1634624004364014, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.8305634012189683, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 4.820604904459722e-05, | |
| "loss": 1.1898113489151, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 0.8310390961795748, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.801103392068516e-05, | |
| "loss": 1.2224345207214355, | |
| "step": 3494 | |
| }, | |
| { | |
| "epoch": 0.8315147911401813, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 4.781582021988507e-05, | |
| "loss": 1.1299514770507812, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 0.8319904861007879, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.762041278113902e-05, | |
| "loss": 1.2070683240890503, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 0.8324661810613944, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 4.742481644819148e-05, | |
| "loss": 1.1668651103973389, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8329418760220009, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 4.7229036069469193e-05, | |
| "loss": 1.1788852214813232, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 0.8334175709826074, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 4.703307649796099e-05, | |
| "loss": 1.128293752670288, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 0.8338932659432139, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 4.683694259109757e-05, | |
| "loss": 1.1507880687713623, | |
| "step": 3506 | |
| }, | |
| { | |
| "epoch": 0.8343689609038204, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 4.664063921063101e-05, | |
| "loss": 1.1574411392211914, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 0.8348446558644269, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 4.644417122251428e-05, | |
| "loss": 1.1994435787200928, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.8353203508250334, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.6247543496780675e-05, | |
| "loss": 1.1481845378875732, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 0.8357960457856399, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 4.605076090742299e-05, | |
| "loss": 1.184557557106018, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.8362717407462464, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 4.585382833227281e-05, | |
| "loss": 1.1902873516082764, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 0.836747435706853, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 4.565675065287956e-05, | |
| "loss": 1.2748725414276123, | |
| "step": 3518 | |
| }, | |
| { | |
| "epoch": 0.8372231306674595, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.545953275438947e-05, | |
| "loss": 1.1273387670516968, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.837698825628066, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 4.526217952542456e-05, | |
| "loss": 1.1241960525512695, | |
| "step": 3522 | |
| }, | |
| { | |
| "epoch": 0.8381745205886725, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 4.506469585796133e-05, | |
| "loss": 1.1555461883544922, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 0.838650215549279, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 4.486708664720965e-05, | |
| "loss": 1.2142927646636963, | |
| "step": 3526 | |
| }, | |
| { | |
| "epoch": 0.8391259105098855, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 4.466935679149131e-05, | |
| "loss": 1.1009758710861206, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 0.839601605470492, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.4471511192118666e-05, | |
| "loss": 1.1688785552978516, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.8400773004310985, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 4.427355475327309e-05, | |
| "loss": 1.1534974575042725, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 0.840552995391705, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 4.407549238188346e-05, | |
| "loss": 1.150222659111023, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 0.8410286903523115, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 4.387732898750448e-05, | |
| "loss": 1.207751750946045, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 0.841504385312918, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.367906948219502e-05, | |
| "loss": 1.1927155256271362, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 0.8419800802735246, | |
| "grad_norm": 0.33984375, | |
| "learning_rate": 4.348071878039633e-05, | |
| "loss": 1.1655819416046143, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.8424557752341311, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 4.3282281798810256e-05, | |
| "loss": 1.1812100410461426, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 0.8429314701947376, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 4.308376345627728e-05, | |
| "loss": 1.2032802104949951, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 0.8434071651553441, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.288516867365474e-05, | |
| "loss": 1.1608192920684814, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 0.8438828601159506, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.2686502373694684e-05, | |
| "loss": 1.2154037952423096, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 0.8443585550765571, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 4.248776948092197e-05, | |
| "loss": 1.152782917022705, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.8448342500371636, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.228897492151213e-05, | |
| "loss": 1.176882028579712, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 0.8453099449977702, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 4.209012362316934e-05, | |
| "loss": 1.1599602699279785, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 0.8457856399583767, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.1891220515004114e-05, | |
| "loss": 1.2112061977386475, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 0.8462613349189833, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 4.169227052741134e-05, | |
| "loss": 1.1296908855438232, | |
| "step": 3558 | |
| }, | |
| { | |
| "epoch": 0.8467370298795898, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 4.1493278591947855e-05, | |
| "loss": 1.1762603521347046, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.8472127248401963, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.1294249641210354e-05, | |
| "loss": 1.2208728790283203, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 0.8476884198008028, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 4.109518860871305e-05, | |
| "loss": 1.2221901416778564, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 0.8481641147614093, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 4.089610042876537e-05, | |
| "loss": 1.1988012790679932, | |
| "step": 3566 | |
| }, | |
| { | |
| "epoch": 0.8486398097220158, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 4.069699003634972e-05, | |
| "loss": 1.1596108675003052, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 0.8491155046826223, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.0497862366999034e-05, | |
| "loss": 1.1585445404052734, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.8495911996432288, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.0298722356674584e-05, | |
| "loss": 1.1766672134399414, | |
| "step": 3572 | |
| }, | |
| { | |
| "epoch": 0.8500668946038353, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 4.0099574941643506e-05, | |
| "loss": 1.1228039264678955, | |
| "step": 3574 | |
| }, | |
| { | |
| "epoch": 0.8505425895644418, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 3.990042505835651e-05, | |
| "loss": 1.1494994163513184, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 0.8510182845250484, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 3.9701277643325416e-05, | |
| "loss": 1.202513575553894, | |
| "step": 3578 | |
| }, | |
| { | |
| "epoch": 0.8514939794856549, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.950213763300097e-05, | |
| "loss": 1.179110050201416, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.8519696744462614, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 3.9303009963650306e-05, | |
| "loss": 1.1852927207946777, | |
| "step": 3582 | |
| }, | |
| { | |
| "epoch": 0.8524453694068679, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 3.910389957123464e-05, | |
| "loss": 1.1301989555358887, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.8529210643674744, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 3.890481139128696e-05, | |
| "loss": 1.2232120037078857, | |
| "step": 3586 | |
| }, | |
| { | |
| "epoch": 0.8533967593280809, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.8705750358789646e-05, | |
| "loss": 1.1268978118896484, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 0.8538724542886874, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 3.850672140805216e-05, | |
| "loss": 1.2016334533691406, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.8543481492492939, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.830772947258869e-05, | |
| "loss": 1.2152290344238281, | |
| "step": 3592 | |
| }, | |
| { | |
| "epoch": 0.8548238442099004, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 3.810877948499589e-05, | |
| "loss": 1.209730625152588, | |
| "step": 3594 | |
| }, | |
| { | |
| "epoch": 0.8552995391705069, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.790987637683069e-05, | |
| "loss": 1.1957197189331055, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 0.8557752341311134, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.7711025078487876e-05, | |
| "loss": 1.1268858909606934, | |
| "step": 3598 | |
| }, | |
| { | |
| "epoch": 0.85625092909172, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.751223051907805e-05, | |
| "loss": 1.2362475395202637, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8567266240523265, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 3.731349762630534e-05, | |
| "loss": 1.172964096069336, | |
| "step": 3602 | |
| }, | |
| { | |
| "epoch": 0.857202319012933, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.711483132634527e-05, | |
| "loss": 1.2133592367172241, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 0.8576780139735395, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 3.691623654372272e-05, | |
| "loss": 1.1895489692687988, | |
| "step": 3606 | |
| }, | |
| { | |
| "epoch": 0.858153708934146, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 3.671771820118975e-05, | |
| "loss": 1.1736524105072021, | |
| "step": 3608 | |
| }, | |
| { | |
| "epoch": 0.8586294038947525, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.6519281219603675e-05, | |
| "loss": 1.1844290494918823, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.859105098855359, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.632093051780498e-05, | |
| "loss": 1.1735870838165283, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 0.8595807938159655, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 3.6122671012495524e-05, | |
| "loss": 1.1634467840194702, | |
| "step": 3614 | |
| }, | |
| { | |
| "epoch": 0.860056488776572, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.592450761811656e-05, | |
| "loss": 1.178370714187622, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 0.8605321837371785, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.5726445246726915e-05, | |
| "loss": 1.153395414352417, | |
| "step": 3618 | |
| }, | |
| { | |
| "epoch": 0.8610078786977851, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 3.5528488807881354e-05, | |
| "loss": 1.1781080961227417, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.8614835736583916, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 3.53306432085087e-05, | |
| "loss": 1.1583459377288818, | |
| "step": 3622 | |
| }, | |
| { | |
| "epoch": 0.8619592686189981, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.513291335279036e-05, | |
| "loss": 1.2509000301361084, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 0.8624349635796046, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.4935304142038686e-05, | |
| "loss": 1.1476457118988037, | |
| "step": 3626 | |
| }, | |
| { | |
| "epoch": 0.8629106585402111, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 3.4737820474575456e-05, | |
| "loss": 1.1432411670684814, | |
| "step": 3628 | |
| }, | |
| { | |
| "epoch": 0.8633863535008176, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 3.4540467245610534e-05, | |
| "loss": 1.1552605628967285, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.8638620484614241, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 3.4343249347120445e-05, | |
| "loss": 1.2122418880462646, | |
| "step": 3632 | |
| }, | |
| { | |
| "epoch": 0.8643377434220306, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 3.41461716677272e-05, | |
| "loss": 1.1323740482330322, | |
| "step": 3634 | |
| }, | |
| { | |
| "epoch": 0.8648134383826371, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 3.394923909257704e-05, | |
| "loss": 1.2014985084533691, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 0.8652891333432436, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 3.375245650321934e-05, | |
| "loss": 1.188545823097229, | |
| "step": 3638 | |
| }, | |
| { | |
| "epoch": 0.8657648283038502, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 3.3555828777485726e-05, | |
| "loss": 1.178330898284912, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.8662405232644567, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 3.335936078936899e-05, | |
| "loss": 1.1636848449707031, | |
| "step": 3642 | |
| }, | |
| { | |
| "epoch": 0.8667162182250632, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.3163057408902435e-05, | |
| "loss": 1.1589958667755127, | |
| "step": 3644 | |
| }, | |
| { | |
| "epoch": 0.8671919131856697, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.296692350203902e-05, | |
| "loss": 1.1450896263122559, | |
| "step": 3646 | |
| }, | |
| { | |
| "epoch": 0.8676676081462762, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 3.277096393053082e-05, | |
| "loss": 1.123741626739502, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 0.8681433031068827, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 3.257518355180853e-05, | |
| "loss": 1.187320351600647, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.8686189980674892, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 3.2379587218860976e-05, | |
| "loss": 1.16719651222229, | |
| "step": 3652 | |
| }, | |
| { | |
| "epoch": 0.8690946930280957, | |
| "grad_norm": 0.318359375, | |
| "learning_rate": 3.2184179780114944e-05, | |
| "loss": 1.196395993232727, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 0.8695703879887022, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 3.198896607931485e-05, | |
| "loss": 1.2043986320495605, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 0.8700460829493087, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.179395095540279e-05, | |
| "loss": 1.1552737951278687, | |
| "step": 3658 | |
| }, | |
| { | |
| "epoch": 0.8705217779099153, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 3.1599139242398556e-05, | |
| "loss": 1.180349588394165, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.8709974728705218, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 3.1404535769279764e-05, | |
| "loss": 1.1361455917358398, | |
| "step": 3662 | |
| }, | |
| { | |
| "epoch": 0.8714731678311283, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 3.121014535986227e-05, | |
| "loss": 1.1576318740844727, | |
| "step": 3664 | |
| }, | |
| { | |
| "epoch": 0.8719488627917348, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 3.1015972832680454e-05, | |
| "loss": 1.083686113357544, | |
| "step": 3666 | |
| }, | |
| { | |
| "epoch": 0.8724245577523413, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 3.0822023000867863e-05, | |
| "loss": 1.1516526937484741, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 0.8729002527129478, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 3.062830067203792e-05, | |
| "loss": 1.1149940490722656, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.8733759476735543, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 3.043481064816467e-05, | |
| "loss": 1.1872518062591553, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 0.8738516426341608, | |
| "grad_norm": 0.32421875, | |
| "learning_rate": 3.0241557725463866e-05, | |
| "loss": 1.133741021156311, | |
| "step": 3674 | |
| }, | |
| { | |
| "epoch": 0.8743273375947673, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.0048546694273965e-05, | |
| "loss": 1.1402521133422852, | |
| "step": 3676 | |
| }, | |
| { | |
| "epoch": 0.8748030325553738, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 2.9855782338937432e-05, | |
| "loss": 1.2263612747192383, | |
| "step": 3678 | |
| }, | |
| { | |
| "epoch": 0.8752787275159803, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 2.9663269437682208e-05, | |
| "loss": 1.1547777652740479, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.8757544224765869, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 2.9471012762503134e-05, | |
| "loss": 1.1414549350738525, | |
| "step": 3682 | |
| }, | |
| { | |
| "epoch": 0.8762301174371934, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 2.9279017079043816e-05, | |
| "loss": 1.206810474395752, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 0.8767058123977999, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 2.908728714647834e-05, | |
| "loss": 1.1493148803710938, | |
| "step": 3686 | |
| }, | |
| { | |
| "epoch": 0.8771815073584064, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.8895827717393446e-05, | |
| "loss": 1.1840794086456299, | |
| "step": 3688 | |
| }, | |
| { | |
| "epoch": 0.8776572023190129, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.8704643537670603e-05, | |
| "loss": 1.1903091669082642, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.8781328972796194, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.8513739346368443e-05, | |
| "loss": 1.1483159065246582, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 0.8786085922402259, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.8323119875605288e-05, | |
| "loss": 1.1400749683380127, | |
| "step": 3694 | |
| }, | |
| { | |
| "epoch": 0.8790842872008324, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.813278985044178e-05, | |
| "loss": 1.1304882764816284, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 0.8795599821614389, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.794275398876386e-05, | |
| "loss": 1.1478686332702637, | |
| "step": 3698 | |
| }, | |
| { | |
| "epoch": 0.8800356771220454, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.7753017001165737e-05, | |
| "loss": 1.1680241823196411, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.880511372082652, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 2.7563583590833133e-05, | |
| "loss": 1.1892788410186768, | |
| "step": 3702 | |
| }, | |
| { | |
| "epoch": 0.8809870670432585, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 2.737445845342677e-05, | |
| "loss": 1.1995958089828491, | |
| "step": 3704 | |
| }, | |
| { | |
| "epoch": 0.881462762003865, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.718564627696588e-05, | |
| "loss": 1.1075689792633057, | |
| "step": 3706 | |
| }, | |
| { | |
| "epoch": 0.8819384569644715, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.6997151741712087e-05, | |
| "loss": 1.1438966989517212, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 0.882414151925078, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.680897952005329e-05, | |
| "loss": 1.209947109222412, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.8828898468856845, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.662113427638796e-05, | |
| "loss": 1.116198182106018, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 0.883365541846291, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 2.6433620667009442e-05, | |
| "loss": 1.1661490201950073, | |
| "step": 3714 | |
| }, | |
| { | |
| "epoch": 0.8838412368068975, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.6246443339990532e-05, | |
| "loss": 1.1473069190979004, | |
| "step": 3716 | |
| }, | |
| { | |
| "epoch": 0.884316931767504, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.605960693506834e-05, | |
| "loss": 1.1723562479019165, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 0.8847926267281107, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.5873116083529173e-05, | |
| "loss": 1.1769287586212158, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.8852683216887172, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 2.56869754080938e-05, | |
| "loss": 1.1576387882232666, | |
| "step": 3722 | |
| }, | |
| { | |
| "epoch": 0.8857440166493237, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.550118952280288e-05, | |
| "loss": 1.0645157098770142, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 0.8862197116099302, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 2.531576303290253e-05, | |
| "loss": 1.1478241682052612, | |
| "step": 3726 | |
| }, | |
| { | |
| "epoch": 0.8866954065705367, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.5130700534730215e-05, | |
| "loss": 1.1812896728515625, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 0.8871711015311432, | |
| "grad_norm": 0.314453125, | |
| "learning_rate": 2.494600661560079e-05, | |
| "loss": 1.223722219467163, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.8876467964917497, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.4761685853692825e-05, | |
| "loss": 1.1464184522628784, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 0.8881224914523562, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.4577742817935077e-05, | |
| "loss": 1.167757511138916, | |
| "step": 3734 | |
| }, | |
| { | |
| "epoch": 0.8885981864129627, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.4394182067893243e-05, | |
| "loss": 1.1267993450164795, | |
| "step": 3736 | |
| }, | |
| { | |
| "epoch": 0.8890738813735692, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.421100815365701e-05, | |
| "loss": 1.1455817222595215, | |
| "step": 3738 | |
| }, | |
| { | |
| "epoch": 0.8895495763341758, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.4028225615727145e-05, | |
| "loss": 1.1717948913574219, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.8900252712947823, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 2.384583898490302e-05, | |
| "loss": 1.1518162488937378, | |
| "step": 3742 | |
| }, | |
| { | |
| "epoch": 0.8905009662553888, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.3663852782170336e-05, | |
| "loss": 1.147728443145752, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 0.8909766612159953, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 2.3482271518588967e-05, | |
| "loss": 1.1500670909881592, | |
| "step": 3746 | |
| }, | |
| { | |
| "epoch": 0.8914523561766018, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 2.330109969518122e-05, | |
| "loss": 1.1796722412109375, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 0.8919280511372083, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.3120341802820197e-05, | |
| "loss": 1.1131136417388916, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.8924037460978148, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.2940002322118518e-05, | |
| "loss": 1.1635349988937378, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 0.8928794410584213, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.2760085723317285e-05, | |
| "loss": 1.1256214380264282, | |
| "step": 3754 | |
| }, | |
| { | |
| "epoch": 0.8933551360190278, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.258059646617517e-05, | |
| "loss": 1.1560603380203247, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 0.8938308309796343, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.240153899985802e-05, | |
| "loss": 1.186435580253601, | |
| "step": 3758 | |
| }, | |
| { | |
| "epoch": 0.8943065259402408, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.222291776282838e-05, | |
| "loss": 1.2056632041931152, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.8947822209008474, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 2.204473718273568e-05, | |
| "loss": 1.1999526023864746, | |
| "step": 3762 | |
| }, | |
| { | |
| "epoch": 0.8952579158614539, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 2.1867001676306306e-05, | |
| "loss": 1.209770917892456, | |
| "step": 3764 | |
| }, | |
| { | |
| "epoch": 0.8957336108220604, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.1689715649234208e-05, | |
| "loss": 1.110062599182129, | |
| "step": 3766 | |
| }, | |
| { | |
| "epoch": 0.8962093057826669, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.1512883496071715e-05, | |
| "loss": 1.195483922958374, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 0.8966850007432734, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 2.1336509600120508e-05, | |
| "loss": 1.226474642753601, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.8971606957038799, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.1160598333323087e-05, | |
| "loss": 1.1339728832244873, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 0.8976363906644864, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.0985154056154274e-05, | |
| "loss": 1.2141457796096802, | |
| "step": 3774 | |
| }, | |
| { | |
| "epoch": 0.8981120856250929, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 2.0810181117513215e-05, | |
| "loss": 1.1662113666534424, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 0.8985877805856994, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 2.0635683854615576e-05, | |
| "loss": 1.0973902940750122, | |
| "step": 3778 | |
| }, | |
| { | |
| "epoch": 0.899063475546306, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 2.0461666592885974e-05, | |
| "loss": 1.1171178817749023, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.8995391705069125, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.0288133645850808e-05, | |
| "loss": 1.1062219142913818, | |
| "step": 3782 | |
| }, | |
| { | |
| "epoch": 0.900014865467519, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 2.0115089315031323e-05, | |
| "loss": 1.1549062728881836, | |
| "step": 3784 | |
| }, | |
| { | |
| "epoch": 0.9004905604281255, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.9942537889836963e-05, | |
| "loss": 1.1845629215240479, | |
| "step": 3786 | |
| }, | |
| { | |
| "epoch": 0.900966255388732, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.9770483647459117e-05, | |
| "loss": 1.1162179708480835, | |
| "step": 3788 | |
| }, | |
| { | |
| "epoch": 0.9014419503493385, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 1.9598930852764987e-05, | |
| "loss": 1.1066762208938599, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.901917645309945, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 1.942788375819198e-05, | |
| "loss": 1.1973916292190552, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 0.9023933402705515, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 1.9257346603642203e-05, | |
| "loss": 1.1700313091278076, | |
| "step": 3794 | |
| }, | |
| { | |
| "epoch": 0.902869035231158, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.9087323616377414e-05, | |
| "loss": 1.1440091133117676, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 0.9033447301917645, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.8917819010914283e-05, | |
| "loss": 1.1456643342971802, | |
| "step": 3798 | |
| }, | |
| { | |
| "epoch": 0.903820425152371, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 1.8748836988919793e-05, | |
| "loss": 1.1695044040679932, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9042961201129776, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.8580381739107252e-05, | |
| "loss": 1.1730451583862305, | |
| "step": 3802 | |
| }, | |
| { | |
| "epoch": 0.9047718150735841, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.8412457437132318e-05, | |
| "loss": 1.1789326667785645, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 0.9052475100341906, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.824506824548956e-05, | |
| "loss": 1.1460459232330322, | |
| "step": 3806 | |
| }, | |
| { | |
| "epoch": 0.9057232049947971, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 1.8078218313409324e-05, | |
| "loss": 1.1638338565826416, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 0.9061988999554036, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.7911911776754756e-05, | |
| "loss": 1.1171094179153442, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.9066745949160101, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 1.7746152757919445e-05, | |
| "loss": 1.2183301448822021, | |
| "step": 3812 | |
| }, | |
| { | |
| "epoch": 0.9071502898766166, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.758094536572508e-05, | |
| "loss": 1.141022801399231, | |
| "step": 3814 | |
| }, | |
| { | |
| "epoch": 0.9076259848372231, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.741629369531968e-05, | |
| "loss": 1.1439030170440674, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 0.9081016797978296, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.7252201828076126e-05, | |
| "loss": 1.1290979385375977, | |
| "step": 3818 | |
| }, | |
| { | |
| "epoch": 0.9085773747584361, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.7088673831490893e-05, | |
| "loss": 1.1221880912780762, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.9090530697190427, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.6925713759083282e-05, | |
| "loss": 1.1449179649353027, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 0.9095287646796492, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.6763325650294933e-05, | |
| "loss": 1.148937702178955, | |
| "step": 3824 | |
| }, | |
| { | |
| "epoch": 0.9100044596402557, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.6601513530389727e-05, | |
| "loss": 1.12366783618927, | |
| "step": 3826 | |
| }, | |
| { | |
| "epoch": 0.9104801546008622, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.644028141035394e-05, | |
| "loss": 1.12631356716156, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 0.9109558495614687, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 1.627963328679686e-05, | |
| "loss": 1.1116429567337036, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.9114315445220752, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.6119573141851747e-05, | |
| "loss": 1.1646809577941895, | |
| "step": 3832 | |
| }, | |
| { | |
| "epoch": 0.9119072394826817, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.5960104943077045e-05, | |
| "loss": 1.0913721323013306, | |
| "step": 3834 | |
| }, | |
| { | |
| "epoch": 0.9123829344432882, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.5801232643358134e-05, | |
| "loss": 1.1654855012893677, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 0.9128586294038947, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.5642960180809255e-05, | |
| "loss": 1.1685070991516113, | |
| "step": 3838 | |
| }, | |
| { | |
| "epoch": 0.9133343243645012, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 1.5485291478675928e-05, | |
| "loss": 1.1893408298492432, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9138100193251077, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.5328230445237758e-05, | |
| "loss": 1.1577904224395752, | |
| "step": 3842 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.517178097371144e-05, | |
| "loss": 1.1701260805130005, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 0.9147614092463208, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.5015946942154375e-05, | |
| "loss": 1.1752269268035889, | |
| "step": 3846 | |
| }, | |
| { | |
| "epoch": 0.9152371042069273, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.4860732213368452e-05, | |
| "loss": 1.158857822418213, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 0.9157127991675338, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 1.4706140634804325e-05, | |
| "loss": 1.163185954093933, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9161884941281403, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 1.455217603846609e-05, | |
| "loss": 1.1203261613845825, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 0.9166641890887468, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.4398842240816207e-05, | |
| "loss": 1.128927230834961, | |
| "step": 3854 | |
| }, | |
| { | |
| "epoch": 0.9171398840493533, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.4246143042680989e-05, | |
| "loss": 1.1380681991577148, | |
| "step": 3856 | |
| }, | |
| { | |
| "epoch": 0.9176155790099598, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.4094082229156323e-05, | |
| "loss": 1.1902419328689575, | |
| "step": 3858 | |
| }, | |
| { | |
| "epoch": 0.9180912739705663, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.3942663569513864e-05, | |
| "loss": 1.1731154918670654, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.9185669689311728, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.3791890817107616e-05, | |
| "loss": 1.167722225189209, | |
| "step": 3862 | |
| }, | |
| { | |
| "epoch": 0.9190426638917794, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.3641767709280869e-05, | |
| "loss": 1.1482999324798584, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 0.9195183588523859, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.3492297967273609e-05, | |
| "loss": 1.1329618692398071, | |
| "step": 3866 | |
| }, | |
| { | |
| "epoch": 0.9199940538129924, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.3343485296130214e-05, | |
| "loss": 1.2048474550247192, | |
| "step": 3868 | |
| }, | |
| { | |
| "epoch": 0.9204697487735989, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.319533338460762e-05, | |
| "loss": 1.1382906436920166, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.9209454437342054, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 1.3047845905083966e-05, | |
| "loss": 1.1446309089660645, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 0.9214211386948119, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.2901026513467434e-05, | |
| "loss": 1.1889190673828125, | |
| "step": 3874 | |
| }, | |
| { | |
| "epoch": 0.9218968336554184, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.2754878849105752e-05, | |
| "loss": 1.1595823764801025, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 0.9223725286160249, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.260940653469589e-05, | |
| "loss": 1.1825573444366455, | |
| "step": 3878 | |
| }, | |
| { | |
| "epoch": 0.9228482235766314, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.2464613176194283e-05, | |
| "loss": 1.113194465637207, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.9233239185372379, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.2320502362727518e-05, | |
| "loss": 1.0969769954681396, | |
| "step": 3882 | |
| }, | |
| { | |
| "epoch": 0.9237996134978445, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 1.2177077666503236e-05, | |
| "loss": 1.1694114208221436, | |
| "step": 3884 | |
| }, | |
| { | |
| "epoch": 0.9242753084584511, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.2034342642721723e-05, | |
| "loss": 1.190758228302002, | |
| "step": 3886 | |
| }, | |
| { | |
| "epoch": 0.9247510034190576, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.1892300829487678e-05, | |
| "loss": 1.136456847190857, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 0.9252266983796641, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.1750955747722546e-05, | |
| "loss": 1.1714725494384766, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.9257023933402706, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 1.161031090107728e-05, | |
| "loss": 1.1840903759002686, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 0.9261780883008771, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.1470369775845423e-05, | |
| "loss": 1.204842209815979, | |
| "step": 3894 | |
| }, | |
| { | |
| "epoch": 0.9266537832614836, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 1.1331135840876764e-05, | |
| "loss": 1.1758289337158203, | |
| "step": 3896 | |
| }, | |
| { | |
| "epoch": 0.9271294782220901, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 1.119261254749128e-05, | |
| "loss": 1.1592724323272705, | |
| "step": 3898 | |
| }, | |
| { | |
| "epoch": 0.9276051731826966, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.1054803329393625e-05, | |
| "loss": 1.1884357929229736, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9280808681433032, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.0917711602588037e-05, | |
| "loss": 1.1424968242645264, | |
| "step": 3902 | |
| }, | |
| { | |
| "epoch": 0.9285565631039097, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.0781340765293606e-05, | |
| "loss": 1.1715056896209717, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 0.9290322580645162, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.0645694197860084e-05, | |
| "loss": 1.1644243001937866, | |
| "step": 3906 | |
| }, | |
| { | |
| "epoch": 0.9295079530251227, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 1.0510775262684056e-05, | |
| "loss": 1.1605405807495117, | |
| "step": 3908 | |
| }, | |
| { | |
| "epoch": 0.9299836479857292, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 1.0376587304125656e-05, | |
| "loss": 1.1060264110565186, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.9304593429463357, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.0243133648425595e-05, | |
| "loss": 1.0869121551513672, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 0.9309350379069422, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.0110417603622733e-05, | |
| "loss": 1.1413328647613525, | |
| "step": 3914 | |
| }, | |
| { | |
| "epoch": 0.9314107328675487, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 9.978442459472127e-06, | |
| "loss": 1.1426079273223877, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 0.9318864278281552, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 9.847211487363401e-06, | |
| "loss": 1.1142783164978027, | |
| "step": 3918 | |
| }, | |
| { | |
| "epoch": 0.9323621227887617, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 9.71672794023975e-06, | |
| "loss": 1.158155083656311, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.9328378177493682, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 9.586995052517208e-06, | |
| "loss": 1.2047823667526245, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 0.9333135127099748, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 9.458016040004541e-06, | |
| "loss": 1.1312339305877686, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 0.9337892076705813, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 9.329794099823531e-06, | |
| "loss": 1.1283931732177734, | |
| "step": 3926 | |
| }, | |
| { | |
| "epoch": 0.9342649026311878, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 9.202332410329676e-06, | |
| "loss": 1.1590964794158936, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 0.9347405975917943, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 9.075634131033481e-06, | |
| "loss": 1.196352243423462, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.9352162925524008, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 8.949702402522065e-06, | |
| "loss": 1.1239594221115112, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 0.9356919875130073, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 8.824540346381343e-06, | |
| "loss": 1.1666662693023682, | |
| "step": 3934 | |
| }, | |
| { | |
| "epoch": 0.9361676824736138, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 8.700151065118683e-06, | |
| "loss": 1.2102231979370117, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 0.9366433774342203, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 8.576537642085934e-06, | |
| "loss": 1.1497886180877686, | |
| "step": 3938 | |
| }, | |
| { | |
| "epoch": 0.9371190723948268, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 8.453703141403062e-06, | |
| "loss": 1.1418395042419434, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.9375947673554333, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 8.331650607882146e-06, | |
| "loss": 1.1689965724945068, | |
| "step": 3942 | |
| }, | |
| { | |
| "epoch": 0.9380704623160399, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 8.210383066951926e-06, | |
| "loss": 1.1347894668579102, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 0.9385461572766464, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 8.08990352458281e-06, | |
| "loss": 1.1696358919143677, | |
| "step": 3946 | |
| }, | |
| { | |
| "epoch": 0.9390218522372529, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 7.970214967212349e-06, | |
| "loss": 1.2054082155227661, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 0.9394975471978594, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 7.851320361671244e-06, | |
| "loss": 1.2238609790802002, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.9399732421584659, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 7.733222655109758e-06, | |
| "loss": 1.1731221675872803, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 0.9404489371190724, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 7.615924774924681e-06, | |
| "loss": 1.1514570713043213, | |
| "step": 3954 | |
| }, | |
| { | |
| "epoch": 0.9409246320796789, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 7.499429628686794e-06, | |
| "loss": 1.1528222560882568, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 0.9414003270402854, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 7.383740104068735e-06, | |
| "loss": 1.119846224784851, | |
| "step": 3958 | |
| }, | |
| { | |
| "epoch": 0.9418760220008919, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 7.268859068773495e-06, | |
| "loss": 1.1658766269683838, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.9423517169614984, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 7.154789370463256e-06, | |
| "loss": 1.1100010871887207, | |
| "step": 3962 | |
| }, | |
| { | |
| "epoch": 0.942827411922105, | |
| "grad_norm": 0.30859375, | |
| "learning_rate": 7.041533836688881e-06, | |
| "loss": 1.1952953338623047, | |
| "step": 3964 | |
| }, | |
| { | |
| "epoch": 0.9433031068827115, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 6.9290952748197524e-06, | |
| "loss": 1.1750929355621338, | |
| "step": 3966 | |
| }, | |
| { | |
| "epoch": 0.943778801843318, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 6.81747647197422e-06, | |
| "loss": 1.143003225326538, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 0.9442544968039245, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 6.706680194950541e-06, | |
| "loss": 1.1299149990081787, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.944730191764531, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 6.596709190158224e-06, | |
| "loss": 1.157487154006958, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 0.9452058867251375, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 6.4875661835500295e-06, | |
| "loss": 1.124016523361206, | |
| "step": 3974 | |
| }, | |
| { | |
| "epoch": 0.945681581685744, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 6.379253880554337e-06, | |
| "loss": 1.1457756757736206, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 0.9461572766463505, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 6.271774966008117e-06, | |
| "loss": 1.1654269695281982, | |
| "step": 3978 | |
| }, | |
| { | |
| "epoch": 0.946632971606957, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 6.1651321040903946e-06, | |
| "loss": 1.2042397260665894, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.9471086665675635, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 6.059327938256148e-06, | |
| "loss": 1.1625417470932007, | |
| "step": 3982 | |
| }, | |
| { | |
| "epoch": 0.94758436152817, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 5.954365091170848e-06, | |
| "loss": 1.1616830825805664, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 0.9480600564887766, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 5.850246164645414e-06, | |
| "loss": 1.2000601291656494, | |
| "step": 3986 | |
| }, | |
| { | |
| "epoch": 0.9485357514493831, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 5.746973739571719e-06, | |
| "loss": 1.1334123611450195, | |
| "step": 3988 | |
| }, | |
| { | |
| "epoch": 0.9490114464099896, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 5.6445503758586485e-06, | |
| "loss": 1.129727840423584, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.9494871413705961, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 5.542978612368588e-06, | |
| "loss": 1.142544150352478, | |
| "step": 3992 | |
| }, | |
| { | |
| "epoch": 0.9499628363312026, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 5.442260966854563e-06, | |
| "loss": 1.1486105918884277, | |
| "step": 3994 | |
| }, | |
| { | |
| "epoch": 0.9504385312918091, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 5.342399935897748e-06, | |
| "loss": 1.0392706394195557, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 0.9509142262524156, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 5.2433979948456385e-06, | |
| "loss": 1.1802358627319336, | |
| "step": 3998 | |
| }, | |
| { | |
| "epoch": 0.9513899212130221, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 5.1452575977506905e-06, | |
| "loss": 1.1869316101074219, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9518656161736286, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 5.047981177309447e-06, | |
| "loss": 1.1039962768554688, | |
| "step": 4002 | |
| }, | |
| { | |
| "epoch": 0.9523413111342351, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 4.9515711448022966e-06, | |
| "loss": 1.108412504196167, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 0.9528170060948417, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 4.856029890033647e-06, | |
| "loss": 1.1982967853546143, | |
| "step": 4006 | |
| }, | |
| { | |
| "epoch": 0.9532927010554482, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 4.761359781272705e-06, | |
| "loss": 1.1908378601074219, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 0.9537683960160547, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.667563165194815e-06, | |
| "loss": 1.2247347831726074, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.9542440909766612, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 4.574642366823199e-06, | |
| "loss": 1.174034595489502, | |
| "step": 4012 | |
| }, | |
| { | |
| "epoch": 0.9547197859372677, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 4.482599689471437e-06, | |
| "loss": 1.1458334922790527, | |
| "step": 4014 | |
| }, | |
| { | |
| "epoch": 0.9551954808978742, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 4.391437414686261e-06, | |
| "loss": 1.1437745094299316, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 0.9556711758584807, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 4.301157802191078e-06, | |
| "loss": 1.1791338920593262, | |
| "step": 4018 | |
| }, | |
| { | |
| "epoch": 0.9561468708190872, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 4.211763089829934e-06, | |
| "loss": 1.2103009223937988, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.9566225657796937, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 4.123255493512028e-06, | |
| "loss": 1.1193060874938965, | |
| "step": 4022 | |
| }, | |
| { | |
| "epoch": 0.9570982607403002, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 4.035637207156798e-06, | |
| "loss": 1.1846659183502197, | |
| "step": 4024 | |
| }, | |
| { | |
| "epoch": 0.9575739557009068, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 3.94891040263953e-06, | |
| "loss": 1.1607009172439575, | |
| "step": 4026 | |
| }, | |
| { | |
| "epoch": 0.9580496506615133, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 3.863077229737546e-06, | |
| "loss": 1.1519575119018555, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 0.9585253456221198, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 3.778139816076878e-06, | |
| "loss": 1.1820671558380127, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.9590010405827263, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 3.694100267079548e-06, | |
| "loss": 1.1689975261688232, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 0.9594767355433328, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 3.610960665911396e-06, | |
| "loss": 1.187016248703003, | |
| "step": 4034 | |
| }, | |
| { | |
| "epoch": 0.9599524305039393, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 3.5287230734304002e-06, | |
| "loss": 1.1339020729064941, | |
| "step": 4036 | |
| }, | |
| { | |
| "epoch": 0.9604281254645458, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 3.4473895281356497e-06, | |
| "loss": 1.1432700157165527, | |
| "step": 4038 | |
| }, | |
| { | |
| "epoch": 0.9609038204251523, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 3.3669620461167464e-06, | |
| "loss": 1.1758100986480713, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.9613795153857588, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 3.2874426210038802e-06, | |
| "loss": 1.1896083354949951, | |
| "step": 4042 | |
| }, | |
| { | |
| "epoch": 0.9618552103463653, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 3.208833223918415e-06, | |
| "loss": 1.169938564300537, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 0.9623309053069719, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 3.1311358034239725e-06, | |
| "loss": 1.24098539352417, | |
| "step": 4046 | |
| }, | |
| { | |
| "epoch": 0.9628066002675784, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 3.0543522854782127e-06, | |
| "loss": 1.1295160055160522, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 0.963282295228185, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.9784845733850144e-06, | |
| "loss": 1.193390130996704, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.9637579901887915, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.9035345477473485e-06, | |
| "loss": 1.1334125995635986, | |
| "step": 4052 | |
| }, | |
| { | |
| "epoch": 0.964233685149398, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 2.8295040664206454e-06, | |
| "loss": 1.156846284866333, | |
| "step": 4054 | |
| }, | |
| { | |
| "epoch": 0.9647093801100045, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.7563949644667354e-06, | |
| "loss": 1.1609504222869873, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 0.965185075070611, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.6842090541083775e-06, | |
| "loss": 1.1681158542633057, | |
| "step": 4058 | |
| }, | |
| { | |
| "epoch": 0.9656607700312175, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 2.6129481246843248e-06, | |
| "loss": 1.1730051040649414, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.966136464991824, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.542613942604968e-06, | |
| "loss": 1.2059528827667236, | |
| "step": 4062 | |
| }, | |
| { | |
| "epoch": 0.9666121599524305, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 2.4732082513085587e-06, | |
| "loss": 1.1665153503417969, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 0.9670878549130371, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 2.404732771218008e-06, | |
| "loss": 1.146468162536621, | |
| "step": 4066 | |
| }, | |
| { | |
| "epoch": 0.9675635498736436, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 2.3371891996982e-06, | |
| "loss": 1.1147561073303223, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 0.9680392448342501, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 2.27057921101395e-06, | |
| "loss": 1.1539335250854492, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.9685149397948566, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 2.204904456288497e-06, | |
| "loss": 1.1748045682907104, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 0.9689906347554631, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.1401665634625823e-06, | |
| "loss": 1.141796588897705, | |
| "step": 4074 | |
| }, | |
| { | |
| "epoch": 0.9694663297160696, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 2.0763671372540585e-06, | |
| "loss": 1.0855543613433838, | |
| "step": 4076 | |
| }, | |
| { | |
| "epoch": 0.9699420246766761, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 2.013507759118176e-06, | |
| "loss": 1.103421688079834, | |
| "step": 4078 | |
| }, | |
| { | |
| "epoch": 0.9704177196372826, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.95158998720832e-06, | |
| "loss": 1.1640735864639282, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.9708934145978891, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 1.8906153563374196e-06, | |
| "loss": 1.1282706260681152, | |
| "step": 4082 | |
| }, | |
| { | |
| "epoch": 0.9713691095584956, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.8305853779399108e-06, | |
| "loss": 1.0961542129516602, | |
| "step": 4084 | |
| }, | |
| { | |
| "epoch": 0.9718448045191022, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 1.7715015400342305e-06, | |
| "loss": 1.1879502534866333, | |
| "step": 4086 | |
| }, | |
| { | |
| "epoch": 0.9723204994797087, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.7133653071859947e-06, | |
| "loss": 1.1628968715667725, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 0.9727961944403152, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 1.656178120471621e-06, | |
| "loss": 1.1832327842712402, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.9732718894009217, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.5999413974426658e-06, | |
| "loss": 1.2111151218414307, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 0.9737475843615282, | |
| "grad_norm": 0.306640625, | |
| "learning_rate": 1.5446565320906692e-06, | |
| "loss": 1.1401962041854858, | |
| "step": 4094 | |
| }, | |
| { | |
| "epoch": 0.9742232793221347, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.4903248948125782e-06, | |
| "loss": 1.1747379302978516, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.9746989742827412, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 1.4369478323768183e-06, | |
| "loss": 1.2249683141708374, | |
| "step": 4098 | |
| }, | |
| { | |
| "epoch": 0.9751746692433477, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 1.3845266678898673e-06, | |
| "loss": 1.1771612167358398, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.9756503642039542, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 1.3330627007634943e-06, | |
| "loss": 1.1556856632232666, | |
| "step": 4102 | |
| }, | |
| { | |
| "epoch": 0.9761260591645607, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 1.2825572066825288e-06, | |
| "loss": 1.1458361148834229, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 0.9766017541251673, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 1.233011437573244e-06, | |
| "loss": 1.1212427616119385, | |
| "step": 4106 | |
| }, | |
| { | |
| "epoch": 0.9770774490857738, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 1.184426621572321e-06, | |
| "loss": 1.1551880836486816, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 0.9775531440463803, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.1368039629964155e-06, | |
| "loss": 1.1765400171279907, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.9780288390069868, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 1.0901446423123007e-06, | |
| "loss": 1.1351805925369263, | |
| "step": 4112 | |
| }, | |
| { | |
| "epoch": 0.9785045339675933, | |
| "grad_norm": 0.28125, | |
| "learning_rate": 1.0444498161075977e-06, | |
| "loss": 1.1993989944458008, | |
| "step": 4114 | |
| }, | |
| { | |
| "epoch": 0.9789802289281998, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 9.997206170621187e-07, | |
| "loss": 1.148155689239502, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 0.9794559238888063, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 9.559581539197916e-07, | |
| "loss": 1.0902024507522583, | |
| "step": 4118 | |
| }, | |
| { | |
| "epoch": 0.9799316188494128, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 9.131635114611481e-07, | |
| "loss": 1.1051156520843506, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.9804073138100193, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 8.713377504764797e-07, | |
| "loss": 1.170903205871582, | |
| "step": 4122 | |
| }, | |
| { | |
| "epoch": 0.9808830087706258, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 8.304819077395065e-07, | |
| "loss": 1.185584545135498, | |
| "step": 4124 | |
| }, | |
| { | |
| "epoch": 0.9813587037312324, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 7.905969959816828e-07, | |
| "loss": 1.1473748683929443, | |
| "step": 4126 | |
| }, | |
| { | |
| "epoch": 0.9818343986918389, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 7.51684003867128e-07, | |
| "loss": 1.1639072895050049, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 0.9823100936524454, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 7.137438959680554e-07, | |
| "loss": 1.234483003616333, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.9827857886130519, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 6.767776127409375e-07, | |
| "loss": 1.1430094242095947, | |
| "step": 4132 | |
| }, | |
| { | |
| "epoch": 0.9832614835736584, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 6.407860705031299e-07, | |
| "loss": 1.1307320594787598, | |
| "step": 4134 | |
| }, | |
| { | |
| "epoch": 0.9837371785342649, | |
| "grad_norm": 0.279296875, | |
| "learning_rate": 6.057701614101862e-07, | |
| "loss": 1.2102608680725098, | |
| "step": 4136 | |
| }, | |
| { | |
| "epoch": 0.9842128734948714, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 5.717307534337613e-07, | |
| "loss": 1.1357035636901855, | |
| "step": 4138 | |
| }, | |
| { | |
| "epoch": 0.9846885684554779, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 5.386686903400496e-07, | |
| "loss": 1.1917630434036255, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.9851642634160844, | |
| "grad_norm": 0.37109375, | |
| "learning_rate": 5.065847916689226e-07, | |
| "loss": 1.145763635635376, | |
| "step": 4142 | |
| }, | |
| { | |
| "epoch": 0.9856399583766909, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 4.754798527135629e-07, | |
| "loss": 1.123291015625, | |
| "step": 4144 | |
| }, | |
| { | |
| "epoch": 0.9861156533372974, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 4.4535464450079056e-07, | |
| "loss": 1.19578218460083, | |
| "step": 4146 | |
| }, | |
| { | |
| "epoch": 0.986591348297904, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 4.162099137719322e-07, | |
| "loss": 1.1768969297409058, | |
| "step": 4148 | |
| }, | |
| { | |
| "epoch": 0.9870670432585105, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 3.880463829643155e-07, | |
| "loss": 1.1089352369308472, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.987542738219117, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 3.608647501933549e-07, | |
| "loss": 1.1268953084945679, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 0.9880184331797235, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 3.346656892352673e-07, | |
| "loss": 1.2365374565124512, | |
| "step": 4154 | |
| }, | |
| { | |
| "epoch": 0.98849412814033, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 3.0944984951033485e-07, | |
| "loss": 1.1548500061035156, | |
| "step": 4156 | |
| }, | |
| { | |
| "epoch": 0.9889698231009365, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.8521785606684616e-07, | |
| "loss": 1.1455793380737305, | |
| "step": 4158 | |
| }, | |
| { | |
| "epoch": 0.989445518061543, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 2.619703095655712e-07, | |
| "loss": 1.156882882118225, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.9899212130221495, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.397077862648978e-07, | |
| "loss": 1.1094558238983154, | |
| "step": 4162 | |
| }, | |
| { | |
| "epoch": 0.990396907982756, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 2.1843083800652255e-07, | |
| "loss": 1.1157076358795166, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 0.9908726029433625, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.9813999220179125e-07, | |
| "loss": 1.1705288887023926, | |
| "step": 4166 | |
| }, | |
| { | |
| "epoch": 0.9913482979039691, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 1.7883575181862012e-07, | |
| "loss": 1.1335409879684448, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 0.9918239928645756, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.6051859536902136e-07, | |
| "loss": 1.1639494895935059, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.9922996878251821, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 1.4318897689725053e-07, | |
| "loss": 1.145524024963379, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 0.9927753827857886, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 1.2684732596854876e-07, | |
| "loss": 1.1588659286499023, | |
| "step": 4174 | |
| }, | |
| { | |
| "epoch": 0.9932510777463951, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 1.1149404765848915e-07, | |
| "loss": 1.138121485710144, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 0.9937267727070016, | |
| "grad_norm": 0.296875, | |
| "learning_rate": 9.712952254294471e-08, | |
| "loss": 1.1140878200531006, | |
| "step": 4178 | |
| }, | |
| { | |
| "epoch": 0.9942024676676081, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 8.375410668865602e-08, | |
| "loss": 1.1625972986221313, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.9946781626282146, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 7.136813164438927e-08, | |
| "loss": 1.140109896659851, | |
| "step": 4182 | |
| }, | |
| { | |
| "epoch": 0.9951538575888211, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 5.997190443274292e-08, | |
| "loss": 1.1538417339324951, | |
| "step": 4184 | |
| }, | |
| { | |
| "epoch": 0.9956295525494276, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 4.9565707542500454e-08, | |
| "loss": 1.1804558038711548, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 0.9961052475100342, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 4.014979892167592e-08, | |
| "loss": 1.1386924982070923, | |
| "step": 4188 | |
| }, | |
| { | |
| "epoch": 0.9965809424706407, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 3.172441197107468e-08, | |
| "loss": 1.1561048030853271, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.9970566374312472, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 2.4289755538537962e-08, | |
| "loss": 1.165192723274231, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 0.9975323323918537, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.7846013913755957e-08, | |
| "loss": 1.1334145069122314, | |
| "step": 4194 | |
| }, | |
| { | |
| "epoch": 0.9980080273524602, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 1.2393346823693641e-08, | |
| "loss": 1.139329195022583, | |
| "step": 4196 | |
| }, | |
| { | |
| "epoch": 0.9984837223130667, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 7.93188942864287e-09, | |
| "loss": 1.1700010299682617, | |
| "step": 4198 | |
| }, | |
| { | |
| "epoch": 0.9989594172736732, | |
| "grad_norm": 0.283203125, | |
| "learning_rate": 4.461752318860591e-09, | |
| "loss": 1.1264121532440186, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9994351122342797, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 1.9830215118377128e-09, | |
| "loss": 1.1346487998962402, | |
| "step": 4202 | |
| }, | |
| { | |
| "epoch": 0.9999108071948862, | |
| "grad_norm": 0.287109375, | |
| "learning_rate": 4.957584501674717e-10, | |
| "loss": 1.121924877166748, | |
| "step": 4204 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4205, | |
| "total_flos": 1.6807134688362627e+19, | |
| "train_loss": 1.3080670000681838, | |
| "train_runtime": 67307.8983, | |
| "train_samples_per_second": 7.995, | |
| "train_steps_per_second": 0.062 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 4205, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1051, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6807134688362627e+19, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |