Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-117 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-117 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-117") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-117") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-117") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-117 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-117" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-117", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-117
- SGLang
How to use furproxy/9b-117 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-117" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-117", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-117" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-117", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-117 with Docker Model Runner:
docker model run hf.co/furproxy/9b-117
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 3276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002442002442002442, | |
| "grad_norm": 2.5787229537963867, | |
| "learning_rate": 1.829268292682927e-08, | |
| "loss": 1.9266729354858398, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 2.9470772743225098, | |
| "learning_rate": 5.48780487804878e-08, | |
| "loss": 2.098057270050049, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 24.35196304321289, | |
| "learning_rate": 9.146341463414634e-08, | |
| "loss": 2.419158458709717, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009768009768009768, | |
| "grad_norm": 13.176837921142578, | |
| "learning_rate": 1.2804878048780488e-07, | |
| "loss": 1.9603787660598755, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 3.178710460662842, | |
| "learning_rate": 1.6463414634146343e-07, | |
| "loss": 2.1654319763183594, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 6.474632263183594, | |
| "learning_rate": 2.0121951219512198e-07, | |
| "loss": 1.944001317024231, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 2.9987783432006836, | |
| "learning_rate": 2.378048780487805e-07, | |
| "loss": 1.6851489543914795, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019536019536019536, | |
| "grad_norm": 16.205787658691406, | |
| "learning_rate": 2.74390243902439e-07, | |
| "loss": 2.074493169784546, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 28.914915084838867, | |
| "learning_rate": 3.1097560975609756e-07, | |
| "loss": 2.760730504989624, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 5.000414848327637, | |
| "learning_rate": 3.475609756097561e-07, | |
| "loss": 1.890244960784912, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026862026862026864, | |
| "grad_norm": 12.402643203735352, | |
| "learning_rate": 3.8414634146341466e-07, | |
| "loss": 2.127265691757202, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 2.161700487136841, | |
| "learning_rate": 4.207317073170732e-07, | |
| "loss": 2.077669143676758, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 2.237544059753418, | |
| "learning_rate": 4.573170731707317e-07, | |
| "loss": 1.9492441415786743, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 3.4599592685699463, | |
| "learning_rate": 4.939024390243903e-07, | |
| "loss": 2.0288307666778564, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 13.558135032653809, | |
| "learning_rate": 5.304878048780488e-07, | |
| "loss": 2.0438828468322754, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03907203907203907, | |
| "grad_norm": 1.968347430229187, | |
| "learning_rate": 5.670731707317073e-07, | |
| "loss": 1.4716084003448486, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04151404151404151, | |
| "grad_norm": 2.296098470687866, | |
| "learning_rate": 6.036585365853659e-07, | |
| "loss": 1.916358470916748, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 4.799354553222656, | |
| "learning_rate": 6.402439024390244e-07, | |
| "loss": 1.500044345855713, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0463980463980464, | |
| "grad_norm": 3.422872543334961, | |
| "learning_rate": 6.768292682926829e-07, | |
| "loss": 1.9229261875152588, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 5.7842278480529785, | |
| "learning_rate": 7.134146341463414e-07, | |
| "loss": 1.8417783975601196, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 1.7502555847167969, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.8713977336883545, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05372405372405373, | |
| "grad_norm": 3.2419745922088623, | |
| "learning_rate": 7.865853658536586e-07, | |
| "loss": 1.8525761365890503, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05616605616605617, | |
| "grad_norm": 4.0050272941589355, | |
| "learning_rate": 8.231707317073171e-07, | |
| "loss": 1.7802633047103882, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 9.282805442810059, | |
| "learning_rate": 8.597560975609755e-07, | |
| "loss": 1.744933009147644, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 11.279670715332031, | |
| "learning_rate": 8.963414634146341e-07, | |
| "loss": 1.6575506925582886, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 4.024263381958008, | |
| "learning_rate": 9.329268292682927e-07, | |
| "loss": 1.7523654699325562, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 9.84774398803711, | |
| "learning_rate": 9.695121951219512e-07, | |
| "loss": 1.5431915521621704, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 2.255718469619751, | |
| "learning_rate": 1.0060975609756098e-06, | |
| "loss": 1.6816049814224243, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07081807081807082, | |
| "grad_norm": 6.438508033752441, | |
| "learning_rate": 1.0426829268292682e-06, | |
| "loss": 1.6770493984222412, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 3.231410026550293, | |
| "learning_rate": 1.0792682926829268e-06, | |
| "loss": 1.7219147682189941, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0757020757020757, | |
| "grad_norm": 4.533494472503662, | |
| "learning_rate": 1.1158536585365854e-06, | |
| "loss": 1.3120664358139038, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07814407814407814, | |
| "grad_norm": 1.4741380214691162, | |
| "learning_rate": 1.152439024390244e-06, | |
| "loss": 1.839809536933899, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 5.4159135818481445, | |
| "learning_rate": 1.1890243902439024e-06, | |
| "loss": 1.4975742101669312, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08302808302808302, | |
| "grad_norm": 1.5194345712661743, | |
| "learning_rate": 1.225609756097561e-06, | |
| "loss": 1.2997660636901855, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 1.0652521848678589, | |
| "learning_rate": 1.2621951219512194e-06, | |
| "loss": 1.325761318206787, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 1.5348917245864868, | |
| "learning_rate": 1.298780487804878e-06, | |
| "loss": 1.6478363275527954, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09035409035409035, | |
| "grad_norm": 3.0826334953308105, | |
| "learning_rate": 1.3353658536585366e-06, | |
| "loss": 1.5671182870864868, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0927960927960928, | |
| "grad_norm": 3.705549478530884, | |
| "learning_rate": 1.3719512195121952e-06, | |
| "loss": 1.528605341911316, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 1.423170804977417, | |
| "learning_rate": 1.4085365853658536e-06, | |
| "loss": 1.433777928352356, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 5.42734432220459, | |
| "learning_rate": 1.4451219512195122e-06, | |
| "loss": 1.3220820426940918, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10012210012210013, | |
| "grad_norm": 13.278860092163086, | |
| "learning_rate": 1.4817073170731708e-06, | |
| "loss": 1.230900526046753, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 9.673623085021973, | |
| "learning_rate": 1.5182926829268292e-06, | |
| "loss": 1.2397403717041016, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10500610500610501, | |
| "grad_norm": 2.096724510192871, | |
| "learning_rate": 1.5548780487804878e-06, | |
| "loss": 1.429734230041504, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10744810744810745, | |
| "grad_norm": 2.136627435684204, | |
| "learning_rate": 1.5914634146341464e-06, | |
| "loss": 1.6148204803466797, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.8591102361679077, | |
| "learning_rate": 1.628048780487805e-06, | |
| "loss": 1.2777928113937378, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11233211233211234, | |
| "grad_norm": 2.6411468982696533, | |
| "learning_rate": 1.6646341463414637e-06, | |
| "loss": 1.6813223361968994, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11477411477411477, | |
| "grad_norm": 9.360551834106445, | |
| "learning_rate": 1.7012195121951218e-06, | |
| "loss": 1.2510548830032349, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 1.5671329498291016, | |
| "learning_rate": 1.7378048780487804e-06, | |
| "loss": 1.549704909324646, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 2.817790985107422, | |
| "learning_rate": 1.774390243902439e-06, | |
| "loss": 1.4941961765289307, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 1.9618436098098755, | |
| "learning_rate": 1.8109756097560976e-06, | |
| "loss": 1.208415150642395, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 2.0582926273345947, | |
| "learning_rate": 1.847560975609756e-06, | |
| "loss": 1.4008541107177734, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 1.3583874702453613, | |
| "learning_rate": 1.8841463414634146e-06, | |
| "loss": 1.0804157257080078, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12942612942612944, | |
| "grad_norm": 0.7717381119728088, | |
| "learning_rate": 1.9207317073170733e-06, | |
| "loss": 0.9862698316574097, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 4.132621765136719, | |
| "learning_rate": 1.9573170731707316e-06, | |
| "loss": 1.4770206212997437, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 3.617838144302368, | |
| "learning_rate": 1.9939024390243905e-06, | |
| "loss": 1.7098721265792847, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 1.887754201889038, | |
| "learning_rate": 2.030487804878049e-06, | |
| "loss": 1.4937952756881714, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 1.5456370115280151, | |
| "learning_rate": 2.0670731707317072e-06, | |
| "loss": 1.2423112392425537, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14163614163614163, | |
| "grad_norm": 1.8167121410369873, | |
| "learning_rate": 2.1036585365853656e-06, | |
| "loss": 1.4288495779037476, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14407814407814407, | |
| "grad_norm": 2.20339035987854, | |
| "learning_rate": 2.1402439024390245e-06, | |
| "loss": 1.3045105934143066, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 1.4721053838729858, | |
| "learning_rate": 2.176829268292683e-06, | |
| "loss": 1.4094347953796387, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14896214896214896, | |
| "grad_norm": 2.694047451019287, | |
| "learning_rate": 2.2134146341463417e-06, | |
| "loss": 1.4338178634643555, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1514041514041514, | |
| "grad_norm": 2.1025125980377197, | |
| "learning_rate": 2.25e-06, | |
| "loss": 1.388542652130127, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 2.5838522911071777, | |
| "learning_rate": 2.2865853658536584e-06, | |
| "loss": 1.2388739585876465, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1562881562881563, | |
| "grad_norm": 1.6586570739746094, | |
| "learning_rate": 2.3231707317073173e-06, | |
| "loss": 1.4730716943740845, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 1.9007089138031006, | |
| "learning_rate": 2.3597560975609757e-06, | |
| "loss": 1.3335144519805908, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 2.044327974319458, | |
| "learning_rate": 2.3963414634146345e-06, | |
| "loss": 1.1245752573013306, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16361416361416362, | |
| "grad_norm": 4.774345874786377, | |
| "learning_rate": 2.432926829268293e-06, | |
| "loss": 1.1957733631134033, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16605616605616605, | |
| "grad_norm": 3.8634209632873535, | |
| "learning_rate": 2.4695121951219513e-06, | |
| "loss": 1.208402395248413, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 2.314042329788208, | |
| "learning_rate": 2.5060975609756097e-06, | |
| "loss": 1.1570374965667725, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 2.155163526535034, | |
| "learning_rate": 2.5426829268292685e-06, | |
| "loss": 1.1594734191894531, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17338217338217338, | |
| "grad_norm": 1.5824326276779175, | |
| "learning_rate": 2.579268292682927e-06, | |
| "loss": 1.420199990272522, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 1.5222316980361938, | |
| "learning_rate": 2.6158536585365853e-06, | |
| "loss": 1.1563900709152222, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17826617826617827, | |
| "grad_norm": 2.792137384414673, | |
| "learning_rate": 2.652439024390244e-06, | |
| "loss": 1.0181738138198853, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1807081807081807, | |
| "grad_norm": 3.344496965408325, | |
| "learning_rate": 2.6890243902439025e-06, | |
| "loss": 1.031973958015442, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 1.3484665155410767, | |
| "learning_rate": 2.7256097560975613e-06, | |
| "loss": 1.370530366897583, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1855921855921856, | |
| "grad_norm": 3.3972597122192383, | |
| "learning_rate": 2.7621951219512197e-06, | |
| "loss": 1.4999196529388428, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 1.831746220588684, | |
| "learning_rate": 2.798780487804878e-06, | |
| "loss": 1.3834564685821533, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 5.442370414733887, | |
| "learning_rate": 2.8353658536585365e-06, | |
| "loss": 1.667514443397522, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19291819291819293, | |
| "grad_norm": 2.3951525688171387, | |
| "learning_rate": 2.8719512195121953e-06, | |
| "loss": 1.1068520545959473, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 1.1164201498031616, | |
| "learning_rate": 2.9085365853658537e-06, | |
| "loss": 0.9049057960510254, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 1.9392452239990234, | |
| "learning_rate": 2.945121951219512e-06, | |
| "loss": 1.3560075759887695, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20024420024420025, | |
| "grad_norm": 1.669164776802063, | |
| "learning_rate": 2.981707317073171e-06, | |
| "loss": 1.2463322877883911, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2026862026862027, | |
| "grad_norm": 1.7429471015930176, | |
| "learning_rate": 3.0182926829268293e-06, | |
| "loss": 1.3172338008880615, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 2.3932223320007324, | |
| "learning_rate": 3.054878048780488e-06, | |
| "loss": 1.1728577613830566, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 2.543950080871582, | |
| "learning_rate": 3.0914634146341465e-06, | |
| "loss": 0.945122480392456, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21001221001221002, | |
| "grad_norm": 1.3421686887741089, | |
| "learning_rate": 3.128048780487805e-06, | |
| "loss": 1.0504443645477295, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 1.8919559717178345, | |
| "learning_rate": 3.1646341463414637e-06, | |
| "loss": 1.332210659980774, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2148962148962149, | |
| "grad_norm": 2.054534673690796, | |
| "learning_rate": 3.201219512195122e-06, | |
| "loss": 1.2866244316101074, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21733821733821734, | |
| "grad_norm": 1.249266505241394, | |
| "learning_rate": 3.237804878048781e-06, | |
| "loss": 1.387539029121399, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 1.3586231470108032, | |
| "learning_rate": 3.2743902439024393e-06, | |
| "loss": 0.9814241528511047, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 5.887375354766846, | |
| "learning_rate": 3.310975609756098e-06, | |
| "loss": 0.9087549448013306, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22466422466422467, | |
| "grad_norm": 1.151898741722107, | |
| "learning_rate": 3.3475609756097565e-06, | |
| "loss": 1.3200916051864624, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 5.016763210296631, | |
| "learning_rate": 3.3841463414634145e-06, | |
| "loss": 1.3280826807022095, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.22954822954822954, | |
| "grad_norm": 1.9523217678070068, | |
| "learning_rate": 3.420731707317073e-06, | |
| "loss": 1.3782356977462769, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 2.2894701957702637, | |
| "learning_rate": 3.4573170731707317e-06, | |
| "loss": 1.3895543813705444, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 2.341043710708618, | |
| "learning_rate": 3.49390243902439e-06, | |
| "loss": 1.3415981531143188, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23687423687423687, | |
| "grad_norm": 2.133942127227783, | |
| "learning_rate": 3.5304878048780485e-06, | |
| "loss": 1.3378491401672363, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 8.46426773071289, | |
| "learning_rate": 3.5670731707317073e-06, | |
| "loss": 1.0105526447296143, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 2.380206823348999, | |
| "learning_rate": 3.6036585365853657e-06, | |
| "loss": 1.3137314319610596, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 2.650566816329956, | |
| "learning_rate": 3.6402439024390245e-06, | |
| "loss": 1.4053981304168701, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24664224664224665, | |
| "grad_norm": 1.5482836961746216, | |
| "learning_rate": 3.676829268292683e-06, | |
| "loss": 1.3324224948883057, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 2.6213765144348145, | |
| "learning_rate": 3.7134146341463417e-06, | |
| "loss": 1.0133942365646362, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2515262515262515, | |
| "grad_norm": 2.176896095275879, | |
| "learning_rate": 3.75e-06, | |
| "loss": 1.2807339429855347, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 1.500716209411621, | |
| "learning_rate": 3.7865853658536585e-06, | |
| "loss": 1.1330087184906006, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 6.454537868499756, | |
| "learning_rate": 3.823170731707317e-06, | |
| "loss": 0.9686428904533386, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2588522588522589, | |
| "grad_norm": 3.82650089263916, | |
| "learning_rate": 3.859756097560976e-06, | |
| "loss": 0.6518009901046753, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2612942612942613, | |
| "grad_norm": 1.4536477327346802, | |
| "learning_rate": 3.896341463414634e-06, | |
| "loss": 1.1154720783233643, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 2.921241283416748, | |
| "learning_rate": 3.932926829268293e-06, | |
| "loss": 1.2995545864105225, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2661782661782662, | |
| "grad_norm": 2.156456232070923, | |
| "learning_rate": 3.969512195121952e-06, | |
| "loss": 1.1956074237823486, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 1.802549123764038, | |
| "learning_rate": 4.00609756097561e-06, | |
| "loss": 1.3290941715240479, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 2.224459648132324, | |
| "learning_rate": 4.0426829268292685e-06, | |
| "loss": 0.9570510387420654, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 1.6716811656951904, | |
| "learning_rate": 4.079268292682927e-06, | |
| "loss": 0.9313968420028687, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27594627594627597, | |
| "grad_norm": 1.8278779983520508, | |
| "learning_rate": 4.115853658536586e-06, | |
| "loss": 1.3462107181549072, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 2.975511312484741, | |
| "learning_rate": 4.152439024390244e-06, | |
| "loss": 1.3946359157562256, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 1.9801675081253052, | |
| "learning_rate": 4.189024390243902e-06, | |
| "loss": 1.3629244565963745, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.28327228327228327, | |
| "grad_norm": 12.177047729492188, | |
| "learning_rate": 4.2256097560975605e-06, | |
| "loss": 0.9982041120529175, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 3.7087788581848145, | |
| "learning_rate": 4.26219512195122e-06, | |
| "loss": 1.0232059955596924, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28815628815628813, | |
| "grad_norm": 3.9933722019195557, | |
| "learning_rate": 4.298780487804878e-06, | |
| "loss": 1.2940337657928467, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 1.5372240543365479, | |
| "learning_rate": 4.3353658536585365e-06, | |
| "loss": 1.266202688217163, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 1.9267133474349976, | |
| "learning_rate": 4.371951219512195e-06, | |
| "loss": 1.1469693183898926, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2954822954822955, | |
| "grad_norm": 2.542509078979492, | |
| "learning_rate": 4.408536585365853e-06, | |
| "loss": 0.9890923500061035, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2979242979242979, | |
| "grad_norm": 1.5382633209228516, | |
| "learning_rate": 4.4451219512195125e-06, | |
| "loss": 1.3382046222686768, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 1.2644575834274292, | |
| "learning_rate": 4.481707317073171e-06, | |
| "loss": 0.774017333984375, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3028083028083028, | |
| "grad_norm": 5.0008769035339355, | |
| "learning_rate": 4.518292682926829e-06, | |
| "loss": 1.216355323791504, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 2.470137596130371, | |
| "learning_rate": 4.554878048780488e-06, | |
| "loss": 0.9469658136367798, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 2.0575084686279297, | |
| "learning_rate": 4.591463414634147e-06, | |
| "loss": 1.045383334159851, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.31013431013431014, | |
| "grad_norm": 2.9232146739959717, | |
| "learning_rate": 4.628048780487805e-06, | |
| "loss": 1.3018403053283691, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3125763125763126, | |
| "grad_norm": 1.5343422889709473, | |
| "learning_rate": 4.664634146341464e-06, | |
| "loss": 1.282840609550476, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 2.349189043045044, | |
| "learning_rate": 4.701219512195122e-06, | |
| "loss": 1.33484947681427, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 2.369356393814087, | |
| "learning_rate": 4.7378048780487805e-06, | |
| "loss": 1.2917380332946777, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3199023199023199, | |
| "grad_norm": 5.888512134552002, | |
| "learning_rate": 4.77439024390244e-06, | |
| "loss": 1.5345417261123657, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 5.536413669586182, | |
| "learning_rate": 4.810975609756098e-06, | |
| "loss": 0.6992707252502441, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 3.4682443141937256, | |
| "learning_rate": 4.8475609756097565e-06, | |
| "loss": 1.0846771001815796, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.32722832722832723, | |
| "grad_norm": 2.0366370677948, | |
| "learning_rate": 4.884146341463414e-06, | |
| "loss": 1.337081789970398, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 5.923638343811035, | |
| "learning_rate": 4.920731707317073e-06, | |
| "loss": 0.9941681027412415, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3321123321123321, | |
| "grad_norm": 1.7837181091308594, | |
| "learning_rate": 4.957317073170732e-06, | |
| "loss": 1.2944457530975342, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33455433455433453, | |
| "grad_norm": 4.933067321777344, | |
| "learning_rate": 4.99390243902439e-06, | |
| "loss": 0.9657446146011353, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 4.120548725128174, | |
| "learning_rate": 5.0304878048780485e-06, | |
| "loss": 1.0117385387420654, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33943833943833945, | |
| "grad_norm": 8.162941932678223, | |
| "learning_rate": 5.067073170731707e-06, | |
| "loss": 1.4234728813171387, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 1.219032883644104, | |
| "learning_rate": 5.103658536585366e-06, | |
| "loss": 1.3093342781066895, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 2.771096706390381, | |
| "learning_rate": 5.1402439024390245e-06, | |
| "loss": 1.2924383878707886, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.34676434676434675, | |
| "grad_norm": 1.962974190711975, | |
| "learning_rate": 5.176829268292683e-06, | |
| "loss": 1.3206619024276733, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 3.3968982696533203, | |
| "learning_rate": 5.213414634146341e-06, | |
| "loss": 1.1162430047988892, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 3.5822386741638184, | |
| "learning_rate": 5.2500000000000006e-06, | |
| "loss": 1.673839807510376, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 7.095156192779541, | |
| "learning_rate": 5.286585365853659e-06, | |
| "loss": 0.9515596628189087, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35653235653235654, | |
| "grad_norm": 2.219644784927368, | |
| "learning_rate": 5.323170731707317e-06, | |
| "loss": 1.309720516204834, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 1.6270936727523804, | |
| "learning_rate": 5.359756097560976e-06, | |
| "loss": 1.0194257497787476, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3614163614163614, | |
| "grad_norm": 1.4493879079818726, | |
| "learning_rate": 5.396341463414634e-06, | |
| "loss": 1.3390767574310303, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.36385836385836384, | |
| "grad_norm": 17.245960235595703, | |
| "learning_rate": 5.432926829268293e-06, | |
| "loss": 1.389981746673584, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 2.71707820892334, | |
| "learning_rate": 5.469512195121952e-06, | |
| "loss": 1.2446229457855225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36874236874236876, | |
| "grad_norm": 3.7474751472473145, | |
| "learning_rate": 5.50609756097561e-06, | |
| "loss": 0.9005425572395325, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3711843711843712, | |
| "grad_norm": 2.9197535514831543, | |
| "learning_rate": 5.5426829268292685e-06, | |
| "loss": 1.202176570892334, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 2.3822195529937744, | |
| "learning_rate": 5.579268292682927e-06, | |
| "loss": 1.329774260520935, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 1.2802468538284302, | |
| "learning_rate": 5.615853658536586e-06, | |
| "loss": 1.3126323223114014, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 1.8697741031646729, | |
| "learning_rate": 5.652439024390244e-06, | |
| "loss": 1.345770239830017, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 4.083343505859375, | |
| "learning_rate": 5.689024390243902e-06, | |
| "loss": 0.9912192821502686, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3833943833943834, | |
| "grad_norm": 6.874935150146484, | |
| "learning_rate": 5.7256097560975605e-06, | |
| "loss": 1.3861531019210815, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.38583638583638585, | |
| "grad_norm": 2.4264907836914062, | |
| "learning_rate": 5.76219512195122e-06, | |
| "loss": 0.6633478999137878, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 4.533822536468506, | |
| "learning_rate": 5.798780487804878e-06, | |
| "loss": 0.9467814564704895, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 2.7442786693573, | |
| "learning_rate": 5.8353658536585365e-06, | |
| "loss": 1.3469103574752808, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 4.531688213348389, | |
| "learning_rate": 5.871951219512195e-06, | |
| "loss": 1.3630614280700684, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 2.2029385566711426, | |
| "learning_rate": 5.908536585365853e-06, | |
| "loss": 1.3367185592651367, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.398046398046398, | |
| "grad_norm": 2.591531276702881, | |
| "learning_rate": 5.9451219512195126e-06, | |
| "loss": 1.2546979188919067, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4004884004884005, | |
| "grad_norm": 3.0475008487701416, | |
| "learning_rate": 5.981707317073171e-06, | |
| "loss": 1.2662358283996582, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 3.1861870288848877, | |
| "learning_rate": 5.9999984668716506e-06, | |
| "loss": 0.9014570116996765, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4053724053724054, | |
| "grad_norm": 5.928226470947266, | |
| "learning_rate": 5.9999862018553e-06, | |
| "loss": 0.6253970861434937, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4078144078144078, | |
| "grad_norm": 1.3099563121795654, | |
| "learning_rate": 5.9999616718783144e-06, | |
| "loss": 1.0681229829788208, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 2.3551321029663086, | |
| "learning_rate": 5.999924877052124e-06, | |
| "loss": 1.3677372932434082, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 1.5692805051803589, | |
| "learning_rate": 5.999875817543871e-06, | |
| "loss": 0.9468737840652466, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 25.2011661529541, | |
| "learning_rate": 5.999814493576417e-06, | |
| "loss": 1.0896673202514648, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 2.671222686767578, | |
| "learning_rate": 5.999740905428328e-06, | |
| "loss": 1.6776376962661743, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42002442002442003, | |
| "grad_norm": 1.7033958435058594, | |
| "learning_rate": 5.999655053433888e-06, | |
| "loss": 0.9407004117965698, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42246642246642246, | |
| "grad_norm": 2.3176705837249756, | |
| "learning_rate": 5.999556937983088e-06, | |
| "loss": 1.1251733303070068, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 2.094940662384033, | |
| "learning_rate": 5.999446559521627e-06, | |
| "loss": 1.3193105459213257, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 3.736506462097168, | |
| "learning_rate": 5.999323918550911e-06, | |
| "loss": 0.863933801651001, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4297924297924298, | |
| "grad_norm": 1.4415168762207031, | |
| "learning_rate": 5.999189015628049e-06, | |
| "loss": 1.0673351287841797, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 2.9786107540130615, | |
| "learning_rate": 5.999041851365851e-06, | |
| "loss": 1.2724120616912842, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4346764346764347, | |
| "grad_norm": 1.8949273824691772, | |
| "learning_rate": 5.998882426432824e-06, | |
| "loss": 0.8991950154304504, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4371184371184371, | |
| "grad_norm": 3.24233078956604, | |
| "learning_rate": 5.998710741553175e-06, | |
| "loss": 1.1615791320800781, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 1.7800151109695435, | |
| "learning_rate": 5.9985267975067975e-06, | |
| "loss": 1.2164958715438843, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.442002442002442, | |
| "grad_norm": 6.551607608795166, | |
| "learning_rate": 5.998330595129277e-06, | |
| "loss": 1.063248634338379, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 1.5826555490493774, | |
| "learning_rate": 5.9981221353118815e-06, | |
| "loss": 1.1169027090072632, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 2.175288200378418, | |
| "learning_rate": 5.9979014190015636e-06, | |
| "loss": 1.3833471536636353, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.44932844932844934, | |
| "grad_norm": 1.5917333364486694, | |
| "learning_rate": 5.9976684472009465e-06, | |
| "loss": 1.027613878250122, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 2.8905162811279297, | |
| "learning_rate": 5.997423220968328e-06, | |
| "loss": 1.0340604782104492, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 1.940250277519226, | |
| "learning_rate": 5.997165741417676e-06, | |
| "loss": 1.3127802610397339, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.45665445665445664, | |
| "grad_norm": 2.4297854900360107, | |
| "learning_rate": 5.9968960097186155e-06, | |
| "loss": 0.8877848386764526, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4590964590964591, | |
| "grad_norm": 2.4118640422821045, | |
| "learning_rate": 5.996614027096429e-06, | |
| "loss": 1.2320910692214966, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 4.578921318054199, | |
| "learning_rate": 5.996319794832055e-06, | |
| "loss": 0.9276782870292664, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 2.2359204292297363, | |
| "learning_rate": 5.996013314262069e-06, | |
| "loss": 0.8818802833557129, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.46642246642246643, | |
| "grad_norm": 1.6342476606369019, | |
| "learning_rate": 5.995694586778692e-06, | |
| "loss": 1.2811360359191895, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 1.650488257408142, | |
| "learning_rate": 5.995363613829776e-06, | |
| "loss": 1.0443174839019775, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4713064713064713, | |
| "grad_norm": 4.0077738761901855, | |
| "learning_rate": 5.995020396918799e-06, | |
| "loss": 1.2094539403915405, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.47374847374847373, | |
| "grad_norm": 3.2971303462982178, | |
| "learning_rate": 5.994664937604858e-06, | |
| "loss": 1.602729320526123, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 1.8406124114990234, | |
| "learning_rate": 5.994297237502663e-06, | |
| "loss": 0.7992144227027893, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 3.1878228187561035, | |
| "learning_rate": 5.993917298282529e-06, | |
| "loss": 0.85492342710495, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4810744810744811, | |
| "grad_norm": 1.531112790107727, | |
| "learning_rate": 5.993525121670369e-06, | |
| "loss": 0.9317103624343872, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 1.492789626121521, | |
| "learning_rate": 5.993120709447684e-06, | |
| "loss": 1.2386692762374878, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.48595848595848595, | |
| "grad_norm": 1.2817524671554565, | |
| "learning_rate": 5.9927040634515595e-06, | |
| "loss": 1.2882177829742432, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 2.322162389755249, | |
| "learning_rate": 5.9922751855746495e-06, | |
| "loss": 0.5464396476745605, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 2.907667636871338, | |
| "learning_rate": 5.991834077765176e-06, | |
| "loss": 1.2339115142822266, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4932844932844933, | |
| "grad_norm": 1.4344274997711182, | |
| "learning_rate": 5.991380742026916e-06, | |
| "loss": 1.337497591972351, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 3.8610916137695312, | |
| "learning_rate": 5.990915180419194e-06, | |
| "loss": 0.6470708250999451, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 8.258731842041016, | |
| "learning_rate": 5.9904373950568695e-06, | |
| "loss": 0.8996188640594482, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 1.479630470275879, | |
| "learning_rate": 5.989947388110331e-06, | |
| "loss": 1.2700155973434448, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.503052503052503, | |
| "grad_norm": 3.2398624420166016, | |
| "learning_rate": 5.9894451618054856e-06, | |
| "loss": 0.6065315008163452, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 1.5686074495315552, | |
| "learning_rate": 5.9889307184237445e-06, | |
| "loss": 1.3284578323364258, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 2.6033775806427, | |
| "learning_rate": 5.988404060302022e-06, | |
| "loss": 1.281160593032837, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5103785103785103, | |
| "grad_norm": 1.3352185487747192, | |
| "learning_rate": 5.987865189832714e-06, | |
| "loss": 1.219364881515503, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 1.3756248950958252, | |
| "learning_rate": 5.987314109463697e-06, | |
| "loss": 1.2490348815917969, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5152625152625152, | |
| "grad_norm": 1.8197388648986816, | |
| "learning_rate": 5.986750821698305e-06, | |
| "loss": 1.2416220903396606, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5177045177045178, | |
| "grad_norm": 2.0134706497192383, | |
| "learning_rate": 5.986175329095333e-06, | |
| "loss": 1.2700915336608887, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 2.1429672241210938, | |
| "learning_rate": 5.985587634269013e-06, | |
| "loss": 1.0763859748840332, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5225885225885226, | |
| "grad_norm": 2.1284050941467285, | |
| "learning_rate": 5.9849877398890085e-06, | |
| "loss": 1.2657442092895508, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 3.2052974700927734, | |
| "learning_rate": 5.984375648680401e-06, | |
| "loss": 0.9026008248329163, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 8.207277297973633, | |
| "learning_rate": 5.983751363423675e-06, | |
| "loss": 0.5902690887451172, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5299145299145299, | |
| "grad_norm": 6.7014594078063965, | |
| "learning_rate": 5.983114886954711e-06, | |
| "loss": 1.2768715620040894, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5323565323565324, | |
| "grad_norm": 1.3282718658447266, | |
| "learning_rate": 5.982466222164768e-06, | |
| "loss": 1.230344295501709, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 1.8573893308639526, | |
| "learning_rate": 5.981805372000472e-06, | |
| "loss": 1.258583664894104, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 15.122943878173828, | |
| "learning_rate": 5.981132339463799e-06, | |
| "loss": 1.2159916162490845, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 4.852167129516602, | |
| "learning_rate": 5.98044712761207e-06, | |
| "loss": 1.0581938028335571, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 2.0339410305023193, | |
| "learning_rate": 5.9797497395579285e-06, | |
| "loss": 0.8306668996810913, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5445665445665445, | |
| "grad_norm": 6.8808488845825195, | |
| "learning_rate": 5.97904017846933e-06, | |
| "loss": 0.9840553998947144, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5470085470085471, | |
| "grad_norm": 1.4884564876556396, | |
| "learning_rate": 5.978318447569527e-06, | |
| "loss": 1.1001070737838745, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 3.1171839237213135, | |
| "learning_rate": 5.977584550137057e-06, | |
| "loss": 0.9998340606689453, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5518925518925519, | |
| "grad_norm": 4.206235408782959, | |
| "learning_rate": 5.976838489505721e-06, | |
| "loss": 0.7569481730461121, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5543345543345544, | |
| "grad_norm": 1.3813527822494507, | |
| "learning_rate": 5.9760802690645775e-06, | |
| "loss": 1.2668659687042236, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 1.965630054473877, | |
| "learning_rate": 5.975309892257918e-06, | |
| "loss": 1.2358734607696533, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5592185592185592, | |
| "grad_norm": 0.8634360432624817, | |
| "learning_rate": 5.974527362585258e-06, | |
| "loss": 0.6871505379676819, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 5.055013656616211, | |
| "learning_rate": 5.973732683601318e-06, | |
| "loss": 1.1548914909362793, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 2.3719708919525146, | |
| "learning_rate": 5.972925858916006e-06, | |
| "loss": 1.1725798845291138, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5665445665445665, | |
| "grad_norm": 14.694830894470215, | |
| "learning_rate": 5.972106892194404e-06, | |
| "loss": 0.5542956590652466, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.568986568986569, | |
| "grad_norm": 1.5816266536712646, | |
| "learning_rate": 5.9712757871567556e-06, | |
| "loss": 1.4963278770446777, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.5916846990585327, | |
| "learning_rate": 5.9704325475784355e-06, | |
| "loss": 1.3050663471221924, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 2.6258790493011475, | |
| "learning_rate": 5.969577177289948e-06, | |
| "loss": 1.0056266784667969, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5763125763125763, | |
| "grad_norm": 7.421305179595947, | |
| "learning_rate": 5.968709680176896e-06, | |
| "loss": 1.2716175317764282, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 4.030231952667236, | |
| "learning_rate": 5.967830060179975e-06, | |
| "loss": 0.8685106039047241, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5811965811965812, | |
| "grad_norm": 3.304006576538086, | |
| "learning_rate": 5.966938321294947e-06, | |
| "loss": 1.0120376348495483, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5836385836385837, | |
| "grad_norm": 1.996673583984375, | |
| "learning_rate": 5.966034467572626e-06, | |
| "loss": 1.166401982307434, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 2.437133550643921, | |
| "learning_rate": 5.965118503118861e-06, | |
| "loss": 1.2958388328552246, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5885225885225885, | |
| "grad_norm": 1.4910950660705566, | |
| "learning_rate": 5.964190432094512e-06, | |
| "loss": 0.523586094379425, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.590964590964591, | |
| "grad_norm": 0.8698585629463196, | |
| "learning_rate": 5.963250258715435e-06, | |
| "loss": 0.954540491104126, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.9387339949607849, | |
| "learning_rate": 5.962297987252463e-06, | |
| "loss": 1.014317274093628, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5958485958485958, | |
| "grad_norm": 5.339751720428467, | |
| "learning_rate": 5.961333622031385e-06, | |
| "loss": 0.9420929551124573, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 2.236091136932373, | |
| "learning_rate": 5.9603571674329294e-06, | |
| "loss": 0.9940929412841797, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 2.2979278564453125, | |
| "learning_rate": 5.959368627892738e-06, | |
| "loss": 0.7589210271835327, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 1.245233178138733, | |
| "learning_rate": 5.958368007901353e-06, | |
| "loss": 0.9640493988990784, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6056166056166056, | |
| "grad_norm": 3.310840368270874, | |
| "learning_rate": 5.9573553120041916e-06, | |
| "loss": 1.2264776229858398, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 5.373754024505615, | |
| "learning_rate": 5.956330544801528e-06, | |
| "loss": 0.5740343332290649, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 1.4315255880355835, | |
| "learning_rate": 5.95529371094847e-06, | |
| "loss": 0.8721321821212769, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.612942612942613, | |
| "grad_norm": 1.5925936698913574, | |
| "learning_rate": 5.954244815154941e-06, | |
| "loss": 1.222565770149231, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.4787653684616089, | |
| "learning_rate": 5.953183862185656e-06, | |
| "loss": 1.238010048866272, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6178266178266179, | |
| "grad_norm": 2.44746994972229, | |
| "learning_rate": 5.9521108568601015e-06, | |
| "loss": 1.2495218515396118, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6202686202686203, | |
| "grad_norm": 2.0077357292175293, | |
| "learning_rate": 5.951025804052512e-06, | |
| "loss": 1.2221843004226685, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 2.2311477661132812, | |
| "learning_rate": 5.949928708691852e-06, | |
| "loss": 1.2433747053146362, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6251526251526252, | |
| "grad_norm": 1.3252453804016113, | |
| "learning_rate": 5.948819575761785e-06, | |
| "loss": 1.070593237876892, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6275946275946276, | |
| "grad_norm": 1.3435025215148926, | |
| "learning_rate": 5.947698410300662e-06, | |
| "loss": 1.0740947723388672, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 1.2309428453445435, | |
| "learning_rate": 5.94656521740149e-06, | |
| "loss": 0.9620496034622192, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6324786324786325, | |
| "grad_norm": 1.51472008228302, | |
| "learning_rate": 5.945420002211912e-06, | |
| "loss": 1.2541457414627075, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 2.0422983169555664, | |
| "learning_rate": 5.944262769934184e-06, | |
| "loss": 0.7579051852226257, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 10.48106575012207, | |
| "learning_rate": 5.9430935258251516e-06, | |
| "loss": 0.9636937975883484, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6398046398046398, | |
| "grad_norm": 12.823934555053711, | |
| "learning_rate": 5.941912275196223e-06, | |
| "loss": 1.010987639427185, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6422466422466423, | |
| "grad_norm": 2.1455140113830566, | |
| "learning_rate": 5.94071902341335e-06, | |
| "loss": 1.2731401920318604, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 1.989286184310913, | |
| "learning_rate": 5.9395137758969996e-06, | |
| "loss": 1.321770191192627, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 1.7523471117019653, | |
| "learning_rate": 5.938296538122131e-06, | |
| "loss": 1.0642083883285522, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6495726495726496, | |
| "grad_norm": 2.0933523178100586, | |
| "learning_rate": 5.9370673156181685e-06, | |
| "loss": 1.0688331127166748, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 4.951768398284912, | |
| "learning_rate": 5.9358261139689804e-06, | |
| "loss": 1.530338168144226, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6544566544566545, | |
| "grad_norm": 1.5651706457138062, | |
| "learning_rate": 5.9345729388128516e-06, | |
| "loss": 0.8295682668685913, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6568986568986569, | |
| "grad_norm": 1.4690457582473755, | |
| "learning_rate": 5.9333077958424564e-06, | |
| "loss": 1.2547093629837036, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 1.7123324871063232, | |
| "learning_rate": 5.932030690804835e-06, | |
| "loss": 1.2759506702423096, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6617826617826618, | |
| "grad_norm": 2.439643621444702, | |
| "learning_rate": 5.9307416295013655e-06, | |
| "loss": 0.9515459537506104, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6642246642246642, | |
| "grad_norm": 1.4160524606704712, | |
| "learning_rate": 5.929440617787741e-06, | |
| "loss": 1.2433102130889893, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 6.129720211029053, | |
| "learning_rate": 5.928127661573937e-06, | |
| "loss": 1.1501506567001343, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6691086691086691, | |
| "grad_norm": 2.4493350982666016, | |
| "learning_rate": 5.9268027668241925e-06, | |
| "loss": 1.2322951555252075, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 5.7734575271606445, | |
| "learning_rate": 5.925465939556974e-06, | |
| "loss": 1.1571426391601562, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 3.0620312690734863, | |
| "learning_rate": 5.924117185844955e-06, | |
| "loss": 1.010740876197815, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6764346764346765, | |
| "grad_norm": 1.3649016618728638, | |
| "learning_rate": 5.922756511814986e-06, | |
| "loss": 1.0325241088867188, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6788766788766789, | |
| "grad_norm": 3.0845694541931152, | |
| "learning_rate": 5.921383923648067e-06, | |
| "loss": 0.7497116923332214, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 3.1487061977386475, | |
| "learning_rate": 5.919999427579318e-06, | |
| "loss": 1.1814972162246704, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 0.6284694671630859, | |
| "learning_rate": 5.918603029897952e-06, | |
| "loss": 1.168214201927185, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6862026862026862, | |
| "grad_norm": 5.0077080726623535, | |
| "learning_rate": 5.917194736947248e-06, | |
| "loss": 1.011269211769104, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 1.6639766693115234, | |
| "learning_rate": 5.915774555124519e-06, | |
| "loss": 1.3079068660736084, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6910866910866911, | |
| "grad_norm": 1.6810539960861206, | |
| "learning_rate": 5.914342490881085e-06, | |
| "loss": 1.096473217010498, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6935286935286935, | |
| "grad_norm": 4.131659030914307, | |
| "learning_rate": 5.912898550722242e-06, | |
| "loss": 1.2962534427642822, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 1.2126718759536743, | |
| "learning_rate": 5.911442741207234e-06, | |
| "loss": 0.9613256454467773, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 2.5516252517700195, | |
| "learning_rate": 5.909975068949225e-06, | |
| "loss": 0.9773088693618774, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7008547008547008, | |
| "grad_norm": 2.4411351680755615, | |
| "learning_rate": 5.908495540615263e-06, | |
| "loss": 1.427431583404541, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 1.301823377609253, | |
| "learning_rate": 5.907004162926255e-06, | |
| "loss": 1.2127736806869507, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7057387057387058, | |
| "grad_norm": 1.3317594528198242, | |
| "learning_rate": 5.9055009426569374e-06, | |
| "loss": 1.190283179283142, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 2.30523681640625, | |
| "learning_rate": 5.903985886635838e-06, | |
| "loss": 1.1487045288085938, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 1.789974331855774, | |
| "learning_rate": 5.902459001745253e-06, | |
| "loss": 1.20576810836792, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7130647130647131, | |
| "grad_norm": 1.5357120037078857, | |
| "learning_rate": 5.9009202949212125e-06, | |
| "loss": 1.0540030002593994, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7155067155067155, | |
| "grad_norm": 1.0633633136749268, | |
| "learning_rate": 5.899369773153447e-06, | |
| "loss": 1.2952874898910522, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 3.676633596420288, | |
| "learning_rate": 5.897807443485358e-06, | |
| "loss": 1.0641299486160278, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 1.8231834173202515, | |
| "learning_rate": 5.8962333130139875e-06, | |
| "loss": 0.9424347281455994, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7228327228327228, | |
| "grad_norm": 1.9420549869537354, | |
| "learning_rate": 5.894647388889982e-06, | |
| "loss": 0.8484236001968384, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 1.0987299680709839, | |
| "learning_rate": 5.893049678317563e-06, | |
| "loss": 1.2070276737213135, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7277167277167277, | |
| "grad_norm": 10.05299186706543, | |
| "learning_rate": 5.891440188554491e-06, | |
| "loss": 0.9539278745651245, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 1.7478141784667969, | |
| "learning_rate": 5.889818926912037e-06, | |
| "loss": 0.9240214824676514, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 1.9144340753555298, | |
| "learning_rate": 5.888185900754946e-06, | |
| "loss": 1.1822292804718018, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7350427350427351, | |
| "grad_norm": 1.3329391479492188, | |
| "learning_rate": 5.886541117501406e-06, | |
| "loss": 1.3204286098480225, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7374847374847375, | |
| "grad_norm": 2.1655688285827637, | |
| "learning_rate": 5.884884584623009e-06, | |
| "loss": 1.1428865194320679, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 1.6737140417099, | |
| "learning_rate": 5.883216309644725e-06, | |
| "loss": 1.4259554147720337, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7423687423687424, | |
| "grad_norm": 1.2776116132736206, | |
| "learning_rate": 5.881536300144861e-06, | |
| "loss": 1.2691248655319214, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 1.9943733215332031, | |
| "learning_rate": 5.879844563755031e-06, | |
| "loss": 0.8886740207672119, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 1.6197903156280518, | |
| "learning_rate": 5.878141108160117e-06, | |
| "loss": 1.2487074136734009, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7496947496947497, | |
| "grad_norm": 1.7818470001220703, | |
| "learning_rate": 5.876425941098242e-06, | |
| "loss": 1.2276005744934082, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7521367521367521, | |
| "grad_norm": 2.3711366653442383, | |
| "learning_rate": 5.8746990703607235e-06, | |
| "loss": 0.8518514633178711, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 1.3806644678115845, | |
| "learning_rate": 5.872960503792047e-06, | |
| "loss": 1.5368404388427734, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 2.896021604537964, | |
| "learning_rate": 5.871210249289828e-06, | |
| "loss": 1.0128453969955444, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7594627594627594, | |
| "grad_norm": 4.764344215393066, | |
| "learning_rate": 5.869448314804772e-06, | |
| "loss": 0.9351692199707031, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 1.9629191160202026, | |
| "learning_rate": 5.867674708340647e-06, | |
| "loss": 1.2186893224716187, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7643467643467643, | |
| "grad_norm": 1.4997833967208862, | |
| "learning_rate": 5.86588943795424e-06, | |
| "loss": 1.2730145454406738, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7667887667887668, | |
| "grad_norm": 5.824042797088623, | |
| "learning_rate": 5.864092511755323e-06, | |
| "loss": 1.0189692974090576, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.6058506965637207, | |
| "learning_rate": 5.8622839379066135e-06, | |
| "loss": 1.2564375400543213, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7716727716727717, | |
| "grad_norm": 2.6748597621917725, | |
| "learning_rate": 5.860463724623742e-06, | |
| "loss": 0.8755788207054138, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7741147741147741, | |
| "grad_norm": 1.938493013381958, | |
| "learning_rate": 5.858631880175212e-06, | |
| "loss": 0.8998976945877075, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 1.6986690759658813, | |
| "learning_rate": 5.856788412882361e-06, | |
| "loss": 1.2397321462631226, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.778998778998779, | |
| "grad_norm": 2.0110723972320557, | |
| "learning_rate": 5.854933331119328e-06, | |
| "loss": 1.24760103225708, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 2.173943042755127, | |
| "learning_rate": 5.853066643313007e-06, | |
| "loss": 0.8890904188156128, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 3.4975063800811768, | |
| "learning_rate": 5.851188357943019e-06, | |
| "loss": 1.00565767288208, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7863247863247863, | |
| "grad_norm": 1.0548205375671387, | |
| "learning_rate": 5.849298483541663e-06, | |
| "loss": 1.1251481771469116, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7887667887667887, | |
| "grad_norm": 1.5539727210998535, | |
| "learning_rate": 5.847397028693887e-06, | |
| "loss": 0.9152241945266724, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 1.818673014640808, | |
| "learning_rate": 5.845484002037241e-06, | |
| "loss": 1.19895601272583, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 1.390134572982788, | |
| "learning_rate": 5.843559412261842e-06, | |
| "loss": 0.5183348655700684, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.796092796092796, | |
| "grad_norm": 1.5669151544570923, | |
| "learning_rate": 5.841623268110333e-06, | |
| "loss": 0.7738546133041382, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 3.074685573577881, | |
| "learning_rate": 5.839675578377848e-06, | |
| "loss": 1.1633089780807495, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.800976800976801, | |
| "grad_norm": 1.443999171257019, | |
| "learning_rate": 5.837716351911962e-06, | |
| "loss": 1.2039074897766113, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8034188034188035, | |
| "grad_norm": 3.9893510341644287, | |
| "learning_rate": 5.83574559761266e-06, | |
| "loss": 0.4816018044948578, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 1.8784825801849365, | |
| "learning_rate": 5.833763324432294e-06, | |
| "loss": 1.053369402885437, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8083028083028083, | |
| "grad_norm": 2.2638516426086426, | |
| "learning_rate": 5.831769541375539e-06, | |
| "loss": 1.1880583763122559, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8107448107448108, | |
| "grad_norm": 1.5638916492462158, | |
| "learning_rate": 5.829764257499357e-06, | |
| "loss": 1.2887765169143677, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 1.4829241037368774, | |
| "learning_rate": 5.827747481912953e-06, | |
| "loss": 0.8841955065727234, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8156288156288156, | |
| "grad_norm": 2.4060006141662598, | |
| "learning_rate": 5.825719223777735e-06, | |
| "loss": 1.04268217086792, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 1.3124898672103882, | |
| "learning_rate": 5.823679492307271e-06, | |
| "loss": 1.3766071796417236, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.4175595045089722, | |
| "learning_rate": 5.8216282967672454e-06, | |
| "loss": 1.2438055276870728, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8229548229548229, | |
| "grad_norm": 1.81400465965271, | |
| "learning_rate": 5.819565646475425e-06, | |
| "loss": 1.001070261001587, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 2.5499863624572754, | |
| "learning_rate": 5.817491550801603e-06, | |
| "loss": 0.8810223937034607, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 3.1251838207244873, | |
| "learning_rate": 5.815406019167574e-06, | |
| "loss": 1.2103674411773682, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 1.698996663093567, | |
| "learning_rate": 5.8133090610470735e-06, | |
| "loss": 1.2406985759735107, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8327228327228328, | |
| "grad_norm": 2.1086080074310303, | |
| "learning_rate": 5.811200685965747e-06, | |
| "loss": 1.1365587711334229, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 8.87724494934082, | |
| "learning_rate": 5.809080903501101e-06, | |
| "loss": 0.6978262662887573, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8376068376068376, | |
| "grad_norm": 1.3921948671340942, | |
| "learning_rate": 5.806949723282462e-06, | |
| "loss": 1.3278273344039917, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8400488400488401, | |
| "grad_norm": 2.7483460903167725, | |
| "learning_rate": 5.804807154990933e-06, | |
| "loss": 1.2896418571472168, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 5.320318698883057, | |
| "learning_rate": 5.802653208359347e-06, | |
| "loss": 0.9632086753845215, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8449328449328449, | |
| "grad_norm": 2.63219952583313, | |
| "learning_rate": 5.800487893172224e-06, | |
| "loss": 1.249232292175293, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8473748473748474, | |
| "grad_norm": 1.0053170919418335, | |
| "learning_rate": 5.798311219265727e-06, | |
| "loss": 0.9811009168624878, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 2.8511881828308105, | |
| "learning_rate": 5.796123196527619e-06, | |
| "loss": 1.1377757787704468, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8522588522588522, | |
| "grad_norm": 2.364295482635498, | |
| "learning_rate": 5.793923834897213e-06, | |
| "loss": 1.006821632385254, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 2.2293732166290283, | |
| "learning_rate": 5.791713144365334e-06, | |
| "loss": 1.0615328550338745, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 3.746429204940796, | |
| "learning_rate": 5.789491134974266e-06, | |
| "loss": 1.0453742742538452, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8595848595848596, | |
| "grad_norm": 2.0390639305114746, | |
| "learning_rate": 5.787257816817712e-06, | |
| "loss": 0.9529613852500916, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8620268620268621, | |
| "grad_norm": 2.2642388343811035, | |
| "learning_rate": 5.785013200040747e-06, | |
| "loss": 0.39921677112579346, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 1.2055710554122925, | |
| "learning_rate": 5.782757294839766e-06, | |
| "loss": 1.1861385107040405, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 1.462980031967163, | |
| "learning_rate": 5.780490111462451e-06, | |
| "loss": 0.872178852558136, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8693528693528694, | |
| "grad_norm": 128.80056762695312, | |
| "learning_rate": 5.77821166020771e-06, | |
| "loss": 1.3495750427246094, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 2.8078572750091553, | |
| "learning_rate": 5.77592195142564e-06, | |
| "loss": 1.5414416790008545, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8742368742368742, | |
| "grad_norm": 2.91050124168396, | |
| "learning_rate": 5.773620995517472e-06, | |
| "loss": 1.402683138847351, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8766788766788767, | |
| "grad_norm": 1.1673686504364014, | |
| "learning_rate": 5.771308802935534e-06, | |
| "loss": 0.9155862927436829, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 3.338332414627075, | |
| "learning_rate": 5.768985384183194e-06, | |
| "loss": 1.1884852647781372, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8815628815628815, | |
| "grad_norm": 10.983979225158691, | |
| "learning_rate": 5.766650749814816e-06, | |
| "loss": 0.8515965938568115, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.884004884004884, | |
| "grad_norm": 2.596907377243042, | |
| "learning_rate": 5.764304910435715e-06, | |
| "loss": 1.2161564826965332, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 1.6692111492156982, | |
| "learning_rate": 5.761947876702101e-06, | |
| "loss": 1.1635143756866455, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.1050961017608643, | |
| "learning_rate": 5.759579659321041e-06, | |
| "loss": 1.1528334617614746, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 2.1501071453094482, | |
| "learning_rate": 5.7572002690504e-06, | |
| "loss": 1.3882144689559937, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 4.581213474273682, | |
| "learning_rate": 5.7548097166988e-06, | |
| "loss": 1.1679284572601318, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8962148962148963, | |
| "grad_norm": 1.7328788042068481, | |
| "learning_rate": 5.752408013125568e-06, | |
| "loss": 0.9865559339523315, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8986568986568987, | |
| "grad_norm": 1.568103551864624, | |
| "learning_rate": 5.749995169240684e-06, | |
| "loss": 0.772152841091156, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 7.975260257720947, | |
| "learning_rate": 5.747571196004737e-06, | |
| "loss": 0.8893488645553589, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 6.543982982635498, | |
| "learning_rate": 5.7451361044288705e-06, | |
| "loss": 1.0185538530349731, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.905982905982906, | |
| "grad_norm": 1.160656452178955, | |
| "learning_rate": 5.742689905574733e-06, | |
| "loss": 0.8777042031288147, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 2.757848024368286, | |
| "learning_rate": 5.740232610554433e-06, | |
| "loss": 0.8858698010444641, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9108669108669109, | |
| "grad_norm": 9.06092643737793, | |
| "learning_rate": 5.73776423053048e-06, | |
| "loss": 1.1307774782180786, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9133089133089133, | |
| "grad_norm": 2.1863319873809814, | |
| "learning_rate": 5.73528477671574e-06, | |
| "loss": 0.8516602516174316, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 1.554819941520691, | |
| "learning_rate": 5.732794260373384e-06, | |
| "loss": 1.333770751953125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9181929181929182, | |
| "grad_norm": 3.050586223602295, | |
| "learning_rate": 5.730292692816835e-06, | |
| "loss": 1.222764492034912, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 1.5035123825073242, | |
| "learning_rate": 5.727780085409714e-06, | |
| "loss": 1.230542540550232, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 2.754014730453491, | |
| "learning_rate": 5.7252564495657985e-06, | |
| "loss": 1.2115579843521118, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9255189255189256, | |
| "grad_norm": 1.9241371154785156, | |
| "learning_rate": 5.722721796748957e-06, | |
| "loss": 1.2471449375152588, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 2.088291645050049, | |
| "learning_rate": 5.720176138473106e-06, | |
| "loss": 0.7556540369987488, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 2.329336166381836, | |
| "learning_rate": 5.717619486302159e-06, | |
| "loss": 1.0959203243255615, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9328449328449329, | |
| "grad_norm": 1.3153835535049438, | |
| "learning_rate": 5.715051851849965e-06, | |
| "loss": 0.8686593770980835, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9352869352869353, | |
| "grad_norm": 6.791528224945068, | |
| "learning_rate": 5.712473246780264e-06, | |
| "loss": 0.5506616830825806, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 1.5540168285369873, | |
| "learning_rate": 5.7098836828066295e-06, | |
| "loss": 0.8565468788146973, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 5.5268754959106445, | |
| "learning_rate": 5.707283171692419e-06, | |
| "loss": 0.6587256789207458, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9426129426129426, | |
| "grad_norm": 1.8966621160507202, | |
| "learning_rate": 5.704671725250719e-06, | |
| "loss": 1.0538215637207031, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 1.3468095064163208, | |
| "learning_rate": 5.702049355344287e-06, | |
| "loss": 1.2241626977920532, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9474969474969475, | |
| "grad_norm": 1.4727566242218018, | |
| "learning_rate": 5.699416073885503e-06, | |
| "loss": 1.1616984605789185, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9499389499389499, | |
| "grad_norm": 3.3165957927703857, | |
| "learning_rate": 5.6967718928363175e-06, | |
| "loss": 0.8437950611114502, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.1607977151870728, | |
| "learning_rate": 5.694116824208188e-06, | |
| "loss": 0.9664669632911682, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9548229548229549, | |
| "grad_norm": 2.6319656372070312, | |
| "learning_rate": 5.691450880062035e-06, | |
| "loss": 1.088547706604004, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9572649572649573, | |
| "grad_norm": 1.914591670036316, | |
| "learning_rate": 5.688774072508178e-06, | |
| "loss": 1.203494906425476, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 3.445983648300171, | |
| "learning_rate": 5.686086413706286e-06, | |
| "loss": 1.2426470518112183, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9621489621489622, | |
| "grad_norm": 4.115734100341797, | |
| "learning_rate": 5.683387915865322e-06, | |
| "loss": 1.3359789848327637, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 1.785029649734497, | |
| "learning_rate": 5.680678591243486e-06, | |
| "loss": 1.2645583152770996, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 3.400036573410034, | |
| "learning_rate": 5.677958452148158e-06, | |
| "loss": 1.0313962697982788, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9694749694749695, | |
| "grad_norm": 3.4706273078918457, | |
| "learning_rate": 5.6752275109358456e-06, | |
| "loss": 1.2580047845840454, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9719169719169719, | |
| "grad_norm": 3.956390857696533, | |
| "learning_rate": 5.672485780012126e-06, | |
| "loss": 1.1567012071609497, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 1.5805697441101074, | |
| "learning_rate": 5.66973327183159e-06, | |
| "loss": 1.1679713726043701, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 1.9347152709960938, | |
| "learning_rate": 5.6669699988977845e-06, | |
| "loss": 1.2190362215042114, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9792429792429792, | |
| "grad_norm": 3.247502565383911, | |
| "learning_rate": 5.664195973763155e-06, | |
| "loss": 0.8972685933113098, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 1.9501919746398926, | |
| "learning_rate": 5.661411209028994e-06, | |
| "loss": 1.1029160022735596, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 3.1804869174957275, | |
| "learning_rate": 5.658615717345374e-06, | |
| "loss": 1.500832438468933, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9865689865689866, | |
| "grad_norm": 2.697408676147461, | |
| "learning_rate": 5.655809511411103e-06, | |
| "loss": 0.88798987865448, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 1.2146772146224976, | |
| "learning_rate": 5.652992603973652e-06, | |
| "loss": 1.2545315027236938, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9914529914529915, | |
| "grad_norm": 1.7389800548553467, | |
| "learning_rate": 5.650165007829109e-06, | |
| "loss": 1.2045774459838867, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9938949938949939, | |
| "grad_norm": 1.8084465265274048, | |
| "learning_rate": 5.647326735822117e-06, | |
| "loss": 1.2816147804260254, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 1.249566912651062, | |
| "learning_rate": 5.644477800845813e-06, | |
| "loss": 1.1596295833587646, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9987789987789988, | |
| "grad_norm": 2.499196767807007, | |
| "learning_rate": 5.641618215841772e-06, | |
| "loss": 1.0810341835021973, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.0012210012210012, | |
| "grad_norm": 1.9975930452346802, | |
| "learning_rate": 5.63874799379995e-06, | |
| "loss": 0.9549547433853149, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 1.4195305109024048, | |
| "learning_rate": 5.635867147758619e-06, | |
| "loss": 1.1853629350662231, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.006105006105006, | |
| "grad_norm": 4.497268199920654, | |
| "learning_rate": 5.632975690804315e-06, | |
| "loss": 0.4928017556667328, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.0085470085470085, | |
| "grad_norm": 2.1255745887756348, | |
| "learning_rate": 5.630073636071774e-06, | |
| "loss": 1.0843961238861084, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 34.958438873291016, | |
| "learning_rate": 5.627160996743874e-06, | |
| "loss": 0.8150432705879211, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.0134310134310134, | |
| "grad_norm": 1.4282941818237305, | |
| "learning_rate": 5.624237786051572e-06, | |
| "loss": 0.857114851474762, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0158730158730158, | |
| "grad_norm": 33.52021789550781, | |
| "learning_rate": 5.621304017273851e-06, | |
| "loss": 0.8700833320617676, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 1.4326083660125732, | |
| "learning_rate": 5.618359703737651e-06, | |
| "loss": 0.8282259702682495, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0207570207570207, | |
| "grad_norm": 3.305655002593994, | |
| "learning_rate": 5.615404858817814e-06, | |
| "loss": 1.1669970750808716, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0231990231990231, | |
| "grad_norm": 2.0486841201782227, | |
| "learning_rate": 5.612439495937022e-06, | |
| "loss": 1.2340257167816162, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 2.4268410205841064, | |
| "learning_rate": 5.609463628565738e-06, | |
| "loss": 0.8751161694526672, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.028083028083028, | |
| "grad_norm": 1.9834953546524048, | |
| "learning_rate": 5.60647727022214e-06, | |
| "loss": 0.8645414710044861, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0305250305250304, | |
| "grad_norm": 3.6179487705230713, | |
| "learning_rate": 5.603480434472062e-06, | |
| "loss": 1.101366400718689, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 3.077362537384033, | |
| "learning_rate": 5.600473134928934e-06, | |
| "loss": 0.8427292704582214, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0354090354090355, | |
| "grad_norm": 1.3050235509872437, | |
| "learning_rate": 5.59745538525372e-06, | |
| "loss": 1.1196715831756592, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.037851037851038, | |
| "grad_norm": 3.206444263458252, | |
| "learning_rate": 5.594427199154852e-06, | |
| "loss": 0.9106602072715759, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 4.214219570159912, | |
| "learning_rate": 5.591388590388172e-06, | |
| "loss": 0.6607577204704285, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0427350427350428, | |
| "grad_norm": 2.264488935470581, | |
| "learning_rate": 5.588339572756869e-06, | |
| "loss": 1.0989412069320679, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.0451770451770452, | |
| "grad_norm": 5.119571208953857, | |
| "learning_rate": 5.5852801601114125e-06, | |
| "loss": 0.8569447994232178, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 1.9778999090194702, | |
| "learning_rate": 5.582210366349495e-06, | |
| "loss": 1.1780451536178589, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.05006105006105, | |
| "grad_norm": 1.977215051651001, | |
| "learning_rate": 5.579130205415967e-06, | |
| "loss": 0.7887912392616272, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0525030525030525, | |
| "grad_norm": 1.1233766078948975, | |
| "learning_rate": 5.576039691302768e-06, | |
| "loss": 1.0990511178970337, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 1.574094533920288, | |
| "learning_rate": 5.572938838048874e-06, | |
| "loss": 1.0752408504486084, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.0573870573870574, | |
| "grad_norm": 4.407149791717529, | |
| "learning_rate": 5.569827659740223e-06, | |
| "loss": 1.4731539487838745, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0598290598290598, | |
| "grad_norm": 3.7272164821624756, | |
| "learning_rate": 5.5667061705096594e-06, | |
| "loss": 0.41928231716156006, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 1.6230946779251099, | |
| "learning_rate": 5.563574384536862e-06, | |
| "loss": 0.9580034017562866, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0647130647130647, | |
| "grad_norm": 3.506577253341675, | |
| "learning_rate": 5.560432316048287e-06, | |
| "loss": 0.9923728704452515, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0671550671550671, | |
| "grad_norm": 1.629608392715454, | |
| "learning_rate": 5.557279979317098e-06, | |
| "loss": 0.747920036315918, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 1.8776957988739014, | |
| "learning_rate": 5.554117388663104e-06, | |
| "loss": 1.0746995210647583, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.072039072039072, | |
| "grad_norm": 10.093416213989258, | |
| "learning_rate": 5.550944558452692e-06, | |
| "loss": 1.016481876373291, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0744810744810744, | |
| "grad_norm": 2.4766669273376465, | |
| "learning_rate": 5.547761503098766e-06, | |
| "loss": 0.7514116764068604, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 2.3521194458007812, | |
| "learning_rate": 5.544568237060677e-06, | |
| "loss": 1.1210784912109375, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0793650793650793, | |
| "grad_norm": 9.225933074951172, | |
| "learning_rate": 5.541364774844158e-06, | |
| "loss": 0.7435236573219299, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0818070818070817, | |
| "grad_norm": 2.2107040882110596, | |
| "learning_rate": 5.538151131001262e-06, | |
| "loss": 0.681244432926178, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 1.9917274713516235, | |
| "learning_rate": 5.534927320130289e-06, | |
| "loss": 1.1450310945510864, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0866910866910866, | |
| "grad_norm": 1.4945895671844482, | |
| "learning_rate": 5.531693356875729e-06, | |
| "loss": 0.8382049202919006, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.089133089133089, | |
| "grad_norm": 1.4547278881072998, | |
| "learning_rate": 5.5284492559281846e-06, | |
| "loss": 1.0185482501983643, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 6.223243713378906, | |
| "learning_rate": 5.525195032024317e-06, | |
| "loss": 1.1147940158843994, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0940170940170941, | |
| "grad_norm": 1.6152573823928833, | |
| "learning_rate": 5.521930699946763e-06, | |
| "loss": 1.0109710693359375, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0964590964590966, | |
| "grad_norm": 1.5743906497955322, | |
| "learning_rate": 5.5186562745240845e-06, | |
| "loss": 0.8494668006896973, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 1.0578030347824097, | |
| "learning_rate": 5.51537177063069e-06, | |
| "loss": 0.7963146567344666, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1013431013431014, | |
| "grad_norm": 2.8918213844299316, | |
| "learning_rate": 5.51207720318677e-06, | |
| "loss": 0.5992125272750854, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1037851037851039, | |
| "grad_norm": 0.9322580099105835, | |
| "learning_rate": 5.50877258715823e-06, | |
| "loss": 0.6189204454421997, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 7.108280181884766, | |
| "learning_rate": 5.5054579375566246e-06, | |
| "loss": 0.9952929019927979, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1086691086691087, | |
| "grad_norm": 2.261410713195801, | |
| "learning_rate": 5.5021332694390855e-06, | |
| "loss": 1.095886468887329, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 1.4210469722747803, | |
| "learning_rate": 5.498798597908252e-06, | |
| "loss": 0.8107478022575378, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 1.540653944015503, | |
| "learning_rate": 5.495453938112209e-06, | |
| "loss": 1.1111472845077515, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.115995115995116, | |
| "grad_norm": 4.085680961608887, | |
| "learning_rate": 5.492099305244411e-06, | |
| "loss": 0.7442325353622437, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1184371184371185, | |
| "grad_norm": 23.62496566772461, | |
| "learning_rate": 5.488734714543617e-06, | |
| "loss": 0.7848599553108215, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 3.1475861072540283, | |
| "learning_rate": 5.4853601812938225e-06, | |
| "loss": 1.0528494119644165, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1233211233211233, | |
| "grad_norm": 3.1182055473327637, | |
| "learning_rate": 5.481975720824186e-06, | |
| "loss": 1.0924468040466309, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1257631257631258, | |
| "grad_norm": 5.0743727684021, | |
| "learning_rate": 5.478581348508961e-06, | |
| "loss": 0.9042542576789856, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 2.026230812072754, | |
| "learning_rate": 5.475177079767427e-06, | |
| "loss": 0.7077540755271912, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1306471306471306, | |
| "grad_norm": 2.2267894744873047, | |
| "learning_rate": 5.47176293006382e-06, | |
| "loss": 0.6727372407913208, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.133089133089133, | |
| "grad_norm": 6.727903366088867, | |
| "learning_rate": 5.468338914907262e-06, | |
| "loss": 0.556076169013977, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 3.433774471282959, | |
| "learning_rate": 5.4649050498516865e-06, | |
| "loss": 1.0447014570236206, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.137973137973138, | |
| "grad_norm": 5.706188201904297, | |
| "learning_rate": 5.461461350495773e-06, | |
| "loss": 1.1313908100128174, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.1404151404151404, | |
| "grad_norm": 4.141596794128418, | |
| "learning_rate": 5.458007832482875e-06, | |
| "loss": 0.9544519782066345, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 13.47986125946045, | |
| "learning_rate": 5.454544511500949e-06, | |
| "loss": 0.944713830947876, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.1452991452991452, | |
| "grad_norm": 1.946338415145874, | |
| "learning_rate": 5.45107140328248e-06, | |
| "loss": 1.0003188848495483, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1477411477411477, | |
| "grad_norm": 1.6882654428482056, | |
| "learning_rate": 5.447588523604413e-06, | |
| "loss": 1.104058861732483, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 1.3386666774749756, | |
| "learning_rate": 5.444095888288082e-06, | |
| "loss": 1.0284168720245361, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1526251526251525, | |
| "grad_norm": 2.4667913913726807, | |
| "learning_rate": 5.440593513199136e-06, | |
| "loss": 0.8081319332122803, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.155067155067155, | |
| "grad_norm": 1.5070215463638306, | |
| "learning_rate": 5.4370814142474705e-06, | |
| "loss": 0.7546210289001465, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 4.182633876800537, | |
| "learning_rate": 5.433559607387149e-06, | |
| "loss": 0.559450089931488, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1599511599511598, | |
| "grad_norm": 1.8414170742034912, | |
| "learning_rate": 5.430028108616334e-06, | |
| "loss": 1.0437990427017212, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1623931623931625, | |
| "grad_norm": 2.0820398330688477, | |
| "learning_rate": 5.42648693397722e-06, | |
| "loss": 1.0472098588943481, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 2.6912789344787598, | |
| "learning_rate": 5.4229360995559475e-06, | |
| "loss": 0.8688966631889343, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1672771672771673, | |
| "grad_norm": 3.620826482772827, | |
| "learning_rate": 5.4193756214825425e-06, | |
| "loss": 0.9058981537818909, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1697191697191698, | |
| "grad_norm": 1.5990374088287354, | |
| "learning_rate": 5.415805515930839e-06, | |
| "loss": 0.7346622347831726, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 3.7363603115081787, | |
| "learning_rate": 5.4122257991184005e-06, | |
| "loss": 1.1916764974594116, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1746031746031746, | |
| "grad_norm": 2.6380817890167236, | |
| "learning_rate": 5.408636487306453e-06, | |
| "loss": 0.9414402842521667, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.177045177045177, | |
| "grad_norm": 0.9934802055358887, | |
| "learning_rate": 5.4050375967998095e-06, | |
| "loss": 0.748233437538147, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 1.6639022827148438, | |
| "learning_rate": 5.401429143946796e-06, | |
| "loss": 1.1463696956634521, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.181929181929182, | |
| "grad_norm": 2.9646964073181152, | |
| "learning_rate": 5.397811145139173e-06, | |
| "loss": 1.188360333442688, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1843711843711844, | |
| "grad_norm": 4.223168849945068, | |
| "learning_rate": 5.3941836168120655e-06, | |
| "loss": 0.7662742137908936, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 1.6976840496063232, | |
| "learning_rate": 5.3905465754438905e-06, | |
| "loss": 1.205042839050293, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1892551892551892, | |
| "grad_norm": 1.893657922744751, | |
| "learning_rate": 5.3869000375562746e-06, | |
| "loss": 0.8556756377220154, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1916971916971917, | |
| "grad_norm": 2.201516628265381, | |
| "learning_rate": 5.383244019713987e-06, | |
| "loss": 0.7409634590148926, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 2.08370041847229, | |
| "learning_rate": 5.379578538524857e-06, | |
| "loss": 0.8899902701377869, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 2.196622133255005, | |
| "learning_rate": 5.3759036106397045e-06, | |
| "loss": 1.10507071018219, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.199023199023199, | |
| "grad_norm": 1.8301552534103394, | |
| "learning_rate": 5.372219252752263e-06, | |
| "loss": 1.1082658767700195, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 2.458317518234253, | |
| "learning_rate": 5.368525481599099e-06, | |
| "loss": 1.1760859489440918, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2039072039072038, | |
| "grad_norm": 7.877130508422852, | |
| "learning_rate": 5.3648223139595424e-06, | |
| "loss": 0.7288396954536438, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2063492063492063, | |
| "grad_norm": 8.319933891296387, | |
| "learning_rate": 5.361109766655608e-06, | |
| "loss": 0.7217950820922852, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 2.940162181854248, | |
| "learning_rate": 5.357387856551917e-06, | |
| "loss": 0.896350085735321, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2112332112332111, | |
| "grad_norm": 1.7916265726089478, | |
| "learning_rate": 5.353656600555624e-06, | |
| "loss": 1.1697304248809814, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2136752136752136, | |
| "grad_norm": 2.0972354412078857, | |
| "learning_rate": 5.3499160156163355e-06, | |
| "loss": 1.0300666093826294, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 5.365437984466553, | |
| "learning_rate": 5.346166118726038e-06, | |
| "loss": 1.0767557621002197, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2185592185592187, | |
| "grad_norm": 2.915233612060547, | |
| "learning_rate": 5.342406926919017e-06, | |
| "loss": 1.0875110626220703, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.221001221001221, | |
| "grad_norm": 3.7588627338409424, | |
| "learning_rate": 5.338638457271783e-06, | |
| "loss": 0.754725992679596, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 1.6952946186065674, | |
| "learning_rate": 5.33486072690299e-06, | |
| "loss": 0.8764317631721497, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.225885225885226, | |
| "grad_norm": 4.733268737792969, | |
| "learning_rate": 5.33107375297336e-06, | |
| "loss": 1.1004637479782104, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.2283272283272284, | |
| "grad_norm": 2.42063307762146, | |
| "learning_rate": 5.327277552685608e-06, | |
| "loss": 0.8175575733184814, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 1.254707932472229, | |
| "learning_rate": 5.323472143284354e-06, | |
| "loss": 0.7044642567634583, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.2332112332112333, | |
| "grad_norm": 1.5242104530334473, | |
| "learning_rate": 5.319657542056059e-06, | |
| "loss": 1.1522612571716309, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2356532356532357, | |
| "grad_norm": 0.8862828612327576, | |
| "learning_rate": 5.315833766328934e-06, | |
| "loss": 0.6255777478218079, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 2.1223502159118652, | |
| "learning_rate": 5.312000833472867e-06, | |
| "loss": 1.1391960382461548, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2405372405372406, | |
| "grad_norm": 3.2272346019744873, | |
| "learning_rate": 5.308158760899344e-06, | |
| "loss": 1.0914331674575806, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.242979242979243, | |
| "grad_norm": 1.3873460292816162, | |
| "learning_rate": 5.304307566061369e-06, | |
| "loss": 1.0861551761627197, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 2.2356748580932617, | |
| "learning_rate": 5.300447266453383e-06, | |
| "loss": 0.7876569032669067, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2478632478632479, | |
| "grad_norm": 12.627351760864258, | |
| "learning_rate": 5.296577879611189e-06, | |
| "loss": 0.8845806121826172, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2503052503052503, | |
| "grad_norm": 1.6576119661331177, | |
| "learning_rate": 5.292699423111867e-06, | |
| "loss": 1.0714237689971924, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 1.7833123207092285, | |
| "learning_rate": 5.2888119145736995e-06, | |
| "loss": 1.0774952173233032, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2551892551892552, | |
| "grad_norm": 2.1097469329833984, | |
| "learning_rate": 5.284915371656087e-06, | |
| "loss": 1.020277500152588, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.2576312576312576, | |
| "grad_norm": 2.2238245010375977, | |
| "learning_rate": 5.281009812059471e-06, | |
| "loss": 0.9611441493034363, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 3.3395566940307617, | |
| "learning_rate": 5.277095253525251e-06, | |
| "loss": 1.1791523694992065, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.2625152625152625, | |
| "grad_norm": 8.59581470489502, | |
| "learning_rate": 5.273171713835705e-06, | |
| "loss": 1.2326103448867798, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.264957264957265, | |
| "grad_norm": 2.7946317195892334, | |
| "learning_rate": 5.269239210813909e-06, | |
| "loss": 0.7500050067901611, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 1.8145307302474976, | |
| "learning_rate": 5.265297762323656e-06, | |
| "loss": 0.6441988348960876, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 1.4063717126846313, | |
| "learning_rate": 5.261347386269376e-06, | |
| "loss": 0.7451719641685486, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2722832722832722, | |
| "grad_norm": 1.3844637870788574, | |
| "learning_rate": 5.25738810059605e-06, | |
| "loss": 1.1439226865768433, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 1.3475596904754639, | |
| "learning_rate": 5.253419923289135e-06, | |
| "loss": 0.8882352113723755, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.277167277167277, | |
| "grad_norm": 2.1434133052825928, | |
| "learning_rate": 5.24944287237448e-06, | |
| "loss": 1.088852882385254, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2796092796092795, | |
| "grad_norm": 4.796410083770752, | |
| "learning_rate": 5.245456965918238e-06, | |
| "loss": 0.7765331268310547, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 3.4816200733184814, | |
| "learning_rate": 5.241462222026794e-06, | |
| "loss": 0.4130716323852539, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2844932844932844, | |
| "grad_norm": 1.4668511152267456, | |
| "learning_rate": 5.237458658846679e-06, | |
| "loss": 1.024600625038147, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2869352869352868, | |
| "grad_norm": 2.391469717025757, | |
| "learning_rate": 5.2334462945644816e-06, | |
| "loss": 0.8750287294387817, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 9.1031494140625, | |
| "learning_rate": 5.229425147406772e-06, | |
| "loss": 0.7807760834693909, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.291819291819292, | |
| "grad_norm": 2.396406888961792, | |
| "learning_rate": 5.225395235640017e-06, | |
| "loss": 1.2667182683944702, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2942612942612943, | |
| "grad_norm": 1.8769642114639282, | |
| "learning_rate": 5.221356577570503e-06, | |
| "loss": 1.1226063966751099, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 3.907742500305176, | |
| "learning_rate": 5.217309191544241e-06, | |
| "loss": 0.6234646439552307, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2991452991452992, | |
| "grad_norm": 1.400547742843628, | |
| "learning_rate": 5.213253095946892e-06, | |
| "loss": 1.0653111934661865, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.3015873015873016, | |
| "grad_norm": 3.261629104614258, | |
| "learning_rate": 5.209188309203678e-06, | |
| "loss": 0.7619715332984924, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 2.847996234893799, | |
| "learning_rate": 5.20511484977931e-06, | |
| "loss": 1.0668418407440186, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.3064713064713065, | |
| "grad_norm": 10.302043914794922, | |
| "learning_rate": 5.201032736177884e-06, | |
| "loss": 0.8345942497253418, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.308913308913309, | |
| "grad_norm": 3.0986499786376953, | |
| "learning_rate": 5.196941986942818e-06, | |
| "loss": 1.0029431581497192, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 2.5847387313842773, | |
| "learning_rate": 5.192842620656753e-06, | |
| "loss": 0.7861806750297546, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.3137973137973138, | |
| "grad_norm": 7.488101005554199, | |
| "learning_rate": 5.1887346559414754e-06, | |
| "loss": 0.7227391600608826, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.3162393162393162, | |
| "grad_norm": 2.137644052505493, | |
| "learning_rate": 5.184618111457831e-06, | |
| "loss": 1.077176809310913, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 1.8668829202651978, | |
| "learning_rate": 5.18049300590564e-06, | |
| "loss": 0.9580538868904114, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.321123321123321, | |
| "grad_norm": 2.09850811958313, | |
| "learning_rate": 5.176359358023612e-06, | |
| "loss": 1.007798433303833, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3235653235653235, | |
| "grad_norm": 1.8359295129776, | |
| "learning_rate": 5.172217186589259e-06, | |
| "loss": 1.0966116189956665, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 3.7404189109802246, | |
| "learning_rate": 5.1680665104188175e-06, | |
| "loss": 1.103602647781372, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.3284493284493284, | |
| "grad_norm": 2.0235581398010254, | |
| "learning_rate": 5.163907348367152e-06, | |
| "loss": 1.0951250791549683, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3308913308913308, | |
| "grad_norm": 1.9195114374160767, | |
| "learning_rate": 5.159739719327678e-06, | |
| "loss": 0.6674078702926636, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 32.633548736572266, | |
| "learning_rate": 5.155563642232271e-06, | |
| "loss": 0.9193532466888428, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3357753357753357, | |
| "grad_norm": 1.8298438787460327, | |
| "learning_rate": 5.151379136051185e-06, | |
| "loss": 1.1200990676879883, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3382173382173383, | |
| "grad_norm": 2.3729898929595947, | |
| "learning_rate": 5.147186219792962e-06, | |
| "loss": 0.5541606545448303, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 2.2689027786254883, | |
| "learning_rate": 5.142984912504351e-06, | |
| "loss": 1.203918695449829, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3431013431013432, | |
| "grad_norm": 1.5877676010131836, | |
| "learning_rate": 5.138775233270214e-06, | |
| "loss": 1.0981378555297852, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3455433455433456, | |
| "grad_norm": 1.9208431243896484, | |
| "learning_rate": 5.134557201213442e-06, | |
| "loss": 1.0556029081344604, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 3.21431827545166, | |
| "learning_rate": 5.130330835494878e-06, | |
| "loss": 0.8595597147941589, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3504273504273505, | |
| "grad_norm": 1.5517840385437012, | |
| "learning_rate": 5.126096155313212e-06, | |
| "loss": 1.117148995399475, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.352869352869353, | |
| "grad_norm": 6.060091495513916, | |
| "learning_rate": 5.12185317990491e-06, | |
| "loss": 0.9276140928268433, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 3.9731998443603516, | |
| "learning_rate": 5.117601928544116e-06, | |
| "loss": 1.1391878128051758, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3577533577533578, | |
| "grad_norm": 1.3449074029922485, | |
| "learning_rate": 5.113342420542571e-06, | |
| "loss": 1.115125298500061, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3601953601953602, | |
| "grad_norm": 2.480891466140747, | |
| "learning_rate": 5.109074675249519e-06, | |
| "loss": 1.0627572536468506, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 7.083606719970703, | |
| "learning_rate": 5.104798712051628e-06, | |
| "loss": 0.46798741817474365, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3650793650793651, | |
| "grad_norm": 2.1328299045562744, | |
| "learning_rate": 5.1005145503728895e-06, | |
| "loss": 1.190787672996521, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 1.7239574193954468, | |
| "learning_rate": 5.096222209674545e-06, | |
| "loss": 1.1722108125686646, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 10.229917526245117, | |
| "learning_rate": 5.091921709454986e-06, | |
| "loss": 0.9836089015007019, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3724053724053724, | |
| "grad_norm": 1.4455071687698364, | |
| "learning_rate": 5.087613069249669e-06, | |
| "loss": 0.9871035814285278, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.3748473748473748, | |
| "grad_norm": 3.920485734939575, | |
| "learning_rate": 5.08329630863103e-06, | |
| "loss": 1.271277904510498, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 3.3860907554626465, | |
| "learning_rate": 5.078971447208389e-06, | |
| "loss": 1.0518757104873657, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3797313797313797, | |
| "grad_norm": 1.6341384649276733, | |
| "learning_rate": 5.074638504627869e-06, | |
| "loss": 0.8103001117706299, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3821733821733821, | |
| "grad_norm": 6.043010234832764, | |
| "learning_rate": 5.070297500572299e-06, | |
| "loss": 0.7450224757194519, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 4.167097091674805, | |
| "learning_rate": 5.065948454761128e-06, | |
| "loss": 0.5298635363578796, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.387057387057387, | |
| "grad_norm": 1.658325433731079, | |
| "learning_rate": 5.06159138695034e-06, | |
| "loss": 0.8351241946220398, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3894993894993894, | |
| "grad_norm": 2.1943113803863525, | |
| "learning_rate": 5.057226316932354e-06, | |
| "loss": 1.1314613819122314, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 2.5029122829437256, | |
| "learning_rate": 5.052853264535944e-06, | |
| "loss": 0.8110060095787048, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3943833943833943, | |
| "grad_norm": 5.180810928344727, | |
| "learning_rate": 5.048472249626143e-06, | |
| "loss": 0.941763162612915, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3968253968253967, | |
| "grad_norm": 3.3859798908233643, | |
| "learning_rate": 5.044083292104154e-06, | |
| "loss": 0.6929471492767334, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 1.9731738567352295, | |
| "learning_rate": 5.039686411907261e-06, | |
| "loss": 1.062778115272522, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4017094017094016, | |
| "grad_norm": 9.248348236083984, | |
| "learning_rate": 5.035281629008738e-06, | |
| "loss": 0.907078742980957, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.404151404151404, | |
| "grad_norm": 1.8255606889724731, | |
| "learning_rate": 5.030868963417756e-06, | |
| "loss": 0.7409341931343079, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 1.724865198135376, | |
| "learning_rate": 5.026448435179296e-06, | |
| "loss": 0.9270036816596985, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.409035409035409, | |
| "grad_norm": 2.166942596435547, | |
| "learning_rate": 5.022020064374055e-06, | |
| "loss": 0.9618323445320129, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4114774114774113, | |
| "grad_norm": 6.376034736633301, | |
| "learning_rate": 5.017583871118354e-06, | |
| "loss": 0.7564554810523987, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 2.646700859069824, | |
| "learning_rate": 5.013139875564054e-06, | |
| "loss": 0.9836897253990173, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.4163614163614164, | |
| "grad_norm": 3.115039825439453, | |
| "learning_rate": 5.008688097898451e-06, | |
| "loss": 0.9553764462471008, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4188034188034189, | |
| "grad_norm": 1.7480710744857788, | |
| "learning_rate": 5.004228558344195e-06, | |
| "loss": 0.7161940932273865, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 7.046204090118408, | |
| "learning_rate": 4.9997612771592e-06, | |
| "loss": 0.7383595705032349, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4236874236874237, | |
| "grad_norm": 2.7713022232055664, | |
| "learning_rate": 4.9952862746365396e-06, | |
| "loss": 1.1871694326400757, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4261294261294262, | |
| "grad_norm": 0.5916857123374939, | |
| "learning_rate": 4.990803571104367e-06, | |
| "loss": 1.031806230545044, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 3.3403420448303223, | |
| "learning_rate": 4.9863131869258165e-06, | |
| "loss": 0.8283597230911255, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.431013431013431, | |
| "grad_norm": 2.6560847759246826, | |
| "learning_rate": 4.981815142498913e-06, | |
| "loss": 1.10054612159729, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.4334554334554335, | |
| "grad_norm": 1.0640076398849487, | |
| "learning_rate": 4.977309458256478e-06, | |
| "loss": 1.0439369678497314, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 2.3129022121429443, | |
| "learning_rate": 4.972796154666038e-06, | |
| "loss": 1.143113136291504, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4383394383394383, | |
| "grad_norm": 3.9890077114105225, | |
| "learning_rate": 4.968275252229734e-06, | |
| "loss": 1.0084482431411743, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4407814407814408, | |
| "grad_norm": 2.268794536590576, | |
| "learning_rate": 4.96374677148422e-06, | |
| "loss": 0.9494715929031372, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 1.620401382446289, | |
| "learning_rate": 4.959210733000581e-06, | |
| "loss": 0.7894858717918396, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.4456654456654456, | |
| "grad_norm": 2.264465808868408, | |
| "learning_rate": 4.954667157384227e-06, | |
| "loss": 1.153456211090088, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.448107448107448, | |
| "grad_norm": 2.0204758644104004, | |
| "learning_rate": 4.9501160652748135e-06, | |
| "loss": 1.0387881994247437, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 4.494890213012695, | |
| "learning_rate": 4.945557477346135e-06, | |
| "loss": 0.8388749957084656, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 5.138271808624268, | |
| "learning_rate": 4.940991414306042e-06, | |
| "loss": 1.0478739738464355, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4554334554334554, | |
| "grad_norm": 1.7801316976547241, | |
| "learning_rate": 4.936417896896333e-06, | |
| "loss": 0.8928017616271973, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 5.119191646575928, | |
| "learning_rate": 4.931836945892678e-06, | |
| "loss": 0.7815787196159363, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4603174603174602, | |
| "grad_norm": 2.283468008041382, | |
| "learning_rate": 4.92724858210451e-06, | |
| "loss": 0.7689943909645081, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.462759462759463, | |
| "grad_norm": 1.3747724294662476, | |
| "learning_rate": 4.922652826374934e-06, | |
| "loss": 1.0252704620361328, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 3.9742517471313477, | |
| "learning_rate": 4.918049699580636e-06, | |
| "loss": 1.176680326461792, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4676434676434678, | |
| "grad_norm": 2.1025235652923584, | |
| "learning_rate": 4.913439222631789e-06, | |
| "loss": 1.0160706043243408, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4700854700854702, | |
| "grad_norm": 4.321649074554443, | |
| "learning_rate": 4.9088214164719465e-06, | |
| "loss": 1.0500729084014893, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 4.172434329986572, | |
| "learning_rate": 4.9041963020779625e-06, | |
| "loss": 0.9391635060310364, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.474969474969475, | |
| "grad_norm": 8.891393661499023, | |
| "learning_rate": 4.899563900459888e-06, | |
| "loss": 0.8474172949790955, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4774114774114775, | |
| "grad_norm": 5.215544700622559, | |
| "learning_rate": 4.894924232660875e-06, | |
| "loss": 0.8866111040115356, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 3.0435311794281006, | |
| "learning_rate": 4.890277319757084e-06, | |
| "loss": 1.029344916343689, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4822954822954824, | |
| "grad_norm": 1.7593703269958496, | |
| "learning_rate": 4.8856231828575886e-06, | |
| "loss": 1.1196095943450928, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4847374847374848, | |
| "grad_norm": 2.1020376682281494, | |
| "learning_rate": 4.880961843104277e-06, | |
| "loss": 1.0347650051116943, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 1.6005417108535767, | |
| "learning_rate": 4.876293321671754e-06, | |
| "loss": 0.2354688048362732, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4896214896214897, | |
| "grad_norm": 1.6489475965499878, | |
| "learning_rate": 4.871617639767254e-06, | |
| "loss": 0.6391555070877075, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.492063492063492, | |
| "grad_norm": 2.582890748977661, | |
| "learning_rate": 4.8669348186305345e-06, | |
| "loss": 1.1244972944259644, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 3.2143290042877197, | |
| "learning_rate": 4.862244879533784e-06, | |
| "loss": 0.8394033312797546, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.496947496947497, | |
| "grad_norm": 2.631812810897827, | |
| "learning_rate": 4.857547843781526e-06, | |
| "loss": 1.1298140287399292, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4993894993894994, | |
| "grad_norm": 3.6919734477996826, | |
| "learning_rate": 4.8528437327105185e-06, | |
| "loss": 0.4192117154598236, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 8.116545677185059, | |
| "learning_rate": 4.848132567689667e-06, | |
| "loss": 0.8103526830673218, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5042735042735043, | |
| "grad_norm": 1.7116774320602417, | |
| "learning_rate": 4.843414370119911e-06, | |
| "loss": 0.6593791246414185, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5067155067155067, | |
| "grad_norm": 3.8022818565368652, | |
| "learning_rate": 4.838689161434145e-06, | |
| "loss": 1.126307725906372, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 1.9455960988998413, | |
| "learning_rate": 4.8339569630971035e-06, | |
| "loss": 0.9250009655952454, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.5115995115995116, | |
| "grad_norm": 1.9260501861572266, | |
| "learning_rate": 4.829217796605279e-06, | |
| "loss": 0.8760790228843689, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.514041514041514, | |
| "grad_norm": 2.5248501300811768, | |
| "learning_rate": 4.824471683486813e-06, | |
| "loss": 1.0439480543136597, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 1.3928033113479614, | |
| "learning_rate": 4.819718645301406e-06, | |
| "loss": 1.0599212646484375, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.5189255189255189, | |
| "grad_norm": 6.743286609649658, | |
| "learning_rate": 4.814958703640217e-06, | |
| "loss": 0.6897165775299072, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.5213675213675213, | |
| "grad_norm": 3.9725565910339355, | |
| "learning_rate": 4.810191880125759e-06, | |
| "loss": 0.8072797060012817, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 2.459388256072998, | |
| "learning_rate": 4.805418196411814e-06, | |
| "loss": 0.8643714785575867, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5262515262515262, | |
| "grad_norm": 2.454529047012329, | |
| "learning_rate": 4.80063767418332e-06, | |
| "loss": 1.1347299814224243, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5286935286935286, | |
| "grad_norm": 6.470121383666992, | |
| "learning_rate": 4.795850335156287e-06, | |
| "loss": 0.845814049243927, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 1.3302001953125, | |
| "learning_rate": 4.791056201077684e-06, | |
| "loss": 1.0788911581039429, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5335775335775335, | |
| "grad_norm": 2.616361379623413, | |
| "learning_rate": 4.786255293725351e-06, | |
| "loss": 1.188575029373169, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.536019536019536, | |
| "grad_norm": 0.9566062092781067, | |
| "learning_rate": 4.781447634907898e-06, | |
| "loss": 0.3384564220905304, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 4.599119186401367, | |
| "learning_rate": 4.776633246464597e-06, | |
| "loss": 0.7301375269889832, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.5409035409035408, | |
| "grad_norm": 3.47401762008667, | |
| "learning_rate": 4.7718121502653e-06, | |
| "loss": 0.5812518000602722, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5433455433455432, | |
| "grad_norm": 2.5774216651916504, | |
| "learning_rate": 4.76698436821032e-06, | |
| "loss": 0.7762341499328613, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 2.030524492263794, | |
| "learning_rate": 4.762149922230351e-06, | |
| "loss": 0.7970203757286072, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5482295482295483, | |
| "grad_norm": 1.4194061756134033, | |
| "learning_rate": 4.757308834286349e-06, | |
| "loss": 1.1093299388885498, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5506715506715507, | |
| "grad_norm": 13.08940601348877, | |
| "learning_rate": 4.752461126369447e-06, | |
| "loss": 0.5118930339813232, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 1.599934697151184, | |
| "learning_rate": 4.747606820500848e-06, | |
| "loss": 1.1399681568145752, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 3.1104736328125, | |
| "learning_rate": 4.7427459387317305e-06, | |
| "loss": 0.9280550479888916, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.557997557997558, | |
| "grad_norm": 2.4784369468688965, | |
| "learning_rate": 4.73787850314314e-06, | |
| "loss": 1.3923664093017578, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 1.778232216835022, | |
| "learning_rate": 4.7330045358458965e-06, | |
| "loss": 0.9597633481025696, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5628815628815629, | |
| "grad_norm": 1.8193713426589966, | |
| "learning_rate": 4.728124058980489e-06, | |
| "loss": 1.052099585533142, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5653235653235653, | |
| "grad_norm": 3.915374279022217, | |
| "learning_rate": 4.723237094716978e-06, | |
| "loss": 0.6439841389656067, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 1.5543129444122314, | |
| "learning_rate": 4.718343665254892e-06, | |
| "loss": 1.0367350578308105, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.5702075702075702, | |
| "grad_norm": 1.7215509414672852, | |
| "learning_rate": 4.713443792823132e-06, | |
| "loss": 1.059045672416687, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5726495726495726, | |
| "grad_norm": 2.069295644760132, | |
| "learning_rate": 4.708537499679864e-06, | |
| "loss": 0.5439974069595337, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 2.160947561264038, | |
| "learning_rate": 4.7036248081124194e-06, | |
| "loss": 0.8789887428283691, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5775335775335775, | |
| "grad_norm": 2.9418277740478516, | |
| "learning_rate": 4.698705740437199e-06, | |
| "loss": 0.36467528343200684, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5799755799755801, | |
| "grad_norm": 1.783849835395813, | |
| "learning_rate": 4.693780318999566e-06, | |
| "loss": 0.7312822341918945, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 1.2510713338851929, | |
| "learning_rate": 4.688848566173743e-06, | |
| "loss": 1.0698741674423218, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.584859584859585, | |
| "grad_norm": 1.7499157190322876, | |
| "learning_rate": 4.683910504362718e-06, | |
| "loss": 1.1414761543273926, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 2.5625784397125244, | |
| "learning_rate": 4.678966155998138e-06, | |
| "loss": 0.963818371295929, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 3.9518566131591797, | |
| "learning_rate": 4.674015543540204e-06, | |
| "loss": 1.1232004165649414, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5921855921855923, | |
| "grad_norm": 4.137270450592041, | |
| "learning_rate": 4.669058689477577e-06, | |
| "loss": 1.0733479261398315, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5946275946275947, | |
| "grad_norm": 1.5602595806121826, | |
| "learning_rate": 4.664095616327265e-06, | |
| "loss": 0.6966079473495483, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 1.9313784837722778, | |
| "learning_rate": 4.6591263466345315e-06, | |
| "loss": 1.07318115234375, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5995115995115996, | |
| "grad_norm": 3.182425022125244, | |
| "learning_rate": 4.654150902972787e-06, | |
| "loss": 0.9745839834213257, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.601953601953602, | |
| "grad_norm": 1.5878440141677856, | |
| "learning_rate": 4.649169307943489e-06, | |
| "loss": 1.0333638191223145, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 2.5433406829833984, | |
| "learning_rate": 4.644181584176036e-06, | |
| "loss": 1.1256206035614014, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.606837606837607, | |
| "grad_norm": 2.123772621154785, | |
| "learning_rate": 4.639187754327669e-06, | |
| "loss": 1.1688880920410156, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.6092796092796093, | |
| "grad_norm": 2.0329692363739014, | |
| "learning_rate": 4.634187841083365e-06, | |
| "loss": 1.0112664699554443, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 1.9080368280410767, | |
| "learning_rate": 4.629181867155735e-06, | |
| "loss": 1.2758021354675293, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6141636141636142, | |
| "grad_norm": 1.2908200025558472, | |
| "learning_rate": 4.624169855284923e-06, | |
| "loss": 1.077506184577942, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.6166056166056166, | |
| "grad_norm": 4.5929975509643555, | |
| "learning_rate": 4.619151828238503e-06, | |
| "loss": 1.1034421920776367, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 3.1997547149658203, | |
| "learning_rate": 4.6141278088113674e-06, | |
| "loss": 0.7947360277175903, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.6214896214896215, | |
| "grad_norm": 1.4481651782989502, | |
| "learning_rate": 4.609097819825635e-06, | |
| "loss": 1.0906190872192383, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 1.5239222049713135, | |
| "learning_rate": 4.604061884130537e-06, | |
| "loss": 1.1238385438919067, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 17.961843490600586, | |
| "learning_rate": 4.599020024602325e-06, | |
| "loss": 0.872590959072113, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.6288156288156288, | |
| "grad_norm": 2.320810556411743, | |
| "learning_rate": 4.593972264144154e-06, | |
| "loss": 1.0335891246795654, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6312576312576312, | |
| "grad_norm": 1.0677226781845093, | |
| "learning_rate": 4.588918625685987e-06, | |
| "loss": 0.6070804595947266, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 6.536888122558594, | |
| "learning_rate": 4.5838591321844895e-06, | |
| "loss": 1.0951392650604248, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.636141636141636, | |
| "grad_norm": 15.38590145111084, | |
| "learning_rate": 4.578793806622921e-06, | |
| "loss": 0.5872366428375244, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6385836385836385, | |
| "grad_norm": 2.773411273956299, | |
| "learning_rate": 4.573722672011037e-06, | |
| "loss": 0.44960087537765503, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 1.476400375366211, | |
| "learning_rate": 4.568645751384979e-06, | |
| "loss": 1.1929024457931519, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6434676434676434, | |
| "grad_norm": 1.6975760459899902, | |
| "learning_rate": 4.563563067807173e-06, | |
| "loss": 1.1604607105255127, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6459096459096458, | |
| "grad_norm": 27.678466796875, | |
| "learning_rate": 4.558474644366225e-06, | |
| "loss": 0.6984640955924988, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 4.300885200500488, | |
| "learning_rate": 4.553380504176812e-06, | |
| "loss": 0.7492251396179199, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6507936507936507, | |
| "grad_norm": 1.742562174797058, | |
| "learning_rate": 4.548280670379582e-06, | |
| "loss": 1.089888334274292, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6532356532356531, | |
| "grad_norm": 1.481261968612671, | |
| "learning_rate": 4.543175166141047e-06, | |
| "loss": 1.1312068700790405, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 2.4384007453918457, | |
| "learning_rate": 4.538064014653475e-06, | |
| "loss": 1.1034740209579468, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.658119658119658, | |
| "grad_norm": 1.1855982542037964, | |
| "learning_rate": 4.53294723913479e-06, | |
| "loss": 0.5771256685256958, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6605616605616604, | |
| "grad_norm": 2.1966373920440674, | |
| "learning_rate": 4.527824862828463e-06, | |
| "loss": 0.789630651473999, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 2.3562395572662354, | |
| "learning_rate": 4.522696909003407e-06, | |
| "loss": 1.1317930221557617, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6654456654456653, | |
| "grad_norm": 1.2977938652038574, | |
| "learning_rate": 4.517563400953871e-06, | |
| "loss": 1.0996814966201782, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.6678876678876677, | |
| "grad_norm": 2.646000385284424, | |
| "learning_rate": 4.512424361999335e-06, | |
| "loss": 1.086019515991211, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 2.2506117820739746, | |
| "learning_rate": 4.507279815484404e-06, | |
| "loss": 1.0686463117599487, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6727716727716728, | |
| "grad_norm": 3.5996057987213135, | |
| "learning_rate": 4.5021297847787e-06, | |
| "loss": 1.1250067949295044, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6752136752136753, | |
| "grad_norm": 1.2507672309875488, | |
| "learning_rate": 4.496974293276761e-06, | |
| "loss": 0.6428678631782532, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 16.312124252319336, | |
| "learning_rate": 4.491813364397928e-06, | |
| "loss": 0.3962022364139557, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6800976800976801, | |
| "grad_norm": 4.418454647064209, | |
| "learning_rate": 4.486647021586242e-06, | |
| "loss": 1.1909222602844238, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6825396825396826, | |
| "grad_norm": 1.5935609340667725, | |
| "learning_rate": 4.48147528831034e-06, | |
| "loss": 1.1969082355499268, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.7602794766426086, | |
| "learning_rate": 4.476298188063342e-06, | |
| "loss": 1.0184684991836548, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6874236874236874, | |
| "grad_norm": 4.655670642852783, | |
| "learning_rate": 4.471115744362751e-06, | |
| "loss": 1.003674030303955, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6898656898656899, | |
| "grad_norm": 2.1681458950042725, | |
| "learning_rate": 4.465927980750341e-06, | |
| "loss": 1.1820536851882935, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 1.1264445781707764, | |
| "learning_rate": 4.460734920792055e-06, | |
| "loss": 1.1060798168182373, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6947496947496947, | |
| "grad_norm": 1.4772337675094604, | |
| "learning_rate": 4.455536588077892e-06, | |
| "loss": 1.1331332921981812, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6971916971916972, | |
| "grad_norm": 6.615632057189941, | |
| "learning_rate": 4.450333006221807e-06, | |
| "loss": 0.4286938011646271, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 1.7158353328704834, | |
| "learning_rate": 4.445124198861595e-06, | |
| "loss": 1.0181334018707275, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.702075702075702, | |
| "grad_norm": 1.6327100992202759, | |
| "learning_rate": 4.439910189658791e-06, | |
| "loss": 0.9523332118988037, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.7045177045177047, | |
| "grad_norm": 2.332667350769043, | |
| "learning_rate": 4.43469100229856e-06, | |
| "loss": 1.1586647033691406, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 5.861179828643799, | |
| "learning_rate": 4.4294666604895896e-06, | |
| "loss": 0.8811005353927612, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 8.61947250366211, | |
| "learning_rate": 4.42423718796398e-06, | |
| "loss": 0.43517377972602844, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.711843711843712, | |
| "grad_norm": 3.1414754390716553, | |
| "learning_rate": 4.41900260847714e-06, | |
| "loss": 0.9462797045707703, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 3.1354236602783203, | |
| "learning_rate": 4.413762945807678e-06, | |
| "loss": 1.1175096035003662, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.7167277167277168, | |
| "grad_norm": 3.440849542617798, | |
| "learning_rate": 4.40851822375729e-06, | |
| "loss": 0.5517919063568115, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.7191697191697193, | |
| "grad_norm": 2.9297659397125244, | |
| "learning_rate": 4.403268466150658e-06, | |
| "loss": 1.0800657272338867, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 3.0104856491088867, | |
| "learning_rate": 4.398013696835337e-06, | |
| "loss": 0.7462184429168701, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7240537240537241, | |
| "grad_norm": 7.846835136413574, | |
| "learning_rate": 4.39275393968165e-06, | |
| "loss": 0.8748076558113098, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7264957264957266, | |
| "grad_norm": 1.4625284671783447, | |
| "learning_rate": 4.3874892185825736e-06, | |
| "loss": 1.1260775327682495, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 7.651988506317139, | |
| "learning_rate": 4.3822195574536385e-06, | |
| "loss": 0.911249041557312, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7313797313797314, | |
| "grad_norm": 1.9895511865615845, | |
| "learning_rate": 4.3769449802328134e-06, | |
| "loss": 1.1256855726242065, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.7338217338217339, | |
| "grad_norm": 3.724893093109131, | |
| "learning_rate": 4.371665510880398e-06, | |
| "loss": 0.8602355122566223, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 6.229957580566406, | |
| "learning_rate": 4.366381173378918e-06, | |
| "loss": 0.8753630518913269, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7387057387057387, | |
| "grad_norm": 3.7382354736328125, | |
| "learning_rate": 4.36109199173301e-06, | |
| "loss": 0.8494032025337219, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7411477411477412, | |
| "grad_norm": 2.8685145378112793, | |
| "learning_rate": 4.355797989969319e-06, | |
| "loss": 0.7920373678207397, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 2.0900766849517822, | |
| "learning_rate": 4.350499192136381e-06, | |
| "loss": 1.128261685371399, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 1.9242794513702393, | |
| "learning_rate": 4.345195622304524e-06, | |
| "loss": 1.1854214668273926, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7484737484737485, | |
| "grad_norm": 6.52469539642334, | |
| "learning_rate": 4.3398873045657505e-06, | |
| "loss": 0.7444266080856323, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 2.14506459236145, | |
| "learning_rate": 4.334574263033629e-06, | |
| "loss": 0.6873703002929688, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7533577533577533, | |
| "grad_norm": 3.528667688369751, | |
| "learning_rate": 4.329256521843191e-06, | |
| "loss": 0.7260130643844604, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7557997557997558, | |
| "grad_norm": 3.055670976638794, | |
| "learning_rate": 4.323934105150814e-06, | |
| "loss": 0.5219581723213196, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 1.5880358219146729, | |
| "learning_rate": 4.318607037134113e-06, | |
| "loss": 1.0635778903961182, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7606837606837606, | |
| "grad_norm": 1.6739574670791626, | |
| "learning_rate": 4.313275341991835e-06, | |
| "loss": 0.9275929927825928, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.763125763125763, | |
| "grad_norm": 2.3931827545166016, | |
| "learning_rate": 4.307939043943743e-06, | |
| "loss": 0.7328923940658569, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 1.5773603916168213, | |
| "learning_rate": 4.302598167230514e-06, | |
| "loss": 1.1174166202545166, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.768009768009768, | |
| "grad_norm": 1.3908194303512573, | |
| "learning_rate": 4.297252736113619e-06, | |
| "loss": 1.104804277420044, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7704517704517704, | |
| "grad_norm": 2.2045950889587402, | |
| "learning_rate": 4.291902774875222e-06, | |
| "loss": 1.0062525272369385, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 4.392017841339111, | |
| "learning_rate": 4.2865483078180606e-06, | |
| "loss": 0.49355483055114746, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.7753357753357752, | |
| "grad_norm": 1.741620421409607, | |
| "learning_rate": 4.281189359265346e-06, | |
| "loss": 0.801204264163971, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 4.615424633026123, | |
| "learning_rate": 4.275825953560643e-06, | |
| "loss": 0.5927553772926331, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 1.8360828161239624, | |
| "learning_rate": 4.270458115067766e-06, | |
| "loss": 1.1054664850234985, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7826617826617825, | |
| "grad_norm": 28.914661407470703, | |
| "learning_rate": 4.265085868170663e-06, | |
| "loss": 1.1111050844192505, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.785103785103785, | |
| "grad_norm": 2.3152248859405518, | |
| "learning_rate": 4.259709237273311e-06, | |
| "loss": 0.9055600762367249, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 1.7210235595703125, | |
| "learning_rate": 4.2543282467995985e-06, | |
| "loss": 0.7722167372703552, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7899877899877898, | |
| "grad_norm": 2.618022918701172, | |
| "learning_rate": 4.248942921193221e-06, | |
| "loss": 0.8464701175689697, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.7924297924297923, | |
| "grad_norm": 6.523756980895996, | |
| "learning_rate": 4.243553284917566e-06, | |
| "loss": 0.848709225654602, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 2.5969009399414062, | |
| "learning_rate": 4.2381593624556e-06, | |
| "loss": 1.0547089576721191, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7973137973137974, | |
| "grad_norm": 1.5815178155899048, | |
| "learning_rate": 4.232761178309761e-06, | |
| "loss": 1.0379605293273926, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7997557997557998, | |
| "grad_norm": 2.534233570098877, | |
| "learning_rate": 4.22735875700185e-06, | |
| "loss": 0.7152073979377747, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 2.298525810241699, | |
| "learning_rate": 4.22195212307291e-06, | |
| "loss": 0.691750168800354, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.8046398046398047, | |
| "grad_norm": 1.962921380996704, | |
| "learning_rate": 4.216541301083124e-06, | |
| "loss": 0.769491970539093, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.807081807081807, | |
| "grad_norm": 2.270589828491211, | |
| "learning_rate": 4.211126315611698e-06, | |
| "loss": 0.4893834888935089, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 2.069971799850464, | |
| "learning_rate": 4.205707191256752e-06, | |
| "loss": 0.7786384224891663, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.811965811965812, | |
| "grad_norm": 1.248873233795166, | |
| "learning_rate": 4.200283952635208e-06, | |
| "loss": 1.2088333368301392, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.8144078144078144, | |
| "grad_norm": 1.4751837253570557, | |
| "learning_rate": 4.1948566243826756e-06, | |
| "loss": 1.0881723165512085, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 2.016812562942505, | |
| "learning_rate": 4.189425231153342e-06, | |
| "loss": 0.6552821397781372, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.8192918192918193, | |
| "grad_norm": 1.4564069509506226, | |
| "learning_rate": 4.183989797619862e-06, | |
| "loss": 0.8445190191268921, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8217338217338217, | |
| "grad_norm": 1.54276704788208, | |
| "learning_rate": 4.178550348473242e-06, | |
| "loss": 1.0718238353729248, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 3.247328996658325, | |
| "learning_rate": 4.17310690842273e-06, | |
| "loss": 0.7224799990653992, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.8266178266178266, | |
| "grad_norm": 1.877454161643982, | |
| "learning_rate": 4.167659502195704e-06, | |
| "loss": 0.8526558876037598, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8290598290598292, | |
| "grad_norm": 1.8572306632995605, | |
| "learning_rate": 4.162208154537559e-06, | |
| "loss": 1.5043039321899414, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 2.081958055496216, | |
| "learning_rate": 4.156752890211592e-06, | |
| "loss": 1.2200380563735962, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.833943833943834, | |
| "grad_norm": 7.550018787384033, | |
| "learning_rate": 4.151293733998894e-06, | |
| "loss": 0.8529073596000671, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8363858363858365, | |
| "grad_norm": 1.3198851346969604, | |
| "learning_rate": 4.145830710698235e-06, | |
| "loss": 0.8785728812217712, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 3.390364170074463, | |
| "learning_rate": 4.140363845125951e-06, | |
| "loss": 0.7647203803062439, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8412698412698414, | |
| "grad_norm": 1.699028730392456, | |
| "learning_rate": 4.134893162115832e-06, | |
| "loss": 0.8751652836799622, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8437118437118438, | |
| "grad_norm": 1.77727472782135, | |
| "learning_rate": 4.129418686519011e-06, | |
| "loss": 1.1023961305618286, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 5.108009338378906, | |
| "learning_rate": 4.123940443203845e-06, | |
| "loss": 1.0676683187484741, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8485958485958487, | |
| "grad_norm": 4.390260696411133, | |
| "learning_rate": 4.118458457055811e-06, | |
| "loss": 1.1111550331115723, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.8510378510378511, | |
| "grad_norm": 9.013320922851562, | |
| "learning_rate": 4.112972752977384e-06, | |
| "loss": 0.8402547836303711, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 2.0569653511047363, | |
| "learning_rate": 4.107483355887933e-06, | |
| "loss": 1.160443902015686, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.855921855921856, | |
| "grad_norm": 2.105151891708374, | |
| "learning_rate": 4.101990290723597e-06, | |
| "loss": 0.9111926555633545, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8583638583638584, | |
| "grad_norm": 2.9433350563049316, | |
| "learning_rate": 4.096493582437182e-06, | |
| "loss": 0.9556376338005066, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 3.2840516567230225, | |
| "learning_rate": 4.090993255998042e-06, | |
| "loss": 0.953590989112854, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.8632478632478633, | |
| "grad_norm": 1.5776071548461914, | |
| "learning_rate": 4.0854893363919666e-06, | |
| "loss": 0.9239129424095154, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8656898656898657, | |
| "grad_norm": 3.6965951919555664, | |
| "learning_rate": 4.079981848621067e-06, | |
| "loss": 0.7926357984542847, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 1.4808310270309448, | |
| "learning_rate": 4.074470817703664e-06, | |
| "loss": 0.9207745790481567, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8705738705738706, | |
| "grad_norm": 1.6555721759796143, | |
| "learning_rate": 4.0689562686741735e-06, | |
| "loss": 0.7711512446403503, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.873015873015873, | |
| "grad_norm": 1.7683871984481812, | |
| "learning_rate": 4.063438226582994e-06, | |
| "loss": 0.8535028696060181, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 4.361166954040527, | |
| "learning_rate": 4.057916716496388e-06, | |
| "loss": 1.119881272315979, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.877899877899878, | |
| "grad_norm": 1.3381246328353882, | |
| "learning_rate": 4.052391763496376e-06, | |
| "loss": 0.9711114168167114, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 1.1705869436264038, | |
| "learning_rate": 4.046863392680614e-06, | |
| "loss": 1.0517574548721313, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 2.8563392162323, | |
| "learning_rate": 4.041331629162289e-06, | |
| "loss": 1.060086965560913, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8852258852258852, | |
| "grad_norm": 1.8419415950775146, | |
| "learning_rate": 4.035796498069995e-06, | |
| "loss": 0.9262551069259644, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8876678876678876, | |
| "grad_norm": 2.282602310180664, | |
| "learning_rate": 4.030258024547625e-06, | |
| "loss": 1.1138805150985718, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 1.8432765007019043, | |
| "learning_rate": 4.024716233754256e-06, | |
| "loss": 1.1018257141113281, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8925518925518925, | |
| "grad_norm": 1.7539535760879517, | |
| "learning_rate": 4.019171150864034e-06, | |
| "loss": 0.6556220650672913, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.894993894993895, | |
| "grad_norm": 1.8477240800857544, | |
| "learning_rate": 4.013622801066059e-06, | |
| "loss": 1.1449788808822632, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 6.399501323699951, | |
| "learning_rate": 4.008071209564272e-06, | |
| "loss": 0.8968675136566162, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8998778998778998, | |
| "grad_norm": 2.375927448272705, | |
| "learning_rate": 4.00251640157734e-06, | |
| "loss": 1.178645372390747, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.9023199023199022, | |
| "grad_norm": 2.230632781982422, | |
| "learning_rate": 3.996958402338539e-06, | |
| "loss": 0.9704781174659729, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 1.7731071710586548, | |
| "learning_rate": 3.991397237095644e-06, | |
| "loss": 0.6532866358757019, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.907203907203907, | |
| "grad_norm": 1.6434381008148193, | |
| "learning_rate": 3.985832931110811e-06, | |
| "loss": 1.0863354206085205, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.9096459096459095, | |
| "grad_norm": 1.637665867805481, | |
| "learning_rate": 3.980265509660464e-06, | |
| "loss": 0.777664065361023, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 1.7624237537384033, | |
| "learning_rate": 3.974694998035176e-06, | |
| "loss": 1.0450448989868164, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.9145299145299144, | |
| "grad_norm": 2.4146924018859863, | |
| "learning_rate": 3.969121421539562e-06, | |
| "loss": 0.9237840175628662, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.9169719169719168, | |
| "grad_norm": 1.4809205532073975, | |
| "learning_rate": 3.963544805492158e-06, | |
| "loss": 0.7602840662002563, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 1.7811788320541382, | |
| "learning_rate": 3.957965175225305e-06, | |
| "loss": 1.1881275177001953, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.9218559218559217, | |
| "grad_norm": 6.949507236480713, | |
| "learning_rate": 3.95238255608504e-06, | |
| "loss": 0.7732429504394531, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.9242979242979243, | |
| "grad_norm": 7.414094924926758, | |
| "learning_rate": 3.946796973430975e-06, | |
| "loss": 1.035898208618164, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 3.1300573348999023, | |
| "learning_rate": 3.941208452636183e-06, | |
| "loss": 0.7754787802696228, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9291819291819292, | |
| "grad_norm": 3.253720998764038, | |
| "learning_rate": 3.935617019087088e-06, | |
| "loss": 1.0728245973587036, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9316239316239316, | |
| "grad_norm": 3.1016621589660645, | |
| "learning_rate": 3.930022698183341e-06, | |
| "loss": 0.32128599286079407, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 4.842744827270508, | |
| "learning_rate": 3.924425515337713e-06, | |
| "loss": 0.5422120094299316, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9365079365079365, | |
| "grad_norm": 3.4001028537750244, | |
| "learning_rate": 3.918825495975971e-06, | |
| "loss": 0.6693114638328552, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.938949938949939, | |
| "grad_norm": 1.329004168510437, | |
| "learning_rate": 3.913222665536772e-06, | |
| "loss": 1.1855244636535645, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 1.492633581161499, | |
| "learning_rate": 3.90761704947154e-06, | |
| "loss": 1.1521289348602295, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9438339438339438, | |
| "grad_norm": 2.899778127670288, | |
| "learning_rate": 3.902008673244353e-06, | |
| "loss": 1.049696445465088, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.9462759462759462, | |
| "grad_norm": 1.1973531246185303, | |
| "learning_rate": 3.896397562331828e-06, | |
| "loss": 0.781209409236908, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 2.6766295433044434, | |
| "learning_rate": 3.890783742223003e-06, | |
| "loss": 0.6280114650726318, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.9511599511599511, | |
| "grad_norm": 1.4150158166885376, | |
| "learning_rate": 3.885167238419226e-06, | |
| "loss": 0.7452678680419922, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9536019536019538, | |
| "grad_norm": 1.5957354307174683, | |
| "learning_rate": 3.879548076434033e-06, | |
| "loss": 0.7975012063980103, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 1.440159559249878, | |
| "learning_rate": 3.873926281793037e-06, | |
| "loss": 1.0404465198516846, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9584859584859586, | |
| "grad_norm": 2.519695997238159, | |
| "learning_rate": 3.868301880033808e-06, | |
| "loss": 1.2595645189285278, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.960927960927961, | |
| "grad_norm": 3.7468674182891846, | |
| "learning_rate": 3.862674896705762e-06, | |
| "loss": 1.4057308435440063, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 1.4645074605941772, | |
| "learning_rate": 3.85704535737004e-06, | |
| "loss": 1.1884080171585083, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 1.430010199546814, | |
| "learning_rate": 3.851413287599392e-06, | |
| "loss": 1.058860182762146, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9682539682539684, | |
| "grad_norm": 4.485745906829834, | |
| "learning_rate": 3.845778712978068e-06, | |
| "loss": 0.9613910913467407, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 2.261005401611328, | |
| "learning_rate": 3.840141659101692e-06, | |
| "loss": 1.1407309770584106, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9731379731379732, | |
| "grad_norm": 1.5672752857208252, | |
| "learning_rate": 3.834502151577152e-06, | |
| "loss": 0.93012934923172, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.9755799755799757, | |
| "grad_norm": 1.4915902614593506, | |
| "learning_rate": 3.828860216022482e-06, | |
| "loss": 1.0084210634231567, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 2.3556222915649414, | |
| "learning_rate": 3.8232158780667465e-06, | |
| "loss": 1.1114431619644165, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.9804639804639805, | |
| "grad_norm": 1.559486746788025, | |
| "learning_rate": 3.817569163349921e-06, | |
| "loss": 0.762478232383728, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.982905982905983, | |
| "grad_norm": 2.0981836318969727, | |
| "learning_rate": 3.8119200975227767e-06, | |
| "loss": 0.570152759552002, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 3.3158349990844727, | |
| "learning_rate": 3.8062687062467704e-06, | |
| "loss": 0.8712815642356873, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9877899877899878, | |
| "grad_norm": 6.709053039550781, | |
| "learning_rate": 3.8006150151939168e-06, | |
| "loss": 1.0017497539520264, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9902319902319903, | |
| "grad_norm": 1.431030511856079, | |
| "learning_rate": 3.7949590500466817e-06, | |
| "loss": 1.154350757598877, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 1.6267858743667603, | |
| "learning_rate": 3.7893008364978575e-06, | |
| "loss": 1.0441209077835083, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9951159951159951, | |
| "grad_norm": 1.9177896976470947, | |
| "learning_rate": 3.783640400250453e-06, | |
| "loss": 0.8070008754730225, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9975579975579976, | |
| "grad_norm": 1.5680376291275024, | |
| "learning_rate": 3.777977767017572e-06, | |
| "loss": 1.167231798171997, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.836150884628296, | |
| "learning_rate": 3.7723129625222997e-06, | |
| "loss": 1.096339464187622, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.0024420024420024, | |
| "grad_norm": 6.821539878845215, | |
| "learning_rate": 3.7666460124975845e-06, | |
| "loss": 0.9670911431312561, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.004884004884005, | |
| "grad_norm": 2.0258708000183105, | |
| "learning_rate": 3.76097694268612e-06, | |
| "loss": 0.6899262070655823, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 1.1317782402038574, | |
| "learning_rate": 3.75530577884023e-06, | |
| "loss": 0.9158855080604553, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.0097680097680097, | |
| "grad_norm": 2.7936630249023438, | |
| "learning_rate": 3.7496325467217498e-06, | |
| "loss": 0.6676980257034302, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.012210012210012, | |
| "grad_norm": 1.8216701745986938, | |
| "learning_rate": 3.7439572721019104e-06, | |
| "loss": 0.7151694297790527, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 26.17021369934082, | |
| "learning_rate": 3.738279980761222e-06, | |
| "loss": 0.6760666370391846, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 5.065937042236328, | |
| "learning_rate": 3.732600698489355e-06, | |
| "loss": 0.7696459293365479, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.0195360195360195, | |
| "grad_norm": 2.4799294471740723, | |
| "learning_rate": 3.726919451085024e-06, | |
| "loss": 1.1132228374481201, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 1.5045807361602783, | |
| "learning_rate": 3.721236264355871e-06, | |
| "loss": 0.5454859733581543, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.0244200244200243, | |
| "grad_norm": 2.9693870544433594, | |
| "learning_rate": 3.715551164118346e-06, | |
| "loss": 1.0050889253616333, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.0268620268620268, | |
| "grad_norm": 4.3840484619140625, | |
| "learning_rate": 3.709864176197592e-06, | |
| "loss": 0.969306230545044, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 2.8534724712371826, | |
| "learning_rate": 3.7041753264273282e-06, | |
| "loss": 1.00910222530365, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0317460317460316, | |
| "grad_norm": 3.8745787143707275, | |
| "learning_rate": 3.6984846406497306e-06, | |
| "loss": 0.4947141706943512, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 10.617118835449219, | |
| "learning_rate": 3.692792144715316e-06, | |
| "loss": 0.6221736073493958, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 1.7398251295089722, | |
| "learning_rate": 3.687097864482821e-06, | |
| "loss": 0.9439553022384644, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.039072039072039, | |
| "grad_norm": 2.146132469177246, | |
| "learning_rate": 3.681401825819092e-06, | |
| "loss": 0.6233932971954346, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.0415140415140414, | |
| "grad_norm": 2.6649765968322754, | |
| "learning_rate": 3.6757040545989615e-06, | |
| "loss": 0.9565668106079102, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 2.2828168869018555, | |
| "learning_rate": 3.6700045767051313e-06, | |
| "loss": 0.9368692636489868, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.0463980463980462, | |
| "grad_norm": 1.828999638557434, | |
| "learning_rate": 3.664303418028056e-06, | |
| "loss": 0.6456412076950073, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.0488400488400487, | |
| "grad_norm": 5.089777946472168, | |
| "learning_rate": 3.6586006044658275e-06, | |
| "loss": 1.019284725189209, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 3.3917853832244873, | |
| "learning_rate": 3.6528961619240525e-06, | |
| "loss": 1.0033758878707886, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0537240537240535, | |
| "grad_norm": 2.755739450454712, | |
| "learning_rate": 3.647190116315739e-06, | |
| "loss": 0.6449636816978455, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.056166056166056, | |
| "grad_norm": 1.7534053325653076, | |
| "learning_rate": 3.6414824935611777e-06, | |
| "loss": 0.7625139355659485, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 26.373376846313477, | |
| "learning_rate": 3.6357733195878213e-06, | |
| "loss": 0.8886415958404541, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.061050061050061, | |
| "grad_norm": 6.317865371704102, | |
| "learning_rate": 3.6300626203301706e-06, | |
| "loss": 0.8263817429542542, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 4.633421421051025, | |
| "learning_rate": 3.6243504217296562e-06, | |
| "loss": 0.7733551859855652, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 0.8595446348190308, | |
| "learning_rate": 3.6186367497345175e-06, | |
| "loss": 0.6480957865715027, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 1.6937971115112305, | |
| "learning_rate": 3.6129216302996884e-06, | |
| "loss": 0.746891975402832, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.070818070818071, | |
| "grad_norm": 1.8328354358673096, | |
| "learning_rate": 3.6072050893866774e-06, | |
| "loss": 0.960361659526825, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 1.8361425399780273, | |
| "learning_rate": 3.6014871529634497e-06, | |
| "loss": 0.9544206261634827, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.075702075702076, | |
| "grad_norm": 4.454921722412109, | |
| "learning_rate": 3.5957678470043117e-06, | |
| "loss": 0.9407209753990173, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0781440781440783, | |
| "grad_norm": 3.5452890396118164, | |
| "learning_rate": 3.5900471974897883e-06, | |
| "loss": 0.5001405477523804, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 1.039163589477539, | |
| "learning_rate": 3.5843252304065106e-06, | |
| "loss": 0.5040726661682129, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.083028083028083, | |
| "grad_norm": 6.359805583953857, | |
| "learning_rate": 3.5786019717470907e-06, | |
| "loss": 0.4261728525161743, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 4.610110759735107, | |
| "learning_rate": 3.5728774475100127e-06, | |
| "loss": 0.9747272729873657, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 2.2436630725860596, | |
| "learning_rate": 3.5671516836995057e-06, | |
| "loss": 0.6898809671401978, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.0903540903540905, | |
| "grad_norm": 12.148319244384766, | |
| "learning_rate": 3.561424706325434e-06, | |
| "loss": 0.6288307905197144, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.092796092796093, | |
| "grad_norm": 1.7018226385116577, | |
| "learning_rate": 3.555696541403169e-06, | |
| "loss": 0.620037317276001, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 2.4741909503936768, | |
| "learning_rate": 3.549967214953482e-06, | |
| "loss": 0.6937116980552673, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.0976800976800978, | |
| "grad_norm": 2.7370715141296387, | |
| "learning_rate": 3.5442367530024167e-06, | |
| "loss": 0.9124626517295837, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.1001221001221, | |
| "grad_norm": 2.0893752574920654, | |
| "learning_rate": 3.5385051815811774e-06, | |
| "loss": 0.9028443694114685, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 4.761720180511475, | |
| "learning_rate": 3.5327725267260076e-06, | |
| "loss": 1.0133492946624756, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.105006105006105, | |
| "grad_norm": 1.624951720237732, | |
| "learning_rate": 3.5270388144780724e-06, | |
| "loss": 0.6351829767227173, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.1074481074481075, | |
| "grad_norm": 16.627628326416016, | |
| "learning_rate": 3.5213040708833395e-06, | |
| "loss": 0.5484714508056641, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 1.2873952388763428, | |
| "learning_rate": 3.5155683219924626e-06, | |
| "loss": 0.5492695569992065, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.1123321123321124, | |
| "grad_norm": 2.2124252319335938, | |
| "learning_rate": 3.509831593860662e-06, | |
| "loss": 0.841966986656189, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.114774114774115, | |
| "grad_norm": 2.9345152378082275, | |
| "learning_rate": 3.5040939125476042e-06, | |
| "loss": 0.5698384642601013, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.9468244910240173, | |
| "learning_rate": 3.4983553041172902e-06, | |
| "loss": 0.2970229685306549, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 4.071929931640625, | |
| "learning_rate": 3.4926157946379273e-06, | |
| "loss": 0.9440501928329468, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.122100122100122, | |
| "grad_norm": 2.8341457843780518, | |
| "learning_rate": 3.4868754101818194e-06, | |
| "loss": 0.8834357261657715, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 1.979362964630127, | |
| "learning_rate": 3.481134176825245e-06, | |
| "loss": 1.0380572080612183, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 7.127715587615967, | |
| "learning_rate": 3.4753921206483347e-06, | |
| "loss": 0.8644207715988159, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.1294261294261294, | |
| "grad_norm": 2.524230480194092, | |
| "learning_rate": 3.4696492677349627e-06, | |
| "loss": 0.6818966269493103, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 1.9305305480957031, | |
| "learning_rate": 3.463905644172619e-06, | |
| "loss": 0.9881414175033569, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.1343101343101343, | |
| "grad_norm": 3.519512891769409, | |
| "learning_rate": 3.4581612760522945e-06, | |
| "loss": 0.4184856712818146, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 1.6307930946350098, | |
| "learning_rate": 3.4524161894683634e-06, | |
| "loss": 1.1337333917617798, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 2.0223984718322754, | |
| "learning_rate": 3.4466704105184634e-06, | |
| "loss": 0.8873319029808044, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.1416361416361416, | |
| "grad_norm": 3.159184694290161, | |
| "learning_rate": 3.4409239653033754e-06, | |
| "loss": 0.9011868834495544, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.144078144078144, | |
| "grad_norm": 1.4487037658691406, | |
| "learning_rate": 3.435176879926911e-06, | |
| "loss": 1.0243741273880005, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 3.106032371520996, | |
| "learning_rate": 3.4294291804957846e-06, | |
| "loss": 0.4617737829685211, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.148962148962149, | |
| "grad_norm": 1.5856150388717651, | |
| "learning_rate": 3.4236808931195045e-06, | |
| "loss": 0.9131142497062683, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1514041514041513, | |
| "grad_norm": 0.6227497458457947, | |
| "learning_rate": 3.417932043910248e-06, | |
| "loss": 0.6714634299278259, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 3.6048402786254883, | |
| "learning_rate": 3.412182658982743e-06, | |
| "loss": 0.6586474776268005, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.156288156288156, | |
| "grad_norm": 1.531855583190918, | |
| "learning_rate": 3.4064327644541536e-06, | |
| "loss": 0.6113312244415283, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.1587301587301586, | |
| "grad_norm": 25.597637176513672, | |
| "learning_rate": 3.400682386443958e-06, | |
| "loss": 0.7350863218307495, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 5.0424346923828125, | |
| "learning_rate": 3.39493155107383e-06, | |
| "loss": 0.5671433806419373, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.1636141636141635, | |
| "grad_norm": 3.7835681438446045, | |
| "learning_rate": 3.389180284467522e-06, | |
| "loss": 0.7587950825691223, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.166056166056166, | |
| "grad_norm": 11.762646675109863, | |
| "learning_rate": 3.3834286127507445e-06, | |
| "loss": 0.8601915240287781, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 1.298113226890564, | |
| "learning_rate": 3.3776765620510475e-06, | |
| "loss": 0.9438109397888184, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 5.715275764465332, | |
| "learning_rate": 3.3719241584977056e-06, | |
| "loss": 0.6178626418113708, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.173382173382173, | |
| "grad_norm": 2.35664963722229, | |
| "learning_rate": 3.366171428221592e-06, | |
| "loss": 0.9353488087654114, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 4.974782466888428, | |
| "learning_rate": 3.360418397355067e-06, | |
| "loss": 0.7427996397018433, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.178266178266178, | |
| "grad_norm": 1.669165849685669, | |
| "learning_rate": 3.3546650920318567e-06, | |
| "loss": 0.9448103308677673, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.1807081807081805, | |
| "grad_norm": 5.4477691650390625, | |
| "learning_rate": 3.3489115383869322e-06, | |
| "loss": 0.9031893014907837, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 3.41313099861145, | |
| "learning_rate": 3.3431577625563934e-06, | |
| "loss": 0.9105832576751709, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.185592185592186, | |
| "grad_norm": 2.2663333415985107, | |
| "learning_rate": 3.337403790677349e-06, | |
| "loss": 0.8743823766708374, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 3.956235408782959, | |
| "learning_rate": 3.3316496488878e-06, | |
| "loss": 0.9286958575248718, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 1.6148682832717896, | |
| "learning_rate": 3.3258953633265155e-06, | |
| "loss": 0.5856344699859619, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.192918192918193, | |
| "grad_norm": 1.766203761100769, | |
| "learning_rate": 3.320140960132923e-06, | |
| "loss": 0.6644143462181091, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.1953601953601956, | |
| "grad_norm": 1.4154376983642578, | |
| "learning_rate": 3.3143864654469796e-06, | |
| "loss": 0.7322801947593689, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 1.9144586324691772, | |
| "learning_rate": 3.3086319054090614e-06, | |
| "loss": 0.9201781749725342, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2002442002442004, | |
| "grad_norm": 4.5645365715026855, | |
| "learning_rate": 3.302877306159839e-06, | |
| "loss": 0.9563385844230652, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.202686202686203, | |
| "grad_norm": 5.516610145568848, | |
| "learning_rate": 3.2971226938401616e-06, | |
| "loss": 0.6716048717498779, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 2.037088632583618, | |
| "learning_rate": 3.2913680945909394e-06, | |
| "loss": 0.9080408215522766, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.2075702075702077, | |
| "grad_norm": 16.09855842590332, | |
| "learning_rate": 3.2856135345530203e-06, | |
| "loss": 1.0019384622573853, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.21001221001221, | |
| "grad_norm": 1.9463958740234375, | |
| "learning_rate": 3.279859039867078e-06, | |
| "loss": 0.6507654786109924, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 1.6095830202102661, | |
| "learning_rate": 3.2741046366734852e-06, | |
| "loss": 0.7600730061531067, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.214896214896215, | |
| "grad_norm": 1.3766435384750366, | |
| "learning_rate": 3.2683503511122013e-06, | |
| "loss": 0.41193750500679016, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.2173382173382175, | |
| "grad_norm": 2.845604419708252, | |
| "learning_rate": 3.262596209322651e-06, | |
| "loss": 1.0406476259231567, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 1.6436043977737427, | |
| "learning_rate": 3.2568422374436078e-06, | |
| "loss": 0.9313133358955383, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.925812005996704, | |
| "learning_rate": 3.2510884616130685e-06, | |
| "loss": 0.9973028898239136, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2246642246642248, | |
| "grad_norm": 3.855332612991333, | |
| "learning_rate": 3.2453349079681436e-06, | |
| "loss": 0.4581510126590729, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 4.27201509475708, | |
| "learning_rate": 3.239581602644933e-06, | |
| "loss": 0.761246383190155, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.2295482295482296, | |
| "grad_norm": 10.0543794631958, | |
| "learning_rate": 3.2338285717784087e-06, | |
| "loss": 0.5892999172210693, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.231990231990232, | |
| "grad_norm": 4.678505897521973, | |
| "learning_rate": 3.228075841502295e-06, | |
| "loss": 0.5877086520195007, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 3.173227310180664, | |
| "learning_rate": 3.2223234379489524e-06, | |
| "loss": 0.6748300790786743, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.236874236874237, | |
| "grad_norm": 1.8820854425430298, | |
| "learning_rate": 3.2165713872492563e-06, | |
| "loss": 0.9059861898422241, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 1.2828683853149414, | |
| "learning_rate": 3.2108197155324782e-06, | |
| "loss": 0.9060832262039185, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 2.243968963623047, | |
| "learning_rate": 3.2050684489261707e-06, | |
| "loss": 0.8909225463867188, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.244200244200244, | |
| "grad_norm": 3.2561757564544678, | |
| "learning_rate": 3.199317613556042e-06, | |
| "loss": 0.9934486746788025, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.2466422466422467, | |
| "grad_norm": 3.1067919731140137, | |
| "learning_rate": 3.193567235545847e-06, | |
| "loss": 0.44754478335380554, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 1.4694087505340576, | |
| "learning_rate": 3.1878173410172574e-06, | |
| "loss": 0.9427831172943115, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.2515262515262515, | |
| "grad_norm": 1.6658433675765991, | |
| "learning_rate": 3.182067956089754e-06, | |
| "loss": 1.1688846349716187, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 17.416038513183594, | |
| "learning_rate": 3.1763191068804963e-06, | |
| "loss": 0.6192197799682617, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 1.2540676593780518, | |
| "learning_rate": 3.170570819504217e-06, | |
| "loss": 0.6497691869735718, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.258852258852259, | |
| "grad_norm": 2.8082709312438965, | |
| "learning_rate": 3.1648231200730907e-06, | |
| "loss": 0.676390528678894, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2612942612942613, | |
| "grad_norm": 12.676959037780762, | |
| "learning_rate": 3.1590760346966254e-06, | |
| "loss": 0.967694878578186, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 22.334165573120117, | |
| "learning_rate": 3.1533295894815374e-06, | |
| "loss": 1.0598804950714111, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.266178266178266, | |
| "grad_norm": 2.7481689453125, | |
| "learning_rate": 3.1475838105316378e-06, | |
| "loss": 0.897810697555542, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.2686202686202686, | |
| "grad_norm": 5.33841609954834, | |
| "learning_rate": 3.1418387239477055e-06, | |
| "loss": 0.8070839643478394, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 3.1426467895507812, | |
| "learning_rate": 3.1360943558273812e-06, | |
| "loss": 1.0592520236968994, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 3.4249966144561768, | |
| "learning_rate": 3.130350732265038e-06, | |
| "loss": 0.6386770009994507, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.275946275946276, | |
| "grad_norm": 2.3887171745300293, | |
| "learning_rate": 3.1246078793516656e-06, | |
| "loss": 0.6159527897834778, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 2.1484906673431396, | |
| "learning_rate": 3.1188658231747566e-06, | |
| "loss": 0.8985686302185059, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.2808302808302807, | |
| "grad_norm": 2.638803005218506, | |
| "learning_rate": 3.11312458981818e-06, | |
| "loss": 0.9426345229148865, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.283272283272283, | |
| "grad_norm": 1.8906961679458618, | |
| "learning_rate": 3.1073842053620734e-06, | |
| "loss": 0.9898474812507629, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 1.6906073093414307, | |
| "learning_rate": 3.1016446958827097e-06, | |
| "loss": 0.9557514786720276, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.288156288156288, | |
| "grad_norm": 4.265973091125488, | |
| "learning_rate": 3.0959060874523965e-06, | |
| "loss": 0.16929063200950623, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 1.2474678754806519, | |
| "learning_rate": 3.0901684061393386e-06, | |
| "loss": 0.96248859167099, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 2.5638835430145264, | |
| "learning_rate": 3.0844316780075377e-06, | |
| "loss": 0.6548967957496643, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.2954822954822953, | |
| "grad_norm": 4.610739707946777, | |
| "learning_rate": 3.078695929116661e-06, | |
| "loss": 0.7896809577941895, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.2979242979242978, | |
| "grad_norm": 2.039637804031372, | |
| "learning_rate": 3.072961185521929e-06, | |
| "loss": 1.022554874420166, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 1.1219255924224854, | |
| "learning_rate": 3.067227473273993e-06, | |
| "loss": 0.6635209321975708, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.3028083028083026, | |
| "grad_norm": 1.9538935422897339, | |
| "learning_rate": 3.0614948184188238e-06, | |
| "loss": 1.2081166505813599, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.305250305250305, | |
| "grad_norm": 4.5801849365234375, | |
| "learning_rate": 3.0557632469975845e-06, | |
| "loss": 0.9475079774856567, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 2.044386148452759, | |
| "learning_rate": 3.05003278504652e-06, | |
| "loss": 1.0499695539474487, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.31013431013431, | |
| "grad_norm": 2.1628005504608154, | |
| "learning_rate": 3.044303458596832e-06, | |
| "loss": 0.95216965675354, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.3125763125763124, | |
| "grad_norm": 5.278742790222168, | |
| "learning_rate": 3.0385752936745665e-06, | |
| "loss": 1.0832526683807373, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 1.9575459957122803, | |
| "learning_rate": 3.032848316300494e-06, | |
| "loss": 1.029971718788147, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.317460317460317, | |
| "grad_norm": 2.02197527885437, | |
| "learning_rate": 3.027122552489988e-06, | |
| "loss": 0.9606547951698303, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.3199023199023197, | |
| "grad_norm": 1.8357181549072266, | |
| "learning_rate": 3.02139802825291e-06, | |
| "loss": 0.9799268245697021, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 3.0944628715515137, | |
| "learning_rate": 3.01567476959349e-06, | |
| "loss": 0.9936726093292236, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 4.7253265380859375, | |
| "learning_rate": 3.009952802510212e-06, | |
| "loss": 0.510124146938324, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.3272283272283274, | |
| "grad_norm": 3.323378324508667, | |
| "learning_rate": 3.004232152995688e-06, | |
| "loss": 0.4245060682296753, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 1.937008023262024, | |
| "learning_rate": 2.99851284703655e-06, | |
| "loss": 0.6247161626815796, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.3321123321123323, | |
| "grad_norm": 1.282409429550171, | |
| "learning_rate": 2.992794910613323e-06, | |
| "loss": 0.4279539883136749, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3345543345543347, | |
| "grad_norm": 15.080961227416992, | |
| "learning_rate": 2.987078369700312e-06, | |
| "loss": 0.7808930277824402, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 1.419805884361267, | |
| "learning_rate": 2.981363250265483e-06, | |
| "loss": 0.8222759962081909, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.3394383394383396, | |
| "grad_norm": 2.3081250190734863, | |
| "learning_rate": 2.9756495782703445e-06, | |
| "loss": 0.9174440503120422, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 1.4504395723342896, | |
| "learning_rate": 2.9699373796698293e-06, | |
| "loss": 0.6940646171569824, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 9.539470672607422, | |
| "learning_rate": 2.9642266804121804e-06, | |
| "loss": 0.27768972516059875, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.346764346764347, | |
| "grad_norm": 7.798355579376221, | |
| "learning_rate": 2.958517506438823e-06, | |
| "loss": 0.7792479991912842, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.3492063492063493, | |
| "grad_norm": 1.2411043643951416, | |
| "learning_rate": 2.9528098836842615e-06, | |
| "loss": 0.5147164463996887, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 1.255823016166687, | |
| "learning_rate": 2.947103838075948e-06, | |
| "loss": 0.9378372430801392, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.354090354090354, | |
| "grad_norm": 2.5060043334960938, | |
| "learning_rate": 2.9413993955341732e-06, | |
| "loss": 0.4387669563293457, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.3565323565323566, | |
| "grad_norm": 1.6388636827468872, | |
| "learning_rate": 2.935696581971944e-06, | |
| "loss": 0.47966766357421875, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 2.1432793140411377, | |
| "learning_rate": 2.929995423294869e-06, | |
| "loss": 0.6485490798950195, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.3614163614163615, | |
| "grad_norm": 1.9438990354537964, | |
| "learning_rate": 2.9242959454010385e-06, | |
| "loss": 0.9063615798950195, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.363858363858364, | |
| "grad_norm": 1.496005654335022, | |
| "learning_rate": 2.918598174180907e-06, | |
| "loss": 0.6603356599807739, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 1.7516268491744995, | |
| "learning_rate": 2.9129021355171796e-06, | |
| "loss": 0.9123400449752808, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.3687423687423688, | |
| "grad_norm": 23.957849502563477, | |
| "learning_rate": 2.907207855284685e-06, | |
| "loss": 0.7359025478363037, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.371184371184371, | |
| "grad_norm": 0.49576354026794434, | |
| "learning_rate": 2.9015153593502697e-06, | |
| "loss": 0.68361896276474, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 3.961500883102417, | |
| "learning_rate": 2.8958246735726717e-06, | |
| "loss": 0.8321384787559509, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 3.9573895931243896, | |
| "learning_rate": 2.890135823802409e-06, | |
| "loss": 0.20095598697662354, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.3785103785103785, | |
| "grad_norm": 1.3203811645507812, | |
| "learning_rate": 2.8844488358816547e-06, | |
| "loss": 0.46073901653289795, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 4.666232585906982, | |
| "learning_rate": 2.8787637356441306e-06, | |
| "loss": 0.45975521206855774, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.3833943833943834, | |
| "grad_norm": 2.1029303073883057, | |
| "learning_rate": 2.8730805489149762e-06, | |
| "loss": 0.4719840884208679, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.385836385836386, | |
| "grad_norm": 14.054176330566406, | |
| "learning_rate": 2.867399301510646e-06, | |
| "loss": 0.8675229549407959, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 2.523514986038208, | |
| "learning_rate": 2.861720019238778e-06, | |
| "loss": 0.9857223629951477, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.3907203907203907, | |
| "grad_norm": 5.2092132568359375, | |
| "learning_rate": 2.8560427278980903e-06, | |
| "loss": 0.6426673531532288, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 1.7665150165557861, | |
| "learning_rate": 2.850367453278251e-06, | |
| "loss": 1.0697640180587769, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 2.8153493404388428, | |
| "learning_rate": 2.8446942211597713e-06, | |
| "loss": 0.6665030121803284, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.398046398046398, | |
| "grad_norm": 1.6511778831481934, | |
| "learning_rate": 2.83902305731388e-06, | |
| "loss": 0.9055560231208801, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.4004884004884004, | |
| "grad_norm": 2.6607825756073, | |
| "learning_rate": 2.833353987502415e-06, | |
| "loss": 0.5983095765113831, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 2.3817784786224365, | |
| "learning_rate": 2.8276870374777003e-06, | |
| "loss": 0.602225661277771, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.4053724053724053, | |
| "grad_norm": 8.560592651367188, | |
| "learning_rate": 2.822022232982428e-06, | |
| "loss": 0.4158218204975128, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4078144078144077, | |
| "grad_norm": 9.470584869384766, | |
| "learning_rate": 2.816359599749548e-06, | |
| "loss": 0.20221352577209473, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.9254062175750732, | |
| "learning_rate": 2.810699163502143e-06, | |
| "loss": 0.18855728209018707, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.4126984126984126, | |
| "grad_norm": 3.3264057636260986, | |
| "learning_rate": 2.805040949953319e-06, | |
| "loss": 0.40398913621902466, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.415140415140415, | |
| "grad_norm": 4.203930854797363, | |
| "learning_rate": 2.7993849848060827e-06, | |
| "loss": 0.9113072752952576, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 1.2429267168045044, | |
| "learning_rate": 2.7937312937532303e-06, | |
| "loss": 0.46490153670310974, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.42002442002442, | |
| "grad_norm": 5.597394943237305, | |
| "learning_rate": 2.7880799024772236e-06, | |
| "loss": 0.9845388531684875, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.4224664224664223, | |
| "grad_norm": 0.8237308263778687, | |
| "learning_rate": 2.7824308366500807e-06, | |
| "loss": 0.49944135546684265, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 1.4086517095565796, | |
| "learning_rate": 2.776784121933254e-06, | |
| "loss": 1.1026017665863037, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 2.9017109870910645, | |
| "learning_rate": 2.7711397839775176e-06, | |
| "loss": 0.977454662322998, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.42979242979243, | |
| "grad_norm": 1.8910452127456665, | |
| "learning_rate": 2.765497848422848e-06, | |
| "loss": 1.0892168283462524, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 5.9468464851379395, | |
| "learning_rate": 2.759858340898308e-06, | |
| "loss": 0.9162377715110779, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.434676434676435, | |
| "grad_norm": 2.0583372116088867, | |
| "learning_rate": 2.7542212870219323e-06, | |
| "loss": 0.9635336995124817, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.4371184371184373, | |
| "grad_norm": 3.0031464099884033, | |
| "learning_rate": 2.7485867124006084e-06, | |
| "loss": 0.8859571218490601, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 1.2768526077270508, | |
| "learning_rate": 2.742954642629961e-06, | |
| "loss": 1.0290803909301758, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.442002442002442, | |
| "grad_norm": 2.5159692764282227, | |
| "learning_rate": 2.7373251032942382e-06, | |
| "loss": 0.6687625646591187, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 1.5561347007751465, | |
| "learning_rate": 2.7316981199661915e-06, | |
| "loss": 0.9961126446723938, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 1.4128899574279785, | |
| "learning_rate": 2.726073718206963e-06, | |
| "loss": 0.6645013093948364, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.4493284493284495, | |
| "grad_norm": 2.9358339309692383, | |
| "learning_rate": 2.7204519235659677e-06, | |
| "loss": 0.717438280582428, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.451770451770452, | |
| "grad_norm": 2.506085157394409, | |
| "learning_rate": 2.7148327615807745e-06, | |
| "loss": 1.0376380681991577, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 2.029444694519043, | |
| "learning_rate": 2.7092162577769975e-06, | |
| "loss": 0.5659610033035278, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.456654456654457, | |
| "grad_norm": 1.5038423538208008, | |
| "learning_rate": 2.7036024376681736e-06, | |
| "loss": 0.5658340454101562, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.4590964590964592, | |
| "grad_norm": 2.677021026611328, | |
| "learning_rate": 2.697991326755648e-06, | |
| "loss": 1.0553836822509766, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 1.5988397598266602, | |
| "learning_rate": 2.692382950528461e-06, | |
| "loss": 0.9713739156723022, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.463980463980464, | |
| "grad_norm": 2.0507140159606934, | |
| "learning_rate": 2.6867773344632283e-06, | |
| "loss": 0.9353849291801453, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.4664224664224665, | |
| "grad_norm": 4.076539039611816, | |
| "learning_rate": 2.6811745040240296e-06, | |
| "loss": 0.7117088437080383, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 1.9486643075942993, | |
| "learning_rate": 2.6755744846622883e-06, | |
| "loss": 0.5674237012863159, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.4713064713064714, | |
| "grad_norm": 3.0820834636688232, | |
| "learning_rate": 2.6699773018166596e-06, | |
| "loss": 0.6507576704025269, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.473748473748474, | |
| "grad_norm": 8.020563125610352, | |
| "learning_rate": 2.6643829809129126e-06, | |
| "loss": 0.9150622487068176, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 3.7450058460235596, | |
| "learning_rate": 2.6587915473638174e-06, | |
| "loss": 0.6196163892745972, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 1.4104655981063843, | |
| "learning_rate": 2.653203026569026e-06, | |
| "loss": 0.920057475566864, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.481074481074481, | |
| "grad_norm": 3.1829826831817627, | |
| "learning_rate": 2.647617443914961e-06, | |
| "loss": 1.105471134185791, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 4.825045585632324, | |
| "learning_rate": 2.6420348247746955e-06, | |
| "loss": 0.5420823693275452, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.485958485958486, | |
| "grad_norm": 3.713477373123169, | |
| "learning_rate": 2.6364551945078424e-06, | |
| "loss": 1.034119963645935, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.4884004884004884, | |
| "grad_norm": 1.7292916774749756, | |
| "learning_rate": 2.630878578460438e-06, | |
| "loss": 0.6454814076423645, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 6.596334457397461, | |
| "learning_rate": 2.6253050019648242e-06, | |
| "loss": 0.7713245153427124, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.4932844932844933, | |
| "grad_norm": 2.4643394947052, | |
| "learning_rate": 2.619734490339537e-06, | |
| "loss": 0.9619376063346863, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 1.9008095264434814, | |
| "learning_rate": 2.614167068889189e-06, | |
| "loss": 0.7063919305801392, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 3.062089443206787, | |
| "learning_rate": 2.6086027629043567e-06, | |
| "loss": 0.5970872044563293, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.5006105006105006, | |
| "grad_norm": 1.492427110671997, | |
| "learning_rate": 2.603041597661461e-06, | |
| "loss": 0.6669678092002869, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.503052503052503, | |
| "grad_norm": 1.6604772806167603, | |
| "learning_rate": 2.5974835984226602e-06, | |
| "loss": 0.7523837089538574, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 7.79230260848999, | |
| "learning_rate": 2.591928790435728e-06, | |
| "loss": 1.1376758813858032, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 1.4952296018600464, | |
| "learning_rate": 2.5863771989339402e-06, | |
| "loss": 0.6450526714324951, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.5103785103785103, | |
| "grad_norm": 1.8876447677612305, | |
| "learning_rate": 2.5808288491359664e-06, | |
| "loss": 0.926227331161499, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 3.5542497634887695, | |
| "learning_rate": 2.575283766245744e-06, | |
| "loss": 0.4095945358276367, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.515262515262515, | |
| "grad_norm": 2.479278087615967, | |
| "learning_rate": 2.569741975452376e-06, | |
| "loss": 0.2681538462638855, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.5177045177045176, | |
| "grad_norm": 4.712648391723633, | |
| "learning_rate": 2.564203501930006e-06, | |
| "loss": 0.6008268594741821, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 2.5217740535736084, | |
| "learning_rate": 2.558668370837712e-06, | |
| "loss": 0.9255989789962769, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.5225885225885225, | |
| "grad_norm": 1.3460025787353516, | |
| "learning_rate": 2.5531366073193857e-06, | |
| "loss": 0.628387987613678, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.525030525030525, | |
| "grad_norm": 5.685485363006592, | |
| "learning_rate": 2.5476082365036257e-06, | |
| "loss": 0.7797712683677673, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 1.2678775787353516, | |
| "learning_rate": 2.5420832835036126e-06, | |
| "loss": 1.1243481636047363, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.52991452991453, | |
| "grad_norm": 1.9160879850387573, | |
| "learning_rate": 2.5365617734170076e-06, | |
| "loss": 0.7206214666366577, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.5323565323565322, | |
| "grad_norm": 7.4982829093933105, | |
| "learning_rate": 2.531043731325827e-06, | |
| "loss": 0.9347074627876282, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 2.430102586746216, | |
| "learning_rate": 2.5255291822963357e-06, | |
| "loss": 0.8390841484069824, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.537240537240537, | |
| "grad_norm": 4.842471599578857, | |
| "learning_rate": 2.5200181513789336e-06, | |
| "loss": 0.9063934683799744, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.5396825396825395, | |
| "grad_norm": 3.8723435401916504, | |
| "learning_rate": 2.5145106636080333e-06, | |
| "loss": 0.6532993316650391, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 2.0189385414123535, | |
| "learning_rate": 2.5090067440019583e-06, | |
| "loss": 0.9501420855522156, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.5445665445665444, | |
| "grad_norm": 2.7918736934661865, | |
| "learning_rate": 2.503506417562817e-06, | |
| "loss": 0.9153074026107788, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.547008547008547, | |
| "grad_norm": 1.4488838911056519, | |
| "learning_rate": 2.498009709276403e-06, | |
| "loss": 0.8253804445266724, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 2.9465348720550537, | |
| "learning_rate": 2.492516644112067e-06, | |
| "loss": 0.9089316129684448, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.5518925518925517, | |
| "grad_norm": 3.4904873371124268, | |
| "learning_rate": 2.487027247022616e-06, | |
| "loss": 0.7865045666694641, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.554334554334554, | |
| "grad_norm": 2.272559642791748, | |
| "learning_rate": 2.4815415429441893e-06, | |
| "loss": 0.6603869199752808, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 2.10170841217041, | |
| "learning_rate": 2.476059556796156e-06, | |
| "loss": 0.6427494287490845, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.559218559218559, | |
| "grad_norm": 1.574903130531311, | |
| "learning_rate": 2.4705813134809898e-06, | |
| "loss": 0.8682103157043457, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.5616605616605614, | |
| "grad_norm": 1.1644954681396484, | |
| "learning_rate": 2.4651068378841686e-06, | |
| "loss": 0.03524484112858772, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 2.8488667011260986, | |
| "learning_rate": 2.4596361548740497e-06, | |
| "loss": 0.5300520658493042, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5665445665445663, | |
| "grad_norm": 1.7959370613098145, | |
| "learning_rate": 2.4541692893017665e-06, | |
| "loss": 0.8741487860679626, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.5689865689865687, | |
| "grad_norm": 4.016081809997559, | |
| "learning_rate": 2.4487062660011067e-06, | |
| "loss": 0.6247915029525757, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 2.1877832412719727, | |
| "learning_rate": 2.4432471097884097e-06, | |
| "loss": 0.869472861289978, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.5738705738705736, | |
| "grad_norm": 5.9354143142700195, | |
| "learning_rate": 2.437791845462442e-06, | |
| "loss": 0.5715877413749695, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.576312576312576, | |
| "grad_norm": 5.521259307861328, | |
| "learning_rate": 2.4323404978042953e-06, | |
| "loss": 0.5813296437263489, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 1.3393948078155518, | |
| "learning_rate": 2.4268930915772708e-06, | |
| "loss": 0.9946026802062988, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.5811965811965814, | |
| "grad_norm": 2.754868984222412, | |
| "learning_rate": 2.4214496515267584e-06, | |
| "loss": 0.27745679020881653, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.583638583638584, | |
| "grad_norm": 2.2351222038269043, | |
| "learning_rate": 2.4160102023801384e-06, | |
| "loss": 0.6880282163619995, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 2.2096810340881348, | |
| "learning_rate": 2.410574768846658e-06, | |
| "loss": 1.0098536014556885, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.5885225885225887, | |
| "grad_norm": 1.1249773502349854, | |
| "learning_rate": 2.4051433756173248e-06, | |
| "loss": 0.7218607068061829, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.590964590964591, | |
| "grad_norm": 5.084073543548584, | |
| "learning_rate": 2.399716047364792e-06, | |
| "loss": 0.9250280857086182, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 1.690556526184082, | |
| "learning_rate": 2.394292808743248e-06, | |
| "loss": 0.837040364742279, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.595848595848596, | |
| "grad_norm": 2.8815386295318604, | |
| "learning_rate": 2.3888736843883023e-06, | |
| "loss": 0.4333546459674835, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.5982905982905984, | |
| "grad_norm": 4.304377555847168, | |
| "learning_rate": 2.383458698916877e-06, | |
| "loss": 1.0332998037338257, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 2.969372272491455, | |
| "learning_rate": 2.3780478769270907e-06, | |
| "loss": 0.6737803220748901, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6031746031746033, | |
| "grad_norm": 1.7612605094909668, | |
| "learning_rate": 2.372641242998151e-06, | |
| "loss": 0.527601957321167, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.6056166056166057, | |
| "grad_norm": 1.2742631435394287, | |
| "learning_rate": 2.367238821690239e-06, | |
| "loss": 0.8370689749717712, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 6.330650329589844, | |
| "learning_rate": 2.361840637544402e-06, | |
| "loss": 0.5774081945419312, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.6105006105006106, | |
| "grad_norm": 5.526278972625732, | |
| "learning_rate": 2.3564467150824347e-06, | |
| "loss": 0.7380045652389526, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.612942612942613, | |
| "grad_norm": 1.5810257196426392, | |
| "learning_rate": 2.351057078806779e-06, | |
| "loss": 0.6179096698760986, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 2.142446517944336, | |
| "learning_rate": 2.3456717532004014e-06, | |
| "loss": 0.8616040349006653, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.617826617826618, | |
| "grad_norm": 3.2861132621765137, | |
| "learning_rate": 2.340290762726691e-06, | |
| "loss": 0.8608450293540955, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.6202686202686203, | |
| "grad_norm": 3.9236459732055664, | |
| "learning_rate": 2.334914131829338e-06, | |
| "loss": 1.0178287029266357, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 2.530313491821289, | |
| "learning_rate": 2.3295418849322353e-06, | |
| "loss": 0.6632241606712341, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.625152625152625, | |
| "grad_norm": 1.9764374494552612, | |
| "learning_rate": 2.3241740464393575e-06, | |
| "loss": 1.0255072116851807, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6275946275946276, | |
| "grad_norm": 2.218528985977173, | |
| "learning_rate": 2.318810640734654e-06, | |
| "loss": 1.0632559061050415, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 2.4311797618865967, | |
| "learning_rate": 2.3134516921819398e-06, | |
| "loss": 1.0157952308654785, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.6324786324786325, | |
| "grad_norm": 1.8771414756774902, | |
| "learning_rate": 2.308097225124779e-06, | |
| "loss": 0.9064328074455261, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 2.203409433364868, | |
| "learning_rate": 2.302747263886381e-06, | |
| "loss": 0.7967960238456726, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 2.97739315032959, | |
| "learning_rate": 2.2974018327694855e-06, | |
| "loss": 0.5664879083633423, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6398046398046398, | |
| "grad_norm": 3.0944299697875977, | |
| "learning_rate": 2.2920609560562573e-06, | |
| "loss": 0.7291885018348694, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.642246642246642, | |
| "grad_norm": 6.805998802185059, | |
| "learning_rate": 2.286724658008166e-06, | |
| "loss": 0.5031374096870422, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 2.784329414367676, | |
| "learning_rate": 2.281392962865888e-06, | |
| "loss": 0.88991379737854, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.647130647130647, | |
| "grad_norm": 1.5047616958618164, | |
| "learning_rate": 2.2760658948491872e-06, | |
| "loss": 0.6943953037261963, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 1.8669089078903198, | |
| "learning_rate": 2.27074347815681e-06, | |
| "loss": 0.999853789806366, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 1.8199567794799805, | |
| "learning_rate": 2.265425736966371e-06, | |
| "loss": 1.0209516286849976, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.6544566544566544, | |
| "grad_norm": 2.209888219833374, | |
| "learning_rate": 2.2601126954342515e-06, | |
| "loss": 1.0429832935333252, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.656898656898657, | |
| "grad_norm": 3.112511157989502, | |
| "learning_rate": 2.2548043776954764e-06, | |
| "loss": 0.7639720439910889, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 2.3887898921966553, | |
| "learning_rate": 2.2495008078636194e-06, | |
| "loss": 0.8997243642807007, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.6617826617826617, | |
| "grad_norm": 6.595883846282959, | |
| "learning_rate": 2.244202010030682e-06, | |
| "loss": 1.002269983291626, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.664224664224664, | |
| "grad_norm": 2.121857166290283, | |
| "learning_rate": 2.2389080082669896e-06, | |
| "loss": 0.9555788636207581, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.1821184158325195, | |
| "learning_rate": 2.2336188266210827e-06, | |
| "loss": 0.6721835732460022, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.669108669108669, | |
| "grad_norm": 2.1556482315063477, | |
| "learning_rate": 2.2283344891196015e-06, | |
| "loss": 0.9556472897529602, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.6715506715506714, | |
| "grad_norm": 2.405789613723755, | |
| "learning_rate": 2.2230550197671873e-06, | |
| "loss": 1.1528511047363281, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 1.6833081245422363, | |
| "learning_rate": 2.217780442546361e-06, | |
| "loss": 0.974722683429718, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.6764346764346767, | |
| "grad_norm": 4.5762505531311035, | |
| "learning_rate": 2.2125107814174263e-06, | |
| "loss": 0.9278739094734192, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.678876678876679, | |
| "grad_norm": 9.507789611816406, | |
| "learning_rate": 2.20724606031835e-06, | |
| "loss": 0.32617825269699097, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 1.8784890174865723, | |
| "learning_rate": 2.2019863031646634e-06, | |
| "loss": 1.0722205638885498, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.683760683760684, | |
| "grad_norm": 18.138153076171875, | |
| "learning_rate": 2.1967315338493424e-06, | |
| "loss": 0.8764669299125671, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.6862026862026864, | |
| "grad_norm": 2.8644320964813232, | |
| "learning_rate": 2.1914817762427106e-06, | |
| "loss": 0.9633726477622986, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 11.419971466064453, | |
| "learning_rate": 2.186237054192323e-06, | |
| "loss": 0.6935689449310303, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.6910866910866913, | |
| "grad_norm": 6.860535621643066, | |
| "learning_rate": 2.1809973915228602e-06, | |
| "loss": 0.7737261056900024, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.6935286935286937, | |
| "grad_norm": 3.685971260070801, | |
| "learning_rate": 2.17576281203602e-06, | |
| "loss": 0.41188710927963257, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 2.738652229309082, | |
| "learning_rate": 2.170533339510412e-06, | |
| "loss": 0.9475657939910889, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.6984126984126986, | |
| "grad_norm": 5.204038619995117, | |
| "learning_rate": 2.16530899770144e-06, | |
| "loss": 0.6033111810684204, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.700854700854701, | |
| "grad_norm": 2.083590507507324, | |
| "learning_rate": 2.1600898103412107e-06, | |
| "loss": 0.9223611354827881, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 1.6157281398773193, | |
| "learning_rate": 2.1548758011384053e-06, | |
| "loss": 0.6177644729614258, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.705738705738706, | |
| "grad_norm": 3.5457189083099365, | |
| "learning_rate": 2.149666993778194e-06, | |
| "loss": 0.4836501181125641, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.7081807081807083, | |
| "grad_norm": 3.0739972591400146, | |
| "learning_rate": 2.1444634119221075e-06, | |
| "loss": 0.5934576988220215, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 4.0983781814575195, | |
| "learning_rate": 2.1392650792079454e-06, | |
| "loss": 0.3090117573738098, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.713064713064713, | |
| "grad_norm": 5.047109603881836, | |
| "learning_rate": 2.1340720192496594e-06, | |
| "loss": 0.5931053757667542, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.7155067155067156, | |
| "grad_norm": 4.083251476287842, | |
| "learning_rate": 2.12888425563725e-06, | |
| "loss": 0.6489290595054626, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 1.615699291229248, | |
| "learning_rate": 2.123701811936659e-06, | |
| "loss": 0.7429230213165283, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.7203907203907205, | |
| "grad_norm": 4.504569053649902, | |
| "learning_rate": 2.118524711689661e-06, | |
| "loss": 0.693551778793335, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.722832722832723, | |
| "grad_norm": 1.5205539464950562, | |
| "learning_rate": 2.1133529784137585e-06, | |
| "loss": 1.0012576580047607, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 3.8578217029571533, | |
| "learning_rate": 2.1081866356020726e-06, | |
| "loss": 0.6832969784736633, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.727716727716728, | |
| "grad_norm": 3.5652737617492676, | |
| "learning_rate": 2.103025706723239e-06, | |
| "loss": 1.0270230770111084, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.7301587301587302, | |
| "grad_norm": 10.974838256835938, | |
| "learning_rate": 2.0978702152212996e-06, | |
| "loss": 1.0029627084732056, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 5.731579303741455, | |
| "learning_rate": 2.092720184515597e-06, | |
| "loss": 0.956536054611206, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 2.9073221683502197, | |
| "learning_rate": 2.0875756380006654e-06, | |
| "loss": 0.6759551167488098, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.7374847374847375, | |
| "grad_norm": 1.188262939453125, | |
| "learning_rate": 2.0824365990461295e-06, | |
| "loss": 1.0032782554626465, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 3.1204891204833984, | |
| "learning_rate": 2.077303090996594e-06, | |
| "loss": 0.6075339913368225, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.7423687423687424, | |
| "grad_norm": 2.2777647972106934, | |
| "learning_rate": 2.072175137171538e-06, | |
| "loss": 0.6075271964073181, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.744810744810745, | |
| "grad_norm": 4.017678737640381, | |
| "learning_rate": 2.067052760865211e-06, | |
| "loss": 0.7910500168800354, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 2.2465343475341797, | |
| "learning_rate": 2.0619359853465268e-06, | |
| "loss": 0.8944975137710571, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7496947496947497, | |
| "grad_norm": 2.802507162094116, | |
| "learning_rate": 2.0568248338589546e-06, | |
| "loss": 1.0018844604492188, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.752136752136752, | |
| "grad_norm": 1.5768181085586548, | |
| "learning_rate": 2.051719329620418e-06, | |
| "loss": 0.6374676823616028, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 5.736408233642578, | |
| "learning_rate": 2.046619495823189e-06, | |
| "loss": 0.7124966979026794, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.757020757020757, | |
| "grad_norm": 1.7231215238571167, | |
| "learning_rate": 2.041525355633775e-06, | |
| "loss": 0.9689019918441772, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.7594627594627594, | |
| "grad_norm": 3.9329147338867188, | |
| "learning_rate": 2.0364369321928277e-06, | |
| "loss": 1.0872962474822998, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 1.9231138229370117, | |
| "learning_rate": 2.031354248615021e-06, | |
| "loss": 1.0194430351257324, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.7643467643467643, | |
| "grad_norm": 1.4887934923171997, | |
| "learning_rate": 2.0262773279889648e-06, | |
| "loss": 0.9722414612770081, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.7667887667887667, | |
| "grad_norm": 1.6510133743286133, | |
| "learning_rate": 2.0212061933770793e-06, | |
| "loss": 0.789564311504364, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.8453580737113953, | |
| "learning_rate": 2.016140867815512e-06, | |
| "loss": 0.655318021774292, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.7716727716727716, | |
| "grad_norm": 1.5889426469802856, | |
| "learning_rate": 2.0110813743140134e-06, | |
| "loss": 0.5706247091293335, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.774114774114774, | |
| "grad_norm": 1.9540982246398926, | |
| "learning_rate": 2.0060277358558474e-06, | |
| "loss": 0.16020342707633972, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 1.7977136373519897, | |
| "learning_rate": 2.0009799753976755e-06, | |
| "loss": 1.0128493309020996, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.778998778998779, | |
| "grad_norm": 1.6063421964645386, | |
| "learning_rate": 1.995938115869464e-06, | |
| "loss": 0.9149772524833679, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.7814407814407813, | |
| "grad_norm": 1.2842918634414673, | |
| "learning_rate": 1.990902180174366e-06, | |
| "loss": 0.5837323665618896, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 12.791586875915527, | |
| "learning_rate": 1.985872191188634e-06, | |
| "loss": 0.33608847856521606, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.786324786324786, | |
| "grad_norm": 1.533747911453247, | |
| "learning_rate": 1.9808481717614975e-06, | |
| "loss": 0.9576466679573059, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.7887667887667886, | |
| "grad_norm": 1.179326057434082, | |
| "learning_rate": 1.9758301447150778e-06, | |
| "loss": 0.9575373530387878, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 1.6882575750350952, | |
| "learning_rate": 1.9708181328442657e-06, | |
| "loss": 0.885867178440094, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.7936507936507935, | |
| "grad_norm": 1.46290922164917, | |
| "learning_rate": 1.9658121589166368e-06, | |
| "loss": 0.9062331318855286, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.796092796092796, | |
| "grad_norm": 6.2452216148376465, | |
| "learning_rate": 1.960812245672332e-06, | |
| "loss": 0.2752612829208374, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 2.4964230060577393, | |
| "learning_rate": 1.955818415823965e-06, | |
| "loss": 0.7961699366569519, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.800976800976801, | |
| "grad_norm": 1.7693415880203247, | |
| "learning_rate": 1.950830692056512e-06, | |
| "loss": 0.781195342540741, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.8034188034188032, | |
| "grad_norm": 1.656353235244751, | |
| "learning_rate": 1.9458490970272132e-06, | |
| "loss": 0.9211829900741577, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 5.0975494384765625, | |
| "learning_rate": 1.9408736533654693e-06, | |
| "loss": 0.43949294090270996, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.808302808302808, | |
| "grad_norm": 1.9063336849212646, | |
| "learning_rate": 1.9359043836727363e-06, | |
| "loss": 0.6156795024871826, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8107448107448105, | |
| "grad_norm": 1.8999279737472534, | |
| "learning_rate": 1.9309413105224242e-06, | |
| "loss": 0.9818214774131775, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.6018390655517578, | |
| "learning_rate": 1.9259844564597955e-06, | |
| "loss": 0.6223698258399963, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.8156288156288154, | |
| "grad_norm": 1.268938660621643, | |
| "learning_rate": 1.9210338440018625e-06, | |
| "loss": 0.7103510499000549, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.818070818070818, | |
| "grad_norm": 1.6142417192459106, | |
| "learning_rate": 1.916089495637282e-06, | |
| "loss": 0.6377496123313904, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 2.1147496700286865, | |
| "learning_rate": 1.911151433826258e-06, | |
| "loss": 0.995940089225769, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8229548229548227, | |
| "grad_norm": 1.3524115085601807, | |
| "learning_rate": 1.9062196810004356e-06, | |
| "loss": 0.6259192824363708, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.825396825396825, | |
| "grad_norm": 7.68358039855957, | |
| "learning_rate": 1.9012942595628017e-06, | |
| "loss": 0.7580180764198303, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 19.87650489807129, | |
| "learning_rate": 1.8963751918875811e-06, | |
| "loss": 0.7949362993240356, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.8302808302808304, | |
| "grad_norm": 2.42893123626709, | |
| "learning_rate": 1.8914625003201375e-06, | |
| "loss": 1.0362216234207153, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.832722832722833, | |
| "grad_norm": 2.1275134086608887, | |
| "learning_rate": 1.8865562071768689e-06, | |
| "loss": 0.6284617185592651, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 4.848043918609619, | |
| "learning_rate": 1.8816563347451088e-06, | |
| "loss": 0.6487046480178833, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.8376068376068377, | |
| "grad_norm": 1.1281238794326782, | |
| "learning_rate": 1.8767629052830238e-06, | |
| "loss": 0.23647232353687286, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.84004884004884, | |
| "grad_norm": 1.886053442955017, | |
| "learning_rate": 1.8718759410195113e-06, | |
| "loss": 0.8259285092353821, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 2.046250343322754, | |
| "learning_rate": 1.8669954641541034e-06, | |
| "loss": 0.7600533962249756, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.844932844932845, | |
| "grad_norm": 2.649106025695801, | |
| "learning_rate": 1.8621214968568598e-06, | |
| "loss": 0.7137502431869507, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8473748473748475, | |
| "grad_norm": 2.0803020000457764, | |
| "learning_rate": 1.8572540612682694e-06, | |
| "loss": 0.965351402759552, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 2.1527020931243896, | |
| "learning_rate": 1.8523931794991518e-06, | |
| "loss": 0.10668709874153137, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.8522588522588523, | |
| "grad_norm": 1.98074209690094, | |
| "learning_rate": 1.8475388736305535e-06, | |
| "loss": 0.9549130797386169, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.8547008547008548, | |
| "grad_norm": 1.5347501039505005, | |
| "learning_rate": 1.8426911657136517e-06, | |
| "loss": 0.5754918456077576, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 2.49215030670166, | |
| "learning_rate": 1.8378500777696496e-06, | |
| "loss": 1.0284221172332764, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.8595848595848596, | |
| "grad_norm": 1.4292778968811035, | |
| "learning_rate": 1.8330156317896794e-06, | |
| "loss": 0.9731957912445068, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.862026862026862, | |
| "grad_norm": 5.513943195343018, | |
| "learning_rate": 1.8281878497347007e-06, | |
| "loss": 0.320040762424469, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 5.377645969390869, | |
| "learning_rate": 1.8233667535354031e-06, | |
| "loss": 0.896674633026123, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.866910866910867, | |
| "grad_norm": 0.5098855495452881, | |
| "learning_rate": 1.8185523650921033e-06, | |
| "loss": 0.4520077109336853, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.8693528693528694, | |
| "grad_norm": 1.4593082666397095, | |
| "learning_rate": 1.813744706274649e-06, | |
| "loss": 0.5917829275131226, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 1.9374688863754272, | |
| "learning_rate": 1.8089437989223166e-06, | |
| "loss": 0.7799688577651978, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.8742368742368742, | |
| "grad_norm": 3.2081990242004395, | |
| "learning_rate": 1.8041496648437138e-06, | |
| "loss": 0.5761586427688599, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.8766788766788767, | |
| "grad_norm": 1.424476981163025, | |
| "learning_rate": 1.7993623258166794e-06, | |
| "loss": 0.8961036801338196, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 6.229803562164307, | |
| "learning_rate": 1.7945818035881866e-06, | |
| "loss": 0.676258385181427, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.8815628815628815, | |
| "grad_norm": 2.3079984188079834, | |
| "learning_rate": 1.789808119874241e-06, | |
| "loss": 0.5127954483032227, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.884004884004884, | |
| "grad_norm": 3.7900733947753906, | |
| "learning_rate": 1.7850412963597837e-06, | |
| "loss": 0.7495479583740234, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 1.7501633167266846, | |
| "learning_rate": 1.7802813546985936e-06, | |
| "loss": 0.9721358418464661, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 1.494238257408142, | |
| "learning_rate": 1.7755283165131876e-06, | |
| "loss": 1.0429596900939941, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.8913308913308913, | |
| "grad_norm": 9.534012794494629, | |
| "learning_rate": 1.7707822033947214e-06, | |
| "loss": 0.7053688764572144, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 1.7080990076065063, | |
| "learning_rate": 1.7660430369028975e-06, | |
| "loss": 1.0583832263946533, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.896214896214896, | |
| "grad_norm": 2.404010534286499, | |
| "learning_rate": 1.7613108385658554e-06, | |
| "loss": 0.9982654452323914, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.8986568986568986, | |
| "grad_norm": 1.0838375091552734, | |
| "learning_rate": 1.7565856298800887e-06, | |
| "loss": 0.767935037612915, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 2.0502405166625977, | |
| "learning_rate": 1.7518674323103325e-06, | |
| "loss": 0.8051881790161133, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.9035409035409034, | |
| "grad_norm": 2.304351806640625, | |
| "learning_rate": 1.7471562672894812e-06, | |
| "loss": 0.8764021396636963, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 3.1706900596618652, | |
| "learning_rate": 1.7424521562184747e-06, | |
| "loss": 0.686414897441864, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 2.4662387371063232, | |
| "learning_rate": 1.7377551204662173e-06, | |
| "loss": 0.9558066129684448, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.9108669108669107, | |
| "grad_norm": 1.8197959661483765, | |
| "learning_rate": 1.7330651813694656e-06, | |
| "loss": 0.8915336728096008, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.913308913308913, | |
| "grad_norm": 1.8730552196502686, | |
| "learning_rate": 1.7283823602327467e-06, | |
| "loss": 0.9963794946670532, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 3.505786418914795, | |
| "learning_rate": 1.7237066783282454e-06, | |
| "loss": 0.7860287427902222, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.918192918192918, | |
| "grad_norm": 10.96102523803711, | |
| "learning_rate": 1.719038156895725e-06, | |
| "loss": 0.5170654058456421, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.9206349206349205, | |
| "grad_norm": 2.723895311355591, | |
| "learning_rate": 1.7143768171424114e-06, | |
| "loss": 0.5465037822723389, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 2.4230728149414062, | |
| "learning_rate": 1.709722680242917e-06, | |
| "loss": 0.8477396368980408, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.925518925518926, | |
| "grad_norm": 4.999912261962891, | |
| "learning_rate": 1.7050757673391256e-06, | |
| "loss": 0.5552250742912292, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.927960927960928, | |
| "grad_norm": 4.302630424499512, | |
| "learning_rate": 1.7004360995401131e-06, | |
| "loss": 0.6446186304092407, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 11.290936470031738, | |
| "learning_rate": 1.6958036979220378e-06, | |
| "loss": 0.5395065546035767, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.932844932844933, | |
| "grad_norm": 2.418199062347412, | |
| "learning_rate": 1.6911785835280544e-06, | |
| "loss": 0.6396113634109497, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.9352869352869355, | |
| "grad_norm": 3.1459553241729736, | |
| "learning_rate": 1.686560777368212e-06, | |
| "loss": 0.3191334009170532, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 8.619038581848145, | |
| "learning_rate": 1.6819503004193633e-06, | |
| "loss": 1.0637816190719604, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.9401709401709404, | |
| "grad_norm": 2.521735191345215, | |
| "learning_rate": 1.6773471736250668e-06, | |
| "loss": 0.6778561472892761, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.942612942612943, | |
| "grad_norm": 2.928496837615967, | |
| "learning_rate": 1.672751417895491e-06, | |
| "loss": 0.6122907996177673, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 3.0468804836273193, | |
| "learning_rate": 1.6681630541073222e-06, | |
| "loss": 0.9394041895866394, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.9474969474969477, | |
| "grad_norm": 2.0013132095336914, | |
| "learning_rate": 1.6635821031036671e-06, | |
| "loss": 0.9378724694252014, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.94993894993895, | |
| "grad_norm": 1.7880492210388184, | |
| "learning_rate": 1.6590085856939593e-06, | |
| "loss": 0.9468516111373901, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 2.087956666946411, | |
| "learning_rate": 1.654442522653865e-06, | |
| "loss": 0.6785021424293518, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.954822954822955, | |
| "grad_norm": 1.5312798023223877, | |
| "learning_rate": 1.6498839347251873e-06, | |
| "loss": 1.0038193464279175, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.9572649572649574, | |
| "grad_norm": 1.5011390447616577, | |
| "learning_rate": 1.6453328426157738e-06, | |
| "loss": 0.8261772990226746, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 2.2196717262268066, | |
| "learning_rate": 1.6407892669994208e-06, | |
| "loss": 0.5461646914482117, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.9621489621489623, | |
| "grad_norm": 1.7109367847442627, | |
| "learning_rate": 1.63625322851578e-06, | |
| "loss": 0.9957700967788696, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.9645909645909647, | |
| "grad_norm": 15.042463302612305, | |
| "learning_rate": 1.6317247477702666e-06, | |
| "loss": 0.5697113871574402, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 2.1879959106445312, | |
| "learning_rate": 1.6272038453339615e-06, | |
| "loss": 0.9809165000915527, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.9694749694749696, | |
| "grad_norm": 2.687741756439209, | |
| "learning_rate": 1.6226905417435226e-06, | |
| "loss": 0.851597249507904, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.971916971916972, | |
| "grad_norm": 2.330556631088257, | |
| "learning_rate": 1.618184857501088e-06, | |
| "loss": 0.93310546875, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 2.613746404647827, | |
| "learning_rate": 1.6136868130741836e-06, | |
| "loss": 0.4884783625602722, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.976800976800977, | |
| "grad_norm": 7.81420373916626, | |
| "learning_rate": 1.6091964288956337e-06, | |
| "loss": 0.5040377974510193, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.9792429792429793, | |
| "grad_norm": 1.7252824306488037, | |
| "learning_rate": 1.6047137253634603e-06, | |
| "loss": 0.934016227722168, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 1.4692366123199463, | |
| "learning_rate": 1.600238722840801e-06, | |
| "loss": 0.9195917248725891, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.984126984126984, | |
| "grad_norm": 2.0103518962860107, | |
| "learning_rate": 1.595771441655804e-06, | |
| "loss": 0.9216701984405518, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.9865689865689866, | |
| "grad_norm": 2.632267951965332, | |
| "learning_rate": 1.5913119021015509e-06, | |
| "loss": 0.8953844308853149, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 1.5844495296478271, | |
| "learning_rate": 1.5868601244359465e-06, | |
| "loss": 0.9133291244506836, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 8.298254013061523, | |
| "learning_rate": 1.5824161288816458e-06, | |
| "loss": 0.5930345058441162, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.993894993894994, | |
| "grad_norm": 1.963354229927063, | |
| "learning_rate": 1.5779799356259456e-06, | |
| "loss": 0.9954817891120911, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 1.8422874212265015, | |
| "learning_rate": 1.5735515648207054e-06, | |
| "loss": 0.8938966989517212, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.998778998778999, | |
| "grad_norm": 1.3020809888839722, | |
| "learning_rate": 1.5691310365822446e-06, | |
| "loss": 0.4988337457180023, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 3.001221001221001, | |
| "grad_norm": 10.537405967712402, | |
| "learning_rate": 1.5647183709912631e-06, | |
| "loss": 0.6655434370040894, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 3.0036630036630036, | |
| "grad_norm": 3.749023199081421, | |
| "learning_rate": 1.560313588092739e-06, | |
| "loss": 0.5545458197593689, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.006105006105006, | |
| "grad_norm": 1.9818220138549805, | |
| "learning_rate": 1.5559167078958468e-06, | |
| "loss": 0.9352074265480042, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 3.0085470085470085, | |
| "grad_norm": 1.7658523321151733, | |
| "learning_rate": 1.5515277503738572e-06, | |
| "loss": 0.7911304235458374, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 3.010989010989011, | |
| "grad_norm": 3.5951449871063232, | |
| "learning_rate": 1.5471467354640563e-06, | |
| "loss": 0.4669531285762787, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 3.0134310134310134, | |
| "grad_norm": 1.7102817296981812, | |
| "learning_rate": 1.5427736830676453e-06, | |
| "loss": 0.7882851362228394, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 3.015873015873016, | |
| "grad_norm": 0.4721863567829132, | |
| "learning_rate": 1.53840861304966e-06, | |
| "loss": 0.2778557240962982, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.0183150183150182, | |
| "grad_norm": 9.13770580291748, | |
| "learning_rate": 1.534051545238872e-06, | |
| "loss": 0.35145601630210876, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 3.0207570207570207, | |
| "grad_norm": 2.7468252182006836, | |
| "learning_rate": 1.5297024994277019e-06, | |
| "loss": 0.8385328054428101, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 3.023199023199023, | |
| "grad_norm": 17.04658317565918, | |
| "learning_rate": 1.5253614953721315e-06, | |
| "loss": 0.506757915019989, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 3.0256410256410255, | |
| "grad_norm": 2.0790674686431885, | |
| "learning_rate": 1.521028552791611e-06, | |
| "loss": 0.5367285013198853, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 3.028083028083028, | |
| "grad_norm": 1.9070419073104858, | |
| "learning_rate": 1.5167036913689707e-06, | |
| "loss": 0.844150185585022, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.0305250305250304, | |
| "grad_norm": 7.032018184661865, | |
| "learning_rate": 1.5123869307503316e-06, | |
| "loss": 0.8483667969703674, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 3.032967032967033, | |
| "grad_norm": 2.8655481338500977, | |
| "learning_rate": 1.5080782905450147e-06, | |
| "loss": 0.46071624755859375, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 3.0354090354090353, | |
| "grad_norm": 2.500929594039917, | |
| "learning_rate": 1.5037777903254555e-06, | |
| "loss": 0.7813271880149841, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 3.0378510378510377, | |
| "grad_norm": 6.008790969848633, | |
| "learning_rate": 1.499485449627111e-06, | |
| "loss": 0.6232810616493225, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 3.04029304029304, | |
| "grad_norm": 2.7102444171905518, | |
| "learning_rate": 1.4952012879483739e-06, | |
| "loss": 0.8550852537155151, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.0427350427350426, | |
| "grad_norm": 3.5206353664398193, | |
| "learning_rate": 1.4909253247504813e-06, | |
| "loss": 0.3453124165534973, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 3.045177045177045, | |
| "grad_norm": 0.4782972037792206, | |
| "learning_rate": 1.4866575794574298e-06, | |
| "loss": 0.27831053733825684, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 3.0476190476190474, | |
| "grad_norm": 5.067359447479248, | |
| "learning_rate": 1.4823980714558842e-06, | |
| "loss": 0.6302961111068726, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 3.05006105006105, | |
| "grad_norm": 15.672896385192871, | |
| "learning_rate": 1.4781468200950903e-06, | |
| "loss": 0.3658387064933777, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 3.0525030525030523, | |
| "grad_norm": 1.5754889249801636, | |
| "learning_rate": 1.4739038446867882e-06, | |
| "loss": 0.4961889386177063, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0549450549450547, | |
| "grad_norm": 8.665702819824219, | |
| "learning_rate": 1.4696691645051233e-06, | |
| "loss": 0.4041464328765869, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 3.057387057387057, | |
| "grad_norm": 2.04341459274292, | |
| "learning_rate": 1.4654427987865584e-06, | |
| "loss": 0.7974004745483398, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 3.0598290598290596, | |
| "grad_norm": 2.483825445175171, | |
| "learning_rate": 1.461224766729788e-06, | |
| "loss": 0.5479510426521301, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 3.062271062271062, | |
| "grad_norm": 1.7536518573760986, | |
| "learning_rate": 1.4570150874956491e-06, | |
| "loss": 0.6952835917472839, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 3.064713064713065, | |
| "grad_norm": 2.361480951309204, | |
| "learning_rate": 1.452813780207038e-06, | |
| "loss": 0.8374277949333191, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.0671550671550674, | |
| "grad_norm": 3.7606232166290283, | |
| "learning_rate": 1.4486208639488147e-06, | |
| "loss": 0.42150750756263733, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 3.06959706959707, | |
| "grad_norm": 2.443471670150757, | |
| "learning_rate": 1.44443635776773e-06, | |
| "loss": 0.7532929182052612, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 3.0720390720390722, | |
| "grad_norm": 3.036879301071167, | |
| "learning_rate": 1.4402602806723224e-06, | |
| "loss": 0.5805554389953613, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 3.0744810744810747, | |
| "grad_norm": 2.405412197113037, | |
| "learning_rate": 1.4360926516328486e-06, | |
| "loss": 0.1354294717311859, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 2.239072322845459, | |
| "learning_rate": 1.4319334895811822e-06, | |
| "loss": 0.4422219395637512, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.0793650793650795, | |
| "grad_norm": 1.6425889730453491, | |
| "learning_rate": 1.4277828134107404e-06, | |
| "loss": 0.6835185289382935, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 3.081807081807082, | |
| "grad_norm": 2.7413876056671143, | |
| "learning_rate": 1.4236406419763888e-06, | |
| "loss": 0.9346991777420044, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 3.0842490842490844, | |
| "grad_norm": 13.284109115600586, | |
| "learning_rate": 1.4195069940943602e-06, | |
| "loss": 0.5621562004089355, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 3.086691086691087, | |
| "grad_norm": 2.8617191314697266, | |
| "learning_rate": 1.4153818885421693e-06, | |
| "loss": 0.48587676882743835, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 3.0891330891330893, | |
| "grad_norm": 4.067028045654297, | |
| "learning_rate": 1.4112653440585253e-06, | |
| "loss": 0.06718393415212631, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.0915750915750917, | |
| "grad_norm": 2.965784788131714, | |
| "learning_rate": 1.4071573793432477e-06, | |
| "loss": 0.36484605073928833, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 3.094017094017094, | |
| "grad_norm": 6.96315336227417, | |
| "learning_rate": 1.4030580130571828e-06, | |
| "loss": 0.9084134101867676, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 3.0964590964590966, | |
| "grad_norm": 4.8996663093566895, | |
| "learning_rate": 1.3989672638221164e-06, | |
| "loss": 0.4888841509819031, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 3.098901098901099, | |
| "grad_norm": 15.704608917236328, | |
| "learning_rate": 1.3948851502206916e-06, | |
| "loss": 0.7055212259292603, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 3.1013431013431014, | |
| "grad_norm": 3.72160005569458, | |
| "learning_rate": 1.3908116907963219e-06, | |
| "loss": 0.9027807712554932, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.103785103785104, | |
| "grad_norm": 2.0347139835357666, | |
| "learning_rate": 1.3867469040531097e-06, | |
| "loss": 0.5173296928405762, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 3.1062271062271063, | |
| "grad_norm": 2.45859694480896, | |
| "learning_rate": 1.3826908084557599e-06, | |
| "loss": 0.515606701374054, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 3.1086691086691087, | |
| "grad_norm": 4.653799533843994, | |
| "learning_rate": 1.3786434224294974e-06, | |
| "loss": 0.09697027504444122, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 3.111111111111111, | |
| "grad_norm": 2.246595621109009, | |
| "learning_rate": 1.374604764359983e-06, | |
| "loss": 0.5722851753234863, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 3.1135531135531136, | |
| "grad_norm": 3.75177264213562, | |
| "learning_rate": 1.3705748525932296e-06, | |
| "loss": 0.6366295218467712, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.115995115995116, | |
| "grad_norm": 12.912460327148438, | |
| "learning_rate": 1.36655370543552e-06, | |
| "loss": 0.19046643376350403, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 3.1184371184371185, | |
| "grad_norm": 2.1945950984954834, | |
| "learning_rate": 1.3625413411533215e-06, | |
| "loss": 0.8602652549743652, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 3.120879120879121, | |
| "grad_norm": 14.562986373901367, | |
| "learning_rate": 1.3585377779732054e-06, | |
| "loss": 0.4864516258239746, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 3.1233211233211233, | |
| "grad_norm": 1.2465832233428955, | |
| "learning_rate": 1.3545430340817623e-06, | |
| "loss": 0.414315789937973, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 3.1257631257631258, | |
| "grad_norm": 2.1674346923828125, | |
| "learning_rate": 1.350557127625521e-06, | |
| "loss": 0.8689194321632385, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.128205128205128, | |
| "grad_norm": 4.005629062652588, | |
| "learning_rate": 1.3465800767108644e-06, | |
| "loss": 0.9341198801994324, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 3.1306471306471306, | |
| "grad_norm": 3.285874605178833, | |
| "learning_rate": 1.342611899403951e-06, | |
| "loss": 0.8401010632514954, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 3.133089133089133, | |
| "grad_norm": 4.772563934326172, | |
| "learning_rate": 1.338652613730625e-06, | |
| "loss": 0.5630367398262024, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 3.1355311355311355, | |
| "grad_norm": 3.344740629196167, | |
| "learning_rate": 1.334702237676345e-06, | |
| "loss": 0.7592120170593262, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 3.137973137973138, | |
| "grad_norm": 9.296796798706055, | |
| "learning_rate": 1.3307607891860916e-06, | |
| "loss": 0.48091375827789307, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.1404151404151404, | |
| "grad_norm": 3.7448983192443848, | |
| "learning_rate": 1.3268282861642965e-06, | |
| "loss": 0.523597776889801, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 2.7864246368408203, | |
| "learning_rate": 1.3229047464747492e-06, | |
| "loss": 0.8306266665458679, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 3.1452991452991452, | |
| "grad_norm": 4.969945430755615, | |
| "learning_rate": 1.3189901879405287e-06, | |
| "loss": 0.8894456028938293, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 3.1477411477411477, | |
| "grad_norm": 2.3445777893066406, | |
| "learning_rate": 1.3150846283439124e-06, | |
| "loss": 0.7661553621292114, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 3.15018315018315, | |
| "grad_norm": 5.691545486450195, | |
| "learning_rate": 1.3111880854263006e-06, | |
| "loss": 0.714350163936615, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.1526251526251525, | |
| "grad_norm": 10.326680183410645, | |
| "learning_rate": 1.3073005768881332e-06, | |
| "loss": 0.8273946046829224, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 3.155067155067155, | |
| "grad_norm": 15.851977348327637, | |
| "learning_rate": 1.3034221203888117e-06, | |
| "loss": 0.32498669624328613, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 3.1575091575091574, | |
| "grad_norm": 9.003250122070312, | |
| "learning_rate": 1.2995527335466172e-06, | |
| "loss": 0.5692976713180542, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 3.15995115995116, | |
| "grad_norm": 6.924123287200928, | |
| "learning_rate": 1.2956924339386318e-06, | |
| "loss": 0.3022007346153259, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 3.1623931623931623, | |
| "grad_norm": 4.58242130279541, | |
| "learning_rate": 1.291841239100656e-06, | |
| "loss": 0.29833054542541504, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.1648351648351647, | |
| "grad_norm": 2.056873083114624, | |
| "learning_rate": 1.287999166527133e-06, | |
| "loss": 0.7911575436592102, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 3.167277167277167, | |
| "grad_norm": 2.0765960216522217, | |
| "learning_rate": 1.2841662336710662e-06, | |
| "loss": 0.6330872774124146, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 3.1697191697191696, | |
| "grad_norm": 6.344074726104736, | |
| "learning_rate": 1.2803424579439411e-06, | |
| "loss": 0.5566316246986389, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 3.172161172161172, | |
| "grad_norm": 2.674805164337158, | |
| "learning_rate": 1.276527856715646e-06, | |
| "loss": 0.8628856539726257, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 3.1746031746031744, | |
| "grad_norm": 2.659057140350342, | |
| "learning_rate": 1.2727224473143933e-06, | |
| "loss": 0.7332071661949158, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.177045177045177, | |
| "grad_norm": 2.530466318130493, | |
| "learning_rate": 1.2689262470266403e-06, | |
| "loss": 0.8011347651481628, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 3.1794871794871793, | |
| "grad_norm": 3.901975154876709, | |
| "learning_rate": 1.2651392730970107e-06, | |
| "loss": 0.8330449461936951, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 3.1819291819291817, | |
| "grad_norm": 43.94447326660156, | |
| "learning_rate": 1.2613615427282177e-06, | |
| "loss": 0.676027774810791, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 3.1843711843711846, | |
| "grad_norm": 3.9286386966705322, | |
| "learning_rate": 1.2575930730809836e-06, | |
| "loss": 0.7716042399406433, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 3.186813186813187, | |
| "grad_norm": 3.0505211353302, | |
| "learning_rate": 1.253833881273963e-06, | |
| "loss": 0.6885058879852295, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.1892551892551895, | |
| "grad_norm": 1.979110598564148, | |
| "learning_rate": 1.250083984383665e-06, | |
| "loss": 0.8547409176826477, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 3.191697191697192, | |
| "grad_norm": 1.8880133628845215, | |
| "learning_rate": 1.2463433994443755e-06, | |
| "loss": 0.7033931612968445, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 3.1941391941391943, | |
| "grad_norm": 5.2559099197387695, | |
| "learning_rate": 1.2426121434480828e-06, | |
| "loss": 0.5308555960655212, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 3.1965811965811968, | |
| "grad_norm": 15.855555534362793, | |
| "learning_rate": 1.2388902333443913e-06, | |
| "loss": 0.5126664042472839, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 3.199023199023199, | |
| "grad_norm": 6.698633670806885, | |
| "learning_rate": 1.2351776860404577e-06, | |
| "loss": 0.2311715930700302, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.2014652014652016, | |
| "grad_norm": 2.60094952583313, | |
| "learning_rate": 1.2314745184009012e-06, | |
| "loss": 0.5172269344329834, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 3.203907203907204, | |
| "grad_norm": 6.668694496154785, | |
| "learning_rate": 1.2277807472477384e-06, | |
| "loss": 0.2329966425895691, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 3.2063492063492065, | |
| "grad_norm": 3.4587221145629883, | |
| "learning_rate": 1.2240963893602952e-06, | |
| "loss": 0.9453647136688232, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 3.208791208791209, | |
| "grad_norm": 10.398512840270996, | |
| "learning_rate": 1.2204214614751442e-06, | |
| "loss": 0.14541763067245483, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 3.2112332112332114, | |
| "grad_norm": 2.5785880088806152, | |
| "learning_rate": 1.2167559802860135e-06, | |
| "loss": 0.9672622084617615, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.213675213675214, | |
| "grad_norm": 6.969937801361084, | |
| "learning_rate": 1.213099962443726e-06, | |
| "loss": 0.556788980960846, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 3.2161172161172162, | |
| "grad_norm": 1.9294971227645874, | |
| "learning_rate": 1.2094534245561098e-06, | |
| "loss": 0.7081160545349121, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 3.2185592185592187, | |
| "grad_norm": 3.8238325119018555, | |
| "learning_rate": 1.205816383187935e-06, | |
| "loss": 0.610231876373291, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 3.221001221001221, | |
| "grad_norm": 8.861764907836914, | |
| "learning_rate": 1.2021888548608272e-06, | |
| "loss": 0.6349549889564514, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 3.2234432234432235, | |
| "grad_norm": 1.7522162199020386, | |
| "learning_rate": 1.1985708560532047e-06, | |
| "loss": 0.7171330451965332, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.225885225885226, | |
| "grad_norm": 2.3566970825195312, | |
| "learning_rate": 1.19496240320019e-06, | |
| "loss": 0.8892612457275391, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 3.2283272283272284, | |
| "grad_norm": 2.1190340518951416, | |
| "learning_rate": 1.1913635126935477e-06, | |
| "loss": 0.7615914344787598, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 3.230769230769231, | |
| "grad_norm": 1.7972434759140015, | |
| "learning_rate": 1.1877742008816002e-06, | |
| "loss": 0.7856362462043762, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 3.2332112332112333, | |
| "grad_norm": 2.146169662475586, | |
| "learning_rate": 1.1841944840691619e-06, | |
| "loss": 0.8134780526161194, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 3.2356532356532357, | |
| "grad_norm": 11.170672416687012, | |
| "learning_rate": 1.1806243785174574e-06, | |
| "loss": 0.25675392150878906, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.238095238095238, | |
| "grad_norm": 3.0035762786865234, | |
| "learning_rate": 1.177063900444053e-06, | |
| "loss": 0.7596380114555359, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 3.2405372405372406, | |
| "grad_norm": 2.9616620540618896, | |
| "learning_rate": 1.173513066022781e-06, | |
| "loss": 0.9850119352340698, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 3.242979242979243, | |
| "grad_norm": 2.4156839847564697, | |
| "learning_rate": 1.1699718913836657e-06, | |
| "loss": 0.5272483229637146, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 3.2454212454212454, | |
| "grad_norm": 6.468486785888672, | |
| "learning_rate": 1.1664403926128516e-06, | |
| "loss": 0.38367509841918945, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 3.247863247863248, | |
| "grad_norm": 7.794209957122803, | |
| "learning_rate": 1.1629185857525296e-06, | |
| "loss": 0.4385049343109131, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 3.2503052503052503, | |
| "grad_norm": 3.511357545852661, | |
| "learning_rate": 1.1594064868008634e-06, | |
| "loss": 1.021645188331604, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 3.2527472527472527, | |
| "grad_norm": 3.927022695541382, | |
| "learning_rate": 1.1559041117119183e-06, | |
| "loss": 0.6152275800704956, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 3.255189255189255, | |
| "grad_norm": 2.2393136024475098, | |
| "learning_rate": 1.1524114763955876e-06, | |
| "loss": 0.48958325386047363, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 3.2576312576312576, | |
| "grad_norm": 4.180426597595215, | |
| "learning_rate": 1.1489285967175207e-06, | |
| "loss": 0.8316766619682312, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 3.26007326007326, | |
| "grad_norm": 2.2399017810821533, | |
| "learning_rate": 1.145455488499051e-06, | |
| "loss": 0.8960772156715393, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 3.2625152625152625, | |
| "grad_norm": 8.40261459350586, | |
| "learning_rate": 1.1419921675171245e-06, | |
| "loss": 0.48031508922576904, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 3.264957264957265, | |
| "grad_norm": 4.309727191925049, | |
| "learning_rate": 1.1385386495042272e-06, | |
| "loss": 0.13968098163604736, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 3.2673992673992673, | |
| "grad_norm": 2.9554507732391357, | |
| "learning_rate": 1.1350949501483145e-06, | |
| "loss": 0.909633457660675, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 3.2698412698412698, | |
| "grad_norm": 3.3006162643432617, | |
| "learning_rate": 1.1316610850927388e-06, | |
| "loss": 0.7681498527526855, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 3.272283272283272, | |
| "grad_norm": 0.4983324706554413, | |
| "learning_rate": 1.12823706993618e-06, | |
| "loss": 0.42390134930610657, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 3.2747252747252746, | |
| "grad_norm": 5.958628177642822, | |
| "learning_rate": 1.1248229202325736e-06, | |
| "loss": 0.5118038654327393, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 3.277167277167277, | |
| "grad_norm": 3.1510939598083496, | |
| "learning_rate": 1.1214186514910398e-06, | |
| "loss": 0.4903255105018616, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 3.2796092796092795, | |
| "grad_norm": 3.030272960662842, | |
| "learning_rate": 1.118024279175814e-06, | |
| "loss": 0.9587437510490417, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 3.282051282051282, | |
| "grad_norm": 0.38044917583465576, | |
| "learning_rate": 1.1146398187061776e-06, | |
| "loss": 0.13130967319011688, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 3.2844932844932844, | |
| "grad_norm": 3.4803404808044434, | |
| "learning_rate": 1.1112652854563824e-06, | |
| "loss": 0.8778082132339478, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 3.286935286935287, | |
| "grad_norm": 2.587324619293213, | |
| "learning_rate": 1.10790069475559e-06, | |
| "loss": 0.7979317903518677, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 3.2893772893772892, | |
| "grad_norm": 0.5913499593734741, | |
| "learning_rate": 1.1045460618877912e-06, | |
| "loss": 0.06984243541955948, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 3.2918192918192917, | |
| "grad_norm": 0.8156366944313049, | |
| "learning_rate": 1.1012014020917484e-06, | |
| "loss": 0.2010909765958786, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 3.294261294261294, | |
| "grad_norm": 1.7881795167922974, | |
| "learning_rate": 1.0978667305609147e-06, | |
| "loss": 0.7086883783340454, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 3.2967032967032965, | |
| "grad_norm": 2.393721342086792, | |
| "learning_rate": 1.0945420624433758e-06, | |
| "loss": 0.8595046997070312, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.299145299145299, | |
| "grad_norm": 2.1565139293670654, | |
| "learning_rate": 1.0912274128417696e-06, | |
| "loss": 0.9005102515220642, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 3.3015873015873014, | |
| "grad_norm": 3.553394317626953, | |
| "learning_rate": 1.0879227968132316e-06, | |
| "loss": 0.7882212996482849, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 3.304029304029304, | |
| "grad_norm": 3.0672333240509033, | |
| "learning_rate": 1.084628229369311e-06, | |
| "loss": 0.7716812491416931, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 3.3064713064713063, | |
| "grad_norm": 2.4524006843566895, | |
| "learning_rate": 1.0813437254759165e-06, | |
| "loss": 0.5214710831642151, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 3.3089133089133087, | |
| "grad_norm": 2.5456132888793945, | |
| "learning_rate": 1.0780693000532372e-06, | |
| "loss": 0.6923003196716309, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 3.311355311355311, | |
| "grad_norm": 2.900570869445801, | |
| "learning_rate": 1.0748049679756843e-06, | |
| "loss": 0.8628150820732117, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 3.3137973137973136, | |
| "grad_norm": 3.343024253845215, | |
| "learning_rate": 1.0715507440718147e-06, | |
| "loss": 0.5487645864486694, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 3.316239316239316, | |
| "grad_norm": 3.0560784339904785, | |
| "learning_rate": 1.0683066431242717e-06, | |
| "loss": 0.8819491267204285, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 3.3186813186813184, | |
| "grad_norm": 4.643116474151611, | |
| "learning_rate": 1.0650726798697113e-06, | |
| "loss": 0.7186728715896606, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 3.3211233211233213, | |
| "grad_norm": 3.3299341201782227, | |
| "learning_rate": 1.061848868998739e-06, | |
| "loss": 0.8435643315315247, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 3.3235653235653237, | |
| "grad_norm": 3.4163544178009033, | |
| "learning_rate": 1.0586352251558425e-06, | |
| "loss": 0.5442382097244263, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 3.326007326007326, | |
| "grad_norm": 0.8344938158988953, | |
| "learning_rate": 1.0554317629393234e-06, | |
| "loss": 0.06635025888681412, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 3.3284493284493286, | |
| "grad_norm": 3.7720038890838623, | |
| "learning_rate": 1.052238496901234e-06, | |
| "loss": 0.4655779004096985, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 3.330891330891331, | |
| "grad_norm": 1.7745321989059448, | |
| "learning_rate": 1.0490554415473079e-06, | |
| "loss": 0.7798499464988708, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 11.497058868408203, | |
| "learning_rate": 1.0458826113368965e-06, | |
| "loss": 0.7589215040206909, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 3.335775335775336, | |
| "grad_norm": 0.8510182499885559, | |
| "learning_rate": 1.042720020682902e-06, | |
| "loss": 0.5679793953895569, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 3.3382173382173383, | |
| "grad_norm": 3.5748512744903564, | |
| "learning_rate": 1.039567683951713e-06, | |
| "loss": 0.6550238132476807, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 3.340659340659341, | |
| "grad_norm": 1.754301905632019, | |
| "learning_rate": 1.036425615463138e-06, | |
| "loss": 0.7326245903968811, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 3.343101343101343, | |
| "grad_norm": 0.8980134129524231, | |
| "learning_rate": 1.0332938294903407e-06, | |
| "loss": 0.4887623190879822, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 3.3455433455433456, | |
| "grad_norm": 1.9559557437896729, | |
| "learning_rate": 1.0301723402597769e-06, | |
| "loss": 0.4706956148147583, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 3.347985347985348, | |
| "grad_norm": 10.616726875305176, | |
| "learning_rate": 1.0270611619511266e-06, | |
| "loss": 0.8596802353858948, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 3.3504273504273505, | |
| "grad_norm": 24.7109317779541, | |
| "learning_rate": 1.0239603086972323e-06, | |
| "loss": 0.584933340549469, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 3.352869352869353, | |
| "grad_norm": 2.8988747596740723, | |
| "learning_rate": 1.020869794584034e-06, | |
| "loss": 0.7101993560791016, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 3.3553113553113554, | |
| "grad_norm": 93.7816162109375, | |
| "learning_rate": 1.017789633650505e-06, | |
| "loss": 0.5839419960975647, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 3.357753357753358, | |
| "grad_norm": 5.284997463226318, | |
| "learning_rate": 1.014719839888588e-06, | |
| "loss": 0.3563012480735779, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.3601953601953602, | |
| "grad_norm": 1.8024640083312988, | |
| "learning_rate": 1.011660427243132e-06, | |
| "loss": 0.3267917037010193, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 3.3626373626373627, | |
| "grad_norm": 1.5629299879074097, | |
| "learning_rate": 1.0086114096118286e-06, | |
| "loss": 0.4166877269744873, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 3.365079365079365, | |
| "grad_norm": 3.879136323928833, | |
| "learning_rate": 1.005572800845148e-06, | |
| "loss": 0.7401219606399536, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 3.3675213675213675, | |
| "grad_norm": 2.1835029125213623, | |
| "learning_rate": 1.0025446147462803e-06, | |
| "loss": 0.8265122175216675, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 3.36996336996337, | |
| "grad_norm": 4.126181602478027, | |
| "learning_rate": 9.995268650710657e-07, | |
| "loss": 1.0622559785842896, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.3724053724053724, | |
| "grad_norm": 2.4242002964019775, | |
| "learning_rate": 9.965195655279379e-07, | |
| "loss": 0.4468766152858734, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 3.374847374847375, | |
| "grad_norm": 2.549320697784424, | |
| "learning_rate": 9.935227297778605e-07, | |
| "loss": 0.8873687386512756, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 3.3772893772893773, | |
| "grad_norm": 3.020458936691284, | |
| "learning_rate": 9.905363714342615e-07, | |
| "loss": 0.4191344082355499, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 3.3797313797313797, | |
| "grad_norm": 2.900737762451172, | |
| "learning_rate": 9.875605040629773e-07, | |
| "loss": 0.37247806787490845, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 3.382173382173382, | |
| "grad_norm": 5.969982624053955, | |
| "learning_rate": 9.845951411821863e-07, | |
| "loss": 0.44880709052085876, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 5.617712497711182, | |
| "learning_rate": 9.816402962623497e-07, | |
| "loss": 0.8781357407569885, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 3.387057387057387, | |
| "grad_norm": 4.446506023406982, | |
| "learning_rate": 9.786959827261491e-07, | |
| "loss": 0.4436875283718109, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 3.3894993894993894, | |
| "grad_norm": 2.733320713043213, | |
| "learning_rate": 9.757622139484275e-07, | |
| "loss": 0.8267475962638855, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 3.391941391941392, | |
| "grad_norm": 1.7333195209503174, | |
| "learning_rate": 9.72839003256126e-07, | |
| "loss": 0.741727352142334, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 3.3943833943833943, | |
| "grad_norm": 0.6535694599151611, | |
| "learning_rate": 9.699263639282255e-07, | |
| "loss": 0.3470302224159241, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.3968253968253967, | |
| "grad_norm": 2.3038716316223145, | |
| "learning_rate": 9.67024309195685e-07, | |
| "loss": 0.538809061050415, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 3.399267399267399, | |
| "grad_norm": 2.897294044494629, | |
| "learning_rate": 9.641328522413813e-07, | |
| "loss": 0.20831730961799622, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 3.4017094017094016, | |
| "grad_norm": 5.283458709716797, | |
| "learning_rate": 9.61252006200051e-07, | |
| "loss": 0.4685713052749634, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 3.404151404151404, | |
| "grad_norm": 2.1215171813964844, | |
| "learning_rate": 9.583817841582285e-07, | |
| "loss": 0.9289071559906006, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 3.4065934065934065, | |
| "grad_norm": 2.2103238105773926, | |
| "learning_rate": 9.555221991541877e-07, | |
| "loss": 0.814532995223999, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 3.409035409035409, | |
| "grad_norm": 1.8449485301971436, | |
| "learning_rate": 9.526732641778836e-07, | |
| "loss": 0.4528883099555969, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 3.4114774114774113, | |
| "grad_norm": 1.9408596754074097, | |
| "learning_rate": 9.498349921708914e-07, | |
| "loss": 0.7237257957458496, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 3.413919413919414, | |
| "grad_norm": 2.033916711807251, | |
| "learning_rate": 9.470073960263484e-07, | |
| "loss": 0.8568973541259766, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 3.416361416361416, | |
| "grad_norm": 2.8969850540161133, | |
| "learning_rate": 9.441904885888982e-07, | |
| "loss": 0.4384755790233612, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 3.4188034188034186, | |
| "grad_norm": 0.7930925488471985, | |
| "learning_rate": 9.413842826546254e-07, | |
| "loss": 0.06476318836212158, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.421245421245421, | |
| "grad_norm": 2.1533210277557373, | |
| "learning_rate": 9.385887909710069e-07, | |
| "loss": 0.825805127620697, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 3.4236874236874235, | |
| "grad_norm": 0.8457545042037964, | |
| "learning_rate": 9.358040262368445e-07, | |
| "loss": 0.6605730652809143, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 3.426129426129426, | |
| "grad_norm": 2.488839626312256, | |
| "learning_rate": 9.330300011022163e-07, | |
| "loss": 0.5794928669929504, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 2.972033739089966, | |
| "learning_rate": 9.302667281684098e-07, | |
| "loss": 0.8902238011360168, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 3.4310134310134313, | |
| "grad_norm": 3.6812944412231445, | |
| "learning_rate": 9.27514219987874e-07, | |
| "loss": 0.6202410459518433, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 3.4334554334554337, | |
| "grad_norm": 1.0500991344451904, | |
| "learning_rate": 9.247724890641543e-07, | |
| "loss": 0.10720151662826538, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 3.435897435897436, | |
| "grad_norm": 7.567105293273926, | |
| "learning_rate": 9.22041547851843e-07, | |
| "loss": 0.6870253682136536, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 3.4383394383394386, | |
| "grad_norm": 21.982929229736328, | |
| "learning_rate": 9.193214087565146e-07, | |
| "loss": 0.7681679725646973, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 3.440781440781441, | |
| "grad_norm": 3.4593942165374756, | |
| "learning_rate": 9.166120841346786e-07, | |
| "loss": 0.9266019463539124, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 3.4432234432234434, | |
| "grad_norm": 3.5219388008117676, | |
| "learning_rate": 9.139135862937139e-07, | |
| "loss": 0.7612605094909668, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 3.445665445665446, | |
| "grad_norm": 4.131385803222656, | |
| "learning_rate": 9.112259274918228e-07, | |
| "loss": 0.9317520260810852, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 3.4481074481074483, | |
| "grad_norm": 2.044438600540161, | |
| "learning_rate": 9.085491199379652e-07, | |
| "loss": 0.8517608046531677, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 3.4505494505494507, | |
| "grad_norm": 2.4360508918762207, | |
| "learning_rate": 9.058831757918119e-07, | |
| "loss": 0.6897678971290588, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 3.452991452991453, | |
| "grad_norm": 2.0640690326690674, | |
| "learning_rate": 9.032281071636829e-07, | |
| "loss": 0.8173401355743408, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 3.4554334554334556, | |
| "grad_norm": 16.34990692138672, | |
| "learning_rate": 9.00583926114497e-07, | |
| "loss": 0.5656797289848328, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 3.457875457875458, | |
| "grad_norm": 3.1184754371643066, | |
| "learning_rate": 8.979506446557141e-07, | |
| "loss": 0.49873775243759155, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 3.4603174603174605, | |
| "grad_norm": 1.7745251655578613, | |
| "learning_rate": 8.953282747492816e-07, | |
| "loss": 0.9596657752990723, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 3.462759462759463, | |
| "grad_norm": 7.131638050079346, | |
| "learning_rate": 8.927168283075807e-07, | |
| "loss": 0.5169793367385864, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 3.4652014652014653, | |
| "grad_norm": 101.0040283203125, | |
| "learning_rate": 8.901163171933706e-07, | |
| "loss": 0.4887714684009552, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 3.4676434676434678, | |
| "grad_norm": 2.2968623638153076, | |
| "learning_rate": 8.875267532197368e-07, | |
| "loss": 0.8718173503875732, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 3.47008547008547, | |
| "grad_norm": 1.8435990810394287, | |
| "learning_rate": 8.849481481500356e-07, | |
| "loss": 0.7854589819908142, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 3.4725274725274726, | |
| "grad_norm": 2.054354667663574, | |
| "learning_rate": 8.823805136978413e-07, | |
| "loss": 0.8386743068695068, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.474969474969475, | |
| "grad_norm": 1.7228000164031982, | |
| "learning_rate": 8.798238615268934e-07, | |
| "loss": 0.8005945682525635, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 3.4774114774114775, | |
| "grad_norm": 2.3333404064178467, | |
| "learning_rate": 8.772782032510439e-07, | |
| "loss": 0.7422125935554504, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 3.47985347985348, | |
| "grad_norm": 6.355716228485107, | |
| "learning_rate": 8.747435504342022e-07, | |
| "loss": 0.5096930861473083, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 3.4822954822954824, | |
| "grad_norm": 1.6502275466918945, | |
| "learning_rate": 8.722199145902857e-07, | |
| "loss": 0.7535804510116577, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 3.484737484737485, | |
| "grad_norm": 0.4867725968360901, | |
| "learning_rate": 8.697073071831658e-07, | |
| "loss": 0.14731785655021667, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 3.4871794871794872, | |
| "grad_norm": 7.8371686935424805, | |
| "learning_rate": 8.672057396266161e-07, | |
| "loss": 0.7259136438369751, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 3.4896214896214897, | |
| "grad_norm": 4.284936904907227, | |
| "learning_rate": 8.647152232842599e-07, | |
| "loss": 0.30577749013900757, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 3.492063492063492, | |
| "grad_norm": 1.8026747703552246, | |
| "learning_rate": 8.622357694695207e-07, | |
| "loss": 0.7950439453125, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 3.4945054945054945, | |
| "grad_norm": 10.06855583190918, | |
| "learning_rate": 8.597673894455675e-07, | |
| "loss": 0.7480056881904602, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 3.496947496947497, | |
| "grad_norm": 1.7933481931686401, | |
| "learning_rate": 8.573100944252662e-07, | |
| "loss": 0.4216347336769104, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 3.4993894993894994, | |
| "grad_norm": 1.7255240678787231, | |
| "learning_rate": 8.548638955711298e-07, | |
| "loss": 0.3292645812034607, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 3.501831501831502, | |
| "grad_norm": 3.3586931228637695, | |
| "learning_rate": 8.524288039952625e-07, | |
| "loss": 0.8613098859786987, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 3.5042735042735043, | |
| "grad_norm": 7.2187700271606445, | |
| "learning_rate": 8.500048307593161e-07, | |
| "loss": 0.49458879232406616, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 3.5067155067155067, | |
| "grad_norm": 13.894682884216309, | |
| "learning_rate": 8.475919868744322e-07, | |
| "loss": 0.39427265524864197, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 3.509157509157509, | |
| "grad_norm": 2.581885814666748, | |
| "learning_rate": 8.451902833012003e-07, | |
| "loss": 0.918152391910553, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 3.5115995115995116, | |
| "grad_norm": 3.96673321723938, | |
| "learning_rate": 8.427997309496005e-07, | |
| "loss": 0.30580323934555054, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 3.514041514041514, | |
| "grad_norm": 3.975219964981079, | |
| "learning_rate": 8.404203406789602e-07, | |
| "loss": 0.7915468215942383, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 3.5164835164835164, | |
| "grad_norm": 2.8507955074310303, | |
| "learning_rate": 8.380521232978992e-07, | |
| "loss": 0.6845412254333496, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.518925518925519, | |
| "grad_norm": 5.622760772705078, | |
| "learning_rate": 8.356950895642862e-07, | |
| "loss": 0.6595726013183594, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 3.5213675213675213, | |
| "grad_norm": 4.254746437072754, | |
| "learning_rate": 8.333492501851838e-07, | |
| "loss": 0.9753207564353943, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 3.5238095238095237, | |
| "grad_norm": 2.7020394802093506, | |
| "learning_rate": 8.310146158168066e-07, | |
| "loss": 0.9157518148422241, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 3.526251526251526, | |
| "grad_norm": 2.108231544494629, | |
| "learning_rate": 8.28691197064466e-07, | |
| "loss": 0.7969650030136108, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 3.5286935286935286, | |
| "grad_norm": 3.530876398086548, | |
| "learning_rate": 8.263790044825285e-07, | |
| "loss": 0.9127578735351562, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 3.531135531135531, | |
| "grad_norm": 2.557037830352783, | |
| "learning_rate": 8.240780485743608e-07, | |
| "loss": 0.7240651845932007, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 3.5335775335775335, | |
| "grad_norm": 6.49147891998291, | |
| "learning_rate": 8.217883397922905e-07, | |
| "loss": 0.3955632746219635, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 3.536019536019536, | |
| "grad_norm": 2.565272808074951, | |
| "learning_rate": 8.195098885375495e-07, | |
| "loss": 0.6618751287460327, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 3.5384615384615383, | |
| "grad_norm": 1.6365941762924194, | |
| "learning_rate": 8.172427051602347e-07, | |
| "loss": 0.7428027391433716, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 3.5409035409035408, | |
| "grad_norm": 2.283928394317627, | |
| "learning_rate": 8.149867999592548e-07, | |
| "loss": 0.7936429381370544, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.543345543345543, | |
| "grad_norm": 3.6295547485351562, | |
| "learning_rate": 8.127421831822881e-07, | |
| "loss": 0.12481510639190674, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 3.5457875457875456, | |
| "grad_norm": 1.7329978942871094, | |
| "learning_rate": 8.105088650257343e-07, | |
| "loss": 0.7854657769203186, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 3.548229548229548, | |
| "grad_norm": 3.5314977169036865, | |
| "learning_rate": 8.082868556346662e-07, | |
| "loss": 0.8098005056381226, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 3.5506715506715505, | |
| "grad_norm": 2.5535361766815186, | |
| "learning_rate": 8.060761651027867e-07, | |
| "loss": 0.9487941265106201, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 3.553113553113553, | |
| "grad_norm": 3.6165003776550293, | |
| "learning_rate": 8.038768034723816e-07, | |
| "loss": 0.52418053150177, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 3.5555555555555554, | |
| "grad_norm": 2.5802009105682373, | |
| "learning_rate": 8.016887807342732e-07, | |
| "loss": 0.3970736563205719, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 3.557997557997558, | |
| "grad_norm": 2.033656358718872, | |
| "learning_rate": 7.995121068277767e-07, | |
| "loss": 0.591442883014679, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 3.5604395604395602, | |
| "grad_norm": 1.6943453550338745, | |
| "learning_rate": 7.973467916406536e-07, | |
| "loss": 0.8024305701255798, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 3.5628815628815627, | |
| "grad_norm": 7.8730692863464355, | |
| "learning_rate": 7.951928450090673e-07, | |
| "loss": 0.6496803164482117, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 3.565323565323565, | |
| "grad_norm": 18.40151596069336, | |
| "learning_rate": 7.93050276717538e-07, | |
| "loss": 0.5158238410949707, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 3.5677655677655675, | |
| "grad_norm": 1.7893723249435425, | |
| "learning_rate": 7.909190964988997e-07, | |
| "loss": 0.826363742351532, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 3.57020757020757, | |
| "grad_norm": 9.468801498413086, | |
| "learning_rate": 7.887993140342538e-07, | |
| "loss": 0.5767655968666077, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 3.5726495726495724, | |
| "grad_norm": 2.3637073040008545, | |
| "learning_rate": 7.86690938952927e-07, | |
| "loss": 0.48466330766677856, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 3.575091575091575, | |
| "grad_norm": 2.659562587738037, | |
| "learning_rate": 7.845939808324267e-07, | |
| "loss": 0.8443353772163391, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 3.5775335775335773, | |
| "grad_norm": 2.3245725631713867, | |
| "learning_rate": 7.825084491983969e-07, | |
| "loss": 0.764642059803009, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 3.57997557997558, | |
| "grad_norm": 5.520932197570801, | |
| "learning_rate": 7.804343535245766e-07, | |
| "loss": 0.6558569669723511, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 3.5824175824175826, | |
| "grad_norm": 3.9519784450531006, | |
| "learning_rate": 7.78371703232755e-07, | |
| "loss": 0.6942023634910583, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 3.584859584859585, | |
| "grad_norm": 1.9188674688339233, | |
| "learning_rate": 7.763205076927297e-07, | |
| "loss": 0.46283501386642456, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 3.5873015873015874, | |
| "grad_norm": 0.12963134050369263, | |
| "learning_rate": 7.742807762222653e-07, | |
| "loss": 0.4600965678691864, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 2.5248825550079346, | |
| "learning_rate": 7.722525180870466e-07, | |
| "loss": 0.557766854763031, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 3.5921855921855923, | |
| "grad_norm": 2.4224584102630615, | |
| "learning_rate": 7.702357425006438e-07, | |
| "loss": 0.8053780794143677, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 3.5946275946275947, | |
| "grad_norm": 2.104917526245117, | |
| "learning_rate": 7.682304586244618e-07, | |
| "loss": 0.7758908271789551, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 3.597069597069597, | |
| "grad_norm": 3.0547473430633545, | |
| "learning_rate": 7.662366755677073e-07, | |
| "loss": 0.8851046562194824, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 3.5995115995115996, | |
| "grad_norm": 1.9801969528198242, | |
| "learning_rate": 7.642544023873402e-07, | |
| "loss": 0.5296459794044495, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 3.601953601953602, | |
| "grad_norm": 3.0353381633758545, | |
| "learning_rate": 7.622836480880383e-07, | |
| "loss": 0.5360254049301147, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.6043956043956045, | |
| "grad_norm": 8.00523567199707, | |
| "learning_rate": 7.603244216221524e-07, | |
| "loss": 0.20186254382133484, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 3.606837606837607, | |
| "grad_norm": 4.944789409637451, | |
| "learning_rate": 7.583767318896664e-07, | |
| "loss": 0.466735303401947, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 3.6092796092796093, | |
| "grad_norm": 5.835378646850586, | |
| "learning_rate": 7.564405877381587e-07, | |
| "loss": 0.6394435167312622, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 3.6117216117216118, | |
| "grad_norm": 2.267725944519043, | |
| "learning_rate": 7.545159979627594e-07, | |
| "loss": 0.6844528913497925, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 3.614163614163614, | |
| "grad_norm": 2.5582151412963867, | |
| "learning_rate": 7.52602971306113e-07, | |
| "loss": 0.7534583210945129, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 3.6166056166056166, | |
| "grad_norm": 5.141064167022705, | |
| "learning_rate": 7.507015164583367e-07, | |
| "loss": 0.5010976791381836, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 3.619047619047619, | |
| "grad_norm": 3.67368745803833, | |
| "learning_rate": 7.488116420569816e-07, | |
| "loss": 0.8287992477416992, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 3.6214896214896215, | |
| "grad_norm": 2.036011219024658, | |
| "learning_rate": 7.46933356686993e-07, | |
| "loss": 0.9010288715362549, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 3.623931623931624, | |
| "grad_norm": 2.759631633758545, | |
| "learning_rate": 7.450666688806727e-07, | |
| "loss": 0.9595599174499512, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 3.6263736263736264, | |
| "grad_norm": 1.9032822847366333, | |
| "learning_rate": 7.432115871176391e-07, | |
| "loss": 0.4801540672779083, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 3.628815628815629, | |
| "grad_norm": 2.809022903442383, | |
| "learning_rate": 7.413681198247886e-07, | |
| "loss": 0.4220638573169708, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 3.6312576312576312, | |
| "grad_norm": 3.1490209102630615, | |
| "learning_rate": 7.395362753762583e-07, | |
| "loss": 0.48024827241897583, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 3.6336996336996337, | |
| "grad_norm": 2.781191110610962, | |
| "learning_rate": 7.37716062093387e-07, | |
| "loss": 0.7418580055236816, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 3.636141636141636, | |
| "grad_norm": 7.007016658782959, | |
| "learning_rate": 7.359074882446775e-07, | |
| "loss": 0.3912465572357178, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 3.6385836385836385, | |
| "grad_norm": 1.995826005935669, | |
| "learning_rate": 7.341105620457597e-07, | |
| "loss": 0.9312414526939392, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 3.641025641025641, | |
| "grad_norm": 4.350949287414551, | |
| "learning_rate": 7.32325291659353e-07, | |
| "loss": 0.891286313533783, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 3.6434676434676434, | |
| "grad_norm": 2.1877682209014893, | |
| "learning_rate": 7.305516851952288e-07, | |
| "loss": 0.837358832359314, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 3.645909645909646, | |
| "grad_norm": 1.86771559715271, | |
| "learning_rate": 7.287897507101736e-07, | |
| "loss": 0.9316425919532776, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 3.6483516483516483, | |
| "grad_norm": 2.0179667472839355, | |
| "learning_rate": 7.270394962079537e-07, | |
| "loss": 0.8129684925079346, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 3.6507936507936507, | |
| "grad_norm": 5.377006530761719, | |
| "learning_rate": 7.253009296392767e-07, | |
| "loss": 0.902230978012085, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 3.653235653235653, | |
| "grad_norm": 2.3976590633392334, | |
| "learning_rate": 7.235740589017581e-07, | |
| "loss": 0.4519374966621399, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 3.6556776556776556, | |
| "grad_norm": 3.464656114578247, | |
| "learning_rate": 7.218588918398821e-07, | |
| "loss": 0.3994402587413788, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 3.658119658119658, | |
| "grad_norm": 1.6983356475830078, | |
| "learning_rate": 7.201554362449699e-07, | |
| "loss": 0.8617055416107178, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 3.6605616605616604, | |
| "grad_norm": 1.0819239616394043, | |
| "learning_rate": 7.184636998551395e-07, | |
| "loss": 0.44496503472328186, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 3.663003663003663, | |
| "grad_norm": 8.184094429016113, | |
| "learning_rate": 7.167836903552761e-07, | |
| "loss": 0.2283964604139328, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.6654456654456653, | |
| "grad_norm": 2.11317777633667, | |
| "learning_rate": 7.151154153769916e-07, | |
| "loss": 0.3644329309463501, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 3.6678876678876677, | |
| "grad_norm": 5.163084506988525, | |
| "learning_rate": 7.134588824985951e-07, | |
| "loss": 0.2088758945465088, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 3.67032967032967, | |
| "grad_norm": 1.7118715047836304, | |
| "learning_rate": 7.118140992450537e-07, | |
| "loss": 0.9201338887214661, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 3.672771672771673, | |
| "grad_norm": 6.890610694885254, | |
| "learning_rate": 7.101810730879629e-07, | |
| "loss": 0.5677740573883057, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 3.6752136752136755, | |
| "grad_norm": 0.9095839858055115, | |
| "learning_rate": 7.085598114455092e-07, | |
| "loss": 0.4820370078086853, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 3.677655677655678, | |
| "grad_norm": 1.6080018281936646, | |
| "learning_rate": 7.069503216824375e-07, | |
| "loss": 0.8840917348861694, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 3.6800976800976803, | |
| "grad_norm": 2.5662214756011963, | |
| "learning_rate": 7.053526111100178e-07, | |
| "loss": 0.8205640316009521, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 3.682539682539683, | |
| "grad_norm": 7.3714470863342285, | |
| "learning_rate": 7.037666869860122e-07, | |
| "loss": 0.6835364699363708, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 3.684981684981685, | |
| "grad_norm": 1.6335556507110596, | |
| "learning_rate": 7.021925565146419e-07, | |
| "loss": 0.48589709401130676, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 3.6874236874236876, | |
| "grad_norm": 3.9356753826141357, | |
| "learning_rate": 7.006302268465534e-07, | |
| "loss": 0.47200772166252136, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 3.68986568986569, | |
| "grad_norm": 1.7010576725006104, | |
| "learning_rate": 6.990797050787876e-07, | |
| "loss": 0.4901275038719177, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 3.1634457111358643, | |
| "learning_rate": 6.975409982547468e-07, | |
| "loss": 0.982101559638977, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 3.694749694749695, | |
| "grad_norm": 0.936671793460846, | |
| "learning_rate": 6.960141133641622e-07, | |
| "loss": 0.5470721125602722, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 3.6971916971916974, | |
| "grad_norm": 5.059283256530762, | |
| "learning_rate": 6.94499057343063e-07, | |
| "loss": 0.5916606783866882, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 3.6996336996337, | |
| "grad_norm": 1.649591088294983, | |
| "learning_rate": 6.929958370737447e-07, | |
| "loss": 0.9363369941711426, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 3.7020757020757022, | |
| "grad_norm": 3.894197702407837, | |
| "learning_rate": 6.915044593847377e-07, | |
| "loss": 0.7085144519805908, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 3.7045177045177047, | |
| "grad_norm": 2.26426100730896, | |
| "learning_rate": 6.900249310507757e-07, | |
| "loss": 0.5215721726417542, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 3.706959706959707, | |
| "grad_norm": 1.1034021377563477, | |
| "learning_rate": 6.885572587927659e-07, | |
| "loss": 0.5011436939239502, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 3.7094017094017095, | |
| "grad_norm": 1.3475278615951538, | |
| "learning_rate": 6.871014492777585e-07, | |
| "loss": 0.504115641117096, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 3.711843711843712, | |
| "grad_norm": 6.192190647125244, | |
| "learning_rate": 6.856575091189154e-07, | |
| "loss": 0.9073065519332886, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 2.2396318912506104, | |
| "learning_rate": 6.842254448754811e-07, | |
| "loss": 0.7537841796875, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 3.716727716727717, | |
| "grad_norm": 1.6438734531402588, | |
| "learning_rate": 6.828052630527522e-07, | |
| "loss": 0.5031915307044983, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 3.7191697191697193, | |
| "grad_norm": 1.9333980083465576, | |
| "learning_rate": 6.813969701020478e-07, | |
| "loss": 0.9249406456947327, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 3.7216117216117217, | |
| "grad_norm": 2.5807371139526367, | |
| "learning_rate": 6.800005724206831e-07, | |
| "loss": 0.5361707806587219, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 3.724053724053724, | |
| "grad_norm": 2.0083351135253906, | |
| "learning_rate": 6.786160763519335e-07, | |
| "loss": 0.741145133972168, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.7264957264957266, | |
| "grad_norm": 2.013129711151123, | |
| "learning_rate": 6.772434881850144e-07, | |
| "loss": 0.8314730525016785, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 3.728937728937729, | |
| "grad_norm": 4.733554840087891, | |
| "learning_rate": 6.75882814155045e-07, | |
| "loss": 0.4873526394367218, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 3.7313797313797314, | |
| "grad_norm": 3.0452795028686523, | |
| "learning_rate": 6.745340604430266e-07, | |
| "loss": 0.9440727233886719, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 3.733821733821734, | |
| "grad_norm": 2.133902072906494, | |
| "learning_rate": 6.731972331758076e-07, | |
| "loss": 0.6994129419326782, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 3.7362637362637363, | |
| "grad_norm": 7.259641647338867, | |
| "learning_rate": 6.718723384260628e-07, | |
| "loss": 0.43192028999328613, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.7387057387057387, | |
| "grad_norm": 2.170210599899292, | |
| "learning_rate": 6.705593822122592e-07, | |
| "loss": 0.46401798725128174, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 3.741147741147741, | |
| "grad_norm": 1.0304683446884155, | |
| "learning_rate": 6.692583704986346e-07, | |
| "loss": 0.3507814407348633, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 3.7435897435897436, | |
| "grad_norm": 3.386765718460083, | |
| "learning_rate": 6.679693091951654e-07, | |
| "loss": 0.5371965169906616, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 3.746031746031746, | |
| "grad_norm": 1.7468204498291016, | |
| "learning_rate": 6.666922041575441e-07, | |
| "loss": 0.8100628852844238, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 3.7484737484737485, | |
| "grad_norm": 2.1092400550842285, | |
| "learning_rate": 6.654270611871487e-07, | |
| "loss": 0.8633115887641907, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 3.750915750915751, | |
| "grad_norm": 5.546787261962891, | |
| "learning_rate": 6.641738860310198e-07, | |
| "loss": 0.5384469628334045, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 3.7533577533577533, | |
| "grad_norm": 1.0326019525527954, | |
| "learning_rate": 6.629326843818317e-07, | |
| "loss": 0.45187580585479736, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 3.755799755799756, | |
| "grad_norm": 4.467280864715576, | |
| "learning_rate": 6.617034618778696e-07, | |
| "loss": 0.855986475944519, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 3.758241758241758, | |
| "grad_norm": 3.7619566917419434, | |
| "learning_rate": 6.60486224103e-07, | |
| "loss": 0.7572266459465027, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 3.7606837606837606, | |
| "grad_norm": 6.183402061462402, | |
| "learning_rate": 6.592809765866497e-07, | |
| "loss": 0.527399480342865, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 3.763125763125763, | |
| "grad_norm": 17.207387924194336, | |
| "learning_rate": 6.580877248037769e-07, | |
| "loss": 0.8154140114784241, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 3.7655677655677655, | |
| "grad_norm": 1.9882452487945557, | |
| "learning_rate": 6.569064741748489e-07, | |
| "loss": 0.911526083946228, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 3.768009768009768, | |
| "grad_norm": 4.858429908752441, | |
| "learning_rate": 6.557372300658159e-07, | |
| "loss": 0.9475221633911133, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 3.7704517704517704, | |
| "grad_norm": 4.474874019622803, | |
| "learning_rate": 6.545799977880883e-07, | |
| "loss": 0.23142358660697937, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 3.772893772893773, | |
| "grad_norm": 3.1783666610717773, | |
| "learning_rate": 6.534347825985101e-07, | |
| "loss": 0.8077917098999023, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 3.7753357753357752, | |
| "grad_norm": 3.9100658893585205, | |
| "learning_rate": 6.523015896993381e-07, | |
| "loss": 0.8263789415359497, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 3.7777777777777777, | |
| "grad_norm": 1.6414414644241333, | |
| "learning_rate": 6.511804242382148e-07, | |
| "loss": 0.927480936050415, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 3.78021978021978, | |
| "grad_norm": 2.777785301208496, | |
| "learning_rate": 6.500712913081486e-07, | |
| "loss": 0.7517139315605164, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 3.7826617826617825, | |
| "grad_norm": 3.0369672775268555, | |
| "learning_rate": 6.489741959474876e-07, | |
| "loss": 0.5767747163772583, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 3.785103785103785, | |
| "grad_norm": 30.473583221435547, | |
| "learning_rate": 6.478891431398988e-07, | |
| "loss": 0.7141063213348389, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.7875457875457874, | |
| "grad_norm": 1.8245930671691895, | |
| "learning_rate": 6.468161378143442e-07, | |
| "loss": 0.8820724487304688, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 3.78998778998779, | |
| "grad_norm": 3.724031448364258, | |
| "learning_rate": 6.457551848450591e-07, | |
| "loss": 0.8075670599937439, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 3.7924297924297923, | |
| "grad_norm": 2.1518399715423584, | |
| "learning_rate": 6.447062890515303e-07, | |
| "loss": 0.8505239486694336, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 3.7948717948717947, | |
| "grad_norm": 10.370966911315918, | |
| "learning_rate": 6.436694551984721e-07, | |
| "loss": 0.4985530376434326, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 3.797313797313797, | |
| "grad_norm": 3.9164273738861084, | |
| "learning_rate": 6.426446879958085e-07, | |
| "loss": 0.5315531492233276, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 3.7997557997557996, | |
| "grad_norm": 3.175743818283081, | |
| "learning_rate": 6.416319920986471e-07, | |
| "loss": 0.5992456674575806, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 3.802197802197802, | |
| "grad_norm": 3.047924518585205, | |
| "learning_rate": 6.406313721072623e-07, | |
| "loss": 0.536521315574646, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 3.8046398046398044, | |
| "grad_norm": 8.903552055358887, | |
| "learning_rate": 6.396428325670712e-07, | |
| "loss": 0.34863823652267456, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 3.807081807081807, | |
| "grad_norm": 2.5433661937713623, | |
| "learning_rate": 6.386663779686147e-07, | |
| "loss": 0.5361265540122986, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 7.335467338562012, | |
| "learning_rate": 6.377020127475377e-07, | |
| "loss": 0.9320744872093201, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 3.8119658119658117, | |
| "grad_norm": 40.987064361572266, | |
| "learning_rate": 6.367497412845654e-07, | |
| "loss": 0.6013365387916565, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 3.814407814407814, | |
| "grad_norm": 2.2245728969573975, | |
| "learning_rate": 6.358095679054882e-07, | |
| "loss": 0.6405077576637268, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 3.8168498168498166, | |
| "grad_norm": 2.7145586013793945, | |
| "learning_rate": 6.348814968811384e-07, | |
| "loss": 0.8977913856506348, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 3.819291819291819, | |
| "grad_norm": 5.800877094268799, | |
| "learning_rate": 6.33965532427373e-07, | |
| "loss": 0.4331892728805542, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 3.8217338217338215, | |
| "grad_norm": 2.0423827171325684, | |
| "learning_rate": 6.330616787050528e-07, | |
| "loss": 0.8526274561882019, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 3.824175824175824, | |
| "grad_norm": 0.7753233909606934, | |
| "learning_rate": 6.321699398200254e-07, | |
| "loss": 0.3737678825855255, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 3.8266178266178263, | |
| "grad_norm": 6.823727130889893, | |
| "learning_rate": 6.312903198231041e-07, | |
| "loss": 0.9473689198493958, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 3.8290598290598292, | |
| "grad_norm": 3.5573318004608154, | |
| "learning_rate": 6.304228227100528e-07, | |
| "loss": 0.8825540542602539, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 3.8315018315018317, | |
| "grad_norm": 2.7311320304870605, | |
| "learning_rate": 6.295674524215642e-07, | |
| "loss": 0.5429124236106873, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 3.833943833943834, | |
| "grad_norm": 2.064741611480713, | |
| "learning_rate": 6.287242128432448e-07, | |
| "loss": 0.4811321198940277, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 3.8363858363858365, | |
| "grad_norm": 4.351254463195801, | |
| "learning_rate": 6.278931078055954e-07, | |
| "loss": 0.6814494132995605, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 3.838827838827839, | |
| "grad_norm": 7.926380634307861, | |
| "learning_rate": 6.270741410839952e-07, | |
| "loss": 0.2247064709663391, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 3.8412698412698414, | |
| "grad_norm": 4.106725215911865, | |
| "learning_rate": 6.26267316398683e-07, | |
| "loss": 0.4575510025024414, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 3.843711843711844, | |
| "grad_norm": 0.16320517659187317, | |
| "learning_rate": 6.254726374147418e-07, | |
| "loss": 0.10908607393503189, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 5.839258193969727, | |
| "learning_rate": 6.246901077420817e-07, | |
| "loss": 0.6451608538627625, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.8485958485958487, | |
| "grad_norm": 5.578673362731934, | |
| "learning_rate": 6.239197309354223e-07, | |
| "loss": 0.7072908282279968, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 3.851037851037851, | |
| "grad_norm": 3.6340432167053223, | |
| "learning_rate": 6.231615104942785e-07, | |
| "loss": 0.471763551235199, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 3.8534798534798536, | |
| "grad_norm": 1.6580287218093872, | |
| "learning_rate": 6.224154498629434e-07, | |
| "loss": 0.7280194163322449, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 3.855921855921856, | |
| "grad_norm": 1.7522375583648682, | |
| "learning_rate": 6.216815524304732e-07, | |
| "loss": 0.7933505773544312, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 3.8583638583638584, | |
| "grad_norm": 11.935617446899414, | |
| "learning_rate": 6.209598215306708e-07, | |
| "loss": 0.06976834684610367, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 3.860805860805861, | |
| "grad_norm": 2.845559597015381, | |
| "learning_rate": 6.202502604420724e-07, | |
| "loss": 0.3656473457813263, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 3.8632478632478633, | |
| "grad_norm": 1.7497023344039917, | |
| "learning_rate": 6.195528723879306e-07, | |
| "loss": 0.8561981320381165, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 3.8656898656898657, | |
| "grad_norm": 2.9220542907714844, | |
| "learning_rate": 6.188676605362014e-07, | |
| "loss": 0.7740746140480042, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 3.868131868131868, | |
| "grad_norm": 1.9189082384109497, | |
| "learning_rate": 6.181946279995291e-07, | |
| "loss": 0.7662597298622131, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 3.8705738705738706, | |
| "grad_norm": 4.2788310050964355, | |
| "learning_rate": 6.17533777835232e-07, | |
| "loss": 0.6690661907196045, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 3.873015873015873, | |
| "grad_norm": 6.616257190704346, | |
| "learning_rate": 6.168851130452888e-07, | |
| "loss": 0.5250207781791687, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 3.8754578754578755, | |
| "grad_norm": 2.653538227081299, | |
| "learning_rate": 6.162486365763252e-07, | |
| "loss": 0.5411117076873779, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 3.877899877899878, | |
| "grad_norm": 2.170850992202759, | |
| "learning_rate": 6.156243513196e-07, | |
| "loss": 0.8127894401550293, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 3.8803418803418803, | |
| "grad_norm": 3.3351333141326904, | |
| "learning_rate": 6.150122601109919e-07, | |
| "loss": 0.9016299843788147, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 3.8827838827838828, | |
| "grad_norm": 2.948333501815796, | |
| "learning_rate": 6.144123657309872e-07, | |
| "loss": 0.8240965604782104, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 3.885225885225885, | |
| "grad_norm": 1.4079304933547974, | |
| "learning_rate": 6.138246709046672e-07, | |
| "loss": 0.7397529482841492, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 3.8876678876678876, | |
| "grad_norm": 4.149062633514404, | |
| "learning_rate": 6.132491783016945e-07, | |
| "loss": 0.34523075819015503, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 3.89010989010989, | |
| "grad_norm": 2.017380714416504, | |
| "learning_rate": 6.126858905363034e-07, | |
| "loss": 0.8450878262519836, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 3.8925518925518925, | |
| "grad_norm": 1.749881386756897, | |
| "learning_rate": 6.121348101672848e-07, | |
| "loss": 0.36155635118484497, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 3.894993894993895, | |
| "grad_norm": 1.3024982213974, | |
| "learning_rate": 6.115959396979775e-07, | |
| "loss": 0.4078894257545471, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 3.8974358974358974, | |
| "grad_norm": 2.626460313796997, | |
| "learning_rate": 6.110692815762551e-07, | |
| "loss": 0.8648070693016052, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 3.8998778998779, | |
| "grad_norm": 5.356231689453125, | |
| "learning_rate": 6.105548381945153e-07, | |
| "loss": 0.8094540238380432, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 3.9023199023199022, | |
| "grad_norm": 25.908906936645508, | |
| "learning_rate": 6.100526118896693e-07, | |
| "loss": 0.5891746878623962, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 3.9047619047619047, | |
| "grad_norm": 5.511157035827637, | |
| "learning_rate": 6.09562604943131e-07, | |
| "loss": 0.07191010564565659, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 3.907203907203907, | |
| "grad_norm": 6.299953460693359, | |
| "learning_rate": 6.090848195808064e-07, | |
| "loss": 0.8390135765075684, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.9096459096459095, | |
| "grad_norm": 1.8649171590805054, | |
| "learning_rate": 6.086192579730838e-07, | |
| "loss": 0.5590109825134277, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 3.912087912087912, | |
| "grad_norm": 2.802351713180542, | |
| "learning_rate": 6.081659222348244e-07, | |
| "loss": 0.985985517501831, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 3.9145299145299144, | |
| "grad_norm": 2.154928207397461, | |
| "learning_rate": 6.07724814425351e-07, | |
| "loss": 0.9891282916069031, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 3.916971916971917, | |
| "grad_norm": 6.227573871612549, | |
| "learning_rate": 6.07295936548441e-07, | |
| "loss": 1.0907913446426392, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 3.9194139194139193, | |
| "grad_norm": 2.676931381225586, | |
| "learning_rate": 6.068792905523156e-07, | |
| "loss": 0.5496243238449097, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.9218559218559217, | |
| "grad_norm": 10.38682746887207, | |
| "learning_rate": 6.064748783296311e-07, | |
| "loss": 0.2229357361793518, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 3.9242979242979246, | |
| "grad_norm": 2.008678674697876, | |
| "learning_rate": 6.060827017174708e-07, | |
| "loss": 0.8315946459770203, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 3.926739926739927, | |
| "grad_norm": 4.7980146408081055, | |
| "learning_rate": 6.057027624973373e-07, | |
| "loss": 0.8503268957138062, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 3.9291819291819294, | |
| "grad_norm": 4.067953109741211, | |
| "learning_rate": 6.053350623951424e-07, | |
| "loss": 0.563690721988678, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 3.931623931623932, | |
| "grad_norm": 2.8761026859283447, | |
| "learning_rate": 6.049796030812015e-07, | |
| "loss": 0.6627534627914429, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.9340659340659343, | |
| "grad_norm": 7.254218101501465, | |
| "learning_rate": 6.046363861702239e-07, | |
| "loss": 0.5418643355369568, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 3.9365079365079367, | |
| "grad_norm": 1.3696813583374023, | |
| "learning_rate": 6.043054132213079e-07, | |
| "loss": 0.7259221076965332, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 3.938949938949939, | |
| "grad_norm": 5.845968246459961, | |
| "learning_rate": 6.039866857379309e-07, | |
| "loss": 0.4918738901615143, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 3.9413919413919416, | |
| "grad_norm": 1.7601078748703003, | |
| "learning_rate": 6.036802051679455e-07, | |
| "loss": 0.9122508764266968, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 3.943833943833944, | |
| "grad_norm": 2.2423689365386963, | |
| "learning_rate": 6.0338597290357e-07, | |
| "loss": 0.4838387668132782, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.9462759462759465, | |
| "grad_norm": 2.109056234359741, | |
| "learning_rate": 6.031039902813847e-07, | |
| "loss": 0.7914977073669434, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 3.948717948717949, | |
| "grad_norm": 4.0477375984191895, | |
| "learning_rate": 6.028342585823242e-07, | |
| "loss": 0.48134973645210266, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 3.9511599511599513, | |
| "grad_norm": 1.566765546798706, | |
| "learning_rate": 6.025767790316716e-07, | |
| "loss": 0.8497970104217529, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 3.9536019536019538, | |
| "grad_norm": 8.391966819763184, | |
| "learning_rate": 6.023315527990545e-07, | |
| "loss": 0.7990399599075317, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 3.956043956043956, | |
| "grad_norm": 3.6553773880004883, | |
| "learning_rate": 6.020985809984372e-07, | |
| "loss": 0.4292301535606384, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.9584859584859586, | |
| "grad_norm": 4.039731025695801, | |
| "learning_rate": 6.018778646881181e-07, | |
| "loss": 0.5612732172012329, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 3.960927960927961, | |
| "grad_norm": 1.7811068296432495, | |
| "learning_rate": 6.016694048707235e-07, | |
| "loss": 0.8350145816802979, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 3.9633699633699635, | |
| "grad_norm": 2.358139991760254, | |
| "learning_rate": 6.014732024932027e-07, | |
| "loss": 0.4561384320259094, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 3.965811965811966, | |
| "grad_norm": 1.5930027961730957, | |
| "learning_rate": 6.012892584468251e-07, | |
| "loss": 0.8480937480926514, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 3.9682539682539684, | |
| "grad_norm": 6.148706912994385, | |
| "learning_rate": 6.011175735671757e-07, | |
| "loss": 0.5076125860214233, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.970695970695971, | |
| "grad_norm": 16.231203079223633, | |
| "learning_rate": 6.009581486341496e-07, | |
| "loss": 0.18412072956562042, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 3.9731379731379732, | |
| "grad_norm": 2.707773447036743, | |
| "learning_rate": 6.008109843719513e-07, | |
| "loss": 0.8144515156745911, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 3.9755799755799757, | |
| "grad_norm": 2.6421728134155273, | |
| "learning_rate": 6.006760814490892e-07, | |
| "loss": 0.7697687745094299, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 3.978021978021978, | |
| "grad_norm": 1.7280405759811401, | |
| "learning_rate": 6.005534404783732e-07, | |
| "loss": 0.37729355692863464, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 3.9804639804639805, | |
| "grad_norm": 3.762451171875, | |
| "learning_rate": 6.004430620169124e-07, | |
| "loss": 0.4543501138687134, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.982905982905983, | |
| "grad_norm": 2.905902147293091, | |
| "learning_rate": 6.003449465661124e-07, | |
| "loss": 0.5729525089263916, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 3.9853479853479854, | |
| "grad_norm": 5.909022331237793, | |
| "learning_rate": 6.002590945716726e-07, | |
| "loss": 0.4344090521335602, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 3.987789987789988, | |
| "grad_norm": 50.008880615234375, | |
| "learning_rate": 6.001855064235843e-07, | |
| "loss": 0.8903095722198486, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 3.9902319902319903, | |
| "grad_norm": 2.487112283706665, | |
| "learning_rate": 6.001241824561287e-07, | |
| "loss": 0.9123827219009399, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 3.9926739926739927, | |
| "grad_norm": 2.6051173210144043, | |
| "learning_rate": 6.000751229478767e-07, | |
| "loss": 0.6394712924957275, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.995115995115995, | |
| "grad_norm": 3.082603693008423, | |
| "learning_rate": 6.000383281216857e-07, | |
| "loss": 0.8653581142425537, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 3.9975579975579976, | |
| "grad_norm": 1.5350114107131958, | |
| "learning_rate": 6.000137981446999e-07, | |
| "loss": 0.7252496480941772, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.818603992462158, | |
| "learning_rate": 6.000015331283497e-07, | |
| "loss": 0.47882503271102905, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 3276, | |
| "total_flos": 3.438047841308639e+18, | |
| "train_loss": 0.884952375524301, | |
| "train_runtime": 10014.0246, | |
| "train_samples_per_second": 5.234, | |
| "train_steps_per_second": 0.327 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.438047841308639e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |