Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-15 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-15 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-15") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-15") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-15") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-15 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-15" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-15", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-15
- SGLang
How to use furproxy/9b-15 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-15" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-15", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-15" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-15", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-15 with Docker Model Runner:
docker model run hf.co/furproxy/9b-15
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1370, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00291970802919708, | |
| "grad_norm": 4.875, | |
| "learning_rate": 7.246376811594204e-08, | |
| "loss": 1.320786714553833, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00583941605839416, | |
| "grad_norm": 26.375, | |
| "learning_rate": 2.173913043478261e-07, | |
| "loss": 2.3353517055511475, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.008759124087591242, | |
| "grad_norm": 5.125, | |
| "learning_rate": 3.623188405797102e-07, | |
| "loss": 1.9446890354156494, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01167883211678832, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 5.072463768115942e-07, | |
| "loss": 1.6843594312667847, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.014598540145985401, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 6.521739130434783e-07, | |
| "loss": 1.8062303066253662, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.017518248175182483, | |
| "grad_norm": 5.0, | |
| "learning_rate": 7.971014492753623e-07, | |
| "loss": 1.9280399084091187, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020437956204379562, | |
| "grad_norm": 3.015625, | |
| "learning_rate": 9.420289855072465e-07, | |
| "loss": 1.570988655090332, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02335766423357664, | |
| "grad_norm": 11.25, | |
| "learning_rate": 1.0869565217391306e-06, | |
| "loss": 1.7710015773773193, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026277372262773723, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 1.2318840579710147e-06, | |
| "loss": 1.9166163206100464, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 23.5, | |
| "learning_rate": 1.3768115942028987e-06, | |
| "loss": 1.9079008102416992, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.032116788321167884, | |
| "grad_norm": 6.15625, | |
| "learning_rate": 1.521739130434783e-06, | |
| "loss": 1.9891327619552612, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.035036496350364967, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.8731980323791504, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03795620437956204, | |
| "grad_norm": 41.5, | |
| "learning_rate": 1.8115942028985508e-06, | |
| "loss": 1.996793508529663, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.040875912408759124, | |
| "grad_norm": 16.125, | |
| "learning_rate": 1.956521739130435e-06, | |
| "loss": 2.4439406394958496, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.043795620437956206, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 2.101449275362319e-06, | |
| "loss": 1.4941191673278809, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04671532846715328, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 2.246376811594203e-06, | |
| "loss": 1.9384567737579346, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.049635036496350364, | |
| "grad_norm": 3.140625, | |
| "learning_rate": 2.391304347826087e-06, | |
| "loss": 2.106153964996338, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.052554744525547446, | |
| "grad_norm": 25.875, | |
| "learning_rate": 2.5362318840579714e-06, | |
| "loss": 2.235496997833252, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05547445255474453, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 2.6811594202898555e-06, | |
| "loss": 2.4106810092926025, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 4.375, | |
| "learning_rate": 2.8260869565217393e-06, | |
| "loss": 1.6466758251190186, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.061313868613138686, | |
| "grad_norm": 95.5, | |
| "learning_rate": 2.9710144927536235e-06, | |
| "loss": 1.9993230104446411, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06423357664233577, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 3.1159420289855073e-06, | |
| "loss": 1.7203528881072998, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06715328467153285, | |
| "grad_norm": 13.5625, | |
| "learning_rate": 3.2608695652173914e-06, | |
| "loss": 2.5018796920776367, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07007299270072993, | |
| "grad_norm": 12.6875, | |
| "learning_rate": 3.4057971014492756e-06, | |
| "loss": 1.935620903968811, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.072992700729927, | |
| "grad_norm": 4.125, | |
| "learning_rate": 3.55072463768116e-06, | |
| "loss": 1.9458433389663696, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07591240875912408, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 3.6956521739130436e-06, | |
| "loss": 1.321602702140808, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07883211678832117, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 3.840579710144928e-06, | |
| "loss": 2.0101318359375, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08175182481751825, | |
| "grad_norm": 5.625, | |
| "learning_rate": 3.9855072463768115e-06, | |
| "loss": 2.0588250160217285, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08467153284671533, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 4.130434782608696e-06, | |
| "loss": 1.860298752784729, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 4.27536231884058e-06, | |
| "loss": 1.9684100151062012, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0905109489051095, | |
| "grad_norm": 9.375, | |
| "learning_rate": 4.4202898550724645e-06, | |
| "loss": 1.980459213256836, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09343065693430656, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 4.565217391304348e-06, | |
| "loss": 1.8493075370788574, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09635036496350365, | |
| "grad_norm": 2.609375, | |
| "learning_rate": 4.710144927536232e-06, | |
| "loss": 1.5537524223327637, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09927007299270073, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 4.855072463768117e-06, | |
| "loss": 1.8475682735443115, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10218978102189781, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 5e-06, | |
| "loss": 1.7411353588104248, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10510948905109489, | |
| "grad_norm": 29.875, | |
| "learning_rate": 4.999973760423467e-06, | |
| "loss": 2.0845284461975098, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.10802919708029197, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 4.99989504230588e-06, | |
| "loss": 1.5018064975738525, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11094890510948906, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 4.999763847483267e-06, | |
| "loss": 1.464540958404541, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11386861313868613, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 4.999580179015625e-06, | |
| "loss": 1.8232789039611816, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 4.999344041186848e-06, | |
| "loss": 1.096325159072876, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11970802919708029, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 4.999055439504633e-06, | |
| "loss": 1.8037409782409668, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12262773722627737, | |
| "grad_norm": 3.84375, | |
| "learning_rate": 4.998714380700345e-06, | |
| "loss": 1.5575973987579346, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12554744525547445, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 4.998320872728862e-06, | |
| "loss": 1.8613684177398682, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12846715328467154, | |
| "grad_norm": 5.15625, | |
| "learning_rate": 4.9978749247683895e-06, | |
| "loss": 1.732508897781372, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13138686131386862, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 4.99737654722025e-06, | |
| "loss": 1.3435773849487305, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1343065693430657, | |
| "grad_norm": 3.25, | |
| "learning_rate": 4.996825751708635e-06, | |
| "loss": 1.7478176355361938, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13722627737226278, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 4.996222551080337e-06, | |
| "loss": 1.4358994960784912, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14014598540145987, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.9955669594044466e-06, | |
| "loss": 1.870757818222046, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14306569343065692, | |
| "grad_norm": 3.671875, | |
| "learning_rate": 4.994858991972031e-06, | |
| "loss": 1.6408865451812744, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 3.375, | |
| "learning_rate": 4.994098665295768e-06, | |
| "loss": 1.4728097915649414, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14890510948905109, | |
| "grad_norm": 7.4375, | |
| "learning_rate": 4.9932859971095705e-06, | |
| "loss": 1.7583755254745483, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15182481751824817, | |
| "grad_norm": 3.25, | |
| "learning_rate": 4.992421006368166e-06, | |
| "loss": 1.6836040019989014, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15474452554744525, | |
| "grad_norm": 26.25, | |
| "learning_rate": 4.991503713246659e-06, | |
| "loss": 1.9515830278396606, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15766423357664233, | |
| "grad_norm": 62.25, | |
| "learning_rate": 4.990534139140055e-06, | |
| "loss": 2.0257816314697266, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16058394160583941, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 4.989512306662767e-06, | |
| "loss": 1.4182727336883545, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1635036496350365, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 4.988438239648084e-06, | |
| "loss": 1.70530366897583, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16642335766423358, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 4.98731196314762e-06, | |
| "loss": 1.5088133811950684, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16934306569343066, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 4.986133503430724e-06, | |
| "loss": 1.6265062093734741, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17226277372262774, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 4.98490288798387e-06, | |
| "loss": 1.402962327003479, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 4.125, | |
| "learning_rate": 4.983620145510017e-06, | |
| "loss": 1.8057794570922852, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1781021897810219, | |
| "grad_norm": 6.875, | |
| "learning_rate": 4.982285305927937e-06, | |
| "loss": 1.9605462551116943, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.181021897810219, | |
| "grad_norm": 3.625, | |
| "learning_rate": 4.980898400371521e-06, | |
| "loss": 1.8519611358642578, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18394160583941604, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 4.9794594611890465e-06, | |
| "loss": 1.6692755222320557, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18686131386861313, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 4.977968521942429e-06, | |
| "loss": 1.8997008800506592, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1897810218978102, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 4.97642561740644e-06, | |
| "loss": 1.8168402910232544, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1927007299270073, | |
| "grad_norm": 16.375, | |
| "learning_rate": 4.974830783567886e-06, | |
| "loss": 1.4727129936218262, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19562043795620437, | |
| "grad_norm": 7.71875, | |
| "learning_rate": 4.973184057624781e-06, | |
| "loss": 1.6138420104980469, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19854014598540146, | |
| "grad_norm": 3.5, | |
| "learning_rate": 4.971485477985474e-06, | |
| "loss": 1.6893023252487183, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.20145985401459854, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 4.969735084267752e-06, | |
| "loss": 1.3670828342437744, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20437956204379562, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 4.967932917297915e-06, | |
| "loss": 1.6938685178756714, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2072992700729927, | |
| "grad_norm": 4.0625, | |
| "learning_rate": 4.966079019109831e-06, | |
| "loss": 2.2959558963775635, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.21021897810218979, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 4.964173432943946e-06, | |
| "loss": 1.6218578815460205, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.21313868613138687, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 4.962216203246281e-06, | |
| "loss": 2.592639446258545, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21605839416058395, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 4.960207375667396e-06, | |
| "loss": 1.5585392713546753, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21897810218978103, | |
| "grad_norm": 3.96875, | |
| "learning_rate": 4.958146997061319e-06, | |
| "loss": 1.6422696113586426, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22189781021897811, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 4.956035115484465e-06, | |
| "loss": 1.7883186340332031, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22481751824817517, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 4.953871780194501e-06, | |
| "loss": 1.657930612564087, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22773722627737225, | |
| "grad_norm": 24.125, | |
| "learning_rate": 4.951657041649206e-06, | |
| "loss": 1.7987116575241089, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.23065693430656933, | |
| "grad_norm": 12.0, | |
| "learning_rate": 4.9493909515052944e-06, | |
| "loss": 2.016146659851074, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 4.947073562617206e-06, | |
| "loss": 1.3612116575241089, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2364963503649635, | |
| "grad_norm": 3.8125, | |
| "learning_rate": 4.944704929035877e-06, | |
| "loss": 1.7367652654647827, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23941605839416058, | |
| "grad_norm": 2.875, | |
| "learning_rate": 4.942285106007477e-06, | |
| "loss": 1.3203725814819336, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24233576642335766, | |
| "grad_norm": 11.25, | |
| "learning_rate": 4.9398141499721246e-06, | |
| "loss": 1.7288057804107666, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24525547445255474, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 4.937292118562566e-06, | |
| "loss": 1.383696436882019, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24817518248175183, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 4.934719070602833e-06, | |
| "loss": 1.6433072090148926, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2510948905109489, | |
| "grad_norm": 3.109375, | |
| "learning_rate": 4.932095066106872e-06, | |
| "loss": 1.4025721549987793, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.25401459854014596, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 4.929420166277141e-06, | |
| "loss": 1.6988599300384521, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2569343065693431, | |
| "grad_norm": 3.65625, | |
| "learning_rate": 4.926694433503186e-06, | |
| "loss": 1.6042873859405518, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.25985401459854013, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 4.923917931360185e-06, | |
| "loss": 1.2862474918365479, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 6.65625, | |
| "learning_rate": 4.9210907246074615e-06, | |
| "loss": 1.7310783863067627, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2656934306569343, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 4.9182128791869796e-06, | |
| "loss": 1.5482988357543945, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.2686131386861314, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 4.9152844622218e-06, | |
| "loss": 1.2439241409301758, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.27153284671532846, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 4.91230554201452e-06, | |
| "loss": 1.5766255855560303, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.27445255474452557, | |
| "grad_norm": 3.90625, | |
| "learning_rate": 4.9092761880456764e-06, | |
| "loss": 1.311848759651184, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2773722627737226, | |
| "grad_norm": 39.75, | |
| "learning_rate": 4.906196470972128e-06, | |
| "loss": 1.5088813304901123, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.28029197080291973, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 4.903066462625405e-06, | |
| "loss": 1.6081913709640503, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2832116788321168, | |
| "grad_norm": 6.125, | |
| "learning_rate": 4.899886236010036e-06, | |
| "loss": 1.7471773624420166, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28613138686131384, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 4.896655865301842e-06, | |
| "loss": 1.6127898693084717, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.28905109489051095, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 4.893375425846209e-06, | |
| "loss": 1.6075236797332764, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 4.890044994156331e-06, | |
| "loss": 1.712640643119812, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2948905109489051, | |
| "grad_norm": 3.84375, | |
| "learning_rate": 4.886664647911422e-06, | |
| "loss": 1.5669183731079102, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29781021897810217, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 4.883234465954909e-06, | |
| "loss": 1.7576971054077148, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3007299270072993, | |
| "grad_norm": 2.515625, | |
| "learning_rate": 4.879754528292588e-06, | |
| "loss": 1.5543663501739502, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.30364963503649633, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 4.876224916090762e-06, | |
| "loss": 1.9160549640655518, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.30656934306569344, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 4.872645711674348e-06, | |
| "loss": 1.646159291267395, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3094890510948905, | |
| "grad_norm": 1.625, | |
| "learning_rate": 4.8690169985249516e-06, | |
| "loss": 1.1048507690429688, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3124087591240876, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 4.865338861278925e-06, | |
| "loss": 1.0736052989959717, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31532846715328466, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 4.8616113857253925e-06, | |
| "loss": 1.2035229206085205, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3182481751824818, | |
| "grad_norm": 23.625, | |
| "learning_rate": 4.857834658804247e-06, | |
| "loss": 1.137906789779663, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.32116788321167883, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 4.8540087686041234e-06, | |
| "loss": 1.7008376121520996, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.32408759124087594, | |
| "grad_norm": 8.75, | |
| "learning_rate": 4.850133804360346e-06, | |
| "loss": 1.6337850093841553, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.327007299270073, | |
| "grad_norm": 3.984375, | |
| "learning_rate": 4.8462098564528455e-06, | |
| "loss": 1.1808865070343018, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.32992700729927005, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 4.842237016404048e-06, | |
| "loss": 1.5622849464416504, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.33284671532846716, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 4.838215376876744e-06, | |
| "loss": 1.1768817901611328, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3357664233576642, | |
| "grad_norm": 6.0, | |
| "learning_rate": 4.834145031671931e-06, | |
| "loss": 1.3726277351379395, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3386861313868613, | |
| "grad_norm": 28.375, | |
| "learning_rate": 4.830026075726615e-06, | |
| "loss": 1.1469438076019287, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3416058394160584, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 4.8258586051116045e-06, | |
| "loss": 1.5012977123260498, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3445255474452555, | |
| "grad_norm": 12.9375, | |
| "learning_rate": 4.821642717029269e-06, | |
| "loss": 1.6817822456359863, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.34744525547445254, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 4.8173785098112675e-06, | |
| "loss": 1.525681495666504, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 15.4375, | |
| "learning_rate": 4.81306608291626e-06, | |
| "loss": 2.0758631229400635, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3532846715328467, | |
| "grad_norm": 3.25, | |
| "learning_rate": 4.808705536927586e-06, | |
| "loss": 1.4310352802276611, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3562043795620438, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 4.804296973550915e-06, | |
| "loss": 1.6908133029937744, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.35912408759124087, | |
| "grad_norm": 3.15625, | |
| "learning_rate": 4.799840495611879e-06, | |
| "loss": 1.2480230331420898, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.362043795620438, | |
| "grad_norm": 2.75, | |
| "learning_rate": 4.795336207053674e-06, | |
| "loss": 1.5943894386291504, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36496350364963503, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 4.790784212934631e-06, | |
| "loss": 1.1932544708251953, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3678832116788321, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 4.786184619425773e-06, | |
| "loss": 1.4538475275039673, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3708029197080292, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 4.781537533808331e-06, | |
| "loss": 1.7138783931732178, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.37372262773722625, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 4.7768430644712435e-06, | |
| "loss": 1.37872314453125, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.37664233576642336, | |
| "grad_norm": 6.25, | |
| "learning_rate": 4.772101320908636e-06, | |
| "loss": 1.4937684535980225, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3795620437956204, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 4.767312413717256e-06, | |
| "loss": 1.4460338354110718, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.38248175182481753, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 4.7624764545939015e-06, | |
| "loss": 1.4206737279891968, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3854014598540146, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 4.757593556332811e-06, | |
| "loss": 1.3555597066879272, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3883211678832117, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 4.752663832823038e-06, | |
| "loss": 1.6055470705032349, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.39124087591240875, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 4.747687399045787e-06, | |
| "loss": 1.3127577304840088, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.39416058394160586, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 4.7426643710717386e-06, | |
| "loss": 1.6612601280212402, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3970802919708029, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 4.737594866058339e-06, | |
| "loss": 1.2799599170684814, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 4.7324790022470675e-06, | |
| "loss": 1.9163275957107544, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4029197080291971, | |
| "grad_norm": 2.328125, | |
| "learning_rate": 4.727316898960681e-06, | |
| "loss": 1.4439561367034912, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4058394160583942, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 4.722108676600427e-06, | |
| "loss": 1.2920876741409302, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 3.671875, | |
| "learning_rate": 4.7168544566432365e-06, | |
| "loss": 1.691207766532898, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4116788321167883, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 4.711554361638896e-06, | |
| "loss": 1.527019739151001, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4145985401459854, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 4.70620851520718e-06, | |
| "loss": 1.4309567213058472, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41751824817518246, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 4.7008170420349746e-06, | |
| "loss": 1.2672343254089355, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.42043795620437957, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 4.695380067873368e-06, | |
| "loss": 1.3927721977233887, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4233576642335766, | |
| "grad_norm": 2.75, | |
| "learning_rate": 4.689897719534715e-06, | |
| "loss": 1.5347919464111328, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.42627737226277373, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 4.68437012488968e-06, | |
| "loss": 1.2839910984039307, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4291970802919708, | |
| "grad_norm": 48.25, | |
| "learning_rate": 4.678797412864258e-06, | |
| "loss": 1.3073639869689941, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4321167883211679, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 4.673179713436762e-06, | |
| "loss": 1.5608128309249878, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.43503649635036495, | |
| "grad_norm": 2.875, | |
| "learning_rate": 4.667517157634797e-06, | |
| "loss": 1.6924610137939453, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 4.6618098775322e-06, | |
| "loss": 1.218139886856079, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4408759124087591, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 4.656058006245959e-06, | |
| "loss": 1.4968738555908203, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.44379562043795623, | |
| "grad_norm": 6.59375, | |
| "learning_rate": 4.650261677933111e-06, | |
| "loss": 1.522092580795288, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4467153284671533, | |
| "grad_norm": 3.109375, | |
| "learning_rate": 4.644421027787614e-06, | |
| "loss": 1.15757155418396, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.44963503649635034, | |
| "grad_norm": 2.5, | |
| "learning_rate": 4.638536192037186e-06, | |
| "loss": 1.0606379508972168, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.45255474452554745, | |
| "grad_norm": 10.375, | |
| "learning_rate": 4.63260730794014e-06, | |
| "loss": 1.674492597579956, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4554744525547445, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 4.62663451378217e-06, | |
| "loss": 1.4489834308624268, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4583941605839416, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 4.620617948873133e-06, | |
| "loss": 1.4036529064178467, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.46131386861313867, | |
| "grad_norm": 4.21875, | |
| "learning_rate": 4.6145577535438004e-06, | |
| "loss": 1.482384204864502, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4642335766423358, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 4.608454069142578e-06, | |
| "loss": 1.4590518474578857, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 4.602307038032216e-06, | |
| "loss": 1.7169837951660156, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.47007299270072994, | |
| "grad_norm": 4.75, | |
| "learning_rate": 4.596116803586487e-06, | |
| "loss": 1.5060232877731323, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.472992700729927, | |
| "grad_norm": 2.828125, | |
| "learning_rate": 4.5898835101868415e-06, | |
| "loss": 1.4886112213134766, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4759124087591241, | |
| "grad_norm": 1.7265625, | |
| "learning_rate": 4.583607303219037e-06, | |
| "loss": 1.4076815843582153, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.47883211678832116, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 4.577288329069753e-06, | |
| "loss": 1.5618150234222412, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.48175182481751827, | |
| "grad_norm": 4.75, | |
| "learning_rate": 4.570926735123171e-06, | |
| "loss": 1.274332046508789, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4846715328467153, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 4.564522669757543e-06, | |
| "loss": 1.4747687578201294, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.48759124087591244, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 4.558076282341723e-06, | |
| "loss": 1.653844952583313, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4905109489051095, | |
| "grad_norm": 39.5, | |
| "learning_rate": 4.551587723231692e-06, | |
| "loss": 1.0735116004943848, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.49343065693430654, | |
| "grad_norm": 36.0, | |
| "learning_rate": 4.545057143767042e-06, | |
| "loss": 1.6714699268341064, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.49635036496350365, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 4.538484696267453e-06, | |
| "loss": 1.4629170894622803, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4992700729927007, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 4.5318705340291394e-06, | |
| "loss": 1.5702762603759766, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5021897810218978, | |
| "grad_norm": 4.96875, | |
| "learning_rate": 4.525214811321269e-06, | |
| "loss": 1.5001425743103027, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5051094890510949, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 4.518517683382373e-06, | |
| "loss": 1.4789342880249023, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5080291970802919, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 4.511779306416716e-06, | |
| "loss": 1.4476077556610107, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5109489051094891, | |
| "grad_norm": 1.703125, | |
| "learning_rate": 4.504999837590665e-06, | |
| "loss": 1.1996196508407593, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5138686131386861, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 4.49817943502901e-06, | |
| "loss": 1.532009482383728, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5167883211678832, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 4.4913182578112815e-06, | |
| "loss": 1.2889015674591064, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5197080291970803, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 4.484416465968049e-06, | |
| "loss": 1.3533192873001099, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5226277372262774, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 4.477474220477172e-06, | |
| "loss": 1.4686871767044067, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 3.78125, | |
| "learning_rate": 4.470491683260056e-06, | |
| "loss": 1.4659610986709595, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5284671532846715, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 4.463469017177876e-06, | |
| "loss": 1.487034797668457, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5313868613138686, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 4.456406386027772e-06, | |
| "loss": 1.1844420433044434, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5343065693430656, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 4.4493039545390345e-06, | |
| "loss": 1.5557405948638916, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5372262773722628, | |
| "grad_norm": 2.984375, | |
| "learning_rate": 4.442161888369258e-06, | |
| "loss": 1.3480842113494873, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5401459854014599, | |
| "grad_norm": 2.90625, | |
| "learning_rate": 4.43498035410048e-06, | |
| "loss": 1.2928515672683716, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5430656934306569, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 4.427759519235294e-06, | |
| "loss": 1.7453609704971313, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.545985401459854, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 4.420499552192944e-06, | |
| "loss": 1.4482967853546143, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5489051094890511, | |
| "grad_norm": 2.0, | |
| "learning_rate": 4.413200622305395e-06, | |
| "loss": 1.6135839223861694, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5518248175182482, | |
| "grad_norm": 13.9375, | |
| "learning_rate": 4.405862899813384e-06, | |
| "loss": 1.570212483406067, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5547445255474452, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.398486555862451e-06, | |
| "loss": 1.298504114151001, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5576642335766423, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 4.391071762498941e-06, | |
| "loss": 1.4520879983901978, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5605839416058395, | |
| "grad_norm": 14.8125, | |
| "learning_rate": 4.383618692666002e-06, | |
| "loss": 1.3408211469650269, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5635036496350365, | |
| "grad_norm": 3.375, | |
| "learning_rate": 4.376127520199541e-06, | |
| "loss": 1.4031929969787598, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5664233576642336, | |
| "grad_norm": 4.03125, | |
| "learning_rate": 4.3685984198241735e-06, | |
| "loss": 1.5412940979003906, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5693430656934306, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.361031567149149e-06, | |
| "loss": 1.3730320930480957, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5722627737226277, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 4.353427138664254e-06, | |
| "loss": 1.3442788124084473, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5751824817518248, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 4.345785311735698e-06, | |
| "loss": 1.4140475988388062, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5781021897810219, | |
| "grad_norm": 6.25, | |
| "learning_rate": 4.3381062646019676e-06, | |
| "loss": 1.5376839637756348, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.581021897810219, | |
| "grad_norm": 4.25, | |
| "learning_rate": 4.330390176369685e-06, | |
| "loss": 1.5938429832458496, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 4.322637227009414e-06, | |
| "loss": 1.1486091613769531, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5868613138686132, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 4.314847597351475e-06, | |
| "loss": 1.452984094619751, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5897810218978102, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 4.3070214690817195e-06, | |
| "loss": 1.4647376537322998, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5927007299270073, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 4.299159024737295e-06, | |
| "loss": 1.2110595703125, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5956204379562043, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 4.291260447702389e-06, | |
| "loss": 1.3485263586044312, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5985401459854015, | |
| "grad_norm": 5.25, | |
| "learning_rate": 4.283325922203949e-06, | |
| "loss": 1.3334099054336548, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6014598540145986, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 4.2753556333073875e-06, | |
| "loss": 1.2992541790008545, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6043795620437956, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 4.267349766912266e-06, | |
| "loss": 1.3331689834594727, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6072992700729927, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 4.259308509747955e-06, | |
| "loss": 1.4391039609909058, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6102189781021898, | |
| "grad_norm": 9.6875, | |
| "learning_rate": 4.251232049369287e-06, | |
| "loss": 1.145450472831726, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 10.875, | |
| "learning_rate": 4.243120574152169e-06, | |
| "loss": 1.5916063785552979, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6160583941605839, | |
| "grad_norm": 4.75, | |
| "learning_rate": 4.234974273289204e-06, | |
| "loss": 1.619133710861206, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.618978102189781, | |
| "grad_norm": 4.375, | |
| "learning_rate": 4.226793336785265e-06, | |
| "loss": 1.4133093357086182, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.621897810218978, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 4.218577955453074e-06, | |
| "loss": 1.253399133682251, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6248175182481752, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 4.210328320908744e-06, | |
| "loss": 1.4635814428329468, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6277372262773723, | |
| "grad_norm": 2.875, | |
| "learning_rate": 4.20204462556731e-06, | |
| "loss": 1.3652441501617432, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6306569343065693, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 4.193727062638247e-06, | |
| "loss": 1.5560953617095947, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6335766423357664, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 4.18537582612096e-06, | |
| "loss": 1.4227533340454102, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6364963503649635, | |
| "grad_norm": 3.265625, | |
| "learning_rate": 4.176991110800256e-06, | |
| "loss": 1.2683900594711304, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6394160583941606, | |
| "grad_norm": 14.1875, | |
| "learning_rate": 4.168573112241805e-06, | |
| "loss": 1.2102452516555786, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 4.16012202678758e-06, | |
| "loss": 1.2587625980377197, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6452554744525547, | |
| "grad_norm": 5.46875, | |
| "learning_rate": 4.1516380515512705e-06, | |
| "loss": 1.410897970199585, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6481751824817519, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 4.143121384413695e-06, | |
| "loss": 1.4373693466186523, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6510948905109489, | |
| "grad_norm": 2.78125, | |
| "learning_rate": 4.134572224018176e-06, | |
| "loss": 1.4430195093154907, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.654014598540146, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 4.125990769765911e-06, | |
| "loss": 1.4238855838775635, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.656934306569343, | |
| "grad_norm": 2.25, | |
| "learning_rate": 4.117377221811324e-06, | |
| "loss": 1.4734668731689453, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6598540145985401, | |
| "grad_norm": 2.734375, | |
| "learning_rate": 4.108731781057393e-06, | |
| "loss": 1.5210154056549072, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6627737226277373, | |
| "grad_norm": 1.25, | |
| "learning_rate": 4.100054649150967e-06, | |
| "loss": 1.237725019454956, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6656934306569343, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 4.091346028478059e-06, | |
| "loss": 1.4640438556671143, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6686131386861314, | |
| "grad_norm": 9.0, | |
| "learning_rate": 4.0826061221591326e-06, | |
| "loss": 1.105014681816101, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6715328467153284, | |
| "grad_norm": 42.25, | |
| "learning_rate": 4.073835134044356e-06, | |
| "loss": 1.4338090419769287, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6744525547445256, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 4.065033268708854e-06, | |
| "loss": 1.3917622566223145, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6773722627737226, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 4.056200731447929e-06, | |
| "loss": 1.0591514110565186, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6802919708029197, | |
| "grad_norm": 4.625, | |
| "learning_rate": 4.0473377282722845e-06, | |
| "loss": 1.4084625244140625, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6832116788321168, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 4.038444465903208e-06, | |
| "loss": 1.4596691131591797, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6861313868613139, | |
| "grad_norm": 11.125, | |
| "learning_rate": 4.029521151767757e-06, | |
| "loss": 1.2422056198120117, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.689051094890511, | |
| "grad_norm": 4.4375, | |
| "learning_rate": 4.0205679939939164e-06, | |
| "loss": 1.33591628074646, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.691970802919708, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 4.011585201405747e-06, | |
| "loss": 1.2504942417144775, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6948905109489051, | |
| "grad_norm": 3.6875, | |
| "learning_rate": 4.002572983518515e-06, | |
| "loss": 1.2631410360336304, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6978102189781021, | |
| "grad_norm": 5.8125, | |
| "learning_rate": 3.993531550533804e-06, | |
| "loss": 1.3914625644683838, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 20.0, | |
| "learning_rate": 3.98446111333461e-06, | |
| "loss": 1.288975715637207, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7036496350364964, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 3.9753618834804295e-06, | |
| "loss": 1.4152731895446777, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7065693430656934, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 3.966234073202316e-06, | |
| "loss": 1.316530466079712, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7094890510948905, | |
| "grad_norm": 56.5, | |
| "learning_rate": 3.957077895397941e-06, | |
| "loss": 1.3749709129333496, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7124087591240876, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 3.947893563626615e-06, | |
| "loss": 1.2120707035064697, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7153284671532847, | |
| "grad_norm": 3.546875, | |
| "learning_rate": 3.93868129210432e-06, | |
| "loss": 1.4016718864440918, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7182481751824817, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 3.929441295698702e-06, | |
| "loss": 1.154693841934204, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7211678832116788, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 3.920173789924065e-06, | |
| "loss": 1.334530234336853, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.724087591240876, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 3.910878990936346e-06, | |
| "loss": 1.3103371858596802, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.727007299270073, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 3.901557115528069e-06, | |
| "loss": 1.244321584701538, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 3.892208381123289e-06, | |
| "loss": 1.4268873929977417, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7328467153284671, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 3.8828330057725225e-06, | |
| "loss": 1.3552806377410889, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7357664233576642, | |
| "grad_norm": 3.65625, | |
| "learning_rate": 3.873431208147664e-06, | |
| "loss": 1.6077991724014282, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7386861313868613, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 3.864003207536879e-06, | |
| "loss": 1.2244906425476074, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7416058394160584, | |
| "grad_norm": 2.265625, | |
| "learning_rate": 3.854549223839497e-06, | |
| "loss": 1.0374276638031006, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7445255474452555, | |
| "grad_norm": 6.96875, | |
| "learning_rate": 3.845069477560876e-06, | |
| "loss": 1.547581434249878, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7474452554744525, | |
| "grad_norm": 2.203125, | |
| "learning_rate": 3.835564189807263e-06, | |
| "loss": 1.225568175315857, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7503649635036497, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 3.826033582280635e-06, | |
| "loss": 1.2825735807418823, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7532846715328467, | |
| "grad_norm": 2.96875, | |
| "learning_rate": 3.816477877273533e-06, | |
| "loss": 1.430619716644287, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7562043795620438, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 3.8068972976638703e-06, | |
| "loss": 1.489488124847412, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 3.797292066909734e-06, | |
| "loss": 0.8555082082748413, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.762043795620438, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 3.787662409044184e-06, | |
| "loss": 1.3753139972686768, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7649635036496351, | |
| "grad_norm": 8.0, | |
| "learning_rate": 3.7780085486700126e-06, | |
| "loss": 1.6844412088394165, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7678832116788321, | |
| "grad_norm": 5.25, | |
| "learning_rate": 3.768330710954517e-06, | |
| "loss": 1.592594027519226, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7708029197080292, | |
| "grad_norm": 1.5, | |
| "learning_rate": 3.7586291216242433e-06, | |
| "loss": 1.2550559043884277, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7737226277372263, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 3.748904006959719e-06, | |
| "loss": 1.1512435674667358, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7766423357664234, | |
| "grad_norm": 10.375, | |
| "learning_rate": 3.739155593790182e-06, | |
| "loss": 1.5256032943725586, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7795620437956204, | |
| "grad_norm": 10.75, | |
| "learning_rate": 3.729384109488282e-06, | |
| "loss": 1.6810424327850342, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.7824817518248175, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 3.719589781964787e-06, | |
| "loss": 1.4392688274383545, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7854014598540145, | |
| "grad_norm": 4.125, | |
| "learning_rate": 3.7097728396632555e-06, | |
| "loss": 1.4172781705856323, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 4.125, | |
| "learning_rate": 3.6999335115547185e-06, | |
| "loss": 1.401853322982788, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7912408759124088, | |
| "grad_norm": 6.375, | |
| "learning_rate": 3.690072027132335e-06, | |
| "loss": 1.534106731414795, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7941605839416058, | |
| "grad_norm": 5.0, | |
| "learning_rate": 3.680188616406037e-06, | |
| "loss": 1.629064679145813, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7970802919708029, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 3.6702835098971706e-06, | |
| "loss": 1.5794017314910889, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 7.90625, | |
| "learning_rate": 3.6603569386331122e-06, | |
| "loss": 1.556319236755371, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8029197080291971, | |
| "grad_norm": 5.125, | |
| "learning_rate": 3.6504091341418853e-06, | |
| "loss": 1.5984359979629517, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8058394160583942, | |
| "grad_norm": 4.5, | |
| "learning_rate": 3.640440328446759e-06, | |
| "loss": 1.5283421277999878, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8087591240875912, | |
| "grad_norm": 6.75, | |
| "learning_rate": 3.6304507540608357e-06, | |
| "loss": 1.383811116218567, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8116788321167884, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 3.620440643981629e-06, | |
| "loss": 1.3146003484725952, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8145985401459854, | |
| "grad_norm": 4.125, | |
| "learning_rate": 3.6104102316856255e-06, | |
| "loss": 1.4131672382354736, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 13.25, | |
| "learning_rate": 3.600359751122845e-06, | |
| "loss": 1.549619197845459, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8204379562043795, | |
| "grad_norm": 2.796875, | |
| "learning_rate": 3.590289436711379e-06, | |
| "loss": 1.5269279479980469, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8233576642335766, | |
| "grad_norm": 3.046875, | |
| "learning_rate": 3.5801995233319265e-06, | |
| "loss": 1.3862372636795044, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8262773722627738, | |
| "grad_norm": 2.484375, | |
| "learning_rate": 3.5700902463223137e-06, | |
| "loss": 1.2330877780914307, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8291970802919708, | |
| "grad_norm": 7.125, | |
| "learning_rate": 3.559961841472005e-06, | |
| "loss": 1.4884552955627441, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8321167883211679, | |
| "grad_norm": 3.28125, | |
| "learning_rate": 3.5498145450166057e-06, | |
| "loss": 1.3787778615951538, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8350364963503649, | |
| "grad_norm": 3.609375, | |
| "learning_rate": 3.5396485936323456e-06, | |
| "loss": 1.3882396221160889, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8379562043795621, | |
| "grad_norm": 3.15625, | |
| "learning_rate": 3.529464224430568e-06, | |
| "loss": 1.3656411170959473, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8408759124087591, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 3.5192616749521942e-06, | |
| "loss": 1.5140806436538696, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8437956204379562, | |
| "grad_norm": 4.5, | |
| "learning_rate": 3.5090411831621803e-06, | |
| "loss": 1.5188113451004028, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8467153284671532, | |
| "grad_norm": 2.671875, | |
| "learning_rate": 3.498802987443974e-06, | |
| "loss": 1.3665883541107178, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8496350364963504, | |
| "grad_norm": 5.25, | |
| "learning_rate": 3.4885473265939464e-06, | |
| "loss": 1.383296012878418, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8525547445255475, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 3.478274439815831e-06, | |
| "loss": 1.2266430854797363, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8554744525547445, | |
| "grad_norm": 3.9375, | |
| "learning_rate": 3.467984566715137e-06, | |
| "loss": 1.5247292518615723, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8583941605839416, | |
| "grad_norm": 4.125, | |
| "learning_rate": 3.4576779472935644e-06, | |
| "loss": 1.4203873872756958, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8613138686131386, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 3.447354821943407e-06, | |
| "loss": 1.222019076347351, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8642335766423358, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 3.4370154314419395e-06, | |
| "loss": 1.2593979835510254, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8671532846715329, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 3.4266600169458135e-06, | |
| "loss": 1.22776460647583, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8700729927007299, | |
| "grad_norm": 2.703125, | |
| "learning_rate": 3.4162888199854182e-06, | |
| "loss": 1.2717225551605225, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.872992700729927, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 3.405902082459259e-06, | |
| "loss": 1.0713449716567993, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 3.453125, | |
| "learning_rate": 3.3955000466283073e-06, | |
| "loss": 1.2096487283706665, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8788321167883212, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 3.385082955110355e-06, | |
| "loss": 1.2699155807495117, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8817518248175182, | |
| "grad_norm": 2.328125, | |
| "learning_rate": 3.3746510508743533e-06, | |
| "loss": 1.3786303997039795, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8846715328467153, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 3.3642045772347453e-06, | |
| "loss": 1.3685808181762695, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8875912408759125, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 3.353743777845795e-06, | |
| "loss": 1.178727626800537, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8905109489051095, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 3.343268896695897e-06, | |
| "loss": 1.383094310760498, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8934306569343066, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 3.3327801781018925e-06, | |
| "loss": 1.4056508541107178, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.8963503649635036, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 3.322277866703367e-06, | |
| "loss": 1.5974513292312622, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.8992700729927007, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 3.3117622074569476e-06, | |
| "loss": 1.1610685586929321, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9021897810218978, | |
| "grad_norm": 10.75, | |
| "learning_rate": 3.3012334456305846e-06, | |
| "loss": 0.901719331741333, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9051094890510949, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 3.2906918267978355e-06, | |
| "loss": 1.2409268617630005, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.908029197080292, | |
| "grad_norm": 3.453125, | |
| "learning_rate": 3.2801375968321355e-06, | |
| "loss": 1.4349682331085205, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.910948905109489, | |
| "grad_norm": 6.875, | |
| "learning_rate": 3.269571001901061e-06, | |
| "loss": 1.3277549743652344, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9138686131386862, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 3.2589922884605924e-06, | |
| "loss": 1.3614181280136108, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9167883211678832, | |
| "grad_norm": 9.125, | |
| "learning_rate": 3.2484017032493615e-06, | |
| "loss": 1.705947756767273, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9197080291970803, | |
| "grad_norm": 4.0, | |
| "learning_rate": 3.237799493282897e-06, | |
| "loss": 1.3996449708938599, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9226277372262773, | |
| "grad_norm": 2.75, | |
| "learning_rate": 3.2271859058478666e-06, | |
| "loss": 1.4013357162475586, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9255474452554745, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 3.2165611884963055e-06, | |
| "loss": 1.2193137407302856, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9284671532846716, | |
| "grad_norm": 2.421875, | |
| "learning_rate": 3.2059255890398445e-06, | |
| "loss": 0.9855245351791382, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9313868613138686, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 3.1952793555439276e-06, | |
| "loss": 1.4272806644439697, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 3.18462273632203e-06, | |
| "loss": 1.1866121292114258, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9372262773722628, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 3.173955979929863e-06, | |
| "loss": 1.385930061340332, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9401459854014599, | |
| "grad_norm": 1.8125, | |
| "learning_rate": 3.163279335159578e-06, | |
| "loss": 1.283376932144165, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9430656934306569, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 3.152593051033966e-06, | |
| "loss": 1.368044376373291, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.945985401459854, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 3.1418973768006424e-06, | |
| "loss": 0.6849503517150879, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.948905109489051, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 3.1311925619262417e-06, | |
| "loss": 1.3481240272521973, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9518248175182482, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 3.1204788560905935e-06, | |
| "loss": 1.390141248703003, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9547445255474453, | |
| "grad_norm": 8.8125, | |
| "learning_rate": 3.1097565091809033e-06, | |
| "loss": 1.3187050819396973, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9576642335766423, | |
| "grad_norm": 12.125, | |
| "learning_rate": 3.0990257712859184e-06, | |
| "loss": 1.3746651411056519, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9605839416058394, | |
| "grad_norm": 7.09375, | |
| "learning_rate": 3.0882868926901e-06, | |
| "loss": 1.2352771759033203, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 3.077540123867783e-06, | |
| "loss": 1.328325629234314, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9664233576642336, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 3.066785715477334e-06, | |
| "loss": 1.2275207042694092, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9693430656934306, | |
| "grad_norm": 2.4375, | |
| "learning_rate": 3.056023918355307e-06, | |
| "loss": 1.335202693939209, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9722627737226277, | |
| "grad_norm": 6.5, | |
| "learning_rate": 3.0452549835105895e-06, | |
| "loss": 1.4829626083374023, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9751824817518249, | |
| "grad_norm": 34.0, | |
| "learning_rate": 3.03447916211855e-06, | |
| "loss": 1.5850169658660889, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9781021897810219, | |
| "grad_norm": 6.5, | |
| "learning_rate": 3.0236967055151804e-06, | |
| "loss": 1.671141266822815, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.981021897810219, | |
| "grad_norm": 23.125, | |
| "learning_rate": 3.0129078651912317e-06, | |
| "loss": 1.300727128982544, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.983941605839416, | |
| "grad_norm": 8.875, | |
| "learning_rate": 3.00211289278635e-06, | |
| "loss": 1.4001004695892334, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9868613138686131, | |
| "grad_norm": 8.875, | |
| "learning_rate": 2.991312040083206e-06, | |
| "loss": 0.47176289558410645, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9897810218978103, | |
| "grad_norm": 2.875, | |
| "learning_rate": 2.9805055590016225e-06, | |
| "loss": 1.2891722917556763, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 2.9696937015926995e-06, | |
| "loss": 1.365147352218628, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9956204379562044, | |
| "grad_norm": 1.8828125, | |
| "learning_rate": 2.9588767200329348e-06, | |
| "loss": 1.2809860706329346, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.9985401459854014, | |
| "grad_norm": 8.25, | |
| "learning_rate": 2.9480548666183427e-06, | |
| "loss": 1.6904196739196777, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0014598540145985, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 2.9372283937585675e-06, | |
| "loss": 1.3279258012771606, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0043795620437956, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 2.926397553970999e-06, | |
| "loss": 1.277381181716919, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0072992700729928, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 2.915562599874882e-06, | |
| "loss": 1.500443935394287, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0102189781021897, | |
| "grad_norm": 9.875, | |
| "learning_rate": 2.904723784185422e-06, | |
| "loss": 1.2994956970214844, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.013138686131387, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 2.893881359707894e-06, | |
| "loss": 1.227457046508789, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0160583941605839, | |
| "grad_norm": 2.984375, | |
| "learning_rate": 2.883035579331744e-06, | |
| "loss": 1.2923262119293213, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.018978102189781, | |
| "grad_norm": 4.0, | |
| "learning_rate": 2.8721866960246912e-06, | |
| "loss": 1.445424199104309, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 2.861334962826828e-06, | |
| "loss": 1.1312172412872314, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0248175182481751, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 2.8504806328447177e-06, | |
| "loss": 1.4891958236694336, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0277372262773723, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 2.8396239592454914e-06, | |
| "loss": 1.4066648483276367, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0306569343065692, | |
| "grad_norm": 4.21875, | |
| "learning_rate": 2.828765195250942e-06, | |
| "loss": 1.4027667045593262, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0335766423357664, | |
| "grad_norm": 3.828125, | |
| "learning_rate": 2.8179045941316214e-06, | |
| "loss": 1.3984425067901611, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0364963503649636, | |
| "grad_norm": 37.25, | |
| "learning_rate": 2.8070424092009264e-06, | |
| "loss": 1.5881340503692627, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0394160583941605, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 2.7961788938091994e-06, | |
| "loss": 1.3652167320251465, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0423357664233577, | |
| "grad_norm": 9.0, | |
| "learning_rate": 2.785314301337811e-06, | |
| "loss": 1.4395644664764404, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0452554744525548, | |
| "grad_norm": 4.125, | |
| "learning_rate": 2.7744488851932568e-06, | |
| "loss": 1.3807083368301392, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0481751824817518, | |
| "grad_norm": 16.625, | |
| "learning_rate": 2.76358289880124e-06, | |
| "loss": 1.2562787532806396, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.051094890510949, | |
| "grad_norm": 4.03125, | |
| "learning_rate": 2.752716595600768e-06, | |
| "loss": 1.2394318580627441, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.054014598540146, | |
| "grad_norm": 8.625, | |
| "learning_rate": 2.7418502290382352e-06, | |
| "loss": 1.1047321557998657, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.056934306569343, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 2.7309840525615146e-06, | |
| "loss": 1.5514793395996094, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0598540145985402, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 2.720118319614047e-06, | |
| "loss": 1.2009215354919434, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.0627737226277372, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 2.709253283628924e-06, | |
| "loss": 1.2573150396347046, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.0656934306569343, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 2.698389198022987e-06, | |
| "loss": 1.624213457107544, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0686131386861315, | |
| "grad_norm": 5.375, | |
| "learning_rate": 2.6875263161909054e-06, | |
| "loss": 1.3574187755584717, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.0715328467153284, | |
| "grad_norm": 7.4375, | |
| "learning_rate": 2.676664891499275e-06, | |
| "loss": 1.2222844362258911, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.0744525547445256, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 2.6658051772807046e-06, | |
| "loss": 1.2617628574371338, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.0773722627737226, | |
| "grad_norm": 8.0, | |
| "learning_rate": 2.6549474268279074e-06, | |
| "loss": 1.3748055696487427, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.0802919708029197, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 2.644091893387793e-06, | |
| "loss": 1.4741809368133545, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0832116788321169, | |
| "grad_norm": 7.1875, | |
| "learning_rate": 2.6332388301555615e-06, | |
| "loss": 1.3683550357818604, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.0861313868613138, | |
| "grad_norm": 23.125, | |
| "learning_rate": 2.622388490268799e-06, | |
| "loss": 1.4302444458007812, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.089051094890511, | |
| "grad_norm": 2.875, | |
| "learning_rate": 2.6115411268015716e-06, | |
| "loss": 1.3794375658035278, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.091970802919708, | |
| "grad_norm": 3.5, | |
| "learning_rate": 2.6006969927585214e-06, | |
| "loss": 1.6521217823028564, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.094890510948905, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 2.589856341068969e-06, | |
| "loss": 1.380043625831604, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0978102189781023, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 2.5790194245810125e-06, | |
| "loss": 1.2655432224273682, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1007299270072992, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 2.568186496055628e-06, | |
| "loss": 1.4429633617401123, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1036496350364964, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 2.5573578081607793e-06, | |
| "loss": 1.1212751865386963, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1065693430656935, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 2.546533613465518e-06, | |
| "loss": 0.9118128418922424, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1094890510948905, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 2.5357141644340966e-06, | |
| "loss": 1.3533203601837158, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1124087591240877, | |
| "grad_norm": 5.625, | |
| "learning_rate": 2.5248997134200833e-06, | |
| "loss": 1.2528855800628662, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1153284671532846, | |
| "grad_norm": 2.5, | |
| "learning_rate": 2.5140905126604677e-06, | |
| "loss": 1.244079351425171, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1182481751824818, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 2.503286814269783e-06, | |
| "loss": 1.3053560256958008, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.121167883211679, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 2.4924888702342266e-06, | |
| "loss": 1.2007651329040527, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1240875912408759, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 2.481696932405779e-06, | |
| "loss": 1.3610585927963257, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.127007299270073, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 2.4709112524963326e-06, | |
| "loss": 1.3990166187286377, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.12992700729927, | |
| "grad_norm": 3.484375, | |
| "learning_rate": 2.4601320820718196e-06, | |
| "loss": 1.3095015287399292, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.1328467153284671, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 2.4493596725463435e-06, | |
| "loss": 1.2231605052947998, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1357664233576643, | |
| "grad_norm": 5.875, | |
| "learning_rate": 2.438594275176318e-06, | |
| "loss": 1.3952467441558838, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1386861313868613, | |
| "grad_norm": 5.09375, | |
| "learning_rate": 2.4278361410546027e-06, | |
| "loss": 1.2288057804107666, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1416058394160584, | |
| "grad_norm": 7.15625, | |
| "learning_rate": 2.41708552110465e-06, | |
| "loss": 1.46846342086792, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.1445255474452556, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 2.4063426660746517e-06, | |
| "loss": 1.3782763481140137, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.1474452554744525, | |
| "grad_norm": 9.375, | |
| "learning_rate": 2.3956078265316883e-06, | |
| "loss": 1.2458666563034058, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1503649635036497, | |
| "grad_norm": 3.59375, | |
| "learning_rate": 2.3848812528558887e-06, | |
| "loss": 1.2981244325637817, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.1532846715328466, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 2.374163195234586e-06, | |
| "loss": 1.3579144477844238, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1562043795620438, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 2.3634539036564853e-06, | |
| "loss": 1.2424495220184326, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.159124087591241, | |
| "grad_norm": 3.78125, | |
| "learning_rate": 2.352753627905833e-06, | |
| "loss": 1.6642348766326904, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.162043795620438, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 2.3420626175565877e-06, | |
| "loss": 1.1931509971618652, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.164963503649635, | |
| "grad_norm": 3.75, | |
| "learning_rate": 2.331381121966603e-06, | |
| "loss": 1.3377602100372314, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.167883211678832, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 2.3207093902718066e-06, | |
| "loss": 1.2145559787750244, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1708029197080292, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 2.3100476713803967e-06, | |
| "loss": 1.1511560678482056, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.1737226277372264, | |
| "grad_norm": 4.75, | |
| "learning_rate": 2.2993962139670292e-06, | |
| "loss": 1.5985954999923706, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.1766423357664233, | |
| "grad_norm": 6.71875, | |
| "learning_rate": 2.288755266467022e-06, | |
| "loss": 1.4606941938400269, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.1795620437956205, | |
| "grad_norm": 7.75, | |
| "learning_rate": 2.2781250770705575e-06, | |
| "loss": 1.5486199855804443, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.1824817518248176, | |
| "grad_norm": 4.1875, | |
| "learning_rate": 2.267505893716898e-06, | |
| "loss": 1.3502545356750488, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1854014598540146, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 2.2568979640885964e-06, | |
| "loss": 1.5650737285614014, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.1883211678832117, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 2.246301535605726e-06, | |
| "loss": 1.6433610916137695, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.1912408759124087, | |
| "grad_norm": 3.78125, | |
| "learning_rate": 2.2357168554201066e-06, | |
| "loss": 1.0836632251739502, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.1941605839416058, | |
| "grad_norm": 3.796875, | |
| "learning_rate": 2.225144170409537e-06, | |
| "loss": 1.1502854824066162, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.197080291970803, | |
| "grad_norm": 3.015625, | |
| "learning_rate": 2.2145837271720433e-06, | |
| "loss": 1.6808114051818848, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 3.296875, | |
| "learning_rate": 2.204035772020121e-06, | |
| "loss": 1.3705600500106812, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2029197080291971, | |
| "grad_norm": 2.78125, | |
| "learning_rate": 2.1935005509749933e-06, | |
| "loss": 1.1946570873260498, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.205839416058394, | |
| "grad_norm": 17.75, | |
| "learning_rate": 2.182978309760874e-06, | |
| "loss": 1.5363470315933228, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2087591240875912, | |
| "grad_norm": 3.78125, | |
| "learning_rate": 2.1724692937992313e-06, | |
| "loss": 1.4042502641677856, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2116788321167884, | |
| "grad_norm": 17.25, | |
| "learning_rate": 2.16197374820307e-06, | |
| "loss": 1.2589643001556396, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2145985401459853, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 2.1514919177712085e-06, | |
| "loss": 1.6056280136108398, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2175182481751825, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 2.141024046982573e-06, | |
| "loss": 1.3564906120300293, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2204379562043797, | |
| "grad_norm": 11.625, | |
| "learning_rate": 2.1305703799904947e-06, | |
| "loss": 0.9380712509155273, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2233576642335766, | |
| "grad_norm": 8.75, | |
| "learning_rate": 2.120131160617013e-06, | |
| "loss": 1.0530650615692139, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2262773722627738, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 2.1097066323471897e-06, | |
| "loss": 0.7292347550392151, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2291970802919707, | |
| "grad_norm": 8.125, | |
| "learning_rate": 2.0992970383234336e-06, | |
| "loss": 0.9691898226737976, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.2321167883211679, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 2.088902621339823e-06, | |
| "loss": 1.152883768081665, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.235036496350365, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 2.078523623836446e-06, | |
| "loss": 1.4850080013275146, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.237956204379562, | |
| "grad_norm": 7.3125, | |
| "learning_rate": 2.0681602878937472e-06, | |
| "loss": 1.3769371509552002, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.2408759124087592, | |
| "grad_norm": 3.53125, | |
| "learning_rate": 2.057812855226879e-06, | |
| "loss": 1.103143334388733, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.243795620437956, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 2.0474815671800644e-06, | |
| "loss": 1.4019992351531982, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2467153284671533, | |
| "grad_norm": 5.40625, | |
| "learning_rate": 2.0371666647209694e-06, | |
| "loss": 1.1963081359863281, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.2496350364963504, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 2.0268683884350803e-06, | |
| "loss": 1.1888788938522339, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.2525547445255474, | |
| "grad_norm": 13.6875, | |
| "learning_rate": 2.0165869785200938e-06, | |
| "loss": 1.2623980045318604, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.2554744525547445, | |
| "grad_norm": 6.4375, | |
| "learning_rate": 2.0063226747803143e-06, | |
| "loss": 1.2596468925476074, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2583941605839417, | |
| "grad_norm": 3.859375, | |
| "learning_rate": 1.9960757166210596e-06, | |
| "loss": 1.333680272102356, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.2613138686131387, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 1.9858463430430807e-06, | |
| "loss": 1.1413600444793701, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.2642335766423358, | |
| "grad_norm": 5.5625, | |
| "learning_rate": 1.9756347926369813e-06, | |
| "loss": 1.3728548288345337, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.2671532846715328, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 1.9654413035776585e-06, | |
| "loss": 1.449355125427246, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.27007299270073, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 1.9552661136187444e-06, | |
| "loss": 1.1183695793151855, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.2729927007299269, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 1.945109460087061e-06, | |
| "loss": 1.1493186950683594, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.275912408759124, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 1.934971579877088e-06, | |
| "loss": 1.3397104740142822, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.2788321167883212, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 1.9248527094454316e-06, | |
| "loss": 1.3082889318466187, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.2817518248175181, | |
| "grad_norm": 8.4375, | |
| "learning_rate": 1.9147530848053152e-06, | |
| "loss": 1.563565731048584, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.2846715328467153, | |
| "grad_norm": 8.25, | |
| "learning_rate": 1.9046729415210686e-06, | |
| "loss": 1.4606716632843018, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2875912408759125, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.8946125147026427e-06, | |
| "loss": 1.3690614700317383, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.2905109489051094, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 1.8845720390001154e-06, | |
| "loss": 1.6756688356399536, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.2934306569343066, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 1.874551748598226e-06, | |
| "loss": 1.2701613903045654, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.2963503649635038, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 1.8645518772109077e-06, | |
| "loss": 1.5865097045898438, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.2992700729927007, | |
| "grad_norm": 3.921875, | |
| "learning_rate": 1.8545726580758428e-06, | |
| "loss": 1.401726484298706, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3021897810218979, | |
| "grad_norm": 7.78125, | |
| "learning_rate": 1.8446143239490168e-06, | |
| "loss": 1.6153247356414795, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.305109489051095, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.8346771070992914e-06, | |
| "loss": 1.4763232469558716, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.308029197080292, | |
| "grad_norm": 1.984375, | |
| "learning_rate": 1.82476123930299e-06, | |
| "loss": 1.2044928073883057, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.310948905109489, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 1.8148669518384862e-06, | |
| "loss": 1.0226365327835083, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.313868613138686, | |
| "grad_norm": 2.1875, | |
| "learning_rate": 1.804994475480815e-06, | |
| "loss": 1.0369101762771606, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3167883211678832, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 1.7951440404962856e-06, | |
| "loss": 1.1433358192443848, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3197080291970802, | |
| "grad_norm": 5.3125, | |
| "learning_rate": 1.7853158766371143e-06, | |
| "loss": 1.1160844564437866, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.3226277372262774, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 1.7755102131360639e-06, | |
| "loss": 1.3365674018859863, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3255474452554745, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 1.7657272787010967e-06, | |
| "loss": 1.3394170999526978, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3284671532846715, | |
| "grad_norm": 14.0625, | |
| "learning_rate": 1.7559673015100405e-06, | |
| "loss": 1.2542470693588257, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3313868613138686, | |
| "grad_norm": 1.9453125, | |
| "learning_rate": 1.7462305092052676e-06, | |
| "loss": 1.2083182334899902, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3343065693430658, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 1.7365171288883841e-06, | |
| "loss": 1.0745160579681396, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.3372262773722627, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.7268273871149335e-06, | |
| "loss": 1.4868173599243164, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.34014598540146, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 1.7171615098891117e-06, | |
| "loss": 0.7804101705551147, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.343065693430657, | |
| "grad_norm": 3.65625, | |
| "learning_rate": 1.7075197226584969e-06, | |
| "loss": 1.3761916160583496, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.345985401459854, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 1.6979022503087905e-06, | |
| "loss": 1.413581132888794, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.348905109489051, | |
| "grad_norm": 8.125, | |
| "learning_rate": 1.688309317158572e-06, | |
| "loss": 1.6476316452026367, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3518248175182481, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 1.6787411469540677e-06, | |
| "loss": 1.5541059970855713, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3547445255474453, | |
| "grad_norm": 6.125, | |
| "learning_rate": 1.6691979628639281e-06, | |
| "loss": 1.5634403228759766, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.3576642335766422, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 1.6596799874740294e-06, | |
| "loss": 1.2540359497070312, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3605839416058394, | |
| "grad_norm": 5.59375, | |
| "learning_rate": 1.6501874427822767e-06, | |
| "loss": 1.4849543571472168, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.3635036496350366, | |
| "grad_norm": 6.40625, | |
| "learning_rate": 1.6407205501934285e-06, | |
| "loss": 1.141026496887207, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.3664233576642335, | |
| "grad_norm": 2.375, | |
| "learning_rate": 1.6312795305139328e-06, | |
| "loss": 0.9827671647071838, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.3693430656934307, | |
| "grad_norm": 5.5, | |
| "learning_rate": 1.6218646039467725e-06, | |
| "loss": 1.4801573753356934, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.3722627737226278, | |
| "grad_norm": 3.109375, | |
| "learning_rate": 1.6124759900863365e-06, | |
| "loss": 1.6479110717773438, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3751824817518248, | |
| "grad_norm": 7.25, | |
| "learning_rate": 1.6031139079132933e-06, | |
| "loss": 1.2483787536621094, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.378102189781022, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 1.593778575789484e-06, | |
| "loss": 1.2027292251586914, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.3810218978102191, | |
| "grad_norm": 3.859375, | |
| "learning_rate": 1.5844702114528315e-06, | |
| "loss": 1.5109983682632446, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.383941605839416, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 1.5751890320122568e-06, | |
| "loss": 1.3143746852874756, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.3868613138686132, | |
| "grad_norm": 8.25, | |
| "learning_rate": 1.5659352539426215e-06, | |
| "loss": 1.2749611139297485, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3897810218978102, | |
| "grad_norm": 2.125, | |
| "learning_rate": 1.5567090930796746e-06, | |
| "loss": 1.244338035583496, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.3927007299270073, | |
| "grad_norm": 4.3125, | |
| "learning_rate": 1.5475107646150203e-06, | |
| "loss": 1.3380858898162842, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.3956204379562043, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 1.5383404830910981e-06, | |
| "loss": 1.4054020643234253, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.3985401459854014, | |
| "grad_norm": 10.5625, | |
| "learning_rate": 1.529198462396175e-06, | |
| "loss": 1.4239089488983154, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4014598540145986, | |
| "grad_norm": 9.25, | |
| "learning_rate": 1.5200849157593666e-06, | |
| "loss": 1.610469102859497, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.4043795620437955, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 1.5110000557456542e-06, | |
| "loss": 1.1694961786270142, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4072992700729927, | |
| "grad_norm": 5.625, | |
| "learning_rate": 1.5019440942509312e-06, | |
| "loss": 1.5139713287353516, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4102189781021899, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 1.4929172424970576e-06, | |
| "loss": 1.376784324645996, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4131386861313868, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 1.483919711026939e-06, | |
| "loss": 1.3103041648864746, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.416058394160584, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 1.4749517096996116e-06, | |
| "loss": 1.2476757764816284, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4189781021897812, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 1.4660134476853485e-06, | |
| "loss": 1.3406193256378174, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.421897810218978, | |
| "grad_norm": 4.375, | |
| "learning_rate": 1.4571051334607813e-06, | |
| "loss": 1.2700021266937256, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.4248175182481753, | |
| "grad_norm": 2.90625, | |
| "learning_rate": 1.4482269748040358e-06, | |
| "loss": 1.2266380786895752, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4277372262773722, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 1.4393791787898896e-06, | |
| "loss": 1.189935564994812, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4306569343065694, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 1.430561951784938e-06, | |
| "loss": 1.4163111448287964, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4335766423357663, | |
| "grad_norm": 7.125, | |
| "learning_rate": 1.4217754994427844e-06, | |
| "loss": 1.6390494108200073, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.4364963503649635, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 1.4130200266992408e-06, | |
| "loss": 1.1357786655426025, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.4394160583941606, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 1.4042957377675484e-06, | |
| "loss": 1.2841823101043701, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4423357664233576, | |
| "grad_norm": 7.34375, | |
| "learning_rate": 1.395602836133616e-06, | |
| "loss": 1.3807730674743652, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.4452554744525548, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 1.386941524551273e-06, | |
| "loss": 1.135375738143921, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.448175182481752, | |
| "grad_norm": 2.875, | |
| "learning_rate": 1.37831200503754e-06, | |
| "loss": 1.1764510869979858, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4510948905109489, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 1.3697144788679174e-06, | |
| "loss": 1.2467272281646729, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.454014598540146, | |
| "grad_norm": 5.90625, | |
| "learning_rate": 1.3611491465716898e-06, | |
| "loss": 1.4708714485168457, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.4569343065693432, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 1.3526162079272495e-06, | |
| "loss": 1.402409553527832, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 1.34411586195744e-06, | |
| "loss": 1.2477829456329346, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4627737226277373, | |
| "grad_norm": 4.0625, | |
| "learning_rate": 1.3356483069249088e-06, | |
| "loss": 1.3877084255218506, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.4656934306569342, | |
| "grad_norm": 7.875, | |
| "learning_rate": 1.3272137403274844e-06, | |
| "loss": 1.555393934249878, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.4686131386861314, | |
| "grad_norm": 3.671875, | |
| "learning_rate": 1.318812358893572e-06, | |
| "loss": 1.3621551990509033, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.4715328467153284, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 1.3104443585775642e-06, | |
| "loss": 1.3545817136764526, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.4744525547445255, | |
| "grad_norm": 3.9375, | |
| "learning_rate": 1.3021099345552695e-06, | |
| "loss": 1.4017988443374634, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4773722627737227, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 1.2938092812193615e-06, | |
| "loss": 1.3940372467041016, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.4802919708029196, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 1.285542592174842e-06, | |
| "loss": 1.1765646934509277, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.4832116788321168, | |
| "grad_norm": 6.0, | |
| "learning_rate": 1.277310060234529e-06, | |
| "loss": 1.385852336883545, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.486131386861314, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 1.2691118774145577e-06, | |
| "loss": 1.395111322402954, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.489051094890511, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 1.2609482349299021e-06, | |
| "loss": 1.325355052947998, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.491970802919708, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 1.2528193231899156e-06, | |
| "loss": 1.2050141096115112, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.4948905109489052, | |
| "grad_norm": 4.03125, | |
| "learning_rate": 1.2447253317938871e-06, | |
| "loss": 1.6511290073394775, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.4978102189781022, | |
| "grad_norm": 3.609375, | |
| "learning_rate": 1.236666449526623e-06, | |
| "loss": 1.28155517578125, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.5007299270072991, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 1.2286428643540418e-06, | |
| "loss": 1.4207556247711182, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.5036496350364965, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 1.22065476341879e-06, | |
| "loss": 1.3519251346588135, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5065693430656935, | |
| "grad_norm": 5.84375, | |
| "learning_rate": 1.2127023330358777e-06, | |
| "loss": 1.396289587020874, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.5094890510948904, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 1.204785758688331e-06, | |
| "loss": 1.3400771617889404, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.5124087591240876, | |
| "grad_norm": 31.25, | |
| "learning_rate": 1.1969052250228683e-06, | |
| "loss": 1.1934255361557007, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5153284671532847, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.1890609158455949e-06, | |
| "loss": 1.4513096809387207, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5182481751824817, | |
| "grad_norm": 2.625, | |
| "learning_rate": 1.181253014117711e-06, | |
| "loss": 1.1264418363571167, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.5211678832116788, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 1.1734817019512465e-06, | |
| "loss": 1.1497807502746582, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.524087591240876, | |
| "grad_norm": 7.8125, | |
| "learning_rate": 1.1657471606048157e-06, | |
| "loss": 1.6058242321014404, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.527007299270073, | |
| "grad_norm": 22.25, | |
| "learning_rate": 1.1580495704793874e-06, | |
| "loss": 1.4766197204589844, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.5299270072992701, | |
| "grad_norm": 3.75, | |
| "learning_rate": 1.1503891111140767e-06, | |
| "loss": 1.2432148456573486, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5328467153284673, | |
| "grad_norm": 28.25, | |
| "learning_rate": 1.1427659611819604e-06, | |
| "loss": 1.1451390981674194, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5357664233576642, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 1.1351802984859045e-06, | |
| "loss": 1.3471091985702515, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5386861313868612, | |
| "grad_norm": 1.640625, | |
| "learning_rate": 1.127632299954423e-06, | |
| "loss": 1.1958954334259033, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5416058394160586, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 1.1201221416375456e-06, | |
| "loss": 1.3556766510009766, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5445255474452555, | |
| "grad_norm": 4.75, | |
| "learning_rate": 1.1126499987027172e-06, | |
| "loss": 1.6111273765563965, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5474452554744524, | |
| "grad_norm": 12.5, | |
| "learning_rate": 1.1052160454307085e-06, | |
| "loss": 1.5189365148544312, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.5503649635036496, | |
| "grad_norm": 3.96875, | |
| "learning_rate": 1.0978204552115493e-06, | |
| "loss": 1.3763346672058105, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.5532846715328468, | |
| "grad_norm": 4.375, | |
| "learning_rate": 1.0904634005404902e-06, | |
| "loss": 1.450345754623413, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.5562043795620437, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 1.0831450530139747e-06, | |
| "loss": 1.2109770774841309, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.5591240875912409, | |
| "grad_norm": 7.0, | |
| "learning_rate": 1.0758655833256381e-06, | |
| "loss": 1.2681195735931396, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.562043795620438, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 1.0686251612623277e-06, | |
| "loss": 1.2694846391677856, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.564963503649635, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 1.0614239557001389e-06, | |
| "loss": 1.5101749897003174, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.5678832116788322, | |
| "grad_norm": 3.171875, | |
| "learning_rate": 1.0542621346004806e-06, | |
| "loss": 1.313795566558838, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.5708029197080293, | |
| "grad_norm": 9.0, | |
| "learning_rate": 1.047139865006155e-06, | |
| "loss": 1.1664808988571167, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.5737226277372263, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 1.0400573130374641e-06, | |
| "loss": 1.203639030456543, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.5766423357664232, | |
| "grad_norm": 3.265625, | |
| "learning_rate": 1.0330146438883304e-06, | |
| "loss": 1.5285131931304932, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5795620437956206, | |
| "grad_norm": 6.5625, | |
| "learning_rate": 1.0260120218224485e-06, | |
| "loss": 1.516188144683838, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.5824817518248175, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 1.019049610169452e-06, | |
| "loss": 1.3165411949157715, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.5854014598540145, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 1.012127571321104e-06, | |
| "loss": 1.1730577945709229, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.5883211678832116, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 1.0052460667275102e-06, | |
| "loss": 1.3837532997131348, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.5912408759124088, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 9.984052568933507e-07, | |
| "loss": 1.342604398727417, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5941605839416058, | |
| "grad_norm": 1.8046875, | |
| "learning_rate": 9.916053013741396e-07, | |
| "loss": 1.0345500707626343, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.597080291970803, | |
| "grad_norm": 3.578125, | |
| "learning_rate": 9.848463587725024e-07, | |
| "loss": 1.3031237125396729, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 2.6875, | |
| "learning_rate": 9.78128586734476e-07, | |
| "loss": 1.4126646518707275, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.602919708029197, | |
| "grad_norm": 2.796875, | |
| "learning_rate": 9.714521419458333e-07, | |
| "loss": 1.2036532163619995, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6058394160583942, | |
| "grad_norm": 5.34375, | |
| "learning_rate": 9.648171801284254e-07, | |
| "loss": 1.3445477485656738, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6087591240875914, | |
| "grad_norm": 6.875, | |
| "learning_rate": 9.582238560365534e-07, | |
| "loss": 1.4824466705322266, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.6116788321167883, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 9.516723234533573e-07, | |
| "loss": 0.6945338845252991, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.6145985401459853, | |
| "grad_norm": 4.375, | |
| "learning_rate": 9.451627351872289e-07, | |
| "loss": 1.691240906715393, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.6175182481751826, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 9.386952430682478e-07, | |
| "loss": 1.6143536567687988, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.6204379562043796, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 9.322699979446395e-07, | |
| "loss": 1.0810116529464722, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.6233576642335765, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 9.25887149679259e-07, | |
| "loss": 1.3443822860717773, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.6262773722627737, | |
| "grad_norm": 2.5, | |
| "learning_rate": 9.19546847146093e-07, | |
| "loss": 1.392272710800171, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.6291970802919709, | |
| "grad_norm": 2.890625, | |
| "learning_rate": 9.132492382267895e-07, | |
| "loss": 1.2860863208770752, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.6321167883211678, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 9.069944698072071e-07, | |
| "loss": 1.4681463241577148, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.635036496350365, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 9.0078268777399e-07, | |
| "loss": 1.1984715461730957, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.6379562043795621, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 8.946140370111651e-07, | |
| "loss": 1.3620171546936035, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.640875912408759, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 8.884886613967625e-07, | |
| "loss": 1.0197124481201172, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6437956204379562, | |
| "grad_norm": 2.96875, | |
| "learning_rate": 8.824067037994597e-07, | |
| "loss": 1.2507963180541992, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.6467153284671534, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 8.763683060752492e-07, | |
| "loss": 1.5034403800964355, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.6496350364963503, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 8.703736090641302e-07, | |
| "loss": 1.250478744506836, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.6525547445255473, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 8.644227525868238e-07, | |
| "loss": 1.2682870626449585, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.6554744525547447, | |
| "grad_norm": 8.5, | |
| "learning_rate": 8.585158754415114e-07, | |
| "loss": 1.5448431968688965, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.6583941605839416, | |
| "grad_norm": 5.65625, | |
| "learning_rate": 8.52653115400598e-07, | |
| "loss": 1.3879718780517578, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.6613138686131386, | |
| "grad_norm": 3.3125, | |
| "learning_rate": 8.468346092074961e-07, | |
| "loss": 1.3755671977996826, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.6642335766423357, | |
| "grad_norm": 2.75, | |
| "learning_rate": 8.410604925734411e-07, | |
| "loss": 1.1513915061950684, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.667153284671533, | |
| "grad_norm": 14.1875, | |
| "learning_rate": 8.35330900174322e-07, | |
| "loss": 1.5474663972854614, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.6700729927007298, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 8.296459656475413e-07, | |
| "loss": 0.8504141569137573, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.672992700729927, | |
| "grad_norm": 5.78125, | |
| "learning_rate": 8.240058215888998e-07, | |
| "loss": 1.3289515972137451, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.6759124087591242, | |
| "grad_norm": 6.9375, | |
| "learning_rate": 8.184105995494998e-07, | |
| "loss": 0.9470740556716919, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.6788321167883211, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 8.128604300326812e-07, | |
| "loss": 1.352350115776062, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6817518248175183, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 8.073554424909755e-07, | |
| "loss": 1.3660526275634766, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.6846715328467154, | |
| "grad_norm": 2.46875, | |
| "learning_rate": 8.01895765323087e-07, | |
| "loss": 1.2722463607788086, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.6875912408759124, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 7.964815258708971e-07, | |
| "loss": 1.13301420211792, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.6905109489051093, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 7.911128504164947e-07, | |
| "loss": 1.3945411443710327, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.6934306569343067, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 7.857898641792322e-07, | |
| "loss": 1.1629891395568848, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6963503649635037, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 7.805126913128018e-07, | |
| "loss": 1.1993281841278076, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.6992700729927006, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 7.752814549023437e-07, | |
| "loss": 1.4611374139785767, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.7021897810218978, | |
| "grad_norm": 4.625, | |
| "learning_rate": 7.700962769615704e-07, | |
| "loss": 1.1919968128204346, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.705109489051095, | |
| "grad_norm": 2.515625, | |
| "learning_rate": 7.649572784299255e-07, | |
| "loss": 1.2250781059265137, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7080291970802919, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 7.598645791697601e-07, | |
| "loss": 1.3479260206222534, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.710948905109489, | |
| "grad_norm": 4.25, | |
| "learning_rate": 7.548182979635389e-07, | |
| "loss": 1.3197946548461914, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.7138686131386862, | |
| "grad_norm": 8.6875, | |
| "learning_rate": 7.49818552511068e-07, | |
| "loss": 1.1691796779632568, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.7167883211678832, | |
| "grad_norm": 3.203125, | |
| "learning_rate": 7.448654594267496e-07, | |
| "loss": 1.2978925704956055, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.7197080291970803, | |
| "grad_norm": 2.96875, | |
| "learning_rate": 7.399591342368644e-07, | |
| "loss": 1.174210786819458, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.7226277372262775, | |
| "grad_norm": 4.625, | |
| "learning_rate": 7.350996913768743e-07, | |
| "loss": 1.2740840911865234, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.7255474452554744, | |
| "grad_norm": 8.0625, | |
| "learning_rate": 7.302872441887562e-07, | |
| "loss": 1.1019668579101562, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.7284671532846714, | |
| "grad_norm": 2.84375, | |
| "learning_rate": 7.255219049183552e-07, | |
| "loss": 1.3885023593902588, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.7313868613138688, | |
| "grad_norm": 5.625, | |
| "learning_rate": 7.208037847127683e-07, | |
| "loss": 1.5192725658416748, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.7343065693430657, | |
| "grad_norm": 6.625, | |
| "learning_rate": 7.161329936177522e-07, | |
| "loss": 1.3260494470596313, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7372262773722627, | |
| "grad_norm": 3.375, | |
| "learning_rate": 7.115096405751567e-07, | |
| "loss": 1.3762927055358887, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7401459854014598, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 7.069338334203818e-07, | |
| "loss": 1.0026099681854248, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.743065693430657, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 7.024056788798658e-07, | |
| "loss": 1.1264629364013672, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.745985401459854, | |
| "grad_norm": 16.75, | |
| "learning_rate": 6.979252825685927e-07, | |
| "loss": 1.5443601608276367, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.748905109489051, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 6.934927489876312e-07, | |
| "loss": 1.0794442892074585, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.7518248175182483, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 6.891081815216958e-07, | |
| "loss": 1.348907470703125, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7547445255474452, | |
| "grad_norm": 3.140625, | |
| "learning_rate": 6.847716824367369e-07, | |
| "loss": 1.3414909839630127, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.7576642335766424, | |
| "grad_norm": 4.59375, | |
| "learning_rate": 6.804833528775531e-07, | |
| "loss": 1.4073083400726318, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.7605839416058395, | |
| "grad_norm": 3.671875, | |
| "learning_rate": 6.762432928654358e-07, | |
| "loss": 0.8366962671279907, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.7635036496350365, | |
| "grad_norm": 5.53125, | |
| "learning_rate": 6.720516012958325e-07, | |
| "loss": 1.3547214269638062, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.7664233576642334, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 6.679083759360433e-07, | |
| "loss": 1.6114599704742432, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7693430656934308, | |
| "grad_norm": 4.5, | |
| "learning_rate": 6.638137134229375e-07, | |
| "loss": 1.5248315334320068, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.7722627737226277, | |
| "grad_norm": 3.6875, | |
| "learning_rate": 6.597677092607025e-07, | |
| "loss": 1.093032956123352, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.7751824817518247, | |
| "grad_norm": 4.5, | |
| "learning_rate": 6.557704578186146e-07, | |
| "loss": 1.408461093902588, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.7781021897810219, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 6.518220523288382e-07, | |
| "loss": 1.3268358707427979, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.781021897810219, | |
| "grad_norm": 4.75, | |
| "learning_rate": 6.479225848842523e-07, | |
| "loss": 1.544386386871338, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.783941605839416, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 6.440721464362998e-07, | |
| "loss": 1.4272065162658691, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.7868613138686131, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 6.402708267928694e-07, | |
| "loss": 1.3150466680526733, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.7897810218978103, | |
| "grad_norm": 5.0, | |
| "learning_rate": 6.365187146161991e-07, | |
| "loss": 1.2979998588562012, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.7927007299270072, | |
| "grad_norm": 4.75, | |
| "learning_rate": 6.32815897420809e-07, | |
| "loss": 1.6841963529586792, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.7956204379562044, | |
| "grad_norm": 5.0, | |
| "learning_rate": 6.29162461571459e-07, | |
| "loss": 1.6227900981903076, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7985401459854016, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 6.25558492281135e-07, | |
| "loss": 1.4919426441192627, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.8014598540145985, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 6.220040736090617e-07, | |
| "loss": 1.3797836303710938, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.8043795620437955, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 6.18499288458743e-07, | |
| "loss": 1.6902371644973755, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.8072992700729928, | |
| "grad_norm": 2.453125, | |
| "learning_rate": 6.150442185760258e-07, | |
| "loss": 1.2298048734664917, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.8102189781021898, | |
| "grad_norm": 4.53125, | |
| "learning_rate": 6.116389445471948e-07, | |
| "loss": 1.3514063358306885, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8131386861313867, | |
| "grad_norm": 3.828125, | |
| "learning_rate": 6.082835457970935e-07, | |
| "loss": 1.3649213314056396, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.816058394160584, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 6.0497810058727e-07, | |
| "loss": 1.3873786926269531, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.818978102189781, | |
| "grad_norm": 5.21875, | |
| "learning_rate": 6.017226860141535e-07, | |
| "loss": 1.6073391437530518, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.821897810218978, | |
| "grad_norm": 2.90625, | |
| "learning_rate": 5.985173780072558e-07, | |
| "loss": 1.333566427230835, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.8248175182481752, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 5.953622513273977e-07, | |
| "loss": 1.3585089445114136, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8277372262773723, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 5.92257379564969e-07, | |
| "loss": 1.195847749710083, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.8306569343065693, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 5.892028351382101e-07, | |
| "loss": 1.4418195486068726, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.8335766423357664, | |
| "grad_norm": 4.09375, | |
| "learning_rate": 5.861986892915227e-07, | |
| "loss": 1.384018063545227, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.8364963503649636, | |
| "grad_norm": 9.4375, | |
| "learning_rate": 5.832450120938093e-07, | |
| "loss": 1.3380024433135986, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.8394160583941606, | |
| "grad_norm": 6.46875, | |
| "learning_rate": 5.803418724368373e-07, | |
| "loss": 1.3088436126708984, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.8423357664233575, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 5.774893380336338e-07, | |
| "loss": 1.5858633518218994, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.845255474452555, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.746874754169053e-07, | |
| "loss": 1.5293078422546387, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.8481751824817518, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 5.719363499374861e-07, | |
| "loss": 1.1518256664276123, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.8510948905109488, | |
| "grad_norm": 7.6875, | |
| "learning_rate": 5.692360257628144e-07, | |
| "loss": 1.3224802017211914, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.854014598540146, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 5.665865658754341e-07, | |
| "loss": 1.2233679294586182, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.856934306569343, | |
| "grad_norm": 6.34375, | |
| "learning_rate": 5.639880320715284e-07, | |
| "loss": 1.4993672370910645, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.85985401459854, | |
| "grad_norm": 3.703125, | |
| "learning_rate": 5.614404849594762e-07, | |
| "loss": 1.3802194595336914, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.8627737226277372, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 5.589439839584404e-07, | |
| "loss": 1.0489559173583984, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.8656934306569344, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 5.564985872969791e-07, | |
| "loss": 1.2326107025146484, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.8686131386861313, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 5.541043520116912e-07, | |
| "loss": 1.1945993900299072, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.8715328467153285, | |
| "grad_norm": 2.625, | |
| "learning_rate": 5.517613339458832e-07, | |
| "loss": 1.2813007831573486, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.8744525547445257, | |
| "grad_norm": 4.46875, | |
| "learning_rate": 5.494695877482676e-07, | |
| "loss": 1.1684314012527466, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.8773722627737226, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 5.472291668716893e-07, | |
| "loss": 1.222388505935669, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.8802919708029195, | |
| "grad_norm": 2.984375, | |
| "learning_rate": 5.450401235718762e-07, | |
| "loss": 1.2156729698181152, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.883211678832117, | |
| "grad_norm": 5.96875, | |
| "learning_rate": 5.42902508906224e-07, | |
| "loss": 1.311574935913086, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8861313868613139, | |
| "grad_norm": 7.96875, | |
| "learning_rate": 5.408163727326021e-07, | |
| "loss": 1.34036123752594, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.8890510948905108, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 5.387817637081928e-07, | |
| "loss": 1.1132798194885254, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.891970802919708, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 5.367987292883554e-07, | |
| "loss": 1.3646128177642822, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.8948905109489051, | |
| "grad_norm": 5.1875, | |
| "learning_rate": 5.348673157255195e-07, | |
| "loss": 1.4554338455200195, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 3.96875, | |
| "learning_rate": 5.329875680681065e-07, | |
| "loss": 1.4109296798706055, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9007299270072993, | |
| "grad_norm": 4.875, | |
| "learning_rate": 5.311595301594783e-07, | |
| "loss": 1.1961219310760498, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.9036496350364964, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 5.293832446369158e-07, | |
| "loss": 0.6657427549362183, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.9065693430656934, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 5.276587529306236e-07, | |
| "loss": 1.397131323814392, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.9094890510948905, | |
| "grad_norm": 6.5, | |
| "learning_rate": 5.25986095262763e-07, | |
| "loss": 1.323398470878601, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.9124087591240877, | |
| "grad_norm": 3.203125, | |
| "learning_rate": 5.243653106465157e-07, | |
| "loss": 1.3060777187347412, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.9153284671532846, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 5.227964368851721e-07, | |
| "loss": 1.5433318614959717, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.9182481751824818, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 5.212795105712508e-07, | |
| "loss": 1.4788509607315063, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.921167883211679, | |
| "grad_norm": 4.8125, | |
| "learning_rate": 5.198145670856438e-07, | |
| "loss": 1.3976120948791504, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.924087591240876, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 5.184016405967931e-07, | |
| "loss": 1.1872693300247192, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.9270072992700729, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 5.170407640598921e-07, | |
| "loss": 1.1601970195770264, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.92992700729927, | |
| "grad_norm": 3.5625, | |
| "learning_rate": 5.157319692161178e-07, | |
| "loss": 1.205195426940918, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.9328467153284672, | |
| "grad_norm": 3.734375, | |
| "learning_rate": 5.144752865918901e-07, | |
| "loss": 1.1591906547546387, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.9357664233576641, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 5.132707454981602e-07, | |
| "loss": 1.3498120307922363, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.9386861313868613, | |
| "grad_norm": 3.796875, | |
| "learning_rate": 5.121183740297261e-07, | |
| "loss": 1.3916034698486328, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.9416058394160585, | |
| "grad_norm": 17.375, | |
| "learning_rate": 5.110181990645788e-07, | |
| "loss": 1.2117153406143188, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.9445255474452554, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 5.099702462632737e-07, | |
| "loss": 1.19834566116333, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.9474452554744526, | |
| "grad_norm": 10.0625, | |
| "learning_rate": 5.089745400683333e-07, | |
| "loss": 0.8368179798126221, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.9503649635036497, | |
| "grad_norm": 5.625, | |
| "learning_rate": 5.080311037036767e-07, | |
| "loss": 1.314239263534546, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.9532846715328467, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 5.071399591740777e-07, | |
| "loss": 1.216627597808838, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.9562043795620438, | |
| "grad_norm": 6.375, | |
| "learning_rate": 5.063011272646521e-07, | |
| "loss": 1.2274556159973145, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.959124087591241, | |
| "grad_norm": 2.546875, | |
| "learning_rate": 5.055146275403725e-07, | |
| "loss": 1.4812201261520386, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.962043795620438, | |
| "grad_norm": 5.71875, | |
| "learning_rate": 5.047804783456117e-07, | |
| "loss": 1.215821623802185, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.964963503649635, | |
| "grad_norm": 4.71875, | |
| "learning_rate": 5.040986968037157e-07, | |
| "loss": 1.318119764328003, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.967883211678832, | |
| "grad_norm": 2.953125, | |
| "learning_rate": 5.034692988166033e-07, | |
| "loss": 1.2136964797973633, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.9708029197080292, | |
| "grad_norm": 4.125, | |
| "learning_rate": 5.028922990643963e-07, | |
| "loss": 1.3341786861419678, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9737226277372262, | |
| "grad_norm": 3.75, | |
| "learning_rate": 5.023677110050759e-07, | |
| "loss": 1.4188188314437866, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.9766423357664233, | |
| "grad_norm": 3.421875, | |
| "learning_rate": 5.018955468741701e-07, | |
| "loss": 1.608628511428833, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.9795620437956205, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 5.014758176844665e-07, | |
| "loss": 1.5936325788497925, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.9824817518248175, | |
| "grad_norm": 2.796875, | |
| "learning_rate": 5.011085332257579e-07, | |
| "loss": 1.178612232208252, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.9854014598540146, | |
| "grad_norm": 7.1875, | |
| "learning_rate": 5.007937020646117e-07, | |
| "loss": 1.1231637001037598, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9883211678832118, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 5.005313315441716e-07, | |
| "loss": 0.6363063454627991, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.9912408759124087, | |
| "grad_norm": 5.5, | |
| "learning_rate": 5.003214277839851e-07, | |
| "loss": 1.3855026960372925, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.994160583941606, | |
| "grad_norm": 5.6875, | |
| "learning_rate": 5.00163995679862e-07, | |
| "loss": 1.346792459487915, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.997080291970803, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 5.000590389037593e-07, | |
| "loss": 1.3148702383041382, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 4.0625, | |
| "learning_rate": 5.00006559903696e-07, | |
| "loss": 1.6425683498382568, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1370, | |
| "total_flos": 1.984544544032555e+18, | |
| "train_loss": 1.409229011779284, | |
| "train_runtime": 8212.4061, | |
| "train_samples_per_second": 2.669, | |
| "train_steps_per_second": 0.167 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1370, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.984544544032555e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |