Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-132 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-132 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-132") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-132") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-132") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-132 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-132" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-132", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-132
- SGLang
How to use furproxy/9b-132 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-132" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-132", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-132" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-132", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-132 with Docker Model Runner:
docker model run hf.co/furproxy/9b-132
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 5284, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001514004542013626, | |
| "grad_norm": 4.384895324707031, | |
| "learning_rate": 1.1320754716981132e-08, | |
| "loss": 1.7582024335861206, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003028009084027252, | |
| "grad_norm": 8.81789779663086, | |
| "learning_rate": 3.39622641509434e-08, | |
| "loss": 2.091010570526123, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004542013626040878, | |
| "grad_norm": 6.768231391906738, | |
| "learning_rate": 5.660377358490566e-08, | |
| "loss": 2.225084066390991, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006056018168054504, | |
| "grad_norm": 3.0249898433685303, | |
| "learning_rate": 7.924528301886792e-08, | |
| "loss": 1.693494200706482, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00757002271006813, | |
| "grad_norm": 16.95102882385254, | |
| "learning_rate": 1.0188679245283018e-07, | |
| "loss": 1.8492655754089355, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009084027252081756, | |
| "grad_norm": 4.063567161560059, | |
| "learning_rate": 1.2452830188679246e-07, | |
| "loss": 1.4832412004470825, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.010598031794095382, | |
| "grad_norm": 31.606918334960938, | |
| "learning_rate": 1.4716981132075472e-07, | |
| "loss": 1.9249378442764282, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.012112036336109008, | |
| "grad_norm": 9.38675308227539, | |
| "learning_rate": 1.6981132075471698e-07, | |
| "loss": 1.7762621641159058, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.013626040878122634, | |
| "grad_norm": 6.414663314819336, | |
| "learning_rate": 1.9245283018867924e-07, | |
| "loss": 1.5769001245498657, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.01514004542013626, | |
| "grad_norm": 2.645232677459717, | |
| "learning_rate": 2.150943396226415e-07, | |
| "loss": 1.8546013832092285, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.016654049962149888, | |
| "grad_norm": 3.0886499881744385, | |
| "learning_rate": 2.3773584905660376e-07, | |
| "loss": 1.8705984354019165, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.018168054504163512, | |
| "grad_norm": 3.312413454055786, | |
| "learning_rate": 2.60377358490566e-07, | |
| "loss": 1.6591613292694092, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01968205904617714, | |
| "grad_norm": 14.512092590332031, | |
| "learning_rate": 2.8301886792452833e-07, | |
| "loss": 1.6924680471420288, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.021196063588190765, | |
| "grad_norm": 4.4867634773254395, | |
| "learning_rate": 3.056603773584906e-07, | |
| "loss": 1.6047476530075073, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.022710068130204392, | |
| "grad_norm": 12.943336486816406, | |
| "learning_rate": 3.2830188679245285e-07, | |
| "loss": 1.809732437133789, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.024224072672218017, | |
| "grad_norm": 5.657660007476807, | |
| "learning_rate": 3.509433962264151e-07, | |
| "loss": 1.2130705118179321, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.025738077214231644, | |
| "grad_norm": 5.186927795410156, | |
| "learning_rate": 3.7358490566037737e-07, | |
| "loss": 1.8291529417037964, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02725208175624527, | |
| "grad_norm": 3.187878370285034, | |
| "learning_rate": 3.9622641509433963e-07, | |
| "loss": 1.8319251537322998, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.028766086298258896, | |
| "grad_norm": 21.289854049682617, | |
| "learning_rate": 4.188679245283019e-07, | |
| "loss": 1.7879869937896729, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03028009084027252, | |
| "grad_norm": 2.181772232055664, | |
| "learning_rate": 4.4150943396226415e-07, | |
| "loss": 1.8917243480682373, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03179409538228615, | |
| "grad_norm": 3.3542346954345703, | |
| "learning_rate": 4.641509433962264e-07, | |
| "loss": 1.4650280475616455, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.033308099924299776, | |
| "grad_norm": 7.832064151763916, | |
| "learning_rate": 4.867924528301886e-07, | |
| "loss": 1.4264377355575562, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0348221044663134, | |
| "grad_norm": 4.87370491027832, | |
| "learning_rate": 5.094339622641509e-07, | |
| "loss": 0.8579185009002686, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.036336109008327025, | |
| "grad_norm": 39.0529670715332, | |
| "learning_rate": 5.320754716981131e-07, | |
| "loss": 1.4417083263397217, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03785011355034065, | |
| "grad_norm": 2.6229774951934814, | |
| "learning_rate": 5.547169811320755e-07, | |
| "loss": 1.778635025024414, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03936411809235428, | |
| "grad_norm": 7.941841125488281, | |
| "learning_rate": 5.773584905660378e-07, | |
| "loss": 1.0647987127304077, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0408781226343679, | |
| "grad_norm": 2.5582528114318848, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.8559340834617615, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04239212717638153, | |
| "grad_norm": 5.4595489501953125, | |
| "learning_rate": 6.226415094339623e-07, | |
| "loss": 0.7988621592521667, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.04390613171839516, | |
| "grad_norm": 4.560713768005371, | |
| "learning_rate": 6.452830188679246e-07, | |
| "loss": 1.2707360982894897, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.045420136260408785, | |
| "grad_norm": 2.874634027481079, | |
| "learning_rate": 6.679245283018868e-07, | |
| "loss": 1.9832544326782227, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.046934140802422405, | |
| "grad_norm": 6.723309516906738, | |
| "learning_rate": 6.905660377358491e-07, | |
| "loss": 1.3568415641784668, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04844814534443603, | |
| "grad_norm": 1.7419151067733765, | |
| "learning_rate": 7.132075471698113e-07, | |
| "loss": 1.6522427797317505, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04996214988644966, | |
| "grad_norm": 1.920200228691101, | |
| "learning_rate": 7.358490566037736e-07, | |
| "loss": 1.7422058582305908, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05147615442846329, | |
| "grad_norm": 1.8585798740386963, | |
| "learning_rate": 7.584905660377358e-07, | |
| "loss": 0.8603143095970154, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.05299015897047691, | |
| "grad_norm": 2.7313153743743896, | |
| "learning_rate": 7.811320754716982e-07, | |
| "loss": 0.8468101620674133, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05450416351249054, | |
| "grad_norm": 2.7206826210021973, | |
| "learning_rate": 8.037735849056604e-07, | |
| "loss": 0.8229624629020691, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.056018168054504165, | |
| "grad_norm": 2.820694923400879, | |
| "learning_rate": 8.264150943396227e-07, | |
| "loss": 0.8030911684036255, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.05753217259651779, | |
| "grad_norm": 2.101903200149536, | |
| "learning_rate": 8.490566037735849e-07, | |
| "loss": 0.6831276416778564, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.059046177138531414, | |
| "grad_norm": 3.765256881713867, | |
| "learning_rate": 8.716981132075472e-07, | |
| "loss": 0.7411392331123352, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06056018168054504, | |
| "grad_norm": 1.8168247938156128, | |
| "learning_rate": 8.943396226415094e-07, | |
| "loss": 1.0968608856201172, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06207418622255867, | |
| "grad_norm": 8.313712120056152, | |
| "learning_rate": 9.169811320754717e-07, | |
| "loss": 0.8532919883728027, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0635881907645723, | |
| "grad_norm": 1.691605567932129, | |
| "learning_rate": 9.396226415094339e-07, | |
| "loss": 1.2071592807769775, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.06510219530658592, | |
| "grad_norm": 1.4280986785888672, | |
| "learning_rate": 9.622641509433961e-07, | |
| "loss": 1.0525227785110474, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.06661619984859955, | |
| "grad_norm": 1.750461459159851, | |
| "learning_rate": 9.849056603773586e-07, | |
| "loss": 0.6976984739303589, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.06813020439061317, | |
| "grad_norm": 2.1426968574523926, | |
| "learning_rate": 1.0075471698113208e-06, | |
| "loss": 1.06442391872406, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0696442089326268, | |
| "grad_norm": 2.3825888633728027, | |
| "learning_rate": 1.030188679245283e-06, | |
| "loss": 0.9887061715126038, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07115821347464042, | |
| "grad_norm": 5.285498142242432, | |
| "learning_rate": 1.0528301886792452e-06, | |
| "loss": 1.0551021099090576, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.07267221801665405, | |
| "grad_norm": 2.1216835975646973, | |
| "learning_rate": 1.0754716981132076e-06, | |
| "loss": 0.7430307865142822, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.07418622255866768, | |
| "grad_norm": 1.883683204650879, | |
| "learning_rate": 1.0981132075471698e-06, | |
| "loss": 0.6745927333831787, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0757002271006813, | |
| "grad_norm": 1.9999110698699951, | |
| "learning_rate": 1.120754716981132e-06, | |
| "loss": 1.1162917613983154, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07721423164269493, | |
| "grad_norm": 1.4555745124816895, | |
| "learning_rate": 1.1433962264150944e-06, | |
| "loss": 1.495133399963379, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.07872823618470856, | |
| "grad_norm": 2.054089307785034, | |
| "learning_rate": 1.1660377358490566e-06, | |
| "loss": 0.546085774898529, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08024224072672217, | |
| "grad_norm": 2.632761001586914, | |
| "learning_rate": 1.188679245283019e-06, | |
| "loss": 0.5810890197753906, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0817562452687358, | |
| "grad_norm": 3.3831615447998047, | |
| "learning_rate": 1.2113207547169813e-06, | |
| "loss": 0.9427996873855591, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.08327024981074943, | |
| "grad_norm": 3.100712776184082, | |
| "learning_rate": 1.2339622641509435e-06, | |
| "loss": 0.9965349435806274, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08478425435276306, | |
| "grad_norm": 2.1988565921783447, | |
| "learning_rate": 1.2566037735849057e-06, | |
| "loss": 0.9902376532554626, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.08629825889477669, | |
| "grad_norm": 7.167949199676514, | |
| "learning_rate": 1.279245283018868e-06, | |
| "loss": 0.5847588777542114, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.08781226343679031, | |
| "grad_norm": 2.712419271469116, | |
| "learning_rate": 1.3018867924528303e-06, | |
| "loss": 1.145547866821289, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.08932626797880394, | |
| "grad_norm": 4.570695877075195, | |
| "learning_rate": 1.3245283018867925e-06, | |
| "loss": 0.8823922276496887, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.09084027252081757, | |
| "grad_norm": 1.8274328708648682, | |
| "learning_rate": 1.3471698113207547e-06, | |
| "loss": 0.8956850171089172, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09235427706283118, | |
| "grad_norm": 1.4670203924179077, | |
| "learning_rate": 1.3698113207547171e-06, | |
| "loss": 0.5994776487350464, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.09386828160484481, | |
| "grad_norm": 3.0260169506073, | |
| "learning_rate": 1.3924528301886793e-06, | |
| "loss": 0.608551025390625, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.09538228614685844, | |
| "grad_norm": 2.6915009021759033, | |
| "learning_rate": 1.4150943396226415e-06, | |
| "loss": 1.432515263557434, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.09689629068887207, | |
| "grad_norm": 1.6062544584274292, | |
| "learning_rate": 1.4377358490566038e-06, | |
| "loss": 1.4172536134719849, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.0984102952308857, | |
| "grad_norm": 2.7345874309539795, | |
| "learning_rate": 1.4603773584905662e-06, | |
| "loss": 1.1050211191177368, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09992429977289932, | |
| "grad_norm": 1.3847036361694336, | |
| "learning_rate": 1.4830188679245284e-06, | |
| "loss": 0.807343602180481, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.10143830431491295, | |
| "grad_norm": 2.7313413619995117, | |
| "learning_rate": 1.5056603773584906e-06, | |
| "loss": 0.752243161201477, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.10295230885692658, | |
| "grad_norm": 2.1813056468963623, | |
| "learning_rate": 1.5283018867924528e-06, | |
| "loss": 0.8742433786392212, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.10446631339894019, | |
| "grad_norm": 1.965896487236023, | |
| "learning_rate": 1.5509433962264152e-06, | |
| "loss": 1.0023077726364136, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.10598031794095382, | |
| "grad_norm": 1.341701626777649, | |
| "learning_rate": 1.5735849056603774e-06, | |
| "loss": 0.4326239824295044, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10749432248296745, | |
| "grad_norm": 2.332487106323242, | |
| "learning_rate": 1.5962264150943396e-06, | |
| "loss": 1.0556713342666626, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.10900832702498107, | |
| "grad_norm": 1.6127592325210571, | |
| "learning_rate": 1.6188679245283018e-06, | |
| "loss": 1.4404288530349731, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1105223315669947, | |
| "grad_norm": 1.9773815870285034, | |
| "learning_rate": 1.6415094339622643e-06, | |
| "loss": 0.7526131868362427, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.11203633610900833, | |
| "grad_norm": 4.140753746032715, | |
| "learning_rate": 1.6641509433962265e-06, | |
| "loss": 0.8076651096343994, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.11355034065102196, | |
| "grad_norm": 2.5584218502044678, | |
| "learning_rate": 1.6867924528301887e-06, | |
| "loss": 1.0231175422668457, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11506434519303559, | |
| "grad_norm": 5.1710524559021, | |
| "learning_rate": 1.7094339622641509e-06, | |
| "loss": 0.7206178903579712, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1165783497350492, | |
| "grad_norm": 1.3863569498062134, | |
| "learning_rate": 1.7320754716981133e-06, | |
| "loss": 0.8312125205993652, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.11809235427706283, | |
| "grad_norm": 5.277348518371582, | |
| "learning_rate": 1.7547169811320755e-06, | |
| "loss": 1.2960776090621948, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.11960635881907646, | |
| "grad_norm": 2.767324924468994, | |
| "learning_rate": 1.7773584905660377e-06, | |
| "loss": 1.3986912965774536, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.12112036336109008, | |
| "grad_norm": 3.0075347423553467, | |
| "learning_rate": 1.8e-06, | |
| "loss": 1.0369937419891357, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12263436790310371, | |
| "grad_norm": 2.4291977882385254, | |
| "learning_rate": 1.8226415094339623e-06, | |
| "loss": 1.1735918521881104, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.12414837244511734, | |
| "grad_norm": 10.382956504821777, | |
| "learning_rate": 1.8452830188679245e-06, | |
| "loss": 0.5223852396011353, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.12566237698713095, | |
| "grad_norm": 3.3174524307250977, | |
| "learning_rate": 1.8679245283018868e-06, | |
| "loss": 0.9646264314651489, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1271763815291446, | |
| "grad_norm": 3.2446677684783936, | |
| "learning_rate": 1.890566037735849e-06, | |
| "loss": 0.5242027640342712, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1286903860711582, | |
| "grad_norm": 2.5006420612335205, | |
| "learning_rate": 1.913207547169811e-06, | |
| "loss": 1.368085503578186, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13020439061317185, | |
| "grad_norm": 3.4297969341278076, | |
| "learning_rate": 1.9358490566037734e-06, | |
| "loss": 0.5971487164497375, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.13171839515518546, | |
| "grad_norm": 2.7067747116088867, | |
| "learning_rate": 1.958490566037736e-06, | |
| "loss": 1.2271755933761597, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1332323996971991, | |
| "grad_norm": 1.2546025514602661, | |
| "learning_rate": 1.981132075471698e-06, | |
| "loss": 1.4489688873291016, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.13474640423921272, | |
| "grad_norm": 6.79306697845459, | |
| "learning_rate": 2.0037735849056604e-06, | |
| "loss": 0.7281686663627625, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.13626040878122633, | |
| "grad_norm": 2.328155994415283, | |
| "learning_rate": 2.0264150943396226e-06, | |
| "loss": 0.5355978608131409, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.13777441332323997, | |
| "grad_norm": 1.7138910293579102, | |
| "learning_rate": 2.049056603773585e-06, | |
| "loss": 1.3829823732376099, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1392884178652536, | |
| "grad_norm": 1.7628329992294312, | |
| "learning_rate": 2.071698113207547e-06, | |
| "loss": 0.6152011156082153, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.14080242240726723, | |
| "grad_norm": 5.342782020568848, | |
| "learning_rate": 2.0943396226415092e-06, | |
| "loss": 0.8484565019607544, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.14231642694928084, | |
| "grad_norm": 1.8729349374771118, | |
| "learning_rate": 2.1169811320754715e-06, | |
| "loss": 1.1291441917419434, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.14383043149129449, | |
| "grad_norm": 2.3892452716827393, | |
| "learning_rate": 2.139622641509434e-06, | |
| "loss": 0.5965837240219116, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1453444360333081, | |
| "grad_norm": 1.9884214401245117, | |
| "learning_rate": 2.1622641509433963e-06, | |
| "loss": 0.948914647102356, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1468584405753217, | |
| "grad_norm": 3.3960092067718506, | |
| "learning_rate": 2.1849056603773585e-06, | |
| "loss": 0.5472453832626343, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.14837244511733536, | |
| "grad_norm": 1.738864541053772, | |
| "learning_rate": 2.2075471698113207e-06, | |
| "loss": 1.108245849609375, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.14988644965934897, | |
| "grad_norm": 2.3850369453430176, | |
| "learning_rate": 2.230188679245283e-06, | |
| "loss": 0.9389583468437195, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.1514004542013626, | |
| "grad_norm": 1.725578784942627, | |
| "learning_rate": 2.2528301886792455e-06, | |
| "loss": 0.8076001405715942, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15291445874337622, | |
| "grad_norm": 2.7566914558410645, | |
| "learning_rate": 2.2754716981132078e-06, | |
| "loss": 0.9999346733093262, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.15442846328538987, | |
| "grad_norm": 1.705634355545044, | |
| "learning_rate": 2.29811320754717e-06, | |
| "loss": 0.9279139041900635, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.15594246782740348, | |
| "grad_norm": 9.00720500946045, | |
| "learning_rate": 2.320754716981132e-06, | |
| "loss": 0.987282931804657, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.15745647236941712, | |
| "grad_norm": 8.06688117980957, | |
| "learning_rate": 2.3433962264150944e-06, | |
| "loss": 0.6445719599723816, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.15897047691143074, | |
| "grad_norm": 6.1968183517456055, | |
| "learning_rate": 2.366037735849057e-06, | |
| "loss": 0.8678835034370422, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16048448145344435, | |
| "grad_norm": 10.432062149047852, | |
| "learning_rate": 2.388679245283019e-06, | |
| "loss": 0.7351512312889099, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.161998485995458, | |
| "grad_norm": 1.7967216968536377, | |
| "learning_rate": 2.4113207547169814e-06, | |
| "loss": 1.169299840927124, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1635124905374716, | |
| "grad_norm": 3.4995172023773193, | |
| "learning_rate": 2.4339622641509436e-06, | |
| "loss": 0.5779087543487549, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.16502649507948525, | |
| "grad_norm": 4.677812099456787, | |
| "learning_rate": 2.456603773584906e-06, | |
| "loss": 0.6416253447532654, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.16654049962149886, | |
| "grad_norm": 2.300534725189209, | |
| "learning_rate": 2.479245283018868e-06, | |
| "loss": 1.3942409753799438, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1680545041635125, | |
| "grad_norm": 1.9117436408996582, | |
| "learning_rate": 2.5018867924528302e-06, | |
| "loss": 0.9447011947631836, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.16956850870552612, | |
| "grad_norm": 4.164151191711426, | |
| "learning_rate": 2.5245283018867925e-06, | |
| "loss": 0.5729636549949646, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.17108251324753973, | |
| "grad_norm": 2.2320473194122314, | |
| "learning_rate": 2.547169811320755e-06, | |
| "loss": 1.0322617292404175, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.17259651778955337, | |
| "grad_norm": 1.339289665222168, | |
| "learning_rate": 2.5698113207547173e-06, | |
| "loss": 0.9149308204650879, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.17411052233156699, | |
| "grad_norm": 3.060145616531372, | |
| "learning_rate": 2.5924528301886795e-06, | |
| "loss": 0.5762428045272827, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.17562452687358063, | |
| "grad_norm": 3.563155174255371, | |
| "learning_rate": 2.6150943396226417e-06, | |
| "loss": 0.9495343565940857, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.17713853141559424, | |
| "grad_norm": 2.58048677444458, | |
| "learning_rate": 2.637735849056604e-06, | |
| "loss": 0.556329607963562, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.17865253595760788, | |
| "grad_norm": 14.05665397644043, | |
| "learning_rate": 2.660377358490566e-06, | |
| "loss": 0.45048168301582336, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.1801665404996215, | |
| "grad_norm": 4.880133628845215, | |
| "learning_rate": 2.6830188679245283e-06, | |
| "loss": 0.6343849897384644, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.18168054504163514, | |
| "grad_norm": 1.243223786354065, | |
| "learning_rate": 2.7056603773584905e-06, | |
| "loss": 0.48857587575912476, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.18319454958364875, | |
| "grad_norm": 3.3495898246765137, | |
| "learning_rate": 2.728301886792453e-06, | |
| "loss": 0.9088899493217468, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.18470855412566237, | |
| "grad_norm": 4.812596321105957, | |
| "learning_rate": 2.7509433962264154e-06, | |
| "loss": 0.5542827844619751, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.186222558667676, | |
| "grad_norm": 1.5049998760223389, | |
| "learning_rate": 2.7735849056603776e-06, | |
| "loss": 1.1015349626541138, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.18773656320968962, | |
| "grad_norm": 3.4939844608306885, | |
| "learning_rate": 2.7962264150943398e-06, | |
| "loss": 0.7408594489097595, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.18925056775170326, | |
| "grad_norm": 2.150743246078491, | |
| "learning_rate": 2.818867924528302e-06, | |
| "loss": 1.0451587438583374, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19076457229371688, | |
| "grad_norm": 2.757932662963867, | |
| "learning_rate": 2.841509433962264e-06, | |
| "loss": 0.9631202816963196, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.19227857683573052, | |
| "grad_norm": 2.339393377304077, | |
| "learning_rate": 2.8641509433962264e-06, | |
| "loss": 1.1843230724334717, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.19379258137774413, | |
| "grad_norm": 2.819859743118286, | |
| "learning_rate": 2.8867924528301886e-06, | |
| "loss": 1.3307970762252808, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.19530658591975775, | |
| "grad_norm": 2.277029514312744, | |
| "learning_rate": 2.9094339622641512e-06, | |
| "loss": 1.314733862876892, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1968205904617714, | |
| "grad_norm": 1.6182972192764282, | |
| "learning_rate": 2.9320754716981135e-06, | |
| "loss": 0.8982797861099243, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.198334595003785, | |
| "grad_norm": 0.9466566443443298, | |
| "learning_rate": 2.9547169811320757e-06, | |
| "loss": 0.49237707257270813, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.19984859954579864, | |
| "grad_norm": 2.1412227153778076, | |
| "learning_rate": 2.977358490566038e-06, | |
| "loss": 1.3869421482086182, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.20136260408781226, | |
| "grad_norm": 4.314519882202148, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5500906705856323, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2028766086298259, | |
| "grad_norm": 2.2210774421691895, | |
| "learning_rate": 2.9999989421378913e-06, | |
| "loss": 1.1445879936218262, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2043906131718395, | |
| "grad_norm": 1.5420582294464111, | |
| "learning_rate": 2.999995768553224e-06, | |
| "loss": 0.8207805156707764, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.20590461771385316, | |
| "grad_norm": 2.379532814025879, | |
| "learning_rate": 2.9999904792509703e-06, | |
| "loss": 1.310062289237976, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.20741862225586677, | |
| "grad_norm": 4.023805141448975, | |
| "learning_rate": 2.999983074239421e-06, | |
| "loss": 0.9318987131118774, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.20893262679788038, | |
| "grad_norm": 2.1359353065490723, | |
| "learning_rate": 2.99997355353018e-06, | |
| "loss": 0.9912564754486084, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.21044663133989402, | |
| "grad_norm": 4.8719563484191895, | |
| "learning_rate": 2.9999619171381696e-06, | |
| "loss": 0.9644961953163147, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.21196063588190764, | |
| "grad_norm": 4.153341770172119, | |
| "learning_rate": 2.9999481650816256e-06, | |
| "loss": 0.9800684452056885, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.21347464042392128, | |
| "grad_norm": 2.0010852813720703, | |
| "learning_rate": 2.9999322973821e-06, | |
| "loss": 1.0485155582427979, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2149886449659349, | |
| "grad_norm": 3.15071702003479, | |
| "learning_rate": 2.9999143140644616e-06, | |
| "loss": 0.6141320466995239, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.21650264950794854, | |
| "grad_norm": 2.1563243865966797, | |
| "learning_rate": 2.9998942151568927e-06, | |
| "loss": 0.68123859167099, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.21801665404996215, | |
| "grad_norm": 2.4670424461364746, | |
| "learning_rate": 2.9998720006908934e-06, | |
| "loss": 0.880809485912323, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.21953065859197576, | |
| "grad_norm": 2.3046791553497314, | |
| "learning_rate": 2.9998476707012776e-06, | |
| "loss": 0.7388229966163635, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2210446631339894, | |
| "grad_norm": 1.8321624994277954, | |
| "learning_rate": 2.999821225226176e-06, | |
| "loss": 0.9509535431861877, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.22255866767600302, | |
| "grad_norm": 2.593620777130127, | |
| "learning_rate": 2.9997926643070335e-06, | |
| "loss": 0.8668114542961121, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.22407267221801666, | |
| "grad_norm": 1.3647516965866089, | |
| "learning_rate": 2.999761987988611e-06, | |
| "loss": 0.5078874826431274, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.22558667676003027, | |
| "grad_norm": 21.98381805419922, | |
| "learning_rate": 2.9997291963189844e-06, | |
| "loss": 0.6921195983886719, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.22710068130204392, | |
| "grad_norm": 1.7081471681594849, | |
| "learning_rate": 2.9996942893495453e-06, | |
| "loss": 1.2570655345916748, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22861468584405753, | |
| "grad_norm": 1.7225868701934814, | |
| "learning_rate": 2.999657267135e-06, | |
| "loss": 0.9504136443138123, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.23012869038607117, | |
| "grad_norm": 15.755874633789062, | |
| "learning_rate": 2.9996181297333692e-06, | |
| "loss": 0.9759711027145386, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.23164269492808479, | |
| "grad_norm": 1.7268153429031372, | |
| "learning_rate": 2.99957687720599e-06, | |
| "loss": 1.3015202283859253, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.2331566994700984, | |
| "grad_norm": 2.12369704246521, | |
| "learning_rate": 2.999533509617513e-06, | |
| "loss": 0.6153337955474854, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.23467070401211204, | |
| "grad_norm": 3.8299176692962646, | |
| "learning_rate": 2.9994880270359037e-06, | |
| "loss": 0.9555022120475769, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.23618470855412566, | |
| "grad_norm": 4.080057144165039, | |
| "learning_rate": 2.999440429532443e-06, | |
| "loss": 0.5967095494270325, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2376987130961393, | |
| "grad_norm": 2.641312837600708, | |
| "learning_rate": 2.9993907171817255e-06, | |
| "loss": 0.5727935433387756, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.2392127176381529, | |
| "grad_norm": 2.667450189590454, | |
| "learning_rate": 2.9993388900616615e-06, | |
| "loss": 0.8149162530899048, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.24072672218016655, | |
| "grad_norm": 1.8847966194152832, | |
| "learning_rate": 2.9992849482534736e-06, | |
| "loss": 0.8662533760070801, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.24224072672218017, | |
| "grad_norm": 3.9809248447418213, | |
| "learning_rate": 2.9992288918417003e-06, | |
| "loss": 0.9989725947380066, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.24375473126419378, | |
| "grad_norm": 2.6532821655273438, | |
| "learning_rate": 2.999170720914192e-06, | |
| "loss": 1.308701992034912, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.24526873580620742, | |
| "grad_norm": 4.844236373901367, | |
| "learning_rate": 2.999110435562116e-06, | |
| "loss": 0.4661545157432556, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.24678274034822104, | |
| "grad_norm": 1.7858662605285645, | |
| "learning_rate": 2.999048035879951e-06, | |
| "loss": 0.8855746984481812, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.24829674489023468, | |
| "grad_norm": 1.814077615737915, | |
| "learning_rate": 2.9989835219654903e-06, | |
| "loss": 1.0955607891082764, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2498107494322483, | |
| "grad_norm": 2.4321069717407227, | |
| "learning_rate": 2.99891689391984e-06, | |
| "loss": 1.3380382061004639, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2513247539742619, | |
| "grad_norm": 4.530355453491211, | |
| "learning_rate": 2.99884815184742e-06, | |
| "loss": 0.5787966251373291, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.2528387585162756, | |
| "grad_norm": 2.016003131866455, | |
| "learning_rate": 2.9987772958559627e-06, | |
| "loss": 0.6646069288253784, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2543527630582892, | |
| "grad_norm": 2.79805064201355, | |
| "learning_rate": 2.998704326056514e-06, | |
| "loss": 1.3354549407958984, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2558667676003028, | |
| "grad_norm": 6.575490474700928, | |
| "learning_rate": 2.998629242563432e-06, | |
| "loss": 0.3947051763534546, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2573807721423164, | |
| "grad_norm": 4.710763931274414, | |
| "learning_rate": 2.998552045494389e-06, | |
| "loss": 0.4745977222919464, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.25889477668433003, | |
| "grad_norm": 2.4593327045440674, | |
| "learning_rate": 2.998472734970367e-06, | |
| "loss": 0.5059724450111389, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2604087812263437, | |
| "grad_norm": 2.6149814128875732, | |
| "learning_rate": 2.998391311115663e-06, | |
| "loss": 0.7527101635932922, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2619227857683573, | |
| "grad_norm": 3.2358217239379883, | |
| "learning_rate": 2.9983077740578835e-06, | |
| "loss": 0.4932746887207031, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2634367903103709, | |
| "grad_norm": 2.8413195610046387, | |
| "learning_rate": 2.998222123927949e-06, | |
| "loss": 0.5023959875106812, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.26495079485238454, | |
| "grad_norm": 2.284940719604492, | |
| "learning_rate": 2.9981343608600907e-06, | |
| "loss": 0.530271589756012, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2664647993943982, | |
| "grad_norm": 1.8905975818634033, | |
| "learning_rate": 2.99804448499185e-06, | |
| "loss": 1.058239459991455, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2679788039364118, | |
| "grad_norm": 1.2502957582473755, | |
| "learning_rate": 2.9979524964640826e-06, | |
| "loss": 0.575753390789032, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.26949280847842544, | |
| "grad_norm": 1.3402975797653198, | |
| "learning_rate": 2.997858395420951e-06, | |
| "loss": 0.7238829135894775, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.27100681302043905, | |
| "grad_norm": 4.392447471618652, | |
| "learning_rate": 2.9977621820099316e-06, | |
| "loss": 1.221725583076477, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.27252081756245267, | |
| "grad_norm": 1.6761703491210938, | |
| "learning_rate": 2.997663856381811e-06, | |
| "loss": 0.42734482884407043, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.27403482210446634, | |
| "grad_norm": 2.298595905303955, | |
| "learning_rate": 2.997563418690685e-06, | |
| "loss": 0.9713137745857239, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.27554882664647995, | |
| "grad_norm": 0.6969879269599915, | |
| "learning_rate": 2.997460869093959e-06, | |
| "loss": 0.31093937158584595, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.27706283118849356, | |
| "grad_norm": 2.1157233715057373, | |
| "learning_rate": 2.9973562077523503e-06, | |
| "loss": 0.6549879908561707, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2785768357305072, | |
| "grad_norm": 1.964150071144104, | |
| "learning_rate": 2.9972494348298837e-06, | |
| "loss": 0.8664920330047607, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2800908402725208, | |
| "grad_norm": 2.272841215133667, | |
| "learning_rate": 2.9971405504938943e-06, | |
| "loss": 0.48350414633750916, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.28160484481453446, | |
| "grad_norm": 1.9322324991226196, | |
| "learning_rate": 2.9970295549150265e-06, | |
| "loss": 0.9229208827018738, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2831188493565481, | |
| "grad_norm": 1.7196455001831055, | |
| "learning_rate": 2.996916448267232e-06, | |
| "loss": 0.9739328622817993, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2846328538985617, | |
| "grad_norm": 2.918416738510132, | |
| "learning_rate": 2.9968012307277723e-06, | |
| "loss": 0.48950931429862976, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.2861468584405753, | |
| "grad_norm": 9.320439338684082, | |
| "learning_rate": 2.9966839024772165e-06, | |
| "loss": 0.8621902465820312, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.28766086298258897, | |
| "grad_norm": 1.188773512840271, | |
| "learning_rate": 2.9965644636994427e-06, | |
| "loss": 1.0359840393066406, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2891748675246026, | |
| "grad_norm": 2.3963191509246826, | |
| "learning_rate": 2.996442914581634e-06, | |
| "loss": 0.5880563259124756, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2906888720666162, | |
| "grad_norm": 2.4038655757904053, | |
| "learning_rate": 2.996319255314284e-06, | |
| "loss": 0.5257418751716614, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.2922028766086298, | |
| "grad_norm": 1.5974441766738892, | |
| "learning_rate": 2.996193486091192e-06, | |
| "loss": 1.2693966627120972, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.2937168811506434, | |
| "grad_norm": 1.8292707204818726, | |
| "learning_rate": 2.9960656071094628e-06, | |
| "loss": 0.9720443487167358, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2952308856926571, | |
| "grad_norm": 1.8199464082717896, | |
| "learning_rate": 2.9959356185695096e-06, | |
| "loss": 1.135921835899353, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2967448902346707, | |
| "grad_norm": 0.9021064639091492, | |
| "learning_rate": 2.9958035206750504e-06, | |
| "loss": 1.1319067478179932, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2982588947766843, | |
| "grad_norm": 3.2225494384765625, | |
| "learning_rate": 2.9956693136331096e-06, | |
| "loss": 0.9472005367279053, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.29977289931869794, | |
| "grad_norm": 4.63291597366333, | |
| "learning_rate": 2.995532997654017e-06, | |
| "loss": 0.99593186378479, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3012869038607116, | |
| "grad_norm": 1.8574267625808716, | |
| "learning_rate": 2.9953945729514073e-06, | |
| "loss": 0.5503549575805664, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3028009084027252, | |
| "grad_norm": 3.5578439235687256, | |
| "learning_rate": 2.99525403974222e-06, | |
| "loss": 0.8387674689292908, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.30431491294473884, | |
| "grad_norm": 6.638049602508545, | |
| "learning_rate": 2.9951113982466996e-06, | |
| "loss": 0.5031633377075195, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.30582891748675245, | |
| "grad_norm": 2.846966505050659, | |
| "learning_rate": 2.9949666486883937e-06, | |
| "loss": 0.9956486225128174, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.30734292202876606, | |
| "grad_norm": 1.3141142129898071, | |
| "learning_rate": 2.9948197912941546e-06, | |
| "loss": 0.288853257894516, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.30885692657077973, | |
| "grad_norm": 3.5708980560302734, | |
| "learning_rate": 2.994670826294138e-06, | |
| "loss": 1.5125066041946411, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.31037093111279335, | |
| "grad_norm": 7.3045125007629395, | |
| "learning_rate": 2.9945197539218017e-06, | |
| "loss": 0.6546447277069092, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.31188493565480696, | |
| "grad_norm": 2.1153337955474854, | |
| "learning_rate": 2.9943665744139075e-06, | |
| "loss": 1.3320523500442505, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3133989401968206, | |
| "grad_norm": 4.322186470031738, | |
| "learning_rate": 2.9942112880105175e-06, | |
| "loss": 0.9307819604873657, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.31491294473883424, | |
| "grad_norm": 2.4291980266571045, | |
| "learning_rate": 2.9940538949549984e-06, | |
| "loss": 0.8921429514884949, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.31642694928084786, | |
| "grad_norm": 2.061569929122925, | |
| "learning_rate": 2.9938943954940167e-06, | |
| "loss": 0.9342135787010193, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.31794095382286147, | |
| "grad_norm": 4.544994831085205, | |
| "learning_rate": 2.9937327898775406e-06, | |
| "loss": 0.5935999751091003, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3194549583648751, | |
| "grad_norm": 3.4589638710021973, | |
| "learning_rate": 2.9935690783588378e-06, | |
| "loss": 0.41867610812187195, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.3209689629068887, | |
| "grad_norm": 2.5398411750793457, | |
| "learning_rate": 2.993403261194479e-06, | |
| "loss": 1.2433795928955078, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.32248296744890237, | |
| "grad_norm": 1.7882115840911865, | |
| "learning_rate": 2.9932353386443325e-06, | |
| "loss": 0.7358930706977844, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.323996971990916, | |
| "grad_norm": 2.736948013305664, | |
| "learning_rate": 2.993065310971568e-06, | |
| "loss": 0.954931378364563, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.3255109765329296, | |
| "grad_norm": 10.501503944396973, | |
| "learning_rate": 2.992893178442652e-06, | |
| "loss": 1.289313793182373, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3270249810749432, | |
| "grad_norm": 2.1437489986419678, | |
| "learning_rate": 2.9927189413273517e-06, | |
| "loss": 0.5708879232406616, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3285389856169568, | |
| "grad_norm": 3.5188357830047607, | |
| "learning_rate": 2.9925425998987326e-06, | |
| "loss": 0.5585161447525024, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.3300529901589705, | |
| "grad_norm": 1.5307719707489014, | |
| "learning_rate": 2.992364154433157e-06, | |
| "loss": 0.9125571250915527, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3315669947009841, | |
| "grad_norm": 2.313886880874634, | |
| "learning_rate": 2.9921836052102853e-06, | |
| "loss": 1.2311666011810303, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.3330809992429977, | |
| "grad_norm": 1.5364906787872314, | |
| "learning_rate": 2.992000952513075e-06, | |
| "loss": 0.5001219511032104, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.33459500378501134, | |
| "grad_norm": 2.821791648864746, | |
| "learning_rate": 2.991816196627779e-06, | |
| "loss": 0.5236578583717346, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.336109008327025, | |
| "grad_norm": 2.0568583011627197, | |
| "learning_rate": 2.991629337843949e-06, | |
| "loss": 0.5407780408859253, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.3376230128690386, | |
| "grad_norm": 1.2596296072006226, | |
| "learning_rate": 2.9914403764544296e-06, | |
| "loss": 1.2255700826644897, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.33913701741105223, | |
| "grad_norm": 3.2330868244171143, | |
| "learning_rate": 2.991249312755362e-06, | |
| "loss": 0.9774052500724792, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.34065102195306585, | |
| "grad_norm": 2.749786853790283, | |
| "learning_rate": 2.991056147046181e-06, | |
| "loss": 0.5227469205856323, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.34216502649507946, | |
| "grad_norm": 1.334883213043213, | |
| "learning_rate": 2.990860879629618e-06, | |
| "loss": 1.231229543685913, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.34367903103709313, | |
| "grad_norm": 4.1452860832214355, | |
| "learning_rate": 2.990663510811695e-06, | |
| "loss": 1.0209695100784302, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.34519303557910674, | |
| "grad_norm": 1.3384249210357666, | |
| "learning_rate": 2.9904640409017305e-06, | |
| "loss": 0.9044679403305054, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.34670704012112036, | |
| "grad_norm": 1.9722930192947388, | |
| "learning_rate": 2.9902624702123334e-06, | |
| "loss": 0.5573891997337341, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.34822104466313397, | |
| "grad_norm": 3.713996648788452, | |
| "learning_rate": 2.9900587990594068e-06, | |
| "loss": 0.7835407853126526, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.34973504920514764, | |
| "grad_norm": 6.265981197357178, | |
| "learning_rate": 2.989853027762144e-06, | |
| "loss": 0.9101991653442383, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.35124905374716126, | |
| "grad_norm": 2.356494665145874, | |
| "learning_rate": 2.989645156643031e-06, | |
| "loss": 1.044858694076538, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.35276305828917487, | |
| "grad_norm": 3.8169243335723877, | |
| "learning_rate": 2.9894351860278433e-06, | |
| "loss": 0.38478291034698486, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3542770628311885, | |
| "grad_norm": 3.984137535095215, | |
| "learning_rate": 2.989223116245648e-06, | |
| "loss": 0.4994244873523712, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.3557910673732021, | |
| "grad_norm": 4.4269185066223145, | |
| "learning_rate": 2.9890089476288017e-06, | |
| "loss": 0.8007203340530396, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.35730507191521577, | |
| "grad_norm": 4.96340799331665, | |
| "learning_rate": 2.98879268051295e-06, | |
| "loss": 0.8296163082122803, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3588190764572294, | |
| "grad_norm": 3.9115028381347656, | |
| "learning_rate": 2.9885743152370267e-06, | |
| "loss": 0.46959009766578674, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.360333080999243, | |
| "grad_norm": 2.773907423019409, | |
| "learning_rate": 2.9883538521432557e-06, | |
| "loss": 0.9441350102424622, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.3618470855412566, | |
| "grad_norm": 3.812221050262451, | |
| "learning_rate": 2.988131291577147e-06, | |
| "loss": 0.8799582123756409, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.3633610900832703, | |
| "grad_norm": 5.634382247924805, | |
| "learning_rate": 2.9879066338874974e-06, | |
| "loss": 0.9772864580154419, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3648750946252839, | |
| "grad_norm": 5.383456707000732, | |
| "learning_rate": 2.9876798794263923e-06, | |
| "loss": 0.4553401470184326, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3663890991672975, | |
| "grad_norm": 1.5636881589889526, | |
| "learning_rate": 2.9874510285492013e-06, | |
| "loss": 1.2522468566894531, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.3679031037093111, | |
| "grad_norm": 2.218456506729126, | |
| "learning_rate": 2.9872200816145807e-06, | |
| "loss": 1.2982913255691528, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.36941710825132473, | |
| "grad_norm": 2.1322004795074463, | |
| "learning_rate": 2.986987038984471e-06, | |
| "loss": 0.6175529956817627, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3709311127933384, | |
| "grad_norm": 2.873570203781128, | |
| "learning_rate": 2.9867519010240975e-06, | |
| "loss": 0.5848088264465332, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.372445117335352, | |
| "grad_norm": 2.3397469520568848, | |
| "learning_rate": 2.9865146681019688e-06, | |
| "loss": 1.2863105535507202, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.37395912187736563, | |
| "grad_norm": 1.7062677145004272, | |
| "learning_rate": 2.986275340589877e-06, | |
| "loss": 0.7637373208999634, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.37547312641937924, | |
| "grad_norm": 3.172126054763794, | |
| "learning_rate": 2.9860339188628978e-06, | |
| "loss": 1.1434704065322876, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.37698713096139286, | |
| "grad_norm": 6.345958232879639, | |
| "learning_rate": 2.985790403299387e-06, | |
| "loss": 0.7667356729507446, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.3785011355034065, | |
| "grad_norm": 1.734782099723816, | |
| "learning_rate": 2.9855447942809834e-06, | |
| "loss": 0.8624241352081299, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.38001514004542014, | |
| "grad_norm": 7.933587074279785, | |
| "learning_rate": 2.985297092192606e-06, | |
| "loss": 0.9229949116706848, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.38152914458743376, | |
| "grad_norm": 2.319852113723755, | |
| "learning_rate": 2.9850472974224557e-06, | |
| "loss": 1.3553284406661987, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.38304314912944737, | |
| "grad_norm": 1.4423457384109497, | |
| "learning_rate": 2.9847954103620095e-06, | |
| "loss": 0.8387634754180908, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.38455715367146104, | |
| "grad_norm": 4.607706069946289, | |
| "learning_rate": 2.9845414314060265e-06, | |
| "loss": 0.6004860401153564, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.38607115821347465, | |
| "grad_norm": 4.358503818511963, | |
| "learning_rate": 2.984285360952544e-06, | |
| "loss": 0.5657727122306824, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.38758516275548827, | |
| "grad_norm": 1.6350818872451782, | |
| "learning_rate": 2.9840271994028754e-06, | |
| "loss": 0.4921954274177551, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.3890991672975019, | |
| "grad_norm": 2.449856758117676, | |
| "learning_rate": 2.9837669471616124e-06, | |
| "loss": 0.6726453900337219, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.3906131718395155, | |
| "grad_norm": 1.7032192945480347, | |
| "learning_rate": 2.9835046046366237e-06, | |
| "loss": 1.297200083732605, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.39212717638152916, | |
| "grad_norm": 3.538943290710449, | |
| "learning_rate": 2.983240172239053e-06, | |
| "loss": 0.8581514358520508, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3936411809235428, | |
| "grad_norm": 6.8904194831848145, | |
| "learning_rate": 2.98297365038332e-06, | |
| "loss": 0.5082361698150635, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3951551854655564, | |
| "grad_norm": 3.4525930881500244, | |
| "learning_rate": 2.982705039487118e-06, | |
| "loss": 0.4789521098136902, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.39666919000757, | |
| "grad_norm": 1.7367075681686401, | |
| "learning_rate": 2.9824343399714144e-06, | |
| "loss": 0.9578684568405151, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.3981831945495837, | |
| "grad_norm": 1.2806156873703003, | |
| "learning_rate": 2.9821615522604515e-06, | |
| "loss": 1.3319694995880127, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3996971990915973, | |
| "grad_norm": 1.590675950050354, | |
| "learning_rate": 2.9818866767817425e-06, | |
| "loss": 0.8608264327049255, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4012112036336109, | |
| "grad_norm": 5.6035943031311035, | |
| "learning_rate": 2.981609713966073e-06, | |
| "loss": 0.46807175874710083, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4027252081756245, | |
| "grad_norm": 1.963212251663208, | |
| "learning_rate": 2.9813306642475005e-06, | |
| "loss": 1.2523926496505737, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.40423921271763813, | |
| "grad_norm": 1.9907010793685913, | |
| "learning_rate": 2.9810495280633517e-06, | |
| "loss": 0.8681015372276306, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.4057532172596518, | |
| "grad_norm": 3.108275890350342, | |
| "learning_rate": 2.980766305854225e-06, | |
| "loss": 0.8309670090675354, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.4072672218016654, | |
| "grad_norm": 2.425701856613159, | |
| "learning_rate": 2.9804809980639865e-06, | |
| "loss": 0.8349297046661377, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.408781226343679, | |
| "grad_norm": 2.3050215244293213, | |
| "learning_rate": 2.9801936051397717e-06, | |
| "loss": 0.5291258096694946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.41029523088569264, | |
| "grad_norm": 4.254032611846924, | |
| "learning_rate": 2.979904127531984e-06, | |
| "loss": 0.5817729234695435, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.4118092354277063, | |
| "grad_norm": 2.6303694248199463, | |
| "learning_rate": 2.9796125656942925e-06, | |
| "loss": 0.6045780777931213, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.4133232399697199, | |
| "grad_norm": 3.902345895767212, | |
| "learning_rate": 2.9793189200836356e-06, | |
| "loss": 0.6001653075218201, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.41483724451173354, | |
| "grad_norm": 3.3723676204681396, | |
| "learning_rate": 2.9790231911602143e-06, | |
| "loss": 0.5705008506774902, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.41635124905374715, | |
| "grad_norm": 3.0511634349823, | |
| "learning_rate": 2.9787253793874958e-06, | |
| "loss": 0.9024116396903992, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.41786525359576077, | |
| "grad_norm": 3.0294415950775146, | |
| "learning_rate": 2.9784254852322125e-06, | |
| "loss": 0.6247788667678833, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.41937925813777444, | |
| "grad_norm": 1.308551549911499, | |
| "learning_rate": 2.9781235091643587e-06, | |
| "loss": 0.8290607333183289, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.42089326267978805, | |
| "grad_norm": 1.2458851337432861, | |
| "learning_rate": 2.977819451657193e-06, | |
| "loss": 0.5316109657287598, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.42240726722180166, | |
| "grad_norm": 2.5829837322235107, | |
| "learning_rate": 2.9775133131872347e-06, | |
| "loss": 1.2767724990844727, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.4239212717638153, | |
| "grad_norm": 3.8361194133758545, | |
| "learning_rate": 2.977205094234265e-06, | |
| "loss": 0.9774861335754395, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4254352763058289, | |
| "grad_norm": 2.603776693344116, | |
| "learning_rate": 2.976894795281326e-06, | |
| "loss": 0.85866379737854, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.42694928084784256, | |
| "grad_norm": 1.0089654922485352, | |
| "learning_rate": 2.9765824168147184e-06, | |
| "loss": 0.7369099855422974, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4284632853898562, | |
| "grad_norm": 6.3280205726623535, | |
| "learning_rate": 2.9762679593240034e-06, | |
| "loss": 0.5556588768959045, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4299772899318698, | |
| "grad_norm": 1.7390072345733643, | |
| "learning_rate": 2.975951423302e-06, | |
| "loss": 1.0518251657485962, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4314912944738834, | |
| "grad_norm": 2.6690053939819336, | |
| "learning_rate": 2.9756328092447835e-06, | |
| "loss": 1.2721149921417236, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.43300529901589707, | |
| "grad_norm": 0.7687424421310425, | |
| "learning_rate": 2.9753121176516876e-06, | |
| "loss": 0.6695957779884338, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4345193035579107, | |
| "grad_norm": 1.9535387754440308, | |
| "learning_rate": 2.9749893490253e-06, | |
| "loss": 1.1010643243789673, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.4360333080999243, | |
| "grad_norm": 9.781461715698242, | |
| "learning_rate": 2.974664503871467e-06, | |
| "loss": 1.0317606925964355, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.4375473126419379, | |
| "grad_norm": 4.649098873138428, | |
| "learning_rate": 2.9743375826992848e-06, | |
| "loss": 1.0596553087234497, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.4390613171839515, | |
| "grad_norm": 2.2979838848114014, | |
| "learning_rate": 2.9740085860211066e-06, | |
| "loss": 1.2803912162780762, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4405753217259652, | |
| "grad_norm": 2.245401382446289, | |
| "learning_rate": 2.9736775143525363e-06, | |
| "loss": 0.9560354948043823, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.4420893262679788, | |
| "grad_norm": 3.6574487686157227, | |
| "learning_rate": 2.9733443682124303e-06, | |
| "loss": 0.6721242666244507, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.4436033308099924, | |
| "grad_norm": 2.0253052711486816, | |
| "learning_rate": 2.9730091481228967e-06, | |
| "loss": 1.2475272417068481, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.44511733535200604, | |
| "grad_norm": 1.5434706211090088, | |
| "learning_rate": 2.9726718546092934e-06, | |
| "loss": 1.0386013984680176, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4466313398940197, | |
| "grad_norm": 3.6868698596954346, | |
| "learning_rate": 2.972332488200228e-06, | |
| "loss": 0.9095458388328552, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4481453444360333, | |
| "grad_norm": 2.4178566932678223, | |
| "learning_rate": 2.971991049427556e-06, | |
| "loss": 1.2065972089767456, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.44965934897804694, | |
| "grad_norm": 2.159417152404785, | |
| "learning_rate": 2.9716475388263818e-06, | |
| "loss": 1.3285571336746216, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.45117335352006055, | |
| "grad_norm": 0.5650960803031921, | |
| "learning_rate": 2.9713019569350568e-06, | |
| "loss": 0.6450245380401611, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.45268735806207416, | |
| "grad_norm": 1.47118079662323, | |
| "learning_rate": 2.9709543042951764e-06, | |
| "loss": 0.5147643685340881, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.45420136260408783, | |
| "grad_norm": 1.1347875595092773, | |
| "learning_rate": 2.970604581451585e-06, | |
| "loss": 0.4938359260559082, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.45571536714610145, | |
| "grad_norm": 1.2782500982284546, | |
| "learning_rate": 2.970252788952368e-06, | |
| "loss": 1.2716739177703857, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.45722937168811506, | |
| "grad_norm": 3.3131613731384277, | |
| "learning_rate": 2.9698989273488553e-06, | |
| "loss": 0.5934652090072632, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.4587433762301287, | |
| "grad_norm": 1.5987533330917358, | |
| "learning_rate": 2.9695429971956215e-06, | |
| "loss": 0.8995585441589355, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.46025738077214234, | |
| "grad_norm": 1.3289687633514404, | |
| "learning_rate": 2.96918499905048e-06, | |
| "loss": 0.5343512892723083, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.46177138531415596, | |
| "grad_norm": 2.6191699504852295, | |
| "learning_rate": 2.968824933474487e-06, | |
| "loss": 1.210081934928894, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.46328538985616957, | |
| "grad_norm": 1.5671101808547974, | |
| "learning_rate": 2.968462801031939e-06, | |
| "loss": 0.75421142578125, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.4647993943981832, | |
| "grad_norm": 1.4679361581802368, | |
| "learning_rate": 2.9680986022903703e-06, | |
| "loss": 0.8687835931777954, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.4663133989401968, | |
| "grad_norm": 2.458467721939087, | |
| "learning_rate": 2.9677323378205546e-06, | |
| "loss": 0.5934958457946777, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.46782740348221047, | |
| "grad_norm": 1.3534835577011108, | |
| "learning_rate": 2.967364008196503e-06, | |
| "loss": 0.9021729230880737, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.4693414080242241, | |
| "grad_norm": 1.609513282775879, | |
| "learning_rate": 2.966993613995462e-06, | |
| "loss": 0.9018821716308594, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4708554125662377, | |
| "grad_norm": 1.3751788139343262, | |
| "learning_rate": 2.966621155797916e-06, | |
| "loss": 0.7877498269081116, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4723694171082513, | |
| "grad_norm": 1.6474313735961914, | |
| "learning_rate": 2.9662466341875814e-06, | |
| "loss": 1.106297492980957, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.4738834216502649, | |
| "grad_norm": 14.291585922241211, | |
| "learning_rate": 2.96587004975141e-06, | |
| "loss": 0.5758960247039795, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.4753974261922786, | |
| "grad_norm": 2.3951332569122314, | |
| "learning_rate": 2.965491403079586e-06, | |
| "loss": 0.9572596549987793, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4769114307342922, | |
| "grad_norm": 4.662050724029541, | |
| "learning_rate": 2.965110694765526e-06, | |
| "loss": 0.4738556742668152, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4784254352763058, | |
| "grad_norm": 2.5419774055480957, | |
| "learning_rate": 2.964727925405877e-06, | |
| "loss": 0.5583599805831909, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.47993943981831944, | |
| "grad_norm": 3.2088208198547363, | |
| "learning_rate": 2.9643430956005166e-06, | |
| "loss": 0.7850269079208374, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.4814534443603331, | |
| "grad_norm": 1.2255338430404663, | |
| "learning_rate": 2.963956205952551e-06, | |
| "loss": 0.5295414924621582, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.4829674489023467, | |
| "grad_norm": 15.457282066345215, | |
| "learning_rate": 2.9635672570683145e-06, | |
| "loss": 0.8352210521697998, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.48448145344436033, | |
| "grad_norm": 2.2116246223449707, | |
| "learning_rate": 2.9631762495573697e-06, | |
| "loss": 0.9833724498748779, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.48599545798637395, | |
| "grad_norm": 1.815476655960083, | |
| "learning_rate": 2.9627831840325043e-06, | |
| "loss": 1.0128812789916992, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.48750946252838756, | |
| "grad_norm": 1.7280386686325073, | |
| "learning_rate": 2.9623880611097323e-06, | |
| "loss": 0.9331883788108826, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.48902346707040123, | |
| "grad_norm": 1.3087846040725708, | |
| "learning_rate": 2.961990881408291e-06, | |
| "loss": 1.2210347652435303, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.49053747161241484, | |
| "grad_norm": 1.7594807147979736, | |
| "learning_rate": 2.9615916455506424e-06, | |
| "loss": 0.8923947215080261, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.49205147615442846, | |
| "grad_norm": 7.217441082000732, | |
| "learning_rate": 2.9611903541624695e-06, | |
| "loss": 0.5282926559448242, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.49356548069644207, | |
| "grad_norm": 2.2700917720794678, | |
| "learning_rate": 2.960787007872678e-06, | |
| "loss": 0.5946333408355713, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.49507948523845574, | |
| "grad_norm": 2.591341018676758, | |
| "learning_rate": 2.960381607313393e-06, | |
| "loss": 0.729837954044342, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.49659348978046935, | |
| "grad_norm": 2.293522834777832, | |
| "learning_rate": 2.959974153119959e-06, | |
| "loss": 0.908227801322937, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.49810749432248297, | |
| "grad_norm": 1.8502575159072876, | |
| "learning_rate": 2.959564645930941e-06, | |
| "loss": 1.260167121887207, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.4996214988644966, | |
| "grad_norm": 22.77242088317871, | |
| "learning_rate": 2.959153086388119e-06, | |
| "loss": 0.9404181241989136, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5011355034065103, | |
| "grad_norm": 1.890344262123108, | |
| "learning_rate": 2.9587394751364895e-06, | |
| "loss": 0.6529523730278015, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5026495079485238, | |
| "grad_norm": 2.7498531341552734, | |
| "learning_rate": 2.9583238128242673e-06, | |
| "loss": 0.7013969421386719, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5041635124905375, | |
| "grad_norm": 2.2211556434631348, | |
| "learning_rate": 2.9579061001028787e-06, | |
| "loss": 0.9186385273933411, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5056775170325511, | |
| "grad_norm": 6.754961967468262, | |
| "learning_rate": 2.9574863376269644e-06, | |
| "loss": 0.5539544820785522, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.5071915215745647, | |
| "grad_norm": 3.343362331390381, | |
| "learning_rate": 2.9570645260543773e-06, | |
| "loss": 0.657001793384552, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5087055261165784, | |
| "grad_norm": 2.516889810562134, | |
| "learning_rate": 2.9566406660461816e-06, | |
| "loss": 0.6957874298095703, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5102195306585919, | |
| "grad_norm": 1.4724997282028198, | |
| "learning_rate": 2.956214758266653e-06, | |
| "loss": 1.0336772203445435, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.5117335352006056, | |
| "grad_norm": 3.6221024990081787, | |
| "learning_rate": 2.955786803383275e-06, | |
| "loss": 0.8379154205322266, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5132475397426193, | |
| "grad_norm": 1.472934365272522, | |
| "learning_rate": 2.9553568020667393e-06, | |
| "loss": 1.3164961338043213, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5147615442846328, | |
| "grad_norm": 2.5690131187438965, | |
| "learning_rate": 2.9549247549909456e-06, | |
| "loss": 0.7076467275619507, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5162755488266465, | |
| "grad_norm": 2.820261240005493, | |
| "learning_rate": 2.954490662833e-06, | |
| "loss": 1.193648099899292, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5177895533686601, | |
| "grad_norm": 2.8086366653442383, | |
| "learning_rate": 2.954054526273213e-06, | |
| "loss": 0.5091046690940857, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5193035579106737, | |
| "grad_norm": 1.9126583337783813, | |
| "learning_rate": 2.9536163459950984e-06, | |
| "loss": 0.37770235538482666, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5208175624526874, | |
| "grad_norm": 3.101130247116089, | |
| "learning_rate": 2.953176122685374e-06, | |
| "loss": 1.3354110717773438, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.522331566994701, | |
| "grad_norm": 2.8612306118011475, | |
| "learning_rate": 2.952733857033959e-06, | |
| "loss": 1.0210174322128296, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5238455715367146, | |
| "grad_norm": 1.8148362636566162, | |
| "learning_rate": 2.9522895497339746e-06, | |
| "loss": 1.0870827436447144, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5253595760787282, | |
| "grad_norm": 9.631368637084961, | |
| "learning_rate": 2.9518432014817396e-06, | |
| "loss": 0.4528180956840515, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5268735806207419, | |
| "grad_norm": 2.0368731021881104, | |
| "learning_rate": 2.951394812976772e-06, | |
| "loss": 0.6437329649925232, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5283875851627555, | |
| "grad_norm": 2.2827751636505127, | |
| "learning_rate": 2.9509443849217887e-06, | |
| "loss": 1.2784912586212158, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5299015897047691, | |
| "grad_norm": 2.5390939712524414, | |
| "learning_rate": 2.9504919180227014e-06, | |
| "loss": 0.5129702687263489, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5314155942467828, | |
| "grad_norm": 4.679416656494141, | |
| "learning_rate": 2.9500374129886178e-06, | |
| "loss": 1.0204864740371704, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5329295987887964, | |
| "grad_norm": 2.2368252277374268, | |
| "learning_rate": 2.9495808705318392e-06, | |
| "loss": 0.8695347309112549, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.53444360333081, | |
| "grad_norm": 5.981869697570801, | |
| "learning_rate": 2.9491222913678613e-06, | |
| "loss": 1.1520168781280518, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5359576078728236, | |
| "grad_norm": 2.0948941707611084, | |
| "learning_rate": 2.9486616762153697e-06, | |
| "loss": 1.0977720022201538, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5374716124148372, | |
| "grad_norm": 1.9340217113494873, | |
| "learning_rate": 2.948199025796242e-06, | |
| "loss": 0.8995184302330017, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5389856169568509, | |
| "grad_norm": 1.8215906620025635, | |
| "learning_rate": 2.9477343408355466e-06, | |
| "loss": 0.8296648263931274, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.5404996214988645, | |
| "grad_norm": 2.6101953983306885, | |
| "learning_rate": 2.947267622061538e-06, | |
| "loss": 0.4829208254814148, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.5420136260408781, | |
| "grad_norm": 8.983611106872559, | |
| "learning_rate": 2.9467988702056594e-06, | |
| "loss": 0.9006999135017395, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.5435276305828918, | |
| "grad_norm": 1.1540857553482056, | |
| "learning_rate": 2.946328086002541e-06, | |
| "loss": 0.9905385375022888, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.5450416351249053, | |
| "grad_norm": 1.1875252723693848, | |
| "learning_rate": 2.945855270189997e-06, | |
| "loss": 0.9678749442100525, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.546555639666919, | |
| "grad_norm": 1.1591203212738037, | |
| "learning_rate": 2.9453804235090253e-06, | |
| "loss": 0.8857312202453613, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.5480696442089327, | |
| "grad_norm": 2.282198667526245, | |
| "learning_rate": 2.944903546703807e-06, | |
| "loss": 0.6595865488052368, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5495836487509462, | |
| "grad_norm": 16.777631759643555, | |
| "learning_rate": 2.944424640521706e-06, | |
| "loss": 1.0174167156219482, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.5510976532929599, | |
| "grad_norm": 1.7583130598068237, | |
| "learning_rate": 2.9439437057132643e-06, | |
| "loss": 0.9153175354003906, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5526116578349735, | |
| "grad_norm": 1.554577112197876, | |
| "learning_rate": 2.943460743032205e-06, | |
| "loss": 0.7978371381759644, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5541256623769871, | |
| "grad_norm": 6.742075443267822, | |
| "learning_rate": 2.942975753235429e-06, | |
| "loss": 0.8972716331481934, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5556396669190008, | |
| "grad_norm": 6.648641109466553, | |
| "learning_rate": 2.9424887370830134e-06, | |
| "loss": 0.5501348376274109, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5571536714610144, | |
| "grad_norm": 1.9982460737228394, | |
| "learning_rate": 2.9419996953382116e-06, | |
| "loss": 0.5124497413635254, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.558667676003028, | |
| "grad_norm": 2.2183444499969482, | |
| "learning_rate": 2.9415086287674513e-06, | |
| "loss": 0.8203888535499573, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5601816805450416, | |
| "grad_norm": 3.337951421737671, | |
| "learning_rate": 2.941015538140334e-06, | |
| "loss": 0.47594571113586426, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5616956850870553, | |
| "grad_norm": 2.2672388553619385, | |
| "learning_rate": 2.940520424229632e-06, | |
| "loss": 0.9030762910842896, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5632096896290689, | |
| "grad_norm": 2.4289283752441406, | |
| "learning_rate": 2.94002328781129e-06, | |
| "loss": 1.2035784721374512, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5647236941710825, | |
| "grad_norm": 3.540351152420044, | |
| "learning_rate": 2.9395241296644217e-06, | |
| "loss": 1.004518747329712, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.5662376987130961, | |
| "grad_norm": 1.4717613458633423, | |
| "learning_rate": 2.939022950571309e-06, | |
| "loss": 1.2319071292877197, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5677517032551098, | |
| "grad_norm": 1.7184895277023315, | |
| "learning_rate": 2.938519751317402e-06, | |
| "loss": 1.0665231943130493, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5692657077971234, | |
| "grad_norm": 1.8175034523010254, | |
| "learning_rate": 2.938014532691315e-06, | |
| "loss": 0.9953513145446777, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.570779712339137, | |
| "grad_norm": 2.043348789215088, | |
| "learning_rate": 2.937507295484829e-06, | |
| "loss": 0.9582357406616211, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5722937168811506, | |
| "grad_norm": 3.9826252460479736, | |
| "learning_rate": 2.936998040492888e-06, | |
| "loss": 0.6116023063659668, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5738077214231643, | |
| "grad_norm": 1.8964965343475342, | |
| "learning_rate": 2.936486768513597e-06, | |
| "loss": 1.2680330276489258, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5753217259651779, | |
| "grad_norm": 2.7118351459503174, | |
| "learning_rate": 2.9359734803482244e-06, | |
| "loss": 0.7763157486915588, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5768357305071915, | |
| "grad_norm": 1.8754926919937134, | |
| "learning_rate": 2.935458176801196e-06, | |
| "loss": 0.5886606574058533, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.5783497350492052, | |
| "grad_norm": 1.2424232959747314, | |
| "learning_rate": 2.9349408586800974e-06, | |
| "loss": 0.8996439576148987, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.5798637395912187, | |
| "grad_norm": 4.870052337646484, | |
| "learning_rate": 2.9344215267956716e-06, | |
| "loss": 0.9547120332717896, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.5813777441332324, | |
| "grad_norm": 1.3577274084091187, | |
| "learning_rate": 2.933900181961816e-06, | |
| "loss": 0.6967052221298218, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.5828917486752461, | |
| "grad_norm": 1.5694098472595215, | |
| "learning_rate": 2.933376824995585e-06, | |
| "loss": 0.8374803066253662, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5844057532172596, | |
| "grad_norm": 3.961740255355835, | |
| "learning_rate": 2.932851456717185e-06, | |
| "loss": 1.0225903987884521, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5859197577592733, | |
| "grad_norm": 4.641229629516602, | |
| "learning_rate": 2.9323240779499744e-06, | |
| "loss": 0.866554319858551, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5874337623012869, | |
| "grad_norm": 2.3968756198883057, | |
| "learning_rate": 2.9317946895204634e-06, | |
| "loss": 0.960785984992981, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.5889477668433005, | |
| "grad_norm": 1.2767549753189087, | |
| "learning_rate": 2.9312632922583108e-06, | |
| "loss": 1.190723180770874, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.5904617713853142, | |
| "grad_norm": 1.090358018875122, | |
| "learning_rate": 2.930729886996324e-06, | |
| "loss": 0.5099388957023621, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5919757759273278, | |
| "grad_norm": 1.057666301727295, | |
| "learning_rate": 2.930194474570458e-06, | |
| "loss": 0.9268434047698975, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5934897804693414, | |
| "grad_norm": 1.2038636207580566, | |
| "learning_rate": 2.929657055819812e-06, | |
| "loss": 0.7505081295967102, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.595003785011355, | |
| "grad_norm": 1.194632649421692, | |
| "learning_rate": 2.9291176315866315e-06, | |
| "loss": 0.45634379982948303, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.5965177895533686, | |
| "grad_norm": 1.2533210515975952, | |
| "learning_rate": 2.928576202716302e-06, | |
| "loss": 0.7255032062530518, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5980317940953823, | |
| "grad_norm": 1.046190619468689, | |
| "learning_rate": 2.9280327700573545e-06, | |
| "loss": 0.9680758714675903, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5995457986373959, | |
| "grad_norm": 1.2196940183639526, | |
| "learning_rate": 2.9274873344614567e-06, | |
| "loss": 0.901178777217865, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6010598031794095, | |
| "grad_norm": 1.716521978378296, | |
| "learning_rate": 2.926939896783418e-06, | |
| "loss": 0.8521221876144409, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6025738077214232, | |
| "grad_norm": 1.507444143295288, | |
| "learning_rate": 2.926390457881185e-06, | |
| "loss": 0.8391146659851074, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6040878122634368, | |
| "grad_norm": 1.9206948280334473, | |
| "learning_rate": 2.9258390186158377e-06, | |
| "loss": 0.8905218839645386, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6056018168054504, | |
| "grad_norm": 1.6404706239700317, | |
| "learning_rate": 2.925285579851596e-06, | |
| "loss": 0.6892648339271545, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.607115821347464, | |
| "grad_norm": 2.493842840194702, | |
| "learning_rate": 2.924730142455809e-06, | |
| "loss": 0.8925967812538147, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6086298258894777, | |
| "grad_norm": 10.76797103881836, | |
| "learning_rate": 2.924172707298962e-06, | |
| "loss": 0.9718807935714722, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6101438304314913, | |
| "grad_norm": 7.091102123260498, | |
| "learning_rate": 2.9236132752546675e-06, | |
| "loss": 0.7837668061256409, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6116578349735049, | |
| "grad_norm": 2.373680353164673, | |
| "learning_rate": 2.9230518471996693e-06, | |
| "loss": 0.8521146774291992, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6131718395155186, | |
| "grad_norm": 4.460805892944336, | |
| "learning_rate": 2.922488424013839e-06, | |
| "loss": 0.7885128259658813, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6146858440575321, | |
| "grad_norm": 1.3457956314086914, | |
| "learning_rate": 2.9219230065801762e-06, | |
| "loss": 0.9336018562316895, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6161998485995458, | |
| "grad_norm": 12.52235221862793, | |
| "learning_rate": 2.9213555957848045e-06, | |
| "loss": 0.625677227973938, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6177138531415595, | |
| "grad_norm": 1.0090450048446655, | |
| "learning_rate": 2.920786192516971e-06, | |
| "loss": 0.4627359211444855, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.619227857683573, | |
| "grad_norm": 6.27386999130249, | |
| "learning_rate": 2.920214797669047e-06, | |
| "loss": 0.48187127709388733, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6207418622255867, | |
| "grad_norm": 1.8286688327789307, | |
| "learning_rate": 2.9196414121365246e-06, | |
| "loss": 0.9612034559249878, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6222558667676003, | |
| "grad_norm": 2.0253140926361084, | |
| "learning_rate": 2.919066036818015e-06, | |
| "loss": 0.37848153710365295, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6237698713096139, | |
| "grad_norm": 2.8019373416900635, | |
| "learning_rate": 2.918488672615248e-06, | |
| "loss": 0.48960334062576294, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6252838758516276, | |
| "grad_norm": 3.38137149810791, | |
| "learning_rate": 2.9179093204330706e-06, | |
| "loss": 1.0333915948867798, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.6267978803936411, | |
| "grad_norm": 3.585141658782959, | |
| "learning_rate": 2.917327981179446e-06, | |
| "loss": 0.8286981582641602, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6283118849356548, | |
| "grad_norm": 1.340408444404602, | |
| "learning_rate": 2.91674465576545e-06, | |
| "loss": 0.592296838760376, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6298258894776685, | |
| "grad_norm": 2.1734678745269775, | |
| "learning_rate": 2.9161593451052726e-06, | |
| "loss": 0.6662003397941589, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.631339894019682, | |
| "grad_norm": 1.7065621614456177, | |
| "learning_rate": 2.9155720501162143e-06, | |
| "loss": 1.1774479150772095, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.6328538985616957, | |
| "grad_norm": 0.9802259206771851, | |
| "learning_rate": 2.9149827717186858e-06, | |
| "loss": 0.7125468254089355, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.6343679031037093, | |
| "grad_norm": 3.3598525524139404, | |
| "learning_rate": 2.914391510836206e-06, | |
| "loss": 1.0522104501724243, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.6358819076457229, | |
| "grad_norm": 4.160059928894043, | |
| "learning_rate": 2.9137982683954005e-06, | |
| "loss": 0.7905111908912659, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6373959121877366, | |
| "grad_norm": 2.1964306831359863, | |
| "learning_rate": 2.9132030453260012e-06, | |
| "loss": 0.8976717591285706, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.6389099167297502, | |
| "grad_norm": 1.4463465213775635, | |
| "learning_rate": 2.912605842560843e-06, | |
| "loss": 1.174971342086792, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.6404239212717638, | |
| "grad_norm": 1.9015480279922485, | |
| "learning_rate": 2.9120066610358644e-06, | |
| "loss": 1.2245635986328125, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.6419379258137774, | |
| "grad_norm": 3.132871150970459, | |
| "learning_rate": 2.911405501690104e-06, | |
| "loss": 0.5686792135238647, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.6434519303557911, | |
| "grad_norm": 3.358302354812622, | |
| "learning_rate": 2.910802365465702e-06, | |
| "loss": 0.8344794511795044, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6449659348978047, | |
| "grad_norm": 2.254426956176758, | |
| "learning_rate": 2.9101972533078937e-06, | |
| "loss": 1.2486826181411743, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.6464799394398183, | |
| "grad_norm": 1.7597107887268066, | |
| "learning_rate": 2.909590166165013e-06, | |
| "loss": 0.4052983820438385, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.647993943981832, | |
| "grad_norm": 4.035180568695068, | |
| "learning_rate": 2.9089811049884896e-06, | |
| "loss": 0.548038125038147, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.6495079485238455, | |
| "grad_norm": 2.2516682147979736, | |
| "learning_rate": 2.908370070732846e-06, | |
| "loss": 1.1047321557998657, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.6510219530658592, | |
| "grad_norm": 1.6817784309387207, | |
| "learning_rate": 2.9077570643556967e-06, | |
| "loss": 0.9021655917167664, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6525359576078729, | |
| "grad_norm": 1.2909799814224243, | |
| "learning_rate": 2.9071420868177464e-06, | |
| "loss": 0.9059284925460815, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.6540499621498864, | |
| "grad_norm": 2.6258788108825684, | |
| "learning_rate": 2.906525139082791e-06, | |
| "loss": 0.8191069960594177, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.6555639666919001, | |
| "grad_norm": 1.954457402229309, | |
| "learning_rate": 2.9059062221177133e-06, | |
| "loss": 0.8419861793518066, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.6570779712339136, | |
| "grad_norm": 2.6133012771606445, | |
| "learning_rate": 2.90528533689248e-06, | |
| "loss": 0.9011951088905334, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.6585919757759273, | |
| "grad_norm": 2.973444700241089, | |
| "learning_rate": 2.904662484380146e-06, | |
| "loss": 0.5388523936271667, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.660105980317941, | |
| "grad_norm": 3.0186350345611572, | |
| "learning_rate": 2.9040376655568473e-06, | |
| "loss": 0.9145660400390625, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.6616199848599545, | |
| "grad_norm": 2.547288179397583, | |
| "learning_rate": 2.903410881401801e-06, | |
| "loss": 1.2741073369979858, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.6631339894019682, | |
| "grad_norm": 1.5956040620803833, | |
| "learning_rate": 2.9027821328973073e-06, | |
| "loss": 0.88572096824646, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.6646479939439819, | |
| "grad_norm": 2.2501113414764404, | |
| "learning_rate": 2.9021514210287405e-06, | |
| "loss": 0.5038325190544128, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.6661619984859954, | |
| "grad_norm": 2.4447531700134277, | |
| "learning_rate": 2.9015187467845555e-06, | |
| "loss": 0.9085601568222046, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6676760030280091, | |
| "grad_norm": 1.6331331729888916, | |
| "learning_rate": 2.900884111156281e-06, | |
| "loss": 0.9052636027336121, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.6691900075700227, | |
| "grad_norm": 1.715603232383728, | |
| "learning_rate": 2.90024751513852e-06, | |
| "loss": 0.9259544014930725, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6707040121120363, | |
| "grad_norm": 2.190739870071411, | |
| "learning_rate": 2.8996089597289474e-06, | |
| "loss": 1.2172551155090332, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.67221801665405, | |
| "grad_norm": 1.130866527557373, | |
| "learning_rate": 2.8989684459283103e-06, | |
| "loss": 0.9008451700210571, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6737320211960636, | |
| "grad_norm": 1.3909481763839722, | |
| "learning_rate": 2.898325974740423e-06, | |
| "loss": 0.8698288202285767, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6752460257380772, | |
| "grad_norm": 1.2574244737625122, | |
| "learning_rate": 2.8976815471721686e-06, | |
| "loss": 0.7217528820037842, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.6767600302800908, | |
| "grad_norm": 12.1749849319458, | |
| "learning_rate": 2.8970351642334953e-06, | |
| "loss": 0.6941770911216736, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.6782740348221045, | |
| "grad_norm": 1.5021611452102661, | |
| "learning_rate": 2.896386826937418e-06, | |
| "loss": 1.2301819324493408, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.6797880393641181, | |
| "grad_norm": 3.2311809062957764, | |
| "learning_rate": 2.895736536300012e-06, | |
| "loss": 0.5155861973762512, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.6813020439061317, | |
| "grad_norm": 2.549818992614746, | |
| "learning_rate": 2.895084293340415e-06, | |
| "loss": 1.0740303993225098, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6828160484481454, | |
| "grad_norm": 2.2381906509399414, | |
| "learning_rate": 2.8944300990808232e-06, | |
| "loss": 0.7758005261421204, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.6843300529901589, | |
| "grad_norm": 2.3603358268737793, | |
| "learning_rate": 2.8937739545464937e-06, | |
| "loss": 0.5189546346664429, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.6858440575321726, | |
| "grad_norm": 2.0805881023406982, | |
| "learning_rate": 2.8931158607657375e-06, | |
| "loss": 0.6191146969795227, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.6873580620741863, | |
| "grad_norm": 1.9956350326538086, | |
| "learning_rate": 2.89245581876992e-06, | |
| "loss": 0.506104588508606, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.6888720666161998, | |
| "grad_norm": 3.0931084156036377, | |
| "learning_rate": 2.891793829593463e-06, | |
| "loss": 0.9202035069465637, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6903860711582135, | |
| "grad_norm": 1.566701054573059, | |
| "learning_rate": 2.8911298942738372e-06, | |
| "loss": 1.0531013011932373, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.691900075700227, | |
| "grad_norm": 1.253186821937561, | |
| "learning_rate": 2.8904640138515636e-06, | |
| "loss": 0.8252142071723938, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.6934140802422407, | |
| "grad_norm": 1.041472315788269, | |
| "learning_rate": 2.889796189370213e-06, | |
| "loss": 0.5106720924377441, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.6949280847842544, | |
| "grad_norm": 2.2845821380615234, | |
| "learning_rate": 2.8891264218764013e-06, | |
| "loss": 0.8980287313461304, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.6964420893262679, | |
| "grad_norm": 1.5488086938858032, | |
| "learning_rate": 2.8884547124197904e-06, | |
| "loss": 0.5322882533073425, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6979560938682816, | |
| "grad_norm": 0.5618732571601868, | |
| "learning_rate": 2.887781062053086e-06, | |
| "loss": 1.0926649570465088, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.6994700984102953, | |
| "grad_norm": 1.8515206575393677, | |
| "learning_rate": 2.887105471832034e-06, | |
| "loss": 0.9032957553863525, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7009841029523088, | |
| "grad_norm": 3.249413251876831, | |
| "learning_rate": 2.8864279428154225e-06, | |
| "loss": 0.5452030897140503, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7024981074943225, | |
| "grad_norm": 5.527769088745117, | |
| "learning_rate": 2.8857484760650765e-06, | |
| "loss": 0.6838735938072205, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7040121120363361, | |
| "grad_norm": 1.556622862815857, | |
| "learning_rate": 2.885067072645859e-06, | |
| "loss": 0.7241641879081726, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7055261165783497, | |
| "grad_norm": 5.3245463371276855, | |
| "learning_rate": 2.8843837336256664e-06, | |
| "loss": 0.8361733555793762, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7070401211203634, | |
| "grad_norm": 2.6056675910949707, | |
| "learning_rate": 2.883698460075431e-06, | |
| "loss": 0.9540938138961792, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.708554125662377, | |
| "grad_norm": 1.26792311668396, | |
| "learning_rate": 2.8830112530691147e-06, | |
| "loss": 1.242187261581421, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7100681302043906, | |
| "grad_norm": 5.294063091278076, | |
| "learning_rate": 2.8823221136837114e-06, | |
| "loss": 0.4748896062374115, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7115821347464042, | |
| "grad_norm": 3.2267799377441406, | |
| "learning_rate": 2.881631042999242e-06, | |
| "loss": 0.8570255637168884, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7130961392884179, | |
| "grad_norm": 1.6323168277740479, | |
| "learning_rate": 2.8809380420987547e-06, | |
| "loss": 0.8971255421638489, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7146101438304315, | |
| "grad_norm": 1.6141637563705444, | |
| "learning_rate": 2.8802431120683226e-06, | |
| "loss": 0.7287229299545288, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.7161241483724451, | |
| "grad_norm": 2.4479527473449707, | |
| "learning_rate": 2.8795462539970417e-06, | |
| "loss": 1.0156718492507935, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.7176381529144588, | |
| "grad_norm": 2.234118938446045, | |
| "learning_rate": 2.878847468977032e-06, | |
| "loss": 0.8655732870101929, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7191521574564723, | |
| "grad_norm": 1.285766839981079, | |
| "learning_rate": 2.8781467581034287e-06, | |
| "loss": 1.0937553644180298, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.720666161998486, | |
| "grad_norm": 3.193481922149658, | |
| "learning_rate": 2.877444122474391e-06, | |
| "loss": 1.252207636833191, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.7221801665404997, | |
| "grad_norm": 2.450623035430908, | |
| "learning_rate": 2.8767395631910893e-06, | |
| "loss": 1.2393505573272705, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.7236941710825132, | |
| "grad_norm": 2.2160840034484863, | |
| "learning_rate": 2.8760330813577127e-06, | |
| "loss": 0.8748006820678711, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.7252081756245269, | |
| "grad_norm": 9.748912811279297, | |
| "learning_rate": 2.8753246780814606e-06, | |
| "loss": 0.8926846981048584, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.7267221801665406, | |
| "grad_norm": 1.8308805227279663, | |
| "learning_rate": 2.8746143544725454e-06, | |
| "loss": 0.5339508652687073, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7282361847085541, | |
| "grad_norm": 2.4011330604553223, | |
| "learning_rate": 2.8739021116441887e-06, | |
| "loss": 0.9131770133972168, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.7297501892505678, | |
| "grad_norm": 1.7618752717971802, | |
| "learning_rate": 2.8731879507126194e-06, | |
| "loss": 0.8701857328414917, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.7312641937925813, | |
| "grad_norm": 2.0772533416748047, | |
| "learning_rate": 2.872471872797073e-06, | |
| "loss": 0.7838905453681946, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.732778198334595, | |
| "grad_norm": 1.310855746269226, | |
| "learning_rate": 2.8717538790197887e-06, | |
| "loss": 0.7258808016777039, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.7342922028766087, | |
| "grad_norm": 3.5756638050079346, | |
| "learning_rate": 2.8710339705060085e-06, | |
| "loss": 0.7574349045753479, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7358062074186222, | |
| "grad_norm": 2.8669235706329346, | |
| "learning_rate": 2.870312148383976e-06, | |
| "loss": 0.857857346534729, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.7373202119606359, | |
| "grad_norm": 5.559253215789795, | |
| "learning_rate": 2.8695884137849317e-06, | |
| "loss": 0.4777136445045471, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.7388342165026495, | |
| "grad_norm": 7.590475082397461, | |
| "learning_rate": 2.868862767843116e-06, | |
| "loss": 1.1985186338424683, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.7403482210446631, | |
| "grad_norm": 1.6530134677886963, | |
| "learning_rate": 2.868135211695763e-06, | |
| "loss": 1.2720890045166016, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.7418622255866768, | |
| "grad_norm": 2.517655372619629, | |
| "learning_rate": 2.8674057464831016e-06, | |
| "loss": 0.8914427757263184, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7433762301286904, | |
| "grad_norm": 2.6291592121124268, | |
| "learning_rate": 2.866674373348351e-06, | |
| "loss": 0.7555364966392517, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.744890234670704, | |
| "grad_norm": 2.1514620780944824, | |
| "learning_rate": 2.865941093437721e-06, | |
| "loss": 0.8511964678764343, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.7464042392127176, | |
| "grad_norm": 1.976210355758667, | |
| "learning_rate": 2.865205907900412e-06, | |
| "loss": 0.9552498459815979, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.7479182437547313, | |
| "grad_norm": 3.423999547958374, | |
| "learning_rate": 2.864468817888608e-06, | |
| "loss": 1.2477160692214966, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.7494322482967449, | |
| "grad_norm": 2.068427562713623, | |
| "learning_rate": 2.863729824557479e-06, | |
| "loss": 1.0305664539337158, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7509462528387585, | |
| "grad_norm": 1.6632643938064575, | |
| "learning_rate": 2.862988929065177e-06, | |
| "loss": 1.1914719343185425, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.7524602573807722, | |
| "grad_norm": 2.210796356201172, | |
| "learning_rate": 2.862246132572837e-06, | |
| "loss": 0.8302785158157349, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.7539742619227857, | |
| "grad_norm": 7.937429904937744, | |
| "learning_rate": 2.8615014362445708e-06, | |
| "loss": 0.8746134638786316, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.7554882664647994, | |
| "grad_norm": 1.960372805595398, | |
| "learning_rate": 2.860754841247469e-06, | |
| "loss": 1.107035756111145, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.757002271006813, | |
| "grad_norm": 6.556978702545166, | |
| "learning_rate": 2.860006348751598e-06, | |
| "loss": 0.48335692286491394, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7585162755488266, | |
| "grad_norm": 2.474748373031616, | |
| "learning_rate": 2.8592559599299976e-06, | |
| "loss": 0.5073326826095581, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.7600302800908403, | |
| "grad_norm": 1.2302364110946655, | |
| "learning_rate": 2.8585036759586796e-06, | |
| "loss": 0.6908177733421326, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.761544284632854, | |
| "grad_norm": 1.1481744050979614, | |
| "learning_rate": 2.857749498016625e-06, | |
| "loss": 0.6242103576660156, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.7630582891748675, | |
| "grad_norm": 1.667983889579773, | |
| "learning_rate": 2.856993427285784e-06, | |
| "loss": 0.5696890950202942, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.7645722937168812, | |
| "grad_norm": 2.3057103157043457, | |
| "learning_rate": 2.856235464951074e-06, | |
| "loss": 0.43427762389183044, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7660862982588947, | |
| "grad_norm": 2.363940715789795, | |
| "learning_rate": 2.8554756122003755e-06, | |
| "loss": 0.8118288516998291, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.7676003028009084, | |
| "grad_norm": 2.5736377239227295, | |
| "learning_rate": 2.8547138702245316e-06, | |
| "loss": 1.203795313835144, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.7691143073429221, | |
| "grad_norm": 4.996185779571533, | |
| "learning_rate": 2.853950240217347e-06, | |
| "loss": 0.5433783531188965, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.7706283118849356, | |
| "grad_norm": 1.1869832277297974, | |
| "learning_rate": 2.8531847233755852e-06, | |
| "loss": 1.1779699325561523, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.7721423164269493, | |
| "grad_norm": 1.2899645566940308, | |
| "learning_rate": 2.852417320898966e-06, | |
| "loss": 1.1803244352340698, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7736563209689629, | |
| "grad_norm": 1.5888104438781738, | |
| "learning_rate": 2.8516480339901663e-06, | |
| "loss": 0.6052057147026062, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.7751703255109765, | |
| "grad_norm": 1.4890227317810059, | |
| "learning_rate": 2.8508768638548135e-06, | |
| "loss": 1.1067696809768677, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.7766843300529902, | |
| "grad_norm": 4.891721248626709, | |
| "learning_rate": 2.850103811701488e-06, | |
| "loss": 0.5788875222206116, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.7781983345950038, | |
| "grad_norm": 2.0971038341522217, | |
| "learning_rate": 2.849328878741721e-06, | |
| "loss": 0.8433497548103333, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.7797123391370174, | |
| "grad_norm": 3.295856237411499, | |
| "learning_rate": 2.8485520661899875e-06, | |
| "loss": 0.9592798352241516, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.781226343679031, | |
| "grad_norm": 1.9466731548309326, | |
| "learning_rate": 2.8477733752637124e-06, | |
| "loss": 0.4877493977546692, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.7827403482210447, | |
| "grad_norm": 5.624363899230957, | |
| "learning_rate": 2.8469928071832622e-06, | |
| "loss": 0.5387091040611267, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.7842543527630583, | |
| "grad_norm": 1.9813792705535889, | |
| "learning_rate": 2.846210363171945e-06, | |
| "loss": 0.9394605159759521, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.7857683573050719, | |
| "grad_norm": 2.6155548095703125, | |
| "learning_rate": 2.8454260444560107e-06, | |
| "loss": 0.745268702507019, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.7872823618470856, | |
| "grad_norm": 3.142620325088501, | |
| "learning_rate": 2.8446398522646444e-06, | |
| "loss": 0.3901050388813019, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7887963663890991, | |
| "grad_norm": 1.0682148933410645, | |
| "learning_rate": 2.8438517878299704e-06, | |
| "loss": 0.8102407455444336, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.7903103709311128, | |
| "grad_norm": 2.6520161628723145, | |
| "learning_rate": 2.8430618523870454e-06, | |
| "loss": 1.0556105375289917, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.7918243754731265, | |
| "grad_norm": 6.871322154998779, | |
| "learning_rate": 2.842270047173858e-06, | |
| "loss": 1.212335467338562, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.79333838001514, | |
| "grad_norm": 1.3800029754638672, | |
| "learning_rate": 2.841476373431328e-06, | |
| "loss": 1.054120421409607, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.7948523845571537, | |
| "grad_norm": 2.970756769180298, | |
| "learning_rate": 2.8406808324033043e-06, | |
| "loss": 1.2394263744354248, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7963663890991673, | |
| "grad_norm": 2.5240118503570557, | |
| "learning_rate": 2.83988342533656e-06, | |
| "loss": 0.913371741771698, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.7978803936411809, | |
| "grad_norm": 1.9976874589920044, | |
| "learning_rate": 2.839084153480795e-06, | |
| "loss": 1.2547472715377808, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.7993943981831946, | |
| "grad_norm": 3.423712968826294, | |
| "learning_rate": 2.83828301808863e-06, | |
| "loss": 0.435365229845047, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8009084027252081, | |
| "grad_norm": 5.868340015411377, | |
| "learning_rate": 2.837480020415607e-06, | |
| "loss": 1.0489962100982666, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8024224072672218, | |
| "grad_norm": 1.535408854484558, | |
| "learning_rate": 2.836675161720187e-06, | |
| "loss": 1.2446261644363403, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8039364118092355, | |
| "grad_norm": 1.8610411882400513, | |
| "learning_rate": 2.8358684432637464e-06, | |
| "loss": 1.2359105348587036, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.805450416351249, | |
| "grad_norm": 1.2122951745986938, | |
| "learning_rate": 2.8350598663105774e-06, | |
| "loss": 0.500268816947937, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8069644208932627, | |
| "grad_norm": 1.37546706199646, | |
| "learning_rate": 2.834249432127884e-06, | |
| "loss": 0.9466779232025146, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.8084784254352763, | |
| "grad_norm": 5.66973876953125, | |
| "learning_rate": 2.833437141985781e-06, | |
| "loss": 0.6052334904670715, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.8099924299772899, | |
| "grad_norm": 2.347961902618408, | |
| "learning_rate": 2.832622997157292e-06, | |
| "loss": 0.858746349811554, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8115064345193036, | |
| "grad_norm": 1.3671703338623047, | |
| "learning_rate": 2.831806998918348e-06, | |
| "loss": 1.2333590984344482, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.8130204390613172, | |
| "grad_norm": 2.0911505222320557, | |
| "learning_rate": 2.8309891485477835e-06, | |
| "loss": 0.7037978172302246, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.8145344436033308, | |
| "grad_norm": 1.9133065938949585, | |
| "learning_rate": 2.830169447327336e-06, | |
| "loss": 0.5428035259246826, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.8160484481453444, | |
| "grad_norm": 4.97536563873291, | |
| "learning_rate": 2.8293478965416444e-06, | |
| "loss": 0.8640028238296509, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.817562452687358, | |
| "grad_norm": 2.3419647216796875, | |
| "learning_rate": 2.8285244974782453e-06, | |
| "loss": 0.8723977208137512, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8190764572293717, | |
| "grad_norm": 7.133307933807373, | |
| "learning_rate": 2.827699251427572e-06, | |
| "loss": 0.9312586784362793, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.8205904617713853, | |
| "grad_norm": 3.1311888694763184, | |
| "learning_rate": 2.8268721596829532e-06, | |
| "loss": 1.2033246755599976, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.822104466313399, | |
| "grad_norm": 1.5448412895202637, | |
| "learning_rate": 2.8260432235406094e-06, | |
| "loss": 0.7385362386703491, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.8236184708554126, | |
| "grad_norm": 2.094623565673828, | |
| "learning_rate": 2.825212444299652e-06, | |
| "loss": 0.4562510550022125, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.8251324753974262, | |
| "grad_norm": 1.6329617500305176, | |
| "learning_rate": 2.8243798232620807e-06, | |
| "loss": 0.9182811975479126, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8266464799394398, | |
| "grad_norm": 4.220635890960693, | |
| "learning_rate": 2.823545361732782e-06, | |
| "loss": 0.520475447177887, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.8281604844814534, | |
| "grad_norm": 1.5116798877716064, | |
| "learning_rate": 2.8227090610195265e-06, | |
| "loss": 0.7769816517829895, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.8296744890234671, | |
| "grad_norm": 6.517539978027344, | |
| "learning_rate": 2.821870922432967e-06, | |
| "loss": 0.8503794074058533, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.8311884935654807, | |
| "grad_norm": 2.3296096324920654, | |
| "learning_rate": 2.8210309472866375e-06, | |
| "loss": 0.8540775179862976, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.8327024981074943, | |
| "grad_norm": 3.2018163204193115, | |
| "learning_rate": 2.820189136896949e-06, | |
| "loss": 1.1501890420913696, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.834216502649508, | |
| "grad_norm": 4.151829719543457, | |
| "learning_rate": 2.8193454925831903e-06, | |
| "loss": 0.52679842710495, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.8357305071915215, | |
| "grad_norm": 2.1522796154022217, | |
| "learning_rate": 2.818500015667523e-06, | |
| "loss": 0.6993886232376099, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.8372445117335352, | |
| "grad_norm": 3.550656318664551, | |
| "learning_rate": 2.8176527074749803e-06, | |
| "loss": 0.8634568452835083, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.8387585162755489, | |
| "grad_norm": 2.264739990234375, | |
| "learning_rate": 2.816803569333467e-06, | |
| "loss": 0.3652278780937195, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.8402725208175624, | |
| "grad_norm": 1.6804509162902832, | |
| "learning_rate": 2.815952602573755e-06, | |
| "loss": 0.8796547651290894, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8417865253595761, | |
| "grad_norm": 2.1686923503875732, | |
| "learning_rate": 2.815099808529482e-06, | |
| "loss": 1.461799144744873, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.8433005299015897, | |
| "grad_norm": 11.775230407714844, | |
| "learning_rate": 2.81424518853715e-06, | |
| "loss": 0.8035557270050049, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.8448145344436033, | |
| "grad_norm": 1.5292541980743408, | |
| "learning_rate": 2.8133887439361213e-06, | |
| "loss": 0.8822787404060364, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.846328538985617, | |
| "grad_norm": 1.0312873125076294, | |
| "learning_rate": 2.8125304760686196e-06, | |
| "loss": 0.7623322010040283, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.8478425435276306, | |
| "grad_norm": 2.891306161880493, | |
| "learning_rate": 2.811670386279724e-06, | |
| "loss": 0.9173418879508972, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8493565480696442, | |
| "grad_norm": 3.8095450401306152, | |
| "learning_rate": 2.8108084759173695e-06, | |
| "loss": 1.0241823196411133, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.8508705526116578, | |
| "grad_norm": 2.514314889907837, | |
| "learning_rate": 2.809944746332346e-06, | |
| "loss": 0.8152939081192017, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.8523845571536715, | |
| "grad_norm": 5.4192609786987305, | |
| "learning_rate": 2.8090791988782928e-06, | |
| "loss": 0.44812607765197754, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.8538985616956851, | |
| "grad_norm": 1.8970630168914795, | |
| "learning_rate": 2.808211834911698e-06, | |
| "loss": 0.7814205288887024, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.8554125662376987, | |
| "grad_norm": 1.624800205230713, | |
| "learning_rate": 2.8073426557918975e-06, | |
| "loss": 0.8644953966140747, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8569265707797123, | |
| "grad_norm": 2.4981987476348877, | |
| "learning_rate": 2.806471662881072e-06, | |
| "loss": 0.5381126999855042, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.858440575321726, | |
| "grad_norm": 1.756712794303894, | |
| "learning_rate": 2.8055988575442435e-06, | |
| "loss": 0.5988097190856934, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.8599545798637396, | |
| "grad_norm": 3.4914777278900146, | |
| "learning_rate": 2.804724241149276e-06, | |
| "loss": 0.6631752848625183, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.8614685844057532, | |
| "grad_norm": 4.860958099365234, | |
| "learning_rate": 2.8038478150668704e-06, | |
| "loss": 1.2718474864959717, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.8629825889477668, | |
| "grad_norm": 2.0775246620178223, | |
| "learning_rate": 2.8029695806705645e-06, | |
| "loss": 0.8018662929534912, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8644965934897805, | |
| "grad_norm": 2.704127311706543, | |
| "learning_rate": 2.8020895393367304e-06, | |
| "loss": 0.4317402243614197, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.8660105980317941, | |
| "grad_norm": 3.2498629093170166, | |
| "learning_rate": 2.8012076924445715e-06, | |
| "loss": 1.2137093544006348, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.8675246025738077, | |
| "grad_norm": 2.4548707008361816, | |
| "learning_rate": 2.8003240413761203e-06, | |
| "loss": 0.7217168211936951, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.8690386071158214, | |
| "grad_norm": 2.6822304725646973, | |
| "learning_rate": 2.7994385875162384e-06, | |
| "loss": 0.8042414784431458, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.8705526116578349, | |
| "grad_norm": 2.950601577758789, | |
| "learning_rate": 2.7985513322526113e-06, | |
| "loss": 1.1039682626724243, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8720666161998486, | |
| "grad_norm": 1.850177526473999, | |
| "learning_rate": 2.797662276975749e-06, | |
| "loss": 0.7056236267089844, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.8735806207418623, | |
| "grad_norm": 6.22799825668335, | |
| "learning_rate": 2.7967714230789814e-06, | |
| "loss": 0.8468853235244751, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.8750946252838758, | |
| "grad_norm": 1.8293390274047852, | |
| "learning_rate": 2.7958787719584563e-06, | |
| "loss": 0.8229158520698547, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.8766086298258895, | |
| "grad_norm": 1.768035888671875, | |
| "learning_rate": 2.794984325013141e-06, | |
| "loss": 0.47428685426712036, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.878122634367903, | |
| "grad_norm": 2.19268798828125, | |
| "learning_rate": 2.7940880836448146e-06, | |
| "loss": 0.8314292430877686, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8796366389099167, | |
| "grad_norm": 2.4109113216400146, | |
| "learning_rate": 2.7931900492580693e-06, | |
| "loss": 1.1764559745788574, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.8811506434519304, | |
| "grad_norm": 1.2788642644882202, | |
| "learning_rate": 2.7922902232603086e-06, | |
| "loss": 0.30559080839157104, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.882664647993944, | |
| "grad_norm": 2.033473014831543, | |
| "learning_rate": 2.7913886070617414e-06, | |
| "loss": 0.8132178783416748, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.8841786525359576, | |
| "grad_norm": 5.624773025512695, | |
| "learning_rate": 2.7904852020753835e-06, | |
| "loss": 0.8571276664733887, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.8856926570779712, | |
| "grad_norm": 1.4092870950698853, | |
| "learning_rate": 2.789580009717054e-06, | |
| "loss": 0.8549548983573914, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8872066616199848, | |
| "grad_norm": 2.9512217044830322, | |
| "learning_rate": 2.788673031405374e-06, | |
| "loss": 0.5353968739509583, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.8887206661619985, | |
| "grad_norm": 4.28328800201416, | |
| "learning_rate": 2.787764268561762e-06, | |
| "loss": 0.731753408908844, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.8902346707040121, | |
| "grad_norm": 2.1515896320343018, | |
| "learning_rate": 2.7868537226104346e-06, | |
| "loss": 0.7792448997497559, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.8917486752460257, | |
| "grad_norm": 7.964438438415527, | |
| "learning_rate": 2.7859413949784013e-06, | |
| "loss": 0.47553539276123047, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.8932626797880394, | |
| "grad_norm": 4.39863395690918, | |
| "learning_rate": 2.7850272870954657e-06, | |
| "loss": 1.2436693906784058, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.894776684330053, | |
| "grad_norm": 1.8371373414993286, | |
| "learning_rate": 2.78411140039422e-06, | |
| "loss": 1.127423882484436, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.8962906888720666, | |
| "grad_norm": 2.7225122451782227, | |
| "learning_rate": 2.783193736310045e-06, | |
| "loss": 0.8037985563278198, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.8978046934140802, | |
| "grad_norm": 2.108687400817871, | |
| "learning_rate": 2.782274296281107e-06, | |
| "loss": 1.2393871545791626, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.8993186979560939, | |
| "grad_norm": 1.637842059135437, | |
| "learning_rate": 2.7813530817483537e-06, | |
| "loss": 1.1900055408477783, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.9008327024981075, | |
| "grad_norm": 4.827752113342285, | |
| "learning_rate": 2.780430094155517e-06, | |
| "loss": 0.8907873630523682, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9023467070401211, | |
| "grad_norm": 4.129854679107666, | |
| "learning_rate": 2.7795053349491062e-06, | |
| "loss": 0.30467483401298523, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.9038607115821348, | |
| "grad_norm": 5.735848426818848, | |
| "learning_rate": 2.778578805578406e-06, | |
| "loss": 0.8214407563209534, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.9053747161241483, | |
| "grad_norm": 1.920812964439392, | |
| "learning_rate": 2.7776505074954756e-06, | |
| "loss": 0.8967646956443787, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.906888720666162, | |
| "grad_norm": 2.8142926692962646, | |
| "learning_rate": 2.776720442155148e-06, | |
| "loss": 0.5961228609085083, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.9084027252081757, | |
| "grad_norm": 1.3868736028671265, | |
| "learning_rate": 2.7757886110150234e-06, | |
| "loss": 1.00760817527771, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9099167297501892, | |
| "grad_norm": 2.524928331375122, | |
| "learning_rate": 2.774855015535471e-06, | |
| "loss": 1.0168966054916382, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.9114307342922029, | |
| "grad_norm": 2.3753180503845215, | |
| "learning_rate": 2.7739196571796242e-06, | |
| "loss": 1.2498538494110107, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.9129447388342165, | |
| "grad_norm": 1.77561616897583, | |
| "learning_rate": 2.7729825374133805e-06, | |
| "loss": 0.7145369648933411, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.9144587433762301, | |
| "grad_norm": 2.117807149887085, | |
| "learning_rate": 2.7720436577053957e-06, | |
| "loss": 0.9042050838470459, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.9159727479182438, | |
| "grad_norm": 5.781267166137695, | |
| "learning_rate": 2.7711030195270854e-06, | |
| "loss": 0.7323153614997864, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.9174867524602573, | |
| "grad_norm": 3.5370779037475586, | |
| "learning_rate": 2.770160624352621e-06, | |
| "loss": 0.8055700063705444, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.919000757002271, | |
| "grad_norm": 1.6705321073532104, | |
| "learning_rate": 2.7692164736589273e-06, | |
| "loss": 0.850743293762207, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.9205147615442847, | |
| "grad_norm": 1.9116226434707642, | |
| "learning_rate": 2.7682705689256797e-06, | |
| "loss": 1.4134933948516846, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.9220287660862982, | |
| "grad_norm": 2.118795871734619, | |
| "learning_rate": 2.767322911635303e-06, | |
| "loss": 0.42531701922416687, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.9235427706283119, | |
| "grad_norm": 1.391668677330017, | |
| "learning_rate": 2.7663735032729687e-06, | |
| "loss": 0.945673406124115, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.9250567751703255, | |
| "grad_norm": 2.9070723056793213, | |
| "learning_rate": 2.7654223453265933e-06, | |
| "loss": 0.922468364238739, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.9265707797123391, | |
| "grad_norm": 1.8511205911636353, | |
| "learning_rate": 2.7644694392868335e-06, | |
| "loss": 0.6354098916053772, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.9280847842543528, | |
| "grad_norm": 4.253298282623291, | |
| "learning_rate": 2.7635147866470874e-06, | |
| "loss": 0.19364294409751892, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.9295987887963664, | |
| "grad_norm": 1.7090325355529785, | |
| "learning_rate": 2.7625583889034892e-06, | |
| "loss": 0.8719943761825562, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.93111279333838, | |
| "grad_norm": 3.77422833442688, | |
| "learning_rate": 2.7616002475549083e-06, | |
| "loss": 0.42701461911201477, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9326267978803936, | |
| "grad_norm": 2.711357593536377, | |
| "learning_rate": 2.7606403641029477e-06, | |
| "loss": 1.1919710636138916, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.9341408024224073, | |
| "grad_norm": 1.9754915237426758, | |
| "learning_rate": 2.7596787400519383e-06, | |
| "loss": 1.2326306104660034, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.9356548069644209, | |
| "grad_norm": 11.081687927246094, | |
| "learning_rate": 2.7587153769089416e-06, | |
| "loss": 0.7123717069625854, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.9371688115064345, | |
| "grad_norm": 2.403883218765259, | |
| "learning_rate": 2.757750276183743e-06, | |
| "loss": 1.3490595817565918, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.9386828160484482, | |
| "grad_norm": 3.322733163833618, | |
| "learning_rate": 2.756783439388851e-06, | |
| "loss": 0.9039950370788574, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9401968205904617, | |
| "grad_norm": 5.353653907775879, | |
| "learning_rate": 2.7558148680394947e-06, | |
| "loss": 0.4231591522693634, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.9417108251324754, | |
| "grad_norm": 2.372028350830078, | |
| "learning_rate": 2.7548445636536234e-06, | |
| "loss": 0.6393493413925171, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.9432248296744891, | |
| "grad_norm": 2.0998072624206543, | |
| "learning_rate": 2.753872527751901e-06, | |
| "loss": 1.0599191188812256, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.9447388342165026, | |
| "grad_norm": 1.9805744886398315, | |
| "learning_rate": 2.7528987618577047e-06, | |
| "loss": 1.1206618547439575, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.9462528387585163, | |
| "grad_norm": 1.0623162984848022, | |
| "learning_rate": 2.7519232674971233e-06, | |
| "loss": 0.6939067840576172, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9477668433005298, | |
| "grad_norm": 4.080114841461182, | |
| "learning_rate": 2.750946046198955e-06, | |
| "loss": 0.902068555355072, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.9492808478425435, | |
| "grad_norm": 5.1340227127075195, | |
| "learning_rate": 2.749967099494704e-06, | |
| "loss": 0.8372121453285217, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.9507948523845572, | |
| "grad_norm": 3.9078993797302246, | |
| "learning_rate": 2.7489864289185786e-06, | |
| "loss": 0.8124945163726807, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.9523088569265707, | |
| "grad_norm": 7.938327789306641, | |
| "learning_rate": 2.7480040360074886e-06, | |
| "loss": 0.9052401185035706, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.9538228614685844, | |
| "grad_norm": 2.190640449523926, | |
| "learning_rate": 2.7470199223010446e-06, | |
| "loss": 0.9121738076210022, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9553368660105981, | |
| "grad_norm": 2.5523154735565186, | |
| "learning_rate": 2.7460340893415503e-06, | |
| "loss": 0.682637631893158, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.9568508705526116, | |
| "grad_norm": 2.231675624847412, | |
| "learning_rate": 2.7450465386740074e-06, | |
| "loss": 0.8745209574699402, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.9583648750946253, | |
| "grad_norm": 1.344656229019165, | |
| "learning_rate": 2.744057271846109e-06, | |
| "loss": 1.2278828620910645, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.9598788796366389, | |
| "grad_norm": 1.938359260559082, | |
| "learning_rate": 2.743066290408236e-06, | |
| "loss": 0.76249760389328, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.9613928841786525, | |
| "grad_norm": 1.5976496934890747, | |
| "learning_rate": 2.7420735959134585e-06, | |
| "loss": 0.5155737400054932, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9629068887206662, | |
| "grad_norm": 1.5426353216171265, | |
| "learning_rate": 2.74107918991753e-06, | |
| "loss": 1.2777304649353027, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.9644208932626798, | |
| "grad_norm": 3.2231552600860596, | |
| "learning_rate": 2.740083073978887e-06, | |
| "loss": 0.5381708741188049, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.9659348978046934, | |
| "grad_norm": 2.456082344055176, | |
| "learning_rate": 2.739085249658645e-06, | |
| "loss": 0.7773256301879883, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.967448902346707, | |
| "grad_norm": 6.216519832611084, | |
| "learning_rate": 2.738085718520598e-06, | |
| "loss": 0.39604809880256653, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.9689629068887207, | |
| "grad_norm": 2.6285789012908936, | |
| "learning_rate": 2.7370844821312133e-06, | |
| "loss": 0.7192943096160889, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.9704769114307343, | |
| "grad_norm": 1.3127273321151733, | |
| "learning_rate": 2.736081542059633e-06, | |
| "loss": 1.2801799774169922, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.9719909159727479, | |
| "grad_norm": 1.9447704553604126, | |
| "learning_rate": 2.7350768998776668e-06, | |
| "loss": 1.1748425960540771, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.9735049205147616, | |
| "grad_norm": 1.5830963850021362, | |
| "learning_rate": 2.7340705571597945e-06, | |
| "loss": 0.8880859017372131, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.9750189250567751, | |
| "grad_norm": 1.9534872770309448, | |
| "learning_rate": 2.733062515483158e-06, | |
| "loss": 0.6956273317337036, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.9765329295987888, | |
| "grad_norm": 1.4043753147125244, | |
| "learning_rate": 2.732052776427564e-06, | |
| "loss": 0.8173001408576965, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.9780469341408025, | |
| "grad_norm": 1.894389033317566, | |
| "learning_rate": 2.7310413415754785e-06, | |
| "loss": 0.7789499759674072, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.979560938682816, | |
| "grad_norm": 3.465914487838745, | |
| "learning_rate": 2.7300282125120255e-06, | |
| "loss": 0.5313485860824585, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.9810749432248297, | |
| "grad_norm": 9.61059856414795, | |
| "learning_rate": 2.7290133908249844e-06, | |
| "loss": 0.9159805774688721, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.9825889477668432, | |
| "grad_norm": 4.936594486236572, | |
| "learning_rate": 2.7279968781047867e-06, | |
| "loss": 0.8132763504981995, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.9841029523088569, | |
| "grad_norm": 2.389509916305542, | |
| "learning_rate": 2.726978675944514e-06, | |
| "loss": 1.0288949012756348, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9856169568508706, | |
| "grad_norm": 1.7264068126678467, | |
| "learning_rate": 2.7259587859398954e-06, | |
| "loss": 0.837356686592102, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.9871309613928841, | |
| "grad_norm": 1.2041871547698975, | |
| "learning_rate": 2.7249372096893075e-06, | |
| "loss": 0.444491982460022, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.9886449659348978, | |
| "grad_norm": 1.9402670860290527, | |
| "learning_rate": 2.723913948793766e-06, | |
| "loss": 0.8788086175918579, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.9901589704769115, | |
| "grad_norm": 8.380989074707031, | |
| "learning_rate": 2.7228890048569294e-06, | |
| "loss": 0.5033178329467773, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.991672975018925, | |
| "grad_norm": 2.616488218307495, | |
| "learning_rate": 2.721862379485094e-06, | |
| "loss": 0.751565158367157, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9931869795609387, | |
| "grad_norm": 1.3802661895751953, | |
| "learning_rate": 2.720834074287188e-06, | |
| "loss": 0.7129951119422913, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.9947009841029523, | |
| "grad_norm": 1.6147418022155762, | |
| "learning_rate": 2.719804090874776e-06, | |
| "loss": 0.7851198315620422, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.9962149886449659, | |
| "grad_norm": 1.4303407669067383, | |
| "learning_rate": 2.7187724308620507e-06, | |
| "loss": 0.8599988222122192, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.9977289931869796, | |
| "grad_norm": 3.953418254852295, | |
| "learning_rate": 2.7177390958658336e-06, | |
| "loss": 0.9210554957389832, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.9992429977289932, | |
| "grad_norm": 1.302258849143982, | |
| "learning_rate": 2.71670408750557e-06, | |
| "loss": 0.38900431990623474, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.0007570022710068, | |
| "grad_norm": 2.8431973457336426, | |
| "learning_rate": 2.715667407403328e-06, | |
| "loss": 0.6563358306884766, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.0022710068130205, | |
| "grad_norm": 1.8707140684127808, | |
| "learning_rate": 2.7146290571837965e-06, | |
| "loss": 0.7056888341903687, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.0037850113550342, | |
| "grad_norm": 1.6970131397247314, | |
| "learning_rate": 2.7135890384742804e-06, | |
| "loss": 0.8172030448913574, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.0052990158970476, | |
| "grad_norm": 2.909482717514038, | |
| "learning_rate": 2.712547352904701e-06, | |
| "loss": 0.4417012631893158, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.0068130204390613, | |
| "grad_norm": 1.567160964012146, | |
| "learning_rate": 2.7115040021075915e-06, | |
| "loss": 1.1346977949142456, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.008327024981075, | |
| "grad_norm": 1.576781988143921, | |
| "learning_rate": 2.710458987718094e-06, | |
| "loss": 0.7643612027168274, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.0098410295230886, | |
| "grad_norm": 2.6382930278778076, | |
| "learning_rate": 2.7094123113739576e-06, | |
| "loss": 0.6715695858001709, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.0113550340651023, | |
| "grad_norm": 2.5436923503875732, | |
| "learning_rate": 2.7083639747155376e-06, | |
| "loss": 0.3763667047023773, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.0128690386071157, | |
| "grad_norm": 3.2012388706207275, | |
| "learning_rate": 2.7073139793857908e-06, | |
| "loss": 0.6741540431976318, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.0143830431491294, | |
| "grad_norm": 3.748810291290283, | |
| "learning_rate": 2.706262327030272e-06, | |
| "loss": 0.6608448624610901, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.015897047691143, | |
| "grad_norm": 1.3386434316635132, | |
| "learning_rate": 2.705209019297135e-06, | |
| "loss": 0.47587254643440247, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.0174110522331568, | |
| "grad_norm": 2.3118233680725098, | |
| "learning_rate": 2.7041540578371273e-06, | |
| "loss": 0.801154375076294, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.0189250567751704, | |
| "grad_norm": 20.85770034790039, | |
| "learning_rate": 2.7030974443035864e-06, | |
| "loss": 0.8961002826690674, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.0204390613171839, | |
| "grad_norm": 1.6689656972885132, | |
| "learning_rate": 2.7020391803524415e-06, | |
| "loss": 0.6929575800895691, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.0219530658591975, | |
| "grad_norm": 1.3510750532150269, | |
| "learning_rate": 2.7009792676422067e-06, | |
| "loss": 0.7450751066207886, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.0234670704012112, | |
| "grad_norm": 0.8317976593971252, | |
| "learning_rate": 2.6999177078339807e-06, | |
| "loss": 0.6293408870697021, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.0249810749432249, | |
| "grad_norm": 3.2913503646850586, | |
| "learning_rate": 2.6988545025914437e-06, | |
| "loss": 0.841137707233429, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.0264950794852385, | |
| "grad_norm": 3.2313413619995117, | |
| "learning_rate": 2.697789653580853e-06, | |
| "loss": 0.7870294451713562, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.028009084027252, | |
| "grad_norm": 1.451443076133728, | |
| "learning_rate": 2.6967231624710447e-06, | |
| "loss": 0.9140100479125977, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.0295230885692657, | |
| "grad_norm": 2.742377996444702, | |
| "learning_rate": 2.695655030933426e-06, | |
| "loss": 0.9199535250663757, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.0310370931112793, | |
| "grad_norm": 9.10448169708252, | |
| "learning_rate": 2.694585260641977e-06, | |
| "loss": 0.6621928811073303, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.032551097653293, | |
| "grad_norm": 1.4048140048980713, | |
| "learning_rate": 2.6935138532732442e-06, | |
| "loss": 0.8045563697814941, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.0340651021953067, | |
| "grad_norm": 2.4908690452575684, | |
| "learning_rate": 2.6924408105063405e-06, | |
| "loss": 0.8018115758895874, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.0355791067373201, | |
| "grad_norm": 0.5607267022132874, | |
| "learning_rate": 2.6913661340229423e-06, | |
| "loss": 0.45020002126693726, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.0370931112793338, | |
| "grad_norm": 4.257814884185791, | |
| "learning_rate": 2.6902898255072865e-06, | |
| "loss": 0.6275112628936768, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.0386071158213475, | |
| "grad_norm": 3.537292242050171, | |
| "learning_rate": 2.6892118866461664e-06, | |
| "loss": 0.3463631570339203, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.0401211203633611, | |
| "grad_norm": 3.6382415294647217, | |
| "learning_rate": 2.6881323191289305e-06, | |
| "loss": 0.791259765625, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.0416351249053748, | |
| "grad_norm": 2.5053513050079346, | |
| "learning_rate": 2.6870511246474815e-06, | |
| "loss": 0.9765958786010742, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.0431491294473882, | |
| "grad_norm": 2.3160805702209473, | |
| "learning_rate": 2.685968304896271e-06, | |
| "loss": 0.7225064039230347, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.044663133989402, | |
| "grad_norm": 2.5423390865325928, | |
| "learning_rate": 2.6848838615722962e-06, | |
| "loss": 1.1174613237380981, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.0461771385314156, | |
| "grad_norm": 1.6330304145812988, | |
| "learning_rate": 2.683797796375101e-06, | |
| "loss": 0.741710364818573, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.0476911430734293, | |
| "grad_norm": 1.4876264333724976, | |
| "learning_rate": 2.68271011100677e-06, | |
| "loss": 0.7462827563285828, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.049205147615443, | |
| "grad_norm": 2.399090051651001, | |
| "learning_rate": 2.6816208071719274e-06, | |
| "loss": 1.0535266399383545, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.0507191521574564, | |
| "grad_norm": 1.310610055923462, | |
| "learning_rate": 2.680529886577733e-06, | |
| "loss": 1.0984599590301514, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.05223315669947, | |
| "grad_norm": 20.521469116210938, | |
| "learning_rate": 2.6794373509338812e-06, | |
| "loss": 0.746917724609375, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.0537471612414837, | |
| "grad_norm": 19.680225372314453, | |
| "learning_rate": 2.6783432019525967e-06, | |
| "loss": 0.4829588532447815, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.0552611657834974, | |
| "grad_norm": 1.8437694311141968, | |
| "learning_rate": 2.6772474413486345e-06, | |
| "loss": 0.7734227180480957, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.056775170325511, | |
| "grad_norm": 1.524431586265564, | |
| "learning_rate": 2.6761500708392727e-06, | |
| "loss": 0.7006011009216309, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.0582891748675245, | |
| "grad_norm": 5.695611000061035, | |
| "learning_rate": 2.675051092144315e-06, | |
| "loss": 0.2834855914115906, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.0598031794095382, | |
| "grad_norm": 2.3736777305603027, | |
| "learning_rate": 2.6739505069860835e-06, | |
| "loss": 0.8014317750930786, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0613171839515518, | |
| "grad_norm": 1.2972900867462158, | |
| "learning_rate": 2.672848317089419e-06, | |
| "loss": 0.43378323316574097, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.0628311884935655, | |
| "grad_norm": 7.267664909362793, | |
| "learning_rate": 2.6717445241816766e-06, | |
| "loss": 0.41693636775016785, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.0643451930355792, | |
| "grad_norm": 2.0065371990203857, | |
| "learning_rate": 2.670639129992724e-06, | |
| "loss": 0.8223442435264587, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.0658591975775926, | |
| "grad_norm": 1.2673693895339966, | |
| "learning_rate": 2.669532136254939e-06, | |
| "loss": 0.8150830864906311, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.0673732021196063, | |
| "grad_norm": 4.826721668243408, | |
| "learning_rate": 2.668423544703205e-06, | |
| "loss": 1.1558277606964111, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.06888720666162, | |
| "grad_norm": 2.484870433807373, | |
| "learning_rate": 2.6673133570749107e-06, | |
| "loss": 0.986355721950531, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.0704012112036336, | |
| "grad_norm": 1.8060321807861328, | |
| "learning_rate": 2.666201575109945e-06, | |
| "loss": 1.1337124109268188, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.0719152157456473, | |
| "grad_norm": 2.5740833282470703, | |
| "learning_rate": 2.665088200550697e-06, | |
| "loss": 0.8420611619949341, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.073429220287661, | |
| "grad_norm": 3.167254686355591, | |
| "learning_rate": 2.6639732351420494e-06, | |
| "loss": 0.7230285406112671, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.0749432248296744, | |
| "grad_norm": 2.329456329345703, | |
| "learning_rate": 2.66285668063138e-06, | |
| "loss": 0.7629521489143372, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.076457229371688, | |
| "grad_norm": 3.7189221382141113, | |
| "learning_rate": 2.6617385387685576e-06, | |
| "loss": 0.4574144780635834, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0779712339137018, | |
| "grad_norm": 3.426736354827881, | |
| "learning_rate": 2.6606188113059366e-06, | |
| "loss": 0.37381836771965027, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.0794852384557154, | |
| "grad_norm": 4.68638801574707, | |
| "learning_rate": 2.6594974999983575e-06, | |
| "loss": 0.6116821765899658, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.080999242997729, | |
| "grad_norm": 3.7828402519226074, | |
| "learning_rate": 2.6583746066031428e-06, | |
| "loss": 0.3239648938179016, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.0825132475397425, | |
| "grad_norm": 2.0054421424865723, | |
| "learning_rate": 2.6572501328800947e-06, | |
| "loss": 0.774564802646637, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.0840272520817562, | |
| "grad_norm": 2.6869266033172607, | |
| "learning_rate": 2.656124080591492e-06, | |
| "loss": 0.4418902099132538, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.0855412566237699, | |
| "grad_norm": 3.1833698749542236, | |
| "learning_rate": 2.654996451502087e-06, | |
| "loss": 0.5636769533157349, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.0870552611657835, | |
| "grad_norm": 1.5770076513290405, | |
| "learning_rate": 2.653867247379104e-06, | |
| "loss": 1.1258914470672607, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.0885692657077972, | |
| "grad_norm": 1.492074728012085, | |
| "learning_rate": 2.6527364699922356e-06, | |
| "loss": 0.6968456506729126, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.0900832702498107, | |
| "grad_norm": 2.156663656234741, | |
| "learning_rate": 2.651604121113639e-06, | |
| "loss": 0.42688336968421936, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.0915972747918243, | |
| "grad_norm": 1.730763554573059, | |
| "learning_rate": 2.6504702025179344e-06, | |
| "loss": 0.7748847007751465, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.093111279333838, | |
| "grad_norm": 4.515677452087402, | |
| "learning_rate": 2.6493347159822033e-06, | |
| "loss": 0.3128478527069092, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.0946252838758517, | |
| "grad_norm": 1.7783712148666382, | |
| "learning_rate": 2.6481976632859837e-06, | |
| "loss": 0.5876952409744263, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.0961392884178653, | |
| "grad_norm": 2.324995517730713, | |
| "learning_rate": 2.647059046211268e-06, | |
| "loss": 1.1297352313995361, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.0976532929598788, | |
| "grad_norm": 2.034179925918579, | |
| "learning_rate": 2.645918866542501e-06, | |
| "loss": 0.8161102533340454, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0991672975018925, | |
| "grad_norm": 2.0529401302337646, | |
| "learning_rate": 2.6447771260665746e-06, | |
| "loss": 0.7258488535881042, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.1006813020439061, | |
| "grad_norm": 2.404632806777954, | |
| "learning_rate": 2.643633826572829e-06, | |
| "loss": 0.8145933747291565, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.1021953065859198, | |
| "grad_norm": 2.9968576431274414, | |
| "learning_rate": 2.6424889698530463e-06, | |
| "loss": 0.7694134712219238, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.1037093111279335, | |
| "grad_norm": 3.773648977279663, | |
| "learning_rate": 2.64134255770145e-06, | |
| "loss": 0.6372519135475159, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.105223315669947, | |
| "grad_norm": 1.944852590560913, | |
| "learning_rate": 2.6401945919147e-06, | |
| "loss": 0.7862814664840698, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.1067373202119606, | |
| "grad_norm": 1.8397468328475952, | |
| "learning_rate": 2.6390450742918934e-06, | |
| "loss": 0.852626621723175, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.1082513247539743, | |
| "grad_norm": 2.014392375946045, | |
| "learning_rate": 2.6378940066345563e-06, | |
| "loss": 0.8128108382225037, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.109765329295988, | |
| "grad_norm": 7.674124240875244, | |
| "learning_rate": 2.636741390746646e-06, | |
| "loss": 0.812615156173706, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.1112793338380016, | |
| "grad_norm": 1.6925760507583618, | |
| "learning_rate": 2.635587228434546e-06, | |
| "loss": 0.4845889210700989, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.112793338380015, | |
| "grad_norm": 1.3222905397415161, | |
| "learning_rate": 2.6344315215070623e-06, | |
| "loss": 0.6777662038803101, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.1143073429220287, | |
| "grad_norm": 5.2306013107299805, | |
| "learning_rate": 2.633274271775423e-06, | |
| "loss": 0.6973274350166321, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.1158213474640424, | |
| "grad_norm": 28.449199676513672, | |
| "learning_rate": 2.6321154810532736e-06, | |
| "loss": 1.161055564880371, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.117335352006056, | |
| "grad_norm": 6.230593204498291, | |
| "learning_rate": 2.6309551511566748e-06, | |
| "loss": 0.7826764583587646, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.1188493565480697, | |
| "grad_norm": 2.1054954528808594, | |
| "learning_rate": 2.629793283904098e-06, | |
| "loss": 0.6974689364433289, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.1203633610900834, | |
| "grad_norm": 10.107316970825195, | |
| "learning_rate": 2.628629881116427e-06, | |
| "loss": 0.9081476926803589, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.1218773656320968, | |
| "grad_norm": 3.2295544147491455, | |
| "learning_rate": 2.6274649446169484e-06, | |
| "loss": 0.28264302015304565, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.1233913701741105, | |
| "grad_norm": 3.182004690170288, | |
| "learning_rate": 2.6262984762313558e-06, | |
| "loss": 0.5937165021896362, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.1249053747161242, | |
| "grad_norm": 1.4822916984558105, | |
| "learning_rate": 2.625130477787741e-06, | |
| "loss": 0.6934922933578491, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.1264193792581378, | |
| "grad_norm": 5.625181674957275, | |
| "learning_rate": 2.623960951116596e-06, | |
| "loss": 0.41950473189353943, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.1279333838001513, | |
| "grad_norm": 1.72547447681427, | |
| "learning_rate": 2.622789898050805e-06, | |
| "loss": 0.5038779973983765, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.129447388342165, | |
| "grad_norm": 3.188124895095825, | |
| "learning_rate": 2.6216173204256478e-06, | |
| "loss": 0.6977412104606628, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.1309613928841786, | |
| "grad_norm": 1.6635669469833374, | |
| "learning_rate": 2.6204432200787906e-06, | |
| "loss": 0.36403128504753113, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.1324753974261923, | |
| "grad_norm": 2.5082645416259766, | |
| "learning_rate": 2.6192675988502873e-06, | |
| "loss": 0.5870634317398071, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.133989401968206, | |
| "grad_norm": 1.4929744005203247, | |
| "learning_rate": 2.6180904585825756e-06, | |
| "loss": 0.5060266256332397, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.1355034065102196, | |
| "grad_norm": 3.6979591846466064, | |
| "learning_rate": 2.6169118011204733e-06, | |
| "loss": 0.4492890536785126, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.137017411052233, | |
| "grad_norm": 1.9358099699020386, | |
| "learning_rate": 2.615731628311175e-06, | |
| "loss": 0.6397398114204407, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.1385314155942468, | |
| "grad_norm": 2.5439043045043945, | |
| "learning_rate": 2.614549942004253e-06, | |
| "loss": 0.7282018661499023, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.1400454201362604, | |
| "grad_norm": 8.694692611694336, | |
| "learning_rate": 2.613366744051648e-06, | |
| "loss": 0.6934101581573486, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.141559424678274, | |
| "grad_norm": 7.270405292510986, | |
| "learning_rate": 2.612182036307673e-06, | |
| "loss": 0.8005611300468445, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.1430734292202875, | |
| "grad_norm": 1.9914963245391846, | |
| "learning_rate": 2.6109958206290047e-06, | |
| "loss": 0.7519674897193909, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.1445874337623012, | |
| "grad_norm": 1.2573553323745728, | |
| "learning_rate": 2.6098080988746847e-06, | |
| "loss": 0.7651998996734619, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.1461014383043149, | |
| "grad_norm": 13.637552261352539, | |
| "learning_rate": 2.6086188729061137e-06, | |
| "loss": 0.3738044500350952, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.1476154428463285, | |
| "grad_norm": 2.9253766536712646, | |
| "learning_rate": 2.6074281445870504e-06, | |
| "loss": 0.7124196290969849, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.1491294473883422, | |
| "grad_norm": 2.5097551345825195, | |
| "learning_rate": 2.6062359157836085e-06, | |
| "loss": 1.0747299194335938, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.1506434519303559, | |
| "grad_norm": 2.58630633354187, | |
| "learning_rate": 2.6050421883642523e-06, | |
| "loss": 0.9966959357261658, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.1521574564723693, | |
| "grad_norm": 2.2582290172576904, | |
| "learning_rate": 2.603846964199795e-06, | |
| "loss": 0.5746228694915771, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.153671461014383, | |
| "grad_norm": 2.2150135040283203, | |
| "learning_rate": 2.602650245163396e-06, | |
| "loss": 1.1288074254989624, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.1551854655563967, | |
| "grad_norm": 1.7310847043991089, | |
| "learning_rate": 2.601452033130556e-06, | |
| "loss": 1.1471965312957764, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.1566994700984103, | |
| "grad_norm": 1.5647629499435425, | |
| "learning_rate": 2.6002523299791185e-06, | |
| "loss": 0.828460693359375, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.158213474640424, | |
| "grad_norm": 4.7715229988098145, | |
| "learning_rate": 2.599051137589261e-06, | |
| "loss": 0.9581698179244995, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.1597274791824375, | |
| "grad_norm": 2.2824630737304688, | |
| "learning_rate": 2.5978484578434956e-06, | |
| "loss": 0.3696361482143402, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.1612414837244511, | |
| "grad_norm": 2.047269344329834, | |
| "learning_rate": 2.5966442926266667e-06, | |
| "loss": 0.42837634682655334, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.1627554882664648, | |
| "grad_norm": 1.830245018005371, | |
| "learning_rate": 2.5954386438259446e-06, | |
| "loss": 0.3391342759132385, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.1642694928084785, | |
| "grad_norm": 4.029056549072266, | |
| "learning_rate": 2.5942315133308264e-06, | |
| "loss": 0.5852560997009277, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.1657834973504921, | |
| "grad_norm": 2.035017251968384, | |
| "learning_rate": 2.5930229030331323e-06, | |
| "loss": 0.8092902302742004, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.1672975018925056, | |
| "grad_norm": 9.123196601867676, | |
| "learning_rate": 2.5918128148269977e-06, | |
| "loss": 0.7252377271652222, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.1688115064345193, | |
| "grad_norm": 3.822357416152954, | |
| "learning_rate": 2.590601250608878e-06, | |
| "loss": 0.7166582942008972, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.170325510976533, | |
| "grad_norm": 2.210566997528076, | |
| "learning_rate": 2.58938821227754e-06, | |
| "loss": 0.9769903421401978, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.1718395155185466, | |
| "grad_norm": 8.526498794555664, | |
| "learning_rate": 2.588173701734061e-06, | |
| "loss": 0.3668955862522125, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.1733535200605603, | |
| "grad_norm": 3.4576306343078613, | |
| "learning_rate": 2.5869577208818264e-06, | |
| "loss": 0.7129625678062439, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.1748675246025737, | |
| "grad_norm": 2.7367055416107178, | |
| "learning_rate": 2.585740271626525e-06, | |
| "loss": 0.7673206329345703, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.1763815291445874, | |
| "grad_norm": 1.9763902425765991, | |
| "learning_rate": 2.5845213558761464e-06, | |
| "loss": 0.5425819158554077, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.177895533686601, | |
| "grad_norm": 1.5533753633499146, | |
| "learning_rate": 2.5833009755409798e-06, | |
| "loss": 0.8152801990509033, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.1794095382286147, | |
| "grad_norm": 1.6180309057235718, | |
| "learning_rate": 2.582079132533609e-06, | |
| "loss": 0.7211406826972961, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.1809235427706284, | |
| "grad_norm": 2.6173770427703857, | |
| "learning_rate": 2.58085582876891e-06, | |
| "loss": 0.7489441633224487, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.182437547312642, | |
| "grad_norm": 3.008863925933838, | |
| "learning_rate": 2.579631066164048e-06, | |
| "loss": 0.8485870957374573, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.1839515518546555, | |
| "grad_norm": 7.870699405670166, | |
| "learning_rate": 2.5784048466384754e-06, | |
| "loss": 0.46112170815467834, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.1854655563966692, | |
| "grad_norm": 3.498415470123291, | |
| "learning_rate": 2.577177172113927e-06, | |
| "loss": 0.39019960165023804, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.1869795609386828, | |
| "grad_norm": 3.178898572921753, | |
| "learning_rate": 2.5759480445144183e-06, | |
| "loss": 0.39782366156578064, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.1884935654806965, | |
| "grad_norm": 2.614898443222046, | |
| "learning_rate": 2.5747174657662415e-06, | |
| "loss": 0.33162638545036316, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.19000757002271, | |
| "grad_norm": 3.082942008972168, | |
| "learning_rate": 2.5734854377979643e-06, | |
| "loss": 0.557581901550293, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.1915215745647236, | |
| "grad_norm": 1.9442788362503052, | |
| "learning_rate": 2.572251962540424e-06, | |
| "loss": 0.8836531639099121, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.1930355791067373, | |
| "grad_norm": 2.360757827758789, | |
| "learning_rate": 2.571017041926727e-06, | |
| "loss": 0.37596502900123596, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.194549583648751, | |
| "grad_norm": 4.975614547729492, | |
| "learning_rate": 2.5697806778922442e-06, | |
| "loss": 0.39887678623199463, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.1960635881907646, | |
| "grad_norm": 1.4703550338745117, | |
| "learning_rate": 2.5685428723746106e-06, | |
| "loss": 0.6991298198699951, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.1975775927327783, | |
| "grad_norm": 2.316718816757202, | |
| "learning_rate": 2.567303627313718e-06, | |
| "loss": 0.671280562877655, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.1990915972747918, | |
| "grad_norm": 5.537595272064209, | |
| "learning_rate": 2.566062944651715e-06, | |
| "loss": 0.3257960081100464, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.2006056018168054, | |
| "grad_norm": 4.8405609130859375, | |
| "learning_rate": 2.5648208263330033e-06, | |
| "loss": 0.37777429819107056, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.202119606358819, | |
| "grad_norm": 2.0979316234588623, | |
| "learning_rate": 2.5635772743042354e-06, | |
| "loss": 0.8450133800506592, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.2036336109008328, | |
| "grad_norm": 3.63385272026062, | |
| "learning_rate": 2.562332290514309e-06, | |
| "loss": 0.4902435839176178, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.2051476154428462, | |
| "grad_norm": 2.6919708251953125, | |
| "learning_rate": 2.5610858769143673e-06, | |
| "loss": 0.7547534704208374, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.2066616199848599, | |
| "grad_norm": 3.5736734867095947, | |
| "learning_rate": 2.5598380354577934e-06, | |
| "loss": 1.0713350772857666, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.2081756245268735, | |
| "grad_norm": 2.7801170349121094, | |
| "learning_rate": 2.5585887681002077e-06, | |
| "loss": 1.0252490043640137, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.2096896290688872, | |
| "grad_norm": 5.497828960418701, | |
| "learning_rate": 2.5573380767994667e-06, | |
| "loss": 0.6267985105514526, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.2112036336109009, | |
| "grad_norm": 2.441645383834839, | |
| "learning_rate": 2.556085963515657e-06, | |
| "loss": 0.7817872166633606, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2127176381529146, | |
| "grad_norm": 10.523898124694824, | |
| "learning_rate": 2.554832430211095e-06, | |
| "loss": 0.6640578508377075, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.214231642694928, | |
| "grad_norm": 5.218235015869141, | |
| "learning_rate": 2.5535774788503226e-06, | |
| "loss": 0.7082098722457886, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.2157456472369417, | |
| "grad_norm": 1.9254984855651855, | |
| "learning_rate": 2.552321111400102e-06, | |
| "loss": 1.0181281566619873, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.2172596517789553, | |
| "grad_norm": 2.4773366451263428, | |
| "learning_rate": 2.551063329829417e-06, | |
| "loss": 0.36115017533302307, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.218773656320969, | |
| "grad_norm": 3.622504234313965, | |
| "learning_rate": 2.5498041361094675e-06, | |
| "loss": 0.4864136278629303, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.2202876608629827, | |
| "grad_norm": 6.712879657745361, | |
| "learning_rate": 2.548543532213664e-06, | |
| "loss": 0.6208847165107727, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.2218016654049961, | |
| "grad_norm": 1.3727296590805054, | |
| "learning_rate": 2.5472815201176305e-06, | |
| "loss": 0.43101292848587036, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.2233156699470098, | |
| "grad_norm": 1.4034091234207153, | |
| "learning_rate": 2.5460181017991965e-06, | |
| "loss": 0.369363933801651, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.2248296744890235, | |
| "grad_norm": 1.6807384490966797, | |
| "learning_rate": 2.5447532792383934e-06, | |
| "loss": 1.2105756998062134, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.2263436790310371, | |
| "grad_norm": 4.796979904174805, | |
| "learning_rate": 2.5434870544174565e-06, | |
| "loss": 0.9204561114311218, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.2278576835730508, | |
| "grad_norm": 3.0788776874542236, | |
| "learning_rate": 2.542219429320816e-06, | |
| "loss": 0.6513817310333252, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.2293716881150643, | |
| "grad_norm": 2.194180488586426, | |
| "learning_rate": 2.5409504059350997e-06, | |
| "loss": 0.48548203706741333, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.230885692657078, | |
| "grad_norm": 1.6935890913009644, | |
| "learning_rate": 2.5396799862491234e-06, | |
| "loss": 0.8554729223251343, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.2323996971990916, | |
| "grad_norm": 1.9916133880615234, | |
| "learning_rate": 2.5384081722538944e-06, | |
| "loss": 0.9386757612228394, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.2339137017411053, | |
| "grad_norm": 1.5255463123321533, | |
| "learning_rate": 2.537134965942602e-06, | |
| "loss": 0.7088950276374817, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.235427706283119, | |
| "grad_norm": 2.5169317722320557, | |
| "learning_rate": 2.53586036931062e-06, | |
| "loss": 0.6848646998405457, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.2369417108251324, | |
| "grad_norm": 1.438494324684143, | |
| "learning_rate": 2.5345843843554997e-06, | |
| "loss": 0.7582930326461792, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.238455715367146, | |
| "grad_norm": 1.4521311521530151, | |
| "learning_rate": 2.5333070130769693e-06, | |
| "loss": 0.5319505929946899, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.2399697199091597, | |
| "grad_norm": 2.501084089279175, | |
| "learning_rate": 2.5320282574769286e-06, | |
| "loss": 0.7813730239868164, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.2414837244511734, | |
| "grad_norm": 1.1438665390014648, | |
| "learning_rate": 2.530748119559447e-06, | |
| "loss": 0.8257892727851868, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.242997728993187, | |
| "grad_norm": 16.8778076171875, | |
| "learning_rate": 2.5294666013307625e-06, | |
| "loss": 0.7767748832702637, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.2445117335352007, | |
| "grad_norm": 6.9293084144592285, | |
| "learning_rate": 2.528183704799272e-06, | |
| "loss": 0.3963207006454468, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.2460257380772142, | |
| "grad_norm": 4.275975227355957, | |
| "learning_rate": 2.5268994319755364e-06, | |
| "loss": 0.5166948437690735, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.2475397426192278, | |
| "grad_norm": 7.330279350280762, | |
| "learning_rate": 2.5256137848722716e-06, | |
| "loss": 0.4638979434967041, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.2490537471612415, | |
| "grad_norm": 1.5681062936782837, | |
| "learning_rate": 2.5243267655043485e-06, | |
| "loss": 0.7441158294677734, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.2505677517032552, | |
| "grad_norm": 8.350687980651855, | |
| "learning_rate": 2.523038375888787e-06, | |
| "loss": 0.7322012782096863, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.2520817562452686, | |
| "grad_norm": 1.7311092615127563, | |
| "learning_rate": 2.521748618044755e-06, | |
| "loss": 0.7636613249778748, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.2535957607872823, | |
| "grad_norm": 2.5470802783966064, | |
| "learning_rate": 2.520457493993566e-06, | |
| "loss": 0.714220404624939, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.255109765329296, | |
| "grad_norm": 2.575735330581665, | |
| "learning_rate": 2.519165005758674e-06, | |
| "loss": 0.8002930879592896, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.2566237698713096, | |
| "grad_norm": 2.6097710132598877, | |
| "learning_rate": 2.5178711553656694e-06, | |
| "loss": 1.2010271549224854, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.2581377744133233, | |
| "grad_norm": 1.4934430122375488, | |
| "learning_rate": 2.5165759448422783e-06, | |
| "loss": 0.2964455783367157, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.259651778955337, | |
| "grad_norm": 1.8192704916000366, | |
| "learning_rate": 2.5152793762183605e-06, | |
| "loss": 0.3605591356754303, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.2611657834973504, | |
| "grad_norm": 1.903306245803833, | |
| "learning_rate": 2.5139814515259005e-06, | |
| "loss": 0.2693943381309509, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.262679788039364, | |
| "grad_norm": 1.477151870727539, | |
| "learning_rate": 2.5126821727990115e-06, | |
| "loss": 1.16646409034729, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.2641937925813778, | |
| "grad_norm": 2.5828359127044678, | |
| "learning_rate": 2.5113815420739265e-06, | |
| "loss": 0.6955753564834595, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.2657077971233914, | |
| "grad_norm": 2.8693950176239014, | |
| "learning_rate": 2.5100795613889975e-06, | |
| "loss": 1.1036823987960815, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.2672218016654049, | |
| "grad_norm": 4.083218097686768, | |
| "learning_rate": 2.5087762327846932e-06, | |
| "loss": 0.8175725936889648, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.2687358062074185, | |
| "grad_norm": 4.969780445098877, | |
| "learning_rate": 2.5074715583035945e-06, | |
| "loss": 0.5654513835906982, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.2702498107494322, | |
| "grad_norm": 2.3143811225891113, | |
| "learning_rate": 2.506165539990391e-06, | |
| "loss": 0.4701712131500244, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.2717638152914459, | |
| "grad_norm": 1.1072319746017456, | |
| "learning_rate": 2.5048581798918786e-06, | |
| "loss": 0.7916725277900696, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.2732778198334596, | |
| "grad_norm": 7.131253719329834, | |
| "learning_rate": 2.5035494800569568e-06, | |
| "loss": 0.34638112783432007, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.2747918243754732, | |
| "grad_norm": 4.370595455169678, | |
| "learning_rate": 2.5022394425366226e-06, | |
| "loss": 0.29949814081192017, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.2763058289174867, | |
| "grad_norm": 3.430283546447754, | |
| "learning_rate": 2.5009280693839733e-06, | |
| "loss": 0.37807488441467285, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.2778198334595003, | |
| "grad_norm": 4.1393208503723145, | |
| "learning_rate": 2.4996153626541942e-06, | |
| "loss": 0.4743579924106598, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.279333838001514, | |
| "grad_norm": 1.6699633598327637, | |
| "learning_rate": 2.498301324404565e-06, | |
| "loss": 0.518422544002533, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.2808478425435277, | |
| "grad_norm": 1.5848231315612793, | |
| "learning_rate": 2.4969859566944504e-06, | |
| "loss": 0.5937182903289795, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.2823618470855411, | |
| "grad_norm": 4.64746618270874, | |
| "learning_rate": 2.495669261585299e-06, | |
| "loss": 0.5423171520233154, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.2838758516275548, | |
| "grad_norm": 2.7205491065979004, | |
| "learning_rate": 2.494351241140639e-06, | |
| "loss": 0.8974373936653137, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.2853898561695685, | |
| "grad_norm": 2.0516796112060547, | |
| "learning_rate": 2.493031897426076e-06, | |
| "loss": 0.3728986382484436, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.2869038607115821, | |
| "grad_norm": 3.630496025085449, | |
| "learning_rate": 2.4917112325092903e-06, | |
| "loss": 0.3708462715148926, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.2884178652535958, | |
| "grad_norm": 7.670022964477539, | |
| "learning_rate": 2.4903892484600315e-06, | |
| "loss": 0.45419633388519287, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.2899318697956095, | |
| "grad_norm": 3.7088091373443604, | |
| "learning_rate": 2.489065947350117e-06, | |
| "loss": 0.3158243000507355, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.2914458743376231, | |
| "grad_norm": 2.961735486984253, | |
| "learning_rate": 2.4877413312534293e-06, | |
| "loss": 0.33370882272720337, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.2929598788796366, | |
| "grad_norm": 2.4538347721099854, | |
| "learning_rate": 2.4864154022459104e-06, | |
| "loss": 0.794564962387085, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.2944738834216503, | |
| "grad_norm": 2.663095235824585, | |
| "learning_rate": 2.48508816240556e-06, | |
| "loss": 0.7713342905044556, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.295987887963664, | |
| "grad_norm": 2.7029004096984863, | |
| "learning_rate": 2.4837596138124327e-06, | |
| "loss": 0.6392582654953003, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.2975018925056774, | |
| "grad_norm": 2.8968873023986816, | |
| "learning_rate": 2.4824297585486346e-06, | |
| "loss": 0.5847026109695435, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.299015897047691, | |
| "grad_norm": 8.163978576660156, | |
| "learning_rate": 2.4810985986983177e-06, | |
| "loss": 0.6873840689659119, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.3005299015897047, | |
| "grad_norm": 4.473104953765869, | |
| "learning_rate": 2.479766136347682e-06, | |
| "loss": 0.792141318321228, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.3020439061317184, | |
| "grad_norm": 34.86464309692383, | |
| "learning_rate": 2.478432373584964e-06, | |
| "loss": 0.27867889404296875, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.303557910673732, | |
| "grad_norm": 4.670142650604248, | |
| "learning_rate": 2.477097312500444e-06, | |
| "loss": 0.7536966800689697, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.3050719152157457, | |
| "grad_norm": 1.702530026435852, | |
| "learning_rate": 2.4757609551864307e-06, | |
| "loss": 1.0583717823028564, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.3065859197577594, | |
| "grad_norm": 2.3609373569488525, | |
| "learning_rate": 2.4744233037372697e-06, | |
| "loss": 0.28235501050949097, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.3080999242997728, | |
| "grad_norm": 3.98068904876709, | |
| "learning_rate": 2.4730843602493315e-06, | |
| "loss": 1.0268464088439941, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.3096139288417865, | |
| "grad_norm": 2.23313307762146, | |
| "learning_rate": 2.4717441268210127e-06, | |
| "loss": 0.5403386354446411, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.3111279333838002, | |
| "grad_norm": 6.894443035125732, | |
| "learning_rate": 2.4704026055527315e-06, | |
| "loss": 0.6712117791175842, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.3126419379258139, | |
| "grad_norm": 1.8715323209762573, | |
| "learning_rate": 2.469059798546924e-06, | |
| "loss": 1.0742299556732178, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.3141559424678273, | |
| "grad_norm": 4.290128231048584, | |
| "learning_rate": 2.4677157079080418e-06, | |
| "loss": 1.2617570161819458, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.315669947009841, | |
| "grad_norm": 3.091566324234009, | |
| "learning_rate": 2.4663703357425475e-06, | |
| "loss": 1.0468664169311523, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.3171839515518546, | |
| "grad_norm": 3.078235149383545, | |
| "learning_rate": 2.4650236841589136e-06, | |
| "loss": 0.8127296566963196, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.3186979560938683, | |
| "grad_norm": 3.087818145751953, | |
| "learning_rate": 2.4636757552676146e-06, | |
| "loss": 1.1390008926391602, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.320211960635882, | |
| "grad_norm": 2.46710467338562, | |
| "learning_rate": 2.4623265511811316e-06, | |
| "loss": 0.5823556184768677, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.3217259651778956, | |
| "grad_norm": 3.8972294330596924, | |
| "learning_rate": 2.4609760740139393e-06, | |
| "loss": 0.612288236618042, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.323239969719909, | |
| "grad_norm": 3.4614830017089844, | |
| "learning_rate": 2.4596243258825107e-06, | |
| "loss": 0.3169248700141907, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.3247539742619228, | |
| "grad_norm": 1.4857923984527588, | |
| "learning_rate": 2.458271308905309e-06, | |
| "loss": 0.7403193712234497, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.3262679788039364, | |
| "grad_norm": 2.9274091720581055, | |
| "learning_rate": 2.4569170252027877e-06, | |
| "loss": 0.29984942078590393, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.32778198334595, | |
| "grad_norm": 2.223027467727661, | |
| "learning_rate": 2.4555614768973842e-06, | |
| "loss": 0.39937183260917664, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.3292959878879635, | |
| "grad_norm": 1.9060097932815552, | |
| "learning_rate": 2.454204666113517e-06, | |
| "loss": 0.1987786591053009, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.3308099924299772, | |
| "grad_norm": 2.1526589393615723, | |
| "learning_rate": 2.452846594977585e-06, | |
| "loss": 0.5061262845993042, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.3323239969719909, | |
| "grad_norm": 2.4836909770965576, | |
| "learning_rate": 2.451487265617962e-06, | |
| "loss": 0.9262844324111938, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.3338380015140046, | |
| "grad_norm": 3.6183834075927734, | |
| "learning_rate": 2.450126680164992e-06, | |
| "loss": 0.4936198890209198, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.3353520060560182, | |
| "grad_norm": 25.579692840576172, | |
| "learning_rate": 2.4487648407509897e-06, | |
| "loss": 0.7663915157318115, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.336866010598032, | |
| "grad_norm": 3.81601619720459, | |
| "learning_rate": 2.447401749510234e-06, | |
| "loss": 0.7129659056663513, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.3383800151400453, | |
| "grad_norm": 2.3081166744232178, | |
| "learning_rate": 2.446037408578965e-06, | |
| "loss": 0.358845055103302, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.339894019682059, | |
| "grad_norm": 3.634019136428833, | |
| "learning_rate": 2.444671820095383e-06, | |
| "loss": 0.8582119941711426, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.3414080242240727, | |
| "grad_norm": 2.7738685607910156, | |
| "learning_rate": 2.443304986199642e-06, | |
| "loss": 0.7216231822967529, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.3429220287660864, | |
| "grad_norm": 2.8546862602233887, | |
| "learning_rate": 2.4419369090338485e-06, | |
| "loss": 0.5570340156555176, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.3444360333080998, | |
| "grad_norm": 1.778089165687561, | |
| "learning_rate": 2.4405675907420575e-06, | |
| "loss": 1.1104695796966553, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.3459500378501135, | |
| "grad_norm": 2.5244617462158203, | |
| "learning_rate": 2.439197033470269e-06, | |
| "loss": 1.0705184936523438, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.3474640423921271, | |
| "grad_norm": 4.3281941413879395, | |
| "learning_rate": 2.437825239366424e-06, | |
| "loss": 0.7556070685386658, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.3489780469341408, | |
| "grad_norm": 2.8140037059783936, | |
| "learning_rate": 2.4364522105804026e-06, | |
| "loss": 1.0987634658813477, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.3504920514761545, | |
| "grad_norm": 9.171343803405762, | |
| "learning_rate": 2.4350779492640203e-06, | |
| "loss": 0.33399084210395813, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.3520060560181681, | |
| "grad_norm": 4.052097320556641, | |
| "learning_rate": 2.433702457571024e-06, | |
| "loss": 0.598379373550415, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.3535200605601818, | |
| "grad_norm": 2.5520999431610107, | |
| "learning_rate": 2.432325737657087e-06, | |
| "loss": 0.4708116054534912, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.3550340651021953, | |
| "grad_norm": 27.633031845092773, | |
| "learning_rate": 2.430947791679811e-06, | |
| "loss": 0.9031903147697449, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.356548069644209, | |
| "grad_norm": 3.972987651824951, | |
| "learning_rate": 2.4295686217987165e-06, | |
| "loss": 0.3332988917827606, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.3580620741862226, | |
| "grad_norm": 0.5608827471733093, | |
| "learning_rate": 2.4281882301752424e-06, | |
| "loss": 0.4873623251914978, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.359576078728236, | |
| "grad_norm": 2.9013092517852783, | |
| "learning_rate": 2.4268066189727427e-06, | |
| "loss": 0.40898972749710083, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.3610900832702497, | |
| "grad_norm": 2.2795474529266357, | |
| "learning_rate": 2.4254237903564834e-06, | |
| "loss": 1.0034469366073608, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.3626040878122634, | |
| "grad_norm": 9.1624755859375, | |
| "learning_rate": 2.424039746493638e-06, | |
| "loss": 0.3764766454696655, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.364118092354277, | |
| "grad_norm": 13.661250114440918, | |
| "learning_rate": 2.4226544895532837e-06, | |
| "loss": 0.7672209143638611, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.3656320968962907, | |
| "grad_norm": 1.79072904586792, | |
| "learning_rate": 2.4212680217063996e-06, | |
| "loss": 0.6887940764427185, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.3671461014383044, | |
| "grad_norm": 1.4073220491409302, | |
| "learning_rate": 2.4198803451258624e-06, | |
| "loss": 0.26766905188560486, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.368660105980318, | |
| "grad_norm": 3.8320226669311523, | |
| "learning_rate": 2.418491461986444e-06, | |
| "loss": 0.5123397707939148, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.3701741105223315, | |
| "grad_norm": 5.280299663543701, | |
| "learning_rate": 2.4171013744648053e-06, | |
| "loss": 0.7153005599975586, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.3716881150643452, | |
| "grad_norm": 1.7055562734603882, | |
| "learning_rate": 2.4157100847394964e-06, | |
| "loss": 0.8119074702262878, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.3732021196063589, | |
| "grad_norm": 2.1829960346221924, | |
| "learning_rate": 2.414317594990951e-06, | |
| "loss": 0.3074583113193512, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.3747161241483725, | |
| "grad_norm": 1.3141074180603027, | |
| "learning_rate": 2.412923907401483e-06, | |
| "loss": 1.1208783388137817, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.376230128690386, | |
| "grad_norm": 4.528420925140381, | |
| "learning_rate": 2.411529024155284e-06, | |
| "loss": 0.7128023505210876, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.3777441332323996, | |
| "grad_norm": 1.3782234191894531, | |
| "learning_rate": 2.41013294743842e-06, | |
| "loss": 0.7611986994743347, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.3792581377744133, | |
| "grad_norm": 4.164299964904785, | |
| "learning_rate": 2.408735679438825e-06, | |
| "loss": 0.16963380575180054, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.380772142316427, | |
| "grad_norm": 11.236958503723145, | |
| "learning_rate": 2.4073372223463043e-06, | |
| "loss": 0.8310223817825317, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.3822861468584406, | |
| "grad_norm": 5.005019187927246, | |
| "learning_rate": 2.405937578352523e-06, | |
| "loss": 0.8467041850090027, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.3838001514004543, | |
| "grad_norm": 2.2640397548675537, | |
| "learning_rate": 2.404536749651007e-06, | |
| "loss": 1.099280595779419, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.3853141559424678, | |
| "grad_norm": 2.679772138595581, | |
| "learning_rate": 2.40313473843714e-06, | |
| "loss": 1.1264312267303467, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.3868281604844814, | |
| "grad_norm": 1.491464376449585, | |
| "learning_rate": 2.4017315469081583e-06, | |
| "loss": 1.1441285610198975, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.388342165026495, | |
| "grad_norm": 4.6747541427612305, | |
| "learning_rate": 2.400327177263148e-06, | |
| "loss": 0.6041007041931152, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.3898561695685088, | |
| "grad_norm": 4.323363304138184, | |
| "learning_rate": 2.3989216317030422e-06, | |
| "loss": 0.7551705241203308, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.3913701741105222, | |
| "grad_norm": 1.805139422416687, | |
| "learning_rate": 2.3975149124306153e-06, | |
| "loss": 0.9166613817214966, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.3928841786525359, | |
| "grad_norm": 1.763720989227295, | |
| "learning_rate": 2.396107021650482e-06, | |
| "loss": 0.3972744047641754, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.3943981831945496, | |
| "grad_norm": 4.463486194610596, | |
| "learning_rate": 2.3946979615690946e-06, | |
| "loss": 0.8842801451683044, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.3959121877365632, | |
| "grad_norm": 2.2309823036193848, | |
| "learning_rate": 2.393287734394735e-06, | |
| "loss": 0.8152608275413513, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.397426192278577, | |
| "grad_norm": 1.9330055713653564, | |
| "learning_rate": 2.3918763423375162e-06, | |
| "loss": 0.7752976417541504, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.3989401968205906, | |
| "grad_norm": 2.6784393787384033, | |
| "learning_rate": 2.3904637876093765e-06, | |
| "loss": 0.4102567136287689, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.400454201362604, | |
| "grad_norm": 1.4778658151626587, | |
| "learning_rate": 2.3890500724240754e-06, | |
| "loss": 0.7943294048309326, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.4019682059046177, | |
| "grad_norm": 2.401735305786133, | |
| "learning_rate": 2.387635198997193e-06, | |
| "loss": 0.407833993434906, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 1.4034822104466314, | |
| "grad_norm": 2.32517409324646, | |
| "learning_rate": 2.386219169546122e-06, | |
| "loss": 1.1640748977661133, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 1.404996214988645, | |
| "grad_norm": 7.444009304046631, | |
| "learning_rate": 2.3848019862900684e-06, | |
| "loss": 0.5029972195625305, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 1.4065102195306585, | |
| "grad_norm": 2.249410390853882, | |
| "learning_rate": 2.383383651450047e-06, | |
| "loss": 0.738408088684082, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 1.4080242240726721, | |
| "grad_norm": 6.936520576477051, | |
| "learning_rate": 2.3819641672488756e-06, | |
| "loss": 0.6538509726524353, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.4095382286146858, | |
| "grad_norm": 3.3028481006622314, | |
| "learning_rate": 2.3805435359111753e-06, | |
| "loss": 0.5028783082962036, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 1.4110522331566995, | |
| "grad_norm": 4.275155544281006, | |
| "learning_rate": 2.379121759663363e-06, | |
| "loss": 0.8367002010345459, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 1.4125662376987131, | |
| "grad_norm": 2.1487843990325928, | |
| "learning_rate": 2.377698840733652e-06, | |
| "loss": 0.7150232791900635, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 1.4140802422407268, | |
| "grad_norm": 2.527843475341797, | |
| "learning_rate": 2.3762747813520437e-06, | |
| "loss": 0.8052014112472534, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 1.4155942467827405, | |
| "grad_norm": 1.5240310430526733, | |
| "learning_rate": 2.3748495837503302e-06, | |
| "loss": 1.0286638736724854, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.417108251324754, | |
| "grad_norm": 4.434390068054199, | |
| "learning_rate": 2.3734232501620843e-06, | |
| "loss": 1.0798012018203735, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 1.4186222558667676, | |
| "grad_norm": 13.43934440612793, | |
| "learning_rate": 2.371995782822661e-06, | |
| "loss": 0.3294588625431061, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 1.4201362604087813, | |
| "grad_norm": 3.540721893310547, | |
| "learning_rate": 2.3705671839691915e-06, | |
| "loss": 1.0713666677474976, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 1.4216502649507947, | |
| "grad_norm": 2.280219078063965, | |
| "learning_rate": 2.3691374558405806e-06, | |
| "loss": 0.7495984435081482, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 1.4231642694928084, | |
| "grad_norm": 1.9954382181167603, | |
| "learning_rate": 2.3677066006775023e-06, | |
| "loss": 0.6960147023200989, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.424678274034822, | |
| "grad_norm": 7.99771785736084, | |
| "learning_rate": 2.3662746207223975e-06, | |
| "loss": 1.105854868888855, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 1.4261922785768357, | |
| "grad_norm": 1.6709688901901245, | |
| "learning_rate": 2.36484151821947e-06, | |
| "loss": 0.3433768153190613, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 1.4277062831188494, | |
| "grad_norm": 2.90567684173584, | |
| "learning_rate": 2.363407295414681e-06, | |
| "loss": 0.28263920545578003, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 1.429220287660863, | |
| "grad_norm": 6.311897277832031, | |
| "learning_rate": 2.361971954555751e-06, | |
| "loss": 0.5564744472503662, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 1.4307342922028767, | |
| "grad_norm": 1.9530609846115112, | |
| "learning_rate": 2.3605354978921497e-06, | |
| "loss": 0.7947676777839661, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.4322482967448902, | |
| "grad_norm": 2.0929741859436035, | |
| "learning_rate": 2.359097927675097e-06, | |
| "loss": 1.120530366897583, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 1.4337623012869039, | |
| "grad_norm": 2.99371600151062, | |
| "learning_rate": 2.3576592461575562e-06, | |
| "loss": 0.656132698059082, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 1.4352763058289175, | |
| "grad_norm": 1.237682580947876, | |
| "learning_rate": 2.356219455594234e-06, | |
| "loss": 1.1159588098526, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 1.4367903103709312, | |
| "grad_norm": 2.7394959926605225, | |
| "learning_rate": 2.3547785582415757e-06, | |
| "loss": 0.6585239171981812, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 1.4383043149129446, | |
| "grad_norm": 3.580203056335449, | |
| "learning_rate": 2.353336556357759e-06, | |
| "loss": 0.28547555208206177, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.4398183194549583, | |
| "grad_norm": 1.8582044839859009, | |
| "learning_rate": 2.351893452202694e-06, | |
| "loss": 1.0720411539077759, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 1.441332323996972, | |
| "grad_norm": 1.8799811601638794, | |
| "learning_rate": 2.350449248038018e-06, | |
| "loss": 0.7256011366844177, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 1.4428463285389856, | |
| "grad_norm": 1.9906892776489258, | |
| "learning_rate": 2.349003946127093e-06, | |
| "loss": 0.6670038104057312, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 1.4443603330809993, | |
| "grad_norm": 7.704482078552246, | |
| "learning_rate": 2.3475575487349996e-06, | |
| "loss": 0.959648609161377, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 1.445874337623013, | |
| "grad_norm": 1.963994026184082, | |
| "learning_rate": 2.3461100581285374e-06, | |
| "loss": 0.7701181769371033, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.4473883421650264, | |
| "grad_norm": 3.128945827484131, | |
| "learning_rate": 2.344661476576217e-06, | |
| "loss": 0.6871995329856873, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 1.44890234670704, | |
| "grad_norm": 7.108216285705566, | |
| "learning_rate": 2.343211806348261e-06, | |
| "loss": 0.4328969120979309, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 1.4504163512490538, | |
| "grad_norm": 4.275369644165039, | |
| "learning_rate": 2.3417610497165965e-06, | |
| "loss": 0.42519325017929077, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 1.4519303557910674, | |
| "grad_norm": 2.9187445640563965, | |
| "learning_rate": 2.3403092089548533e-06, | |
| "loss": 0.41934680938720703, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 1.4534443603330809, | |
| "grad_norm": 2.5698537826538086, | |
| "learning_rate": 2.3388562863383623e-06, | |
| "loss": 0.6157204508781433, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.4549583648750946, | |
| "grad_norm": 2.6239511966705322, | |
| "learning_rate": 2.3374022841441473e-06, | |
| "loss": 0.7134471535682678, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 1.4564723694171082, | |
| "grad_norm": 1.3179781436920166, | |
| "learning_rate": 2.3359472046509254e-06, | |
| "loss": 0.6652729511260986, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 1.457986373959122, | |
| "grad_norm": 1.326392412185669, | |
| "learning_rate": 2.3344910501391012e-06, | |
| "loss": 0.306835412979126, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 1.4595003785011356, | |
| "grad_norm": 1.8047044277191162, | |
| "learning_rate": 2.3330338228907653e-06, | |
| "loss": 0.7175745964050293, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 1.4610143830431492, | |
| "grad_norm": 1.8204708099365234, | |
| "learning_rate": 2.3315755251896883e-06, | |
| "loss": 0.760126531124115, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.4625283875851627, | |
| "grad_norm": 4.059135913848877, | |
| "learning_rate": 2.3301161593213196e-06, | |
| "loss": 0.6605585813522339, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 1.4640423921271764, | |
| "grad_norm": 2.046104669570923, | |
| "learning_rate": 2.328655727572781e-06, | |
| "loss": 0.6077765226364136, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 1.46555639666919, | |
| "grad_norm": 7.639178276062012, | |
| "learning_rate": 2.327194232232866e-06, | |
| "loss": 0.2979111671447754, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 1.4670704012112037, | |
| "grad_norm": 6.912994861602783, | |
| "learning_rate": 2.3257316755920356e-06, | |
| "loss": 1.1609059572219849, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 1.4685844057532171, | |
| "grad_norm": 1.4372037649154663, | |
| "learning_rate": 2.3242680599424116e-06, | |
| "loss": 0.6751406192779541, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.4700984102952308, | |
| "grad_norm": 1.8448315858840942, | |
| "learning_rate": 2.3228033875777787e-06, | |
| "loss": 0.6058751344680786, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 1.4716124148372445, | |
| "grad_norm": 1.561957597732544, | |
| "learning_rate": 2.321337660793574e-06, | |
| "loss": 0.688287079334259, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 1.4731264193792581, | |
| "grad_norm": 3.6917073726654053, | |
| "learning_rate": 2.319870881886891e-06, | |
| "loss": 0.671501100063324, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 1.4746404239212718, | |
| "grad_norm": 2.6265804767608643, | |
| "learning_rate": 2.318403053156469e-06, | |
| "loss": 1.0544801950454712, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 1.4761544284632855, | |
| "grad_norm": 2.026118755340576, | |
| "learning_rate": 2.316934176902694e-06, | |
| "loss": 0.6782279014587402, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.4776684330052992, | |
| "grad_norm": 2.275644540786743, | |
| "learning_rate": 2.3154642554275942e-06, | |
| "loss": 0.3071862459182739, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 1.4791824375473126, | |
| "grad_norm": 1.539597988128662, | |
| "learning_rate": 2.313993291034834e-06, | |
| "loss": 0.6210603713989258, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 1.4806964420893263, | |
| "grad_norm": 1.7491732835769653, | |
| "learning_rate": 2.312521286029714e-06, | |
| "loss": 0.663459300994873, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 1.48221044663134, | |
| "grad_norm": 0.9735066890716553, | |
| "learning_rate": 2.3110482427191647e-06, | |
| "loss": 0.3123417794704437, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 1.4837244511733534, | |
| "grad_norm": 2.4855947494506836, | |
| "learning_rate": 2.309574163411745e-06, | |
| "loss": 0.6801527738571167, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.485238455715367, | |
| "grad_norm": 3.174555540084839, | |
| "learning_rate": 2.308099050417636e-06, | |
| "loss": 0.6908773183822632, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 1.4867524602573807, | |
| "grad_norm": 1.6514712572097778, | |
| "learning_rate": 2.3066229060486395e-06, | |
| "loss": 1.0852717161178589, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 1.4882664647993944, | |
| "grad_norm": 5.090445041656494, | |
| "learning_rate": 2.3051457326181727e-06, | |
| "loss": 0.5876284837722778, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 1.489780469341408, | |
| "grad_norm": 1.7408846616744995, | |
| "learning_rate": 2.303667532441268e-06, | |
| "loss": 0.6825834512710571, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 1.4912944738834217, | |
| "grad_norm": 1.8605716228485107, | |
| "learning_rate": 2.3021883078345644e-06, | |
| "loss": 1.0978846549987793, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.4928084784254354, | |
| "grad_norm": 3.411088466644287, | |
| "learning_rate": 2.3007080611163075e-06, | |
| "loss": 0.63323974609375, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 1.4943224829674489, | |
| "grad_norm": 1.4026780128479004, | |
| "learning_rate": 2.2992267946063442e-06, | |
| "loss": 0.6705458760261536, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 1.4958364875094625, | |
| "grad_norm": 1.7022696733474731, | |
| "learning_rate": 2.2977445106261203e-06, | |
| "loss": 0.6665899157524109, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 1.4973504920514762, | |
| "grad_norm": 2.7791049480438232, | |
| "learning_rate": 2.2962612114986766e-06, | |
| "loss": 1.0865336656570435, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 1.4988644965934899, | |
| "grad_norm": 1.447414517402649, | |
| "learning_rate": 2.2947768995486425e-06, | |
| "loss": 0.29112380743026733, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.5003785011355033, | |
| "grad_norm": 1.5562597513198853, | |
| "learning_rate": 2.293291577102238e-06, | |
| "loss": 1.0406157970428467, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 1.501892505677517, | |
| "grad_norm": 2.4863104820251465, | |
| "learning_rate": 2.291805246487264e-06, | |
| "loss": 0.8998382687568665, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 1.5034065102195306, | |
| "grad_norm": 2.075108289718628, | |
| "learning_rate": 2.2903179100331036e-06, | |
| "loss": 1.0479289293289185, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 1.5049205147615443, | |
| "grad_norm": 3.2120509147644043, | |
| "learning_rate": 2.2888295700707136e-06, | |
| "loss": 0.2061680257320404, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 1.506434519303558, | |
| "grad_norm": 1.16193425655365, | |
| "learning_rate": 2.287340228932626e-06, | |
| "loss": 0.8722069263458252, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.5079485238455717, | |
| "grad_norm": 4.635110378265381, | |
| "learning_rate": 2.2858498889529404e-06, | |
| "loss": 0.6780929565429688, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 1.5094625283875853, | |
| "grad_norm": 1.9462432861328125, | |
| "learning_rate": 2.284358552467323e-06, | |
| "loss": 1.0604909658432007, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 1.5109765329295988, | |
| "grad_norm": 3.5864200592041016, | |
| "learning_rate": 2.282866221813001e-06, | |
| "loss": 0.7383281588554382, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 1.5124905374716124, | |
| "grad_norm": 1.6705453395843506, | |
| "learning_rate": 2.2813728993287584e-06, | |
| "loss": 1.1641281843185425, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 1.5140045420136259, | |
| "grad_norm": 4.274797439575195, | |
| "learning_rate": 2.279878587354936e-06, | |
| "loss": 0.7858713865280151, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.5155185465556396, | |
| "grad_norm": 3.163729429244995, | |
| "learning_rate": 2.2783832882334237e-06, | |
| "loss": 0.5768117308616638, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 1.5170325510976532, | |
| "grad_norm": 1.7296491861343384, | |
| "learning_rate": 2.2768870043076593e-06, | |
| "loss": 0.46675315499305725, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 1.518546555639667, | |
| "grad_norm": 1.1777745485305786, | |
| "learning_rate": 2.2753897379226236e-06, | |
| "loss": 1.0568435192108154, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 1.5200605601816806, | |
| "grad_norm": 2.120549201965332, | |
| "learning_rate": 2.2738914914248375e-06, | |
| "loss": 0.3315390944480896, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 1.5215745647236942, | |
| "grad_norm": 1.4694416522979736, | |
| "learning_rate": 2.272392267162356e-06, | |
| "loss": 0.9205579161643982, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.523088569265708, | |
| "grad_norm": 4.467464447021484, | |
| "learning_rate": 2.27089206748477e-06, | |
| "loss": 0.2087993174791336, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 1.5246025738077216, | |
| "grad_norm": 1.785783290863037, | |
| "learning_rate": 2.269390894743196e-06, | |
| "loss": 0.7023442387580872, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 1.526116578349735, | |
| "grad_norm": 9.867462158203125, | |
| "learning_rate": 2.2678887512902772e-06, | |
| "loss": 0.27967381477355957, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 1.5276305828917487, | |
| "grad_norm": 1.9199988842010498, | |
| "learning_rate": 2.266385639480177e-06, | |
| "loss": 0.7091067433357239, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 1.5291445874337621, | |
| "grad_norm": 33.07830810546875, | |
| "learning_rate": 2.264881561668577e-06, | |
| "loss": 0.6637606620788574, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.5306585919757758, | |
| "grad_norm": 2.0145633220672607, | |
| "learning_rate": 2.263376520212673e-06, | |
| "loss": 0.7130411863327026, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 1.5321725965177895, | |
| "grad_norm": 2.2729172706604004, | |
| "learning_rate": 2.261870517471171e-06, | |
| "loss": 1.0831512212753296, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 1.5336866010598031, | |
| "grad_norm": 6.332074165344238, | |
| "learning_rate": 2.260363555804282e-06, | |
| "loss": 0.20880655944347382, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 1.5352006056018168, | |
| "grad_norm": 2.0282723903656006, | |
| "learning_rate": 2.2588556375737217e-06, | |
| "loss": 0.6106334328651428, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 1.5367146101438305, | |
| "grad_norm": 1.8590947389602661, | |
| "learning_rate": 2.2573467651427044e-06, | |
| "loss": 0.24480338394641876, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.5382286146858442, | |
| "grad_norm": 2.0381529331207275, | |
| "learning_rate": 2.2558369408759395e-06, | |
| "loss": 0.40811577439308167, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 1.5397426192278578, | |
| "grad_norm": 2.440659761428833, | |
| "learning_rate": 2.254326167139628e-06, | |
| "loss": 0.9319549798965454, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 1.5412566237698713, | |
| "grad_norm": 6.197136878967285, | |
| "learning_rate": 2.2528144463014607e-06, | |
| "loss": 0.6879948377609253, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 1.542770628311885, | |
| "grad_norm": 4.5604472160339355, | |
| "learning_rate": 2.2513017807306087e-06, | |
| "loss": 0.5867362022399902, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 1.5442846328538986, | |
| "grad_norm": 5.366823196411133, | |
| "learning_rate": 2.2497881727977283e-06, | |
| "loss": 1.0565264225006104, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.545798637395912, | |
| "grad_norm": 1.2136204242706299, | |
| "learning_rate": 2.24827362487495e-06, | |
| "loss": 0.34370917081832886, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 1.5473126419379257, | |
| "grad_norm": 2.217613697052002, | |
| "learning_rate": 2.246758139335878e-06, | |
| "loss": 0.501958966255188, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 1.5488266464799394, | |
| "grad_norm": 43.41156768798828, | |
| "learning_rate": 2.245241718555586e-06, | |
| "loss": 0.3205106854438782, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 1.550340651021953, | |
| "grad_norm": 8.517773628234863, | |
| "learning_rate": 2.2437243649106126e-06, | |
| "loss": 0.7185165286064148, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 1.5518546555639667, | |
| "grad_norm": 1.816855549812317, | |
| "learning_rate": 2.2422060807789602e-06, | |
| "loss": 0.5383141040802002, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.5533686601059804, | |
| "grad_norm": 1.642162561416626, | |
| "learning_rate": 2.240686868540088e-06, | |
| "loss": 0.3428835868835449, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 1.554882664647994, | |
| "grad_norm": 3.3189697265625, | |
| "learning_rate": 2.23916673057491e-06, | |
| "loss": 0.16990278661251068, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 1.5563966691900075, | |
| "grad_norm": 2.2869858741760254, | |
| "learning_rate": 2.2376456692657917e-06, | |
| "loss": 0.4399016499519348, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 1.5579106737320212, | |
| "grad_norm": 2.9362711906433105, | |
| "learning_rate": 2.2361236869965447e-06, | |
| "loss": 0.19548234343528748, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 1.5594246782740349, | |
| "grad_norm": 3.8211545944213867, | |
| "learning_rate": 2.234600786152425e-06, | |
| "loss": 1.248904824256897, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.5609386828160483, | |
| "grad_norm": 6.065986156463623, | |
| "learning_rate": 2.2330769691201267e-06, | |
| "loss": 0.7616989612579346, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 1.562452687358062, | |
| "grad_norm": 9.370930671691895, | |
| "learning_rate": 2.231552238287781e-06, | |
| "loss": 0.3147810399532318, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 1.5639666919000756, | |
| "grad_norm": 3.1894898414611816, | |
| "learning_rate": 2.230026596044951e-06, | |
| "loss": 0.34935462474823, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 1.5654806964420893, | |
| "grad_norm": 2.467068672180176, | |
| "learning_rate": 2.2285000447826276e-06, | |
| "loss": 0.7595210671424866, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 1.566994700984103, | |
| "grad_norm": 2.4427568912506104, | |
| "learning_rate": 2.2269725868932266e-06, | |
| "loss": 0.4047495424747467, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.5685087055261167, | |
| "grad_norm": 4.277834892272949, | |
| "learning_rate": 2.2254442247705855e-06, | |
| "loss": 0.626862645149231, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 1.5700227100681303, | |
| "grad_norm": 1.623204231262207, | |
| "learning_rate": 2.223914960809958e-06, | |
| "loss": 1.1122664213180542, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 1.571536714610144, | |
| "grad_norm": 1.6308976411819458, | |
| "learning_rate": 2.222384797408011e-06, | |
| "loss": 1.063917875289917, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 1.5730507191521574, | |
| "grad_norm": 3.854956865310669, | |
| "learning_rate": 2.220853736962821e-06, | |
| "loss": 0.5665196776390076, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 1.574564723694171, | |
| "grad_norm": 3.0789151191711426, | |
| "learning_rate": 2.2193217818738714e-06, | |
| "loss": 0.5702639818191528, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.5760787282361846, | |
| "grad_norm": 2.537348508834839, | |
| "learning_rate": 2.217788934542047e-06, | |
| "loss": 0.32417529821395874, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 1.5775927327781982, | |
| "grad_norm": 3.38523530960083, | |
| "learning_rate": 2.2162551973696306e-06, | |
| "loss": 0.35859784483909607, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 1.579106737320212, | |
| "grad_norm": 1.4633924961090088, | |
| "learning_rate": 2.2147205727603e-06, | |
| "loss": 0.6509751081466675, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 1.5806207418622256, | |
| "grad_norm": 1.6090911626815796, | |
| "learning_rate": 2.2131850631191237e-06, | |
| "loss": 0.7736411094665527, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 1.5821347464042392, | |
| "grad_norm": 2.0669453144073486, | |
| "learning_rate": 2.2116486708525576e-06, | |
| "loss": 0.7473424673080444, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.583648750946253, | |
| "grad_norm": 7.861624717712402, | |
| "learning_rate": 2.2101113983684397e-06, | |
| "loss": 0.3567483127117157, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 1.5851627554882666, | |
| "grad_norm": 2.284384250640869, | |
| "learning_rate": 2.208573248075989e-06, | |
| "loss": 0.7789891958236694, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 1.5866767600302802, | |
| "grad_norm": 5.964828968048096, | |
| "learning_rate": 2.2070342223857986e-06, | |
| "loss": 0.69113689661026, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 1.5881907645722937, | |
| "grad_norm": 1.429634690284729, | |
| "learning_rate": 2.205494323709835e-06, | |
| "loss": 0.4352221190929413, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 1.5897047691143074, | |
| "grad_norm": 10.728472709655762, | |
| "learning_rate": 2.2039535544614325e-06, | |
| "loss": 0.5330414772033691, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.5912187736563208, | |
| "grad_norm": 2.230041980743408, | |
| "learning_rate": 2.2024119170552886e-06, | |
| "loss": 0.15315233170986176, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 1.5927327781983345, | |
| "grad_norm": 2.4584739208221436, | |
| "learning_rate": 2.2008694139074623e-06, | |
| "loss": 0.2879572808742523, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 1.5942467827403481, | |
| "grad_norm": 8.412919998168945, | |
| "learning_rate": 2.19932604743537e-06, | |
| "loss": 0.6374467015266418, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 1.5957607872823618, | |
| "grad_norm": 1.1442950963974, | |
| "learning_rate": 2.19778182005778e-06, | |
| "loss": 1.0432395935058594, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 1.5972747918243755, | |
| "grad_norm": 2.137449026107788, | |
| "learning_rate": 2.1962367341948103e-06, | |
| "loss": 1.115038275718689, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.5987887963663892, | |
| "grad_norm": 5.937266826629639, | |
| "learning_rate": 2.194690792267925e-06, | |
| "loss": 0.8900066018104553, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 1.6003028009084028, | |
| "grad_norm": 3.706021785736084, | |
| "learning_rate": 2.1931439966999285e-06, | |
| "loss": 0.7339775562286377, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 1.6018168054504165, | |
| "grad_norm": 1.7120450735092163, | |
| "learning_rate": 2.191596349914964e-06, | |
| "loss": 0.5568338632583618, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 1.60333080999243, | |
| "grad_norm": 4.790015697479248, | |
| "learning_rate": 2.1900478543385073e-06, | |
| "loss": 0.6627650260925293, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 1.6048448145344436, | |
| "grad_norm": 1.9481124877929688, | |
| "learning_rate": 2.188498512397367e-06, | |
| "loss": 0.8415189981460571, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.6063588190764573, | |
| "grad_norm": 6.659837245941162, | |
| "learning_rate": 2.186948326519675e-06, | |
| "loss": 0.2886730134487152, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 1.6078728236184707, | |
| "grad_norm": 3.406694173812866, | |
| "learning_rate": 2.1853972991348895e-06, | |
| "loss": 0.1624942421913147, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 1.6093868281604844, | |
| "grad_norm": 1.9080997705459595, | |
| "learning_rate": 2.1838454326737836e-06, | |
| "loss": 0.4276244044303894, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 1.610900832702498, | |
| "grad_norm": 5.854907035827637, | |
| "learning_rate": 2.182292729568448e-06, | |
| "loss": 0.4545116424560547, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 1.6124148372445117, | |
| "grad_norm": 2.8690412044525146, | |
| "learning_rate": 2.180739192252284e-06, | |
| "loss": 0.7988955974578857, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.6139288417865254, | |
| "grad_norm": 8.291343688964844, | |
| "learning_rate": 2.1791848231600002e-06, | |
| "loss": 0.6213572025299072, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 1.615442846328539, | |
| "grad_norm": 0.862676203250885, | |
| "learning_rate": 2.1776296247276077e-06, | |
| "loss": 0.4094568192958832, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 1.6169568508705527, | |
| "grad_norm": 2.8311262130737305, | |
| "learning_rate": 2.1760735993924196e-06, | |
| "loss": 0.7716742157936096, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 1.6184708554125662, | |
| "grad_norm": 3.6992971897125244, | |
| "learning_rate": 2.174516749593044e-06, | |
| "loss": 0.7096973061561584, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 1.6199848599545799, | |
| "grad_norm": 2.3221044540405273, | |
| "learning_rate": 2.172959077769379e-06, | |
| "loss": 0.8890570402145386, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.6214988644965935, | |
| "grad_norm": 2.638880491256714, | |
| "learning_rate": 2.1714005863626143e-06, | |
| "loss": 0.5222177505493164, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 1.623012869038607, | |
| "grad_norm": 2.155319929122925, | |
| "learning_rate": 2.169841277815221e-06, | |
| "loss": 0.7492667436599731, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 1.6245268735806206, | |
| "grad_norm": 2.3153676986694336, | |
| "learning_rate": 2.168281154570954e-06, | |
| "loss": 0.9271564483642578, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 1.6260408781226343, | |
| "grad_norm": 3.7721846103668213, | |
| "learning_rate": 2.1667202190748423e-06, | |
| "loss": 0.3474150002002716, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 1.627554882664648, | |
| "grad_norm": 1.7081248760223389, | |
| "learning_rate": 2.165158473773189e-06, | |
| "loss": 0.2777302861213684, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.6290688872066617, | |
| "grad_norm": 3.3190102577209473, | |
| "learning_rate": 2.163595921113567e-06, | |
| "loss": 0.4995487928390503, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 1.6305828917486753, | |
| "grad_norm": 8.794017791748047, | |
| "learning_rate": 2.1620325635448127e-06, | |
| "loss": 0.7314831614494324, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 1.632096896290689, | |
| "grad_norm": 0.8514871001243591, | |
| "learning_rate": 2.1604684035170253e-06, | |
| "loss": 0.34603098034858704, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 1.6336109008327027, | |
| "grad_norm": 3.585616111755371, | |
| "learning_rate": 2.158903443481561e-06, | |
| "loss": 0.6739398837089539, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 1.635124905374716, | |
| "grad_norm": 4.3569865226745605, | |
| "learning_rate": 2.157337685891031e-06, | |
| "loss": 0.7613204717636108, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.6366389099167298, | |
| "grad_norm": 5.168086051940918, | |
| "learning_rate": 2.155771133199294e-06, | |
| "loss": 0.5170891880989075, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 1.6381529144587432, | |
| "grad_norm": 1.6620736122131348, | |
| "learning_rate": 2.154203787861458e-06, | |
| "loss": 0.5840808153152466, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 1.639666919000757, | |
| "grad_norm": 3.4618961811065674, | |
| "learning_rate": 2.1526356523338704e-06, | |
| "loss": 0.6698583364486694, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 1.6411809235427706, | |
| "grad_norm": 3.096137523651123, | |
| "learning_rate": 2.1510667290741183e-06, | |
| "loss": 0.43474137783050537, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 1.6426949280847842, | |
| "grad_norm": 13.721327781677246, | |
| "learning_rate": 2.149497020541023e-06, | |
| "loss": 0.6251839399337769, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.644208932626798, | |
| "grad_norm": 1.7023292779922485, | |
| "learning_rate": 2.1479265291946365e-06, | |
| "loss": 0.8443311452865601, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.6457229371688116, | |
| "grad_norm": 2.377385377883911, | |
| "learning_rate": 2.146355257496239e-06, | |
| "loss": 0.7048377990722656, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 1.6472369417108252, | |
| "grad_norm": 6.913028717041016, | |
| "learning_rate": 2.1447832079083306e-06, | |
| "loss": 0.3259809911251068, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 1.648750946252839, | |
| "grad_norm": 1.9534987211227417, | |
| "learning_rate": 2.1432103828946335e-06, | |
| "loss": 1.068019986152649, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 1.6502649507948524, | |
| "grad_norm": 2.9702231884002686, | |
| "learning_rate": 2.141636784920083e-06, | |
| "loss": 0.7385921478271484, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.651778955336866, | |
| "grad_norm": 3.8918232917785645, | |
| "learning_rate": 2.1400624164508283e-06, | |
| "loss": 0.6695861220359802, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 1.6532929598788795, | |
| "grad_norm": 3.2664449214935303, | |
| "learning_rate": 2.1384872799542233e-06, | |
| "loss": 0.5747061371803284, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.6548069644208931, | |
| "grad_norm": 9.48470687866211, | |
| "learning_rate": 2.1369113778988272e-06, | |
| "loss": 0.6012070178985596, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 1.6563209689629068, | |
| "grad_norm": 2.4145398139953613, | |
| "learning_rate": 2.135334712754399e-06, | |
| "loss": 0.2863891124725342, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 1.6578349735049205, | |
| "grad_norm": 4.188508033752441, | |
| "learning_rate": 2.1337572869918926e-06, | |
| "loss": 1.1720845699310303, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.6593489780469342, | |
| "grad_norm": 2.1696295738220215, | |
| "learning_rate": 2.132179103083455e-06, | |
| "loss": 1.126940131187439, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 1.6608629825889478, | |
| "grad_norm": 2.3900654315948486, | |
| "learning_rate": 2.1306001635024204e-06, | |
| "loss": 1.0954455137252808, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 1.6623769871309615, | |
| "grad_norm": 2.878375768661499, | |
| "learning_rate": 2.129020470723309e-06, | |
| "loss": 0.6001009345054626, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.6638909916729752, | |
| "grad_norm": 26.621519088745117, | |
| "learning_rate": 2.127440027221819e-06, | |
| "loss": 0.47328245639801025, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 1.6654049962149886, | |
| "grad_norm": 2.122884511947632, | |
| "learning_rate": 2.1258588354748273e-06, | |
| "loss": 0.3275623619556427, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.6669190007570023, | |
| "grad_norm": 1.6605173349380493, | |
| "learning_rate": 2.1242768979603817e-06, | |
| "loss": 0.28766143321990967, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 1.668433005299016, | |
| "grad_norm": 4.779479026794434, | |
| "learning_rate": 2.1226942171577e-06, | |
| "loss": 0.6301637887954712, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 1.6699470098410294, | |
| "grad_norm": 1.1953977346420288, | |
| "learning_rate": 2.1211107955471653e-06, | |
| "loss": 0.7062985897064209, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 1.671461014383043, | |
| "grad_norm": 2.85499906539917, | |
| "learning_rate": 2.1195266356103194e-06, | |
| "loss": 0.2579890191555023, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.6729750189250567, | |
| "grad_norm": 6.130537033081055, | |
| "learning_rate": 2.117941739829864e-06, | |
| "loss": 0.2943982183933258, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.6744890234670704, | |
| "grad_norm": 2.1135518550872803, | |
| "learning_rate": 2.116356110689652e-06, | |
| "loss": 0.5594744086265564, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 1.676003028009084, | |
| "grad_norm": 2.256420850753784, | |
| "learning_rate": 2.1147697506746865e-06, | |
| "loss": 0.6435813307762146, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 1.6775170325510977, | |
| "grad_norm": 430.08251953125, | |
| "learning_rate": 2.1131826622711157e-06, | |
| "loss": 0.3069487512111664, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 1.6790310370931114, | |
| "grad_norm": 1.5891367197036743, | |
| "learning_rate": 2.1115948479662303e-06, | |
| "loss": 0.7901256680488586, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 1.6805450416351249, | |
| "grad_norm": 3.6547927856445312, | |
| "learning_rate": 2.1100063102484567e-06, | |
| "loss": 0.4188877046108246, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.6820590461771385, | |
| "grad_norm": 1.7542855739593506, | |
| "learning_rate": 2.1084170516073566e-06, | |
| "loss": 0.2679920792579651, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 1.6835730507191522, | |
| "grad_norm": 5.140130996704102, | |
| "learning_rate": 2.106827074533622e-06, | |
| "loss": 0.24302098155021667, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 1.6850870552611656, | |
| "grad_norm": 3.187342405319214, | |
| "learning_rate": 2.1052363815190685e-06, | |
| "loss": 0.1624186784029007, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 1.6866010598031793, | |
| "grad_norm": 6.9088053703308105, | |
| "learning_rate": 2.1036449750566363e-06, | |
| "loss": 0.6162082552909851, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 1.688115064345193, | |
| "grad_norm": 1.30314040184021, | |
| "learning_rate": 2.102052857640381e-06, | |
| "loss": 0.2861323654651642, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.6896290688872067, | |
| "grad_norm": 5.282270908355713, | |
| "learning_rate": 2.1004600317654764e-06, | |
| "loss": 0.7010675668716431, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.6911430734292203, | |
| "grad_norm": 6.541689872741699, | |
| "learning_rate": 2.0988664999282025e-06, | |
| "loss": 0.17050687968730927, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 1.692657077971234, | |
| "grad_norm": 2.042179584503174, | |
| "learning_rate": 2.0972722646259476e-06, | |
| "loss": 1.1382827758789062, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 1.6941710825132477, | |
| "grad_norm": 2.3376784324645996, | |
| "learning_rate": 2.095677328357202e-06, | |
| "loss": 0.23813597857952118, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 1.6956850870552613, | |
| "grad_norm": 1.6474254131317139, | |
| "learning_rate": 2.0940816936215553e-06, | |
| "loss": 0.5763483047485352, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.6971990915972748, | |
| "grad_norm": 2.9063918590545654, | |
| "learning_rate": 2.0924853629196918e-06, | |
| "loss": 0.34474313259124756, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 1.6987130961392884, | |
| "grad_norm": 2.5557656288146973, | |
| "learning_rate": 2.090888338753385e-06, | |
| "loss": 0.21587520837783813, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.700227100681302, | |
| "grad_norm": 5.877704620361328, | |
| "learning_rate": 2.0892906236254966e-06, | |
| "loss": 0.621183454990387, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 1.7017411052233156, | |
| "grad_norm": 4.238709926605225, | |
| "learning_rate": 2.08769222003997e-06, | |
| "loss": 1.0777775049209595, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 1.7032551097653292, | |
| "grad_norm": 2.4976248741149902, | |
| "learning_rate": 2.08609313050183e-06, | |
| "loss": 0.6810624599456787, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.704769114307343, | |
| "grad_norm": 5.799039363861084, | |
| "learning_rate": 2.0844933575171725e-06, | |
| "loss": 0.6343029141426086, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 1.7062831188493566, | |
| "grad_norm": 1.8170959949493408, | |
| "learning_rate": 2.0828929035931685e-06, | |
| "loss": 0.7549158930778503, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 1.7077971233913702, | |
| "grad_norm": 4.467005252838135, | |
| "learning_rate": 2.0812917712380533e-06, | |
| "loss": 0.6830211281776428, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.709311127933384, | |
| "grad_norm": 0.8398280739784241, | |
| "learning_rate": 2.0796899629611274e-06, | |
| "loss": 0.6890422105789185, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 1.7108251324753976, | |
| "grad_norm": 1.046366572380066, | |
| "learning_rate": 2.078087481272749e-06, | |
| "loss": 0.4800116717815399, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.712339137017411, | |
| "grad_norm": 1.4331594705581665, | |
| "learning_rate": 2.0764843286843326e-06, | |
| "loss": 0.8580260276794434, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 1.7138531415594247, | |
| "grad_norm": 19.763986587524414, | |
| "learning_rate": 2.0748805077083444e-06, | |
| "loss": 0.6199619174003601, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 1.7153671461014381, | |
| "grad_norm": 17.90291976928711, | |
| "learning_rate": 2.0732760208582967e-06, | |
| "loss": 0.7525902390480042, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 1.7168811506434518, | |
| "grad_norm": 4.046377658843994, | |
| "learning_rate": 2.0716708706487476e-06, | |
| "loss": 0.7198786735534668, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.7183951551854655, | |
| "grad_norm": 9.317425727844238, | |
| "learning_rate": 2.0700650595952925e-06, | |
| "loss": 0.30152690410614014, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.7199091597274792, | |
| "grad_norm": 1.2944056987762451, | |
| "learning_rate": 2.0684585902145637e-06, | |
| "loss": 0.6058851480484009, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.7214231642694928, | |
| "grad_norm": 2.4493825435638428, | |
| "learning_rate": 2.0668514650242252e-06, | |
| "loss": 0.6572599411010742, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 1.7229371688115065, | |
| "grad_norm": 2.4681243896484375, | |
| "learning_rate": 2.0652436865429685e-06, | |
| "loss": 0.14060214161872864, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 1.7244511733535202, | |
| "grad_norm": 3.9299867153167725, | |
| "learning_rate": 2.0636352572905093e-06, | |
| "loss": 0.6264467239379883, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 1.7259651778955338, | |
| "grad_norm": 4.141777992248535, | |
| "learning_rate": 2.0620261797875824e-06, | |
| "loss": 0.2831032872200012, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.7274791824375473, | |
| "grad_norm": 2.3005599975585938, | |
| "learning_rate": 2.06041645655594e-06, | |
| "loss": 0.6381456851959229, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 1.728993186979561, | |
| "grad_norm": 1.5281527042388916, | |
| "learning_rate": 2.0588060901183444e-06, | |
| "loss": 0.6487545371055603, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 1.7305071915215746, | |
| "grad_norm": 2.2702927589416504, | |
| "learning_rate": 2.0571950829985673e-06, | |
| "loss": 0.810280978679657, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 1.732021196063588, | |
| "grad_norm": 1.50748610496521, | |
| "learning_rate": 2.0555834377213843e-06, | |
| "loss": 0.3577505648136139, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 1.7335352006056017, | |
| "grad_norm": 2.3487930297851562, | |
| "learning_rate": 2.0539711568125707e-06, | |
| "loss": 0.3521404564380646, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.7350492051476154, | |
| "grad_norm": 8.966695785522461, | |
| "learning_rate": 2.052358242798898e-06, | |
| "loss": 0.2824268341064453, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.736563209689629, | |
| "grad_norm": 1.4676111936569214, | |
| "learning_rate": 2.050744698208131e-06, | |
| "loss": 0.4738492965698242, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 1.7380772142316427, | |
| "grad_norm": 3.0044350624084473, | |
| "learning_rate": 2.0491305255690207e-06, | |
| "loss": 0.7092089653015137, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 1.7395912187736564, | |
| "grad_norm": 1.4396655559539795, | |
| "learning_rate": 2.047515727411304e-06, | |
| "loss": 0.6721881628036499, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 1.74110522331567, | |
| "grad_norm": 8.363606452941895, | |
| "learning_rate": 2.0459003062656975e-06, | |
| "loss": 0.2827519178390503, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.7426192278576835, | |
| "grad_norm": 2.10673451423645, | |
| "learning_rate": 2.0442842646638944e-06, | |
| "loss": 0.7053329944610596, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 1.7441332323996972, | |
| "grad_norm": 2.727703809738159, | |
| "learning_rate": 2.0426676051385603e-06, | |
| "loss": 0.22457894682884216, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.7456472369417109, | |
| "grad_norm": 3.643404960632324, | |
| "learning_rate": 2.041050330223328e-06, | |
| "loss": 0.8219473361968994, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 1.7471612414837243, | |
| "grad_norm": 3.9443776607513428, | |
| "learning_rate": 2.0394324424527976e-06, | |
| "loss": 0.2641485929489136, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 1.748675246025738, | |
| "grad_norm": 0.7181191444396973, | |
| "learning_rate": 2.0378139443625263e-06, | |
| "loss": 0.7425059080123901, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.7501892505677517, | |
| "grad_norm": 6.626009464263916, | |
| "learning_rate": 2.03619483848903e-06, | |
| "loss": 0.7613123655319214, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 1.7517032551097653, | |
| "grad_norm": 1.737387776374817, | |
| "learning_rate": 2.034575127369776e-06, | |
| "loss": 0.9189949035644531, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 1.753217259651779, | |
| "grad_norm": 3.098576068878174, | |
| "learning_rate": 2.0329548135431816e-06, | |
| "loss": 1.189211130142212, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.7547312641937927, | |
| "grad_norm": 2.3874411582946777, | |
| "learning_rate": 2.0313338995486073e-06, | |
| "loss": 0.3131447732448578, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 1.7562452687358063, | |
| "grad_norm": 2.1706600189208984, | |
| "learning_rate": 2.0297123879263546e-06, | |
| "loss": 1.1029870510101318, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.7577592732778198, | |
| "grad_norm": 1.2848676443099976, | |
| "learning_rate": 2.0280902812176607e-06, | |
| "loss": 0.7795432209968567, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 1.7592732778198334, | |
| "grad_norm": 1.6532896757125854, | |
| "learning_rate": 2.0264675819646977e-06, | |
| "loss": 1.0654683113098145, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 1.7607872823618471, | |
| "grad_norm": 1.1202808618545532, | |
| "learning_rate": 2.0248442927105635e-06, | |
| "loss": 1.0802593231201172, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 1.7623012869038606, | |
| "grad_norm": 2.4864604473114014, | |
| "learning_rate": 2.0232204159992833e-06, | |
| "loss": 0.6008468270301819, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.7638152914458742, | |
| "grad_norm": 1.4563044309616089, | |
| "learning_rate": 2.021595954375801e-06, | |
| "loss": 0.6921793222427368, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.765329295987888, | |
| "grad_norm": 1.7934669256210327, | |
| "learning_rate": 2.0199709103859784e-06, | |
| "loss": 0.28670886158943176, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 1.7668433005299016, | |
| "grad_norm": 1.6941759586334229, | |
| "learning_rate": 2.0183452865765893e-06, | |
| "loss": 1.050851821899414, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 1.7683573050719152, | |
| "grad_norm": 2.231344223022461, | |
| "learning_rate": 2.0167190854953167e-06, | |
| "loss": 0.1752273291349411, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.769871309613929, | |
| "grad_norm": 1.5495513677597046, | |
| "learning_rate": 2.0150923096907473e-06, | |
| "loss": 0.6756109595298767, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 1.7713853141559426, | |
| "grad_norm": 3.340039014816284, | |
| "learning_rate": 2.0134649617123697e-06, | |
| "loss": 1.0384888648986816, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.7728993186979563, | |
| "grad_norm": 7.249871253967285, | |
| "learning_rate": 2.011837044110569e-06, | |
| "loss": 0.9125099182128906, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 1.7744133232399697, | |
| "grad_norm": 2.0098390579223633, | |
| "learning_rate": 2.0102085594366227e-06, | |
| "loss": 1.1059890985488892, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 1.7759273277819834, | |
| "grad_norm": 2.3969979286193848, | |
| "learning_rate": 2.0085795102426962e-06, | |
| "loss": 0.3932843804359436, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 1.7774413323239968, | |
| "grad_norm": 3.099766254425049, | |
| "learning_rate": 2.0069498990818417e-06, | |
| "loss": 0.5124934911727905, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 1.7789553368660105, | |
| "grad_norm": 1.7208963632583618, | |
| "learning_rate": 2.00531972850799e-06, | |
| "loss": 0.593222439289093, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.7804693414080242, | |
| "grad_norm": 4.858328819274902, | |
| "learning_rate": 2.00368900107595e-06, | |
| "loss": 0.2706722617149353, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.7819833459500378, | |
| "grad_norm": 3.2357375621795654, | |
| "learning_rate": 2.0020577193414025e-06, | |
| "loss": 1.0699340105056763, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 1.7834973504920515, | |
| "grad_norm": 2.757415771484375, | |
| "learning_rate": 2.0004258858608973e-06, | |
| "loss": 0.650534451007843, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 1.7850113550340652, | |
| "grad_norm": 1.8998754024505615, | |
| "learning_rate": 1.9987935031918496e-06, | |
| "loss": 0.7294416427612305, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 1.7865253595760788, | |
| "grad_norm": 1.2810745239257812, | |
| "learning_rate": 1.997160573892534e-06, | |
| "loss": 0.6762998104095459, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.7880393641180925, | |
| "grad_norm": 1.3143075704574585, | |
| "learning_rate": 1.9955271005220826e-06, | |
| "loss": 1.1342246532440186, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 1.789553368660106, | |
| "grad_norm": 18.63542938232422, | |
| "learning_rate": 1.9938930856404796e-06, | |
| "loss": 0.4164232015609741, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.7910673732021196, | |
| "grad_norm": 7.862457752227783, | |
| "learning_rate": 1.9922585318085586e-06, | |
| "loss": 0.2773081958293915, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 1.7925813777441333, | |
| "grad_norm": 0.9495812058448792, | |
| "learning_rate": 1.990623441587998e-06, | |
| "loss": 0.15149936079978943, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.7940953822861467, | |
| "grad_norm": 2.372420072555542, | |
| "learning_rate": 1.988987817541315e-06, | |
| "loss": 1.0846363306045532, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.7956093868281604, | |
| "grad_norm": 1.3067657947540283, | |
| "learning_rate": 1.9873516622318655e-06, | |
| "loss": 0.5807230472564697, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 1.797123391370174, | |
| "grad_norm": 3.343245029449463, | |
| "learning_rate": 1.9857149782238376e-06, | |
| "loss": 0.45187991857528687, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 1.7986373959121877, | |
| "grad_norm": 1.814562439918518, | |
| "learning_rate": 1.9840777680822465e-06, | |
| "loss": 1.1235315799713135, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 1.8001514004542014, | |
| "grad_norm": 1.2937862873077393, | |
| "learning_rate": 1.982440034372934e-06, | |
| "loss": 0.9832783341407776, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 1.801665404996215, | |
| "grad_norm": 3.880544900894165, | |
| "learning_rate": 1.9808017796625614e-06, | |
| "loss": 0.31947067379951477, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.8031794095382288, | |
| "grad_norm": 2.1368255615234375, | |
| "learning_rate": 1.979163006518606e-06, | |
| "loss": 0.7589985728263855, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 1.8046934140802422, | |
| "grad_norm": 1.9387202262878418, | |
| "learning_rate": 1.977523717509359e-06, | |
| "loss": 0.7736470699310303, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 1.8062074186222559, | |
| "grad_norm": 2.6687638759613037, | |
| "learning_rate": 1.9758839152039183e-06, | |
| "loss": 0.5925498604774475, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 1.8077214231642695, | |
| "grad_norm": 1.7037615776062012, | |
| "learning_rate": 1.974243602172188e-06, | |
| "loss": 0.707631528377533, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 1.809235427706283, | |
| "grad_norm": 2.250582456588745, | |
| "learning_rate": 1.972602780984871e-06, | |
| "loss": 0.6910974979400635, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.8107494322482967, | |
| "grad_norm": 4.812062740325928, | |
| "learning_rate": 1.9709614542134684e-06, | |
| "loss": 0.6874426603317261, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 1.8122634367903103, | |
| "grad_norm": 1.8871315717697144, | |
| "learning_rate": 1.969319624430272e-06, | |
| "loss": 0.24301689863204956, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 1.813777441332324, | |
| "grad_norm": 12.313353538513184, | |
| "learning_rate": 1.9676772942083627e-06, | |
| "loss": 0.32100293040275574, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 1.8152914458743377, | |
| "grad_norm": 1.4709579944610596, | |
| "learning_rate": 1.9660344661216058e-06, | |
| "loss": 0.7021037340164185, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 1.8168054504163513, | |
| "grad_norm": 4.930418014526367, | |
| "learning_rate": 1.9643911427446458e-06, | |
| "loss": 0.22974015772342682, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.818319454958365, | |
| "grad_norm": 1.7876191139221191, | |
| "learning_rate": 1.9627473266529055e-06, | |
| "loss": 0.1252487748861313, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 1.8198334595003784, | |
| "grad_norm": 4.775298595428467, | |
| "learning_rate": 1.9611030204225776e-06, | |
| "loss": 0.6840951442718506, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 1.8213474640423921, | |
| "grad_norm": 3.2811765670776367, | |
| "learning_rate": 1.9594582266306244e-06, | |
| "loss": 0.8643097877502441, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 1.8228614685844058, | |
| "grad_norm": 3.3247106075286865, | |
| "learning_rate": 1.957812947854771e-06, | |
| "loss": 0.342410683631897, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 1.8243754731264192, | |
| "grad_norm": 14.739418029785156, | |
| "learning_rate": 1.9561671866735053e-06, | |
| "loss": 0.4793134331703186, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.825889477668433, | |
| "grad_norm": 1.838046669960022, | |
| "learning_rate": 1.954520945666068e-06, | |
| "loss": 0.6665730476379395, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 1.8274034822104466, | |
| "grad_norm": 6.243978500366211, | |
| "learning_rate": 1.9528742274124527e-06, | |
| "loss": 0.8040502071380615, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 1.8289174867524602, | |
| "grad_norm": 2.532534599304199, | |
| "learning_rate": 1.9512270344934027e-06, | |
| "loss": 0.8092038631439209, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 1.830431491294474, | |
| "grad_norm": 2.357820987701416, | |
| "learning_rate": 1.949579369490403e-06, | |
| "loss": 0.5882593393325806, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 1.8319454958364876, | |
| "grad_norm": 1.8288893699645996, | |
| "learning_rate": 1.9479312349856796e-06, | |
| "loss": 0.6844436526298523, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.8334595003785013, | |
| "grad_norm": 1.6580768823623657, | |
| "learning_rate": 1.946282633562194e-06, | |
| "loss": 0.8981972336769104, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 1.834973504920515, | |
| "grad_norm": 0.8140391111373901, | |
| "learning_rate": 1.94463356780364e-06, | |
| "loss": 0.7706785202026367, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 1.8364875094625284, | |
| "grad_norm": 4.756402969360352, | |
| "learning_rate": 1.942984040294438e-06, | |
| "loss": 0.7183281779289246, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 1.838001514004542, | |
| "grad_norm": 0.7826236486434937, | |
| "learning_rate": 1.9413340536197326e-06, | |
| "loss": 0.5470402240753174, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 1.8395155185465555, | |
| "grad_norm": 5.349993705749512, | |
| "learning_rate": 1.9396836103653883e-06, | |
| "loss": 0.6334415078163147, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.8410295230885692, | |
| "grad_norm": 1.5277384519577026, | |
| "learning_rate": 1.938032713117985e-06, | |
| "loss": 0.6904246211051941, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 1.8425435276305828, | |
| "grad_norm": 2.268113136291504, | |
| "learning_rate": 1.936381364464814e-06, | |
| "loss": 0.27258798480033875, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 1.8440575321725965, | |
| "grad_norm": 2.470331907272339, | |
| "learning_rate": 1.934729566993874e-06, | |
| "loss": 0.5896321535110474, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 1.8455715367146102, | |
| "grad_norm": 6.7304558753967285, | |
| "learning_rate": 1.9330773232938673e-06, | |
| "loss": 0.6072213053703308, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 1.8470855412566238, | |
| "grad_norm": 1.9519010782241821, | |
| "learning_rate": 1.931424635954195e-06, | |
| "loss": 0.8895407319068909, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.8485995457986375, | |
| "grad_norm": 1.34812593460083, | |
| "learning_rate": 1.9297715075649543e-06, | |
| "loss": 1.1164705753326416, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 1.8501135503406512, | |
| "grad_norm": 2.2477667331695557, | |
| "learning_rate": 1.928117940716933e-06, | |
| "loss": 0.34229204058647156, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 1.8516275548826646, | |
| "grad_norm": 4.346146106719971, | |
| "learning_rate": 1.9264639380016063e-06, | |
| "loss": 0.7773399949073792, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 1.8531415594246783, | |
| "grad_norm": 1.9627861976623535, | |
| "learning_rate": 1.9248095020111323e-06, | |
| "loss": 0.7164353728294373, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 1.8546555639666917, | |
| "grad_norm": 1.1922169923782349, | |
| "learning_rate": 1.923154635338348e-06, | |
| "loss": 0.7517849206924438, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.8561695685087054, | |
| "grad_norm": 3.457965850830078, | |
| "learning_rate": 1.921499340576766e-06, | |
| "loss": 0.5871737599372864, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 1.857683573050719, | |
| "grad_norm": 2.036007881164551, | |
| "learning_rate": 1.9198436203205694e-06, | |
| "loss": 0.6375160217285156, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 1.8591975775927327, | |
| "grad_norm": 3.6813805103302, | |
| "learning_rate": 1.9181874771646085e-06, | |
| "loss": 1.134217619895935, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 1.8607115821347464, | |
| "grad_norm": 3.64957594871521, | |
| "learning_rate": 1.916530913704395e-06, | |
| "loss": 0.7019612193107605, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 1.86222558667676, | |
| "grad_norm": 1.4620537757873535, | |
| "learning_rate": 1.9148739325361015e-06, | |
| "loss": 1.0680433511734009, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.8637395912187738, | |
| "grad_norm": 3.8369688987731934, | |
| "learning_rate": 1.913216536256553e-06, | |
| "loss": 0.7128046751022339, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 1.8652535957607874, | |
| "grad_norm": 5.886375427246094, | |
| "learning_rate": 1.9115587274632274e-06, | |
| "loss": 0.282992959022522, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 1.8667676003028009, | |
| "grad_norm": 1.4305994510650635, | |
| "learning_rate": 1.9099005087542467e-06, | |
| "loss": 0.9355074763298035, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 1.8682816048448145, | |
| "grad_norm": 5.087035179138184, | |
| "learning_rate": 1.9082418827283766e-06, | |
| "loss": 0.6677455902099609, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 1.8697956093868282, | |
| "grad_norm": 1.5539398193359375, | |
| "learning_rate": 1.9065828519850212e-06, | |
| "loss": 0.6931514739990234, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.8713096139288417, | |
| "grad_norm": 6.549385070800781, | |
| "learning_rate": 1.9049234191242185e-06, | |
| "loss": 0.5165091753005981, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 1.8728236184708553, | |
| "grad_norm": 1.6973838806152344, | |
| "learning_rate": 1.9032635867466376e-06, | |
| "loss": 1.1013144254684448, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 1.874337623012869, | |
| "grad_norm": 1.8635910749435425, | |
| "learning_rate": 1.9016033574535719e-06, | |
| "loss": 0.8190028667449951, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 1.8758516275548827, | |
| "grad_norm": 18.24675750732422, | |
| "learning_rate": 1.8999427338469386e-06, | |
| "loss": 0.5483837127685547, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 1.8773656320968963, | |
| "grad_norm": 4.019062519073486, | |
| "learning_rate": 1.8982817185292722e-06, | |
| "loss": 0.7344475984573364, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.87887963663891, | |
| "grad_norm": 7.511033535003662, | |
| "learning_rate": 1.896620314103721e-06, | |
| "loss": 0.28435125946998596, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 1.8803936411809237, | |
| "grad_norm": 5.958474636077881, | |
| "learning_rate": 1.894958523174043e-06, | |
| "loss": 0.6226410269737244, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 1.8819076457229371, | |
| "grad_norm": 2.613577365875244, | |
| "learning_rate": 1.8932963483446027e-06, | |
| "loss": 0.26969021558761597, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 1.8834216502649508, | |
| "grad_norm": 2.083977222442627, | |
| "learning_rate": 1.8916337922203647e-06, | |
| "loss": 0.6489355564117432, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 1.8849356548069645, | |
| "grad_norm": 1.8885645866394043, | |
| "learning_rate": 1.8899708574068928e-06, | |
| "loss": 0.8442902565002441, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.886449659348978, | |
| "grad_norm": 2.0844578742980957, | |
| "learning_rate": 1.8883075465103431e-06, | |
| "loss": 0.4130919277667999, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 1.8879636638909916, | |
| "grad_norm": 5.105222702026367, | |
| "learning_rate": 1.8866438621374628e-06, | |
| "loss": 0.18109102547168732, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 1.8894776684330052, | |
| "grad_norm": 12.537839889526367, | |
| "learning_rate": 1.8849798068955823e-06, | |
| "loss": 0.25919821858406067, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 1.890991672975019, | |
| "grad_norm": 1.3052566051483154, | |
| "learning_rate": 1.8833153833926146e-06, | |
| "loss": 0.6028334498405457, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 1.8925056775170326, | |
| "grad_norm": 1.4219228029251099, | |
| "learning_rate": 1.8816505942370496e-06, | |
| "loss": 0.47099339962005615, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.8940196820590463, | |
| "grad_norm": 1.4984897375106812, | |
| "learning_rate": 1.8799854420379487e-06, | |
| "loss": 0.6924835443496704, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 1.89553368660106, | |
| "grad_norm": 3.9320881366729736, | |
| "learning_rate": 1.8783199294049453e-06, | |
| "loss": 0.2342991828918457, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 1.8970476911430736, | |
| "grad_norm": 1.5038467645645142, | |
| "learning_rate": 1.8766540589482351e-06, | |
| "loss": 1.052242398262024, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 1.898561695685087, | |
| "grad_norm": 1.7633041143417358, | |
| "learning_rate": 1.8749878332785754e-06, | |
| "loss": 0.4072822034358978, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 1.9000757002271007, | |
| "grad_norm": 6.334680080413818, | |
| "learning_rate": 1.8733212550072807e-06, | |
| "loss": 0.6254635453224182, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.9015897047691142, | |
| "grad_norm": 4.005409240722656, | |
| "learning_rate": 1.8716543267462177e-06, | |
| "loss": 0.20865847170352936, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 1.9031037093111278, | |
| "grad_norm": 3.80173921585083, | |
| "learning_rate": 1.8699870511078017e-06, | |
| "loss": 0.22845837473869324, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 1.9046177138531415, | |
| "grad_norm": 6.341163158416748, | |
| "learning_rate": 1.8683194307049918e-06, | |
| "loss": 0.29452162981033325, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 1.9061317183951552, | |
| "grad_norm": 2.0832197666168213, | |
| "learning_rate": 1.866651468151288e-06, | |
| "loss": 0.9766835570335388, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 1.9076457229371688, | |
| "grad_norm": 1.7201058864593506, | |
| "learning_rate": 1.8649831660607262e-06, | |
| "loss": 0.7189306616783142, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.9091597274791825, | |
| "grad_norm": 12.217644691467285, | |
| "learning_rate": 1.8633145270478757e-06, | |
| "loss": 0.18444077670574188, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 1.9106737320211962, | |
| "grad_norm": 2.312544822692871, | |
| "learning_rate": 1.8616455537278318e-06, | |
| "loss": 0.6718014478683472, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 1.9121877365632098, | |
| "grad_norm": 4.253828048706055, | |
| "learning_rate": 1.8599762487162146e-06, | |
| "loss": 0.21305717527866364, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 1.9137017411052233, | |
| "grad_norm": 1.2430156469345093, | |
| "learning_rate": 1.858306614629165e-06, | |
| "loss": 1.0205436944961548, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 1.915215745647237, | |
| "grad_norm": 2.997448682785034, | |
| "learning_rate": 1.856636654083338e-06, | |
| "loss": 0.25120025873184204, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.9167297501892504, | |
| "grad_norm": 8.223458290100098, | |
| "learning_rate": 1.8549663696959016e-06, | |
| "loss": 0.7833942174911499, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 1.918243754731264, | |
| "grad_norm": 3.068028450012207, | |
| "learning_rate": 1.8532957640845296e-06, | |
| "loss": 1.1276060342788696, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 1.9197577592732777, | |
| "grad_norm": 8.644725799560547, | |
| "learning_rate": 1.851624839867402e-06, | |
| "loss": 0.21417191624641418, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 1.9212717638152914, | |
| "grad_norm": 2.2275826930999756, | |
| "learning_rate": 1.8499535996631946e-06, | |
| "loss": 1.0725288391113281, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 1.922785768357305, | |
| "grad_norm": 3.2605981826782227, | |
| "learning_rate": 1.8482820460910817e-06, | |
| "loss": 0.3155927062034607, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.9242997728993188, | |
| "grad_norm": 1.3210935592651367, | |
| "learning_rate": 1.8466101817707271e-06, | |
| "loss": 0.7417844533920288, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 1.9258137774413324, | |
| "grad_norm": 4.283865928649902, | |
| "learning_rate": 1.844938009322281e-06, | |
| "loss": 0.8048570156097412, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 1.927327781983346, | |
| "grad_norm": 1.1630911827087402, | |
| "learning_rate": 1.8432655313663771e-06, | |
| "loss": 1.1231316328048706, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 1.9288417865253595, | |
| "grad_norm": 1.1668163537979126, | |
| "learning_rate": 1.8415927505241298e-06, | |
| "loss": 1.1409462690353394, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 1.9303557910673732, | |
| "grad_norm": 3.072653293609619, | |
| "learning_rate": 1.8399196694171252e-06, | |
| "loss": 0.2875867187976837, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.9318697956093869, | |
| "grad_norm": 1.0892410278320312, | |
| "learning_rate": 1.838246290667421e-06, | |
| "loss": 0.7053228616714478, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 1.9333838001514003, | |
| "grad_norm": 3.109567165374756, | |
| "learning_rate": 1.8365726168975425e-06, | |
| "loss": 0.2398093193769455, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 1.934897804693414, | |
| "grad_norm": 3.3759360313415527, | |
| "learning_rate": 1.8348986507304757e-06, | |
| "loss": 0.6860714554786682, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 1.9364118092354277, | |
| "grad_norm": 1.5024205446243286, | |
| "learning_rate": 1.833224394789666e-06, | |
| "loss": 1.1165058612823486, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 1.9379258137774413, | |
| "grad_norm": 44.66451644897461, | |
| "learning_rate": 1.8315498516990123e-06, | |
| "loss": 0.609163761138916, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.939439818319455, | |
| "grad_norm": 0.7202508449554443, | |
| "learning_rate": 1.8298750240828638e-06, | |
| "loss": 0.5042010545730591, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 1.9409538228614687, | |
| "grad_norm": 1.71787428855896, | |
| "learning_rate": 1.828199914566016e-06, | |
| "loss": 0.5848456621170044, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 1.9424678274034823, | |
| "grad_norm": 1.383819341659546, | |
| "learning_rate": 1.826524525773705e-06, | |
| "loss": 1.1039332151412964, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 1.9439818319454958, | |
| "grad_norm": 3.2185161113739014, | |
| "learning_rate": 1.8248488603316063e-06, | |
| "loss": 0.8656116724014282, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 1.9454958364875095, | |
| "grad_norm": 1.7334600687026978, | |
| "learning_rate": 1.8231729208658271e-06, | |
| "loss": 0.597359299659729, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.9470098410295231, | |
| "grad_norm": 0.8499669432640076, | |
| "learning_rate": 1.821496710002905e-06, | |
| "loss": 0.66790372133255, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 1.9485238455715366, | |
| "grad_norm": 2.160719871520996, | |
| "learning_rate": 1.8198202303698038e-06, | |
| "loss": 0.7572706937789917, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 1.9500378501135502, | |
| "grad_norm": 2.4661171436309814, | |
| "learning_rate": 1.8181434845939077e-06, | |
| "loss": 0.7103179693222046, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 1.951551854655564, | |
| "grad_norm": 1.7464579343795776, | |
| "learning_rate": 1.8164664753030164e-06, | |
| "loss": 0.7086505889892578, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 1.9530658591975776, | |
| "grad_norm": 2.0125958919525146, | |
| "learning_rate": 1.8147892051253455e-06, | |
| "loss": 0.3641662001609802, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.9545798637395913, | |
| "grad_norm": 2.989818811416626, | |
| "learning_rate": 1.8131116766895169e-06, | |
| "loss": 0.700904369354248, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 1.956093868281605, | |
| "grad_norm": 1.8995088338851929, | |
| "learning_rate": 1.8114338926245596e-06, | |
| "loss": 0.6467546224594116, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 1.9576078728236186, | |
| "grad_norm": 1.562253713607788, | |
| "learning_rate": 1.8097558555599016e-06, | |
| "loss": 1.0711427927017212, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 1.9591218773656323, | |
| "grad_norm": 3.0294432640075684, | |
| "learning_rate": 1.8080775681253673e-06, | |
| "loss": 0.7762032747268677, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 1.9606358819076457, | |
| "grad_norm": 4.3492751121521, | |
| "learning_rate": 1.8063990329511749e-06, | |
| "loss": 0.41252315044403076, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.9621498864496594, | |
| "grad_norm": 1.1631908416748047, | |
| "learning_rate": 1.8047202526679291e-06, | |
| "loss": 0.5813542604446411, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 1.9636638909916728, | |
| "grad_norm": 9.60074520111084, | |
| "learning_rate": 1.8030412299066201e-06, | |
| "loss": 0.2214062511920929, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 1.9651778955336865, | |
| "grad_norm": 2.970233201980591, | |
| "learning_rate": 1.8013619672986173e-06, | |
| "loss": 0.24248868227005005, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 1.9666919000757002, | |
| "grad_norm": 2.1815083026885986, | |
| "learning_rate": 1.799682467475667e-06, | |
| "loss": 0.28807175159454346, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 1.9682059046177138, | |
| "grad_norm": 6.088417053222656, | |
| "learning_rate": 1.798002733069886e-06, | |
| "loss": 0.3555501699447632, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.9697199091597275, | |
| "grad_norm": 3.4696614742279053, | |
| "learning_rate": 1.796322766713759e-06, | |
| "loss": 0.2768677771091461, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 1.9712339137017412, | |
| "grad_norm": 4.333212852478027, | |
| "learning_rate": 1.7946425710401357e-06, | |
| "loss": 0.7167282700538635, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 1.9727479182437548, | |
| "grad_norm": 2.5988035202026367, | |
| "learning_rate": 1.7929621486822223e-06, | |
| "loss": 0.6456173062324524, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 1.9742619227857685, | |
| "grad_norm": 3.324312448501587, | |
| "learning_rate": 1.7912815022735837e-06, | |
| "loss": 0.1809239685535431, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 1.975775927327782, | |
| "grad_norm": 1.678682804107666, | |
| "learning_rate": 1.789600634448133e-06, | |
| "loss": 1.049579381942749, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.9772899318697956, | |
| "grad_norm": 2.530160665512085, | |
| "learning_rate": 1.7879195478401319e-06, | |
| "loss": 0.7081025838851929, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 1.978803936411809, | |
| "grad_norm": 2.570563793182373, | |
| "learning_rate": 1.7862382450841844e-06, | |
| "loss": 0.6838253140449524, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 1.9803179409538227, | |
| "grad_norm": 6.012343883514404, | |
| "learning_rate": 1.784556728815234e-06, | |
| "loss": 0.6574077010154724, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 1.9818319454958364, | |
| "grad_norm": 8.046130180358887, | |
| "learning_rate": 1.7828750016685576e-06, | |
| "loss": 0.17780661582946777, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 1.98334595003785, | |
| "grad_norm": 3.787870168685913, | |
| "learning_rate": 1.7811930662797638e-06, | |
| "loss": 0.7062051892280579, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.9848599545798638, | |
| "grad_norm": 1.1279329061508179, | |
| "learning_rate": 1.7795109252847867e-06, | |
| "loss": 0.7671316862106323, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 1.9863739591218774, | |
| "grad_norm": 4.606888294219971, | |
| "learning_rate": 1.7778285813198826e-06, | |
| "loss": 0.31346026062965393, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 1.987887963663891, | |
| "grad_norm": 2.6706228256225586, | |
| "learning_rate": 1.7761460370216267e-06, | |
| "loss": 0.5862433314323425, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 1.9894019682059048, | |
| "grad_norm": 2.2766308784484863, | |
| "learning_rate": 1.7744632950269075e-06, | |
| "loss": 1.0763511657714844, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 1.9909159727479182, | |
| "grad_norm": 5.0256667137146, | |
| "learning_rate": 1.772780357972924e-06, | |
| "loss": 0.13953058421611786, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.9924299772899319, | |
| "grad_norm": 5.517915725708008, | |
| "learning_rate": 1.7710972284971793e-06, | |
| "loss": 1.0178477764129639, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 1.9939439818319455, | |
| "grad_norm": 2.4863970279693604, | |
| "learning_rate": 1.7694139092374802e-06, | |
| "loss": 0.6464669108390808, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 1.995457986373959, | |
| "grad_norm": 4.220505714416504, | |
| "learning_rate": 1.7677304028319295e-06, | |
| "loss": 0.8648689985275269, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 1.9969719909159727, | |
| "grad_norm": 2.9062840938568115, | |
| "learning_rate": 1.7660467119189236e-06, | |
| "loss": 0.16142083704471588, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 1.9984859954579863, | |
| "grad_norm": 18.753829956054688, | |
| "learning_rate": 1.7643628391371484e-06, | |
| "loss": 0.3382264971733093, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.7218689918518066, | |
| "learning_rate": 1.762678787125574e-06, | |
| "loss": 0.719879150390625, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.0015140045420137, | |
| "grad_norm": 0.6632734537124634, | |
| "learning_rate": 1.7609945585234533e-06, | |
| "loss": 0.14925611019134521, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.0030280090840273, | |
| "grad_norm": 1.8668553829193115, | |
| "learning_rate": 1.7593101559703132e-06, | |
| "loss": 0.6265012621879578, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.004542013626041, | |
| "grad_norm": 1.4000589847564697, | |
| "learning_rate": 1.7576255821059549e-06, | |
| "loss": 0.5532234311103821, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.0060560181680547, | |
| "grad_norm": 1.3937102556228638, | |
| "learning_rate": 1.7559408395704483e-06, | |
| "loss": 0.12811285257339478, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.0075700227100683, | |
| "grad_norm": 2.235651969909668, | |
| "learning_rate": 1.7542559310041272e-06, | |
| "loss": 0.5081092119216919, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.0090840272520816, | |
| "grad_norm": 1.5715231895446777, | |
| "learning_rate": 1.7525708590475855e-06, | |
| "loss": 0.12974363565444946, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.0105980317940952, | |
| "grad_norm": 3.355414867401123, | |
| "learning_rate": 1.7508856263416728e-06, | |
| "loss": 0.17922165989875793, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.012112036336109, | |
| "grad_norm": 2.6334707736968994, | |
| "learning_rate": 1.7492002355274917e-06, | |
| "loss": 0.26148444414138794, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.0136260408781226, | |
| "grad_norm": 1.8044371604919434, | |
| "learning_rate": 1.7475146892463911e-06, | |
| "loss": 0.6592626571655273, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.0151400454201362, | |
| "grad_norm": 0.6419950127601624, | |
| "learning_rate": 1.7458289901399652e-06, | |
| "loss": 0.5145966410636902, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.01665404996215, | |
| "grad_norm": 3.272899627685547, | |
| "learning_rate": 1.7441431408500469e-06, | |
| "loss": 0.577723503112793, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.0181680545041636, | |
| "grad_norm": 2.162247896194458, | |
| "learning_rate": 1.7424571440187036e-06, | |
| "loss": 0.9184585809707642, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.0196820590461773, | |
| "grad_norm": 4.363552570343018, | |
| "learning_rate": 1.7407710022882353e-06, | |
| "loss": 0.7015078663825989, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.021196063588191, | |
| "grad_norm": 0.746873676776886, | |
| "learning_rate": 1.7390847183011696e-06, | |
| "loss": 0.14988543093204498, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.0227100681302046, | |
| "grad_norm": 1.4385411739349365, | |
| "learning_rate": 1.7373982947002545e-06, | |
| "loss": 0.5507326126098633, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.024224072672218, | |
| "grad_norm": 5.640541076660156, | |
| "learning_rate": 1.7357117341284586e-06, | |
| "loss": 0.5066704750061035, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.0257380772142315, | |
| "grad_norm": 1.6015201807022095, | |
| "learning_rate": 1.7340250392289654e-06, | |
| "loss": 0.58171147108078, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.027252081756245, | |
| "grad_norm": 1.6645236015319824, | |
| "learning_rate": 1.7323382126451683e-06, | |
| "loss": 1.0548933744430542, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.028766086298259, | |
| "grad_norm": 1.4560426473617554, | |
| "learning_rate": 1.7306512570206675e-06, | |
| "loss": 1.017156720161438, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.0302800908402725, | |
| "grad_norm": 2.0511622428894043, | |
| "learning_rate": 1.7289641749992642e-06, | |
| "loss": 0.5105241537094116, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.031794095382286, | |
| "grad_norm": 4.8046464920043945, | |
| "learning_rate": 1.7272769692249596e-06, | |
| "loss": 0.5566238164901733, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.0333080999243, | |
| "grad_norm": 18.07652473449707, | |
| "learning_rate": 1.7255896423419474e-06, | |
| "loss": 0.2835147976875305, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.0348221044663135, | |
| "grad_norm": 2.6613402366638184, | |
| "learning_rate": 1.7239021969946115e-06, | |
| "loss": 0.5674735307693481, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.036336109008327, | |
| "grad_norm": 1.3865885734558105, | |
| "learning_rate": 1.7222146358275214e-06, | |
| "loss": 0.15232010185718536, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.037850113550341, | |
| "grad_norm": 1.7241010665893555, | |
| "learning_rate": 1.720526961485429e-06, | |
| "loss": 0.8321764469146729, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.039364118092354, | |
| "grad_norm": 10.220908164978027, | |
| "learning_rate": 1.7188391766132618e-06, | |
| "loss": 0.22783613204956055, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.0408781226343677, | |
| "grad_norm": 1.8045783042907715, | |
| "learning_rate": 1.7171512838561219e-06, | |
| "loss": 0.9568790197372437, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.0423921271763814, | |
| "grad_norm": 1.4294806718826294, | |
| "learning_rate": 1.7154632858592804e-06, | |
| "loss": 0.6187859177589417, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.043906131718395, | |
| "grad_norm": 2.846625804901123, | |
| "learning_rate": 1.7137751852681728e-06, | |
| "loss": 0.5443164110183716, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.0454201362604087, | |
| "grad_norm": 4.185090065002441, | |
| "learning_rate": 1.7120869847283955e-06, | |
| "loss": 0.11699831485748291, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.0469341408024224, | |
| "grad_norm": 5.7305755615234375, | |
| "learning_rate": 1.7103986868857016e-06, | |
| "loss": 0.7156167030334473, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.048448145344436, | |
| "grad_norm": 7.5829267501831055, | |
| "learning_rate": 1.7087102943859973e-06, | |
| "loss": 0.5357162356376648, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.0499621498864498, | |
| "grad_norm": 1.4636871814727783, | |
| "learning_rate": 1.7070218098753363e-06, | |
| "loss": 0.4982175827026367, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.0514761544284634, | |
| "grad_norm": 4.640434741973877, | |
| "learning_rate": 1.7053332359999166e-06, | |
| "loss": 0.6078394055366516, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.052990158970477, | |
| "grad_norm": 1.874154806137085, | |
| "learning_rate": 1.7036445754060766e-06, | |
| "loss": 1.1511926651000977, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.0545041635124903, | |
| "grad_norm": 1.7623599767684937, | |
| "learning_rate": 1.7019558307402901e-06, | |
| "loss": 1.0208662748336792, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.056018168054504, | |
| "grad_norm": 1.8835748434066772, | |
| "learning_rate": 1.7002670046491641e-06, | |
| "loss": 0.5125632286071777, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.0575321725965177, | |
| "grad_norm": 2.5960309505462646, | |
| "learning_rate": 1.6985780997794308e-06, | |
| "loss": 0.35421526432037354, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.0590461771385313, | |
| "grad_norm": 0.24600274860858917, | |
| "learning_rate": 1.696889118777948e-06, | |
| "loss": 0.5041108727455139, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.060560181680545, | |
| "grad_norm": 3.7518346309661865, | |
| "learning_rate": 1.6952000642916918e-06, | |
| "loss": 0.4405469298362732, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.0620741862225587, | |
| "grad_norm": 3.009103536605835, | |
| "learning_rate": 1.6935109389677534e-06, | |
| "loss": 0.5766614079475403, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.0635881907645723, | |
| "grad_norm": 3.843696355819702, | |
| "learning_rate": 1.6918217454533359e-06, | |
| "loss": 0.260686457157135, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.065102195306586, | |
| "grad_norm": 5.3915605545043945, | |
| "learning_rate": 1.6901324863957482e-06, | |
| "loss": 0.25513988733291626, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.0666161998485997, | |
| "grad_norm": 0.4250639081001282, | |
| "learning_rate": 1.6884431644424022e-06, | |
| "loss": 0.16661809384822845, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.0681302043906133, | |
| "grad_norm": 2.3801870346069336, | |
| "learning_rate": 1.6867537822408093e-06, | |
| "loss": 0.5339866876602173, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 2.069644208932627, | |
| "grad_norm": 2.5381367206573486, | |
| "learning_rate": 1.6850643424385733e-06, | |
| "loss": 0.10374370217323303, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 2.0711582134746402, | |
| "grad_norm": 1.1726455688476562, | |
| "learning_rate": 1.6833748476833906e-06, | |
| "loss": 0.548637866973877, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 2.072672218016654, | |
| "grad_norm": 2.1821258068084717, | |
| "learning_rate": 1.6816853006230427e-06, | |
| "loss": 0.8682297468185425, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 2.0741862225586676, | |
| "grad_norm": 1.3762861490249634, | |
| "learning_rate": 1.6799957039053924e-06, | |
| "loss": 0.619299054145813, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.0757002271006812, | |
| "grad_norm": 4.402048110961914, | |
| "learning_rate": 1.6783060601783816e-06, | |
| "loss": 0.6997363567352295, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 2.077214231642695, | |
| "grad_norm": 1.7996996641159058, | |
| "learning_rate": 1.6766163720900242e-06, | |
| "loss": 0.43396300077438354, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 2.0787282361847086, | |
| "grad_norm": 1.5604248046875, | |
| "learning_rate": 1.674926642288406e-06, | |
| "loss": 0.15332743525505066, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 2.0802422407267223, | |
| "grad_norm": 2.172469139099121, | |
| "learning_rate": 1.6732368734216756e-06, | |
| "loss": 1.0345743894577026, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 2.081756245268736, | |
| "grad_norm": 3.688363790512085, | |
| "learning_rate": 1.6715470681380446e-06, | |
| "loss": 0.63254714012146, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.0832702498107496, | |
| "grad_norm": 1.5668286085128784, | |
| "learning_rate": 1.6698572290857814e-06, | |
| "loss": 0.5776516199111938, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 2.0847842543527633, | |
| "grad_norm": 3.8889572620391846, | |
| "learning_rate": 1.6681673589132063e-06, | |
| "loss": 0.4518263638019562, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 2.0862982588947765, | |
| "grad_norm": 4.702708721160889, | |
| "learning_rate": 1.6664774602686903e-06, | |
| "loss": 0.49554580450057983, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 2.08781226343679, | |
| "grad_norm": 3.964775323867798, | |
| "learning_rate": 1.6647875358006466e-06, | |
| "loss": 0.5103597044944763, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 2.089326267978804, | |
| "grad_norm": 3.2854743003845215, | |
| "learning_rate": 1.663097588157531e-06, | |
| "loss": 0.06786801666021347, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.0908402725208175, | |
| "grad_norm": 4.798826694488525, | |
| "learning_rate": 1.661407619987834e-06, | |
| "loss": 0.5872377157211304, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 2.092354277062831, | |
| "grad_norm": 2.028712272644043, | |
| "learning_rate": 1.6597176339400792e-06, | |
| "loss": 0.6830023527145386, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 2.093868281604845, | |
| "grad_norm": 2.520421028137207, | |
| "learning_rate": 1.6580276326628184e-06, | |
| "loss": 0.22064897418022156, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 2.0953822861468585, | |
| "grad_norm": 2.916041851043701, | |
| "learning_rate": 1.6563376188046265e-06, | |
| "loss": 0.1994413435459137, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 2.096896290688872, | |
| "grad_norm": 3.3804969787597656, | |
| "learning_rate": 1.6546475950140986e-06, | |
| "loss": 0.15971145033836365, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.098410295230886, | |
| "grad_norm": 3.459014654159546, | |
| "learning_rate": 1.6529575639398453e-06, | |
| "loss": 0.2711777985095978, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 2.0999242997728995, | |
| "grad_norm": 7.535019874572754, | |
| "learning_rate": 1.6512675282304884e-06, | |
| "loss": 0.30359020829200745, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 2.1014383043149127, | |
| "grad_norm": 2.6062686443328857, | |
| "learning_rate": 1.6495774905346575e-06, | |
| "loss": 0.08550383895635605, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 2.1029523088569264, | |
| "grad_norm": 2.4432120323181152, | |
| "learning_rate": 1.6478874535009847e-06, | |
| "loss": 1.0165009498596191, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 2.10446631339894, | |
| "grad_norm": 1.2791420221328735, | |
| "learning_rate": 1.6461974197781015e-06, | |
| "loss": 0.5231562256813049, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.1059803179409537, | |
| "grad_norm": 1.6797175407409668, | |
| "learning_rate": 1.6445073920146336e-06, | |
| "loss": 0.5763256549835205, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 2.1074943224829674, | |
| "grad_norm": 1.43915855884552, | |
| "learning_rate": 1.6428173728591981e-06, | |
| "loss": 0.9489549994468689, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 2.109008327024981, | |
| "grad_norm": 3.021160125732422, | |
| "learning_rate": 1.6411273649603988e-06, | |
| "loss": 0.9699093103408813, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 2.1105223315669948, | |
| "grad_norm": 1.4524022340774536, | |
| "learning_rate": 1.6394373709668207e-06, | |
| "loss": 0.5449516177177429, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 2.1120363361090084, | |
| "grad_norm": 1.5240285396575928, | |
| "learning_rate": 1.6377473935270272e-06, | |
| "loss": 0.5013771057128906, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.113550340651022, | |
| "grad_norm": 3.620814561843872, | |
| "learning_rate": 1.6360574352895573e-06, | |
| "loss": 0.9098644256591797, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 2.1150643451930358, | |
| "grad_norm": 1.660154938697815, | |
| "learning_rate": 1.6343674989029185e-06, | |
| "loss": 0.7111346125602722, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 2.116578349735049, | |
| "grad_norm": 1.55197012424469, | |
| "learning_rate": 1.632677587015584e-06, | |
| "loss": 0.9725939631462097, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 2.1180923542770627, | |
| "grad_norm": 2.468062400817871, | |
| "learning_rate": 1.6309877022759894e-06, | |
| "loss": 1.0302742719650269, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 2.1196063588190763, | |
| "grad_norm": 4.183055400848389, | |
| "learning_rate": 1.6292978473325269e-06, | |
| "loss": 0.3166213035583496, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.12112036336109, | |
| "grad_norm": 1.5126471519470215, | |
| "learning_rate": 1.627608024833543e-06, | |
| "loss": 0.48440665006637573, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 2.1226343679031037, | |
| "grad_norm": 1.1503214836120605, | |
| "learning_rate": 1.6259182374273325e-06, | |
| "loss": 0.5727757215499878, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 2.1241483724451173, | |
| "grad_norm": 6.117621898651123, | |
| "learning_rate": 1.6242284877621352e-06, | |
| "loss": 0.1801280677318573, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 2.125662376987131, | |
| "grad_norm": 2.113598585128784, | |
| "learning_rate": 1.6225387784861332e-06, | |
| "loss": 0.8976154327392578, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 2.1271763815291447, | |
| "grad_norm": 2.00311279296875, | |
| "learning_rate": 1.6208491122474423e-06, | |
| "loss": 0.14283889532089233, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.1286903860711583, | |
| "grad_norm": 16.498226165771484, | |
| "learning_rate": 1.6191594916941145e-06, | |
| "loss": 0.6590249538421631, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 2.130204390613172, | |
| "grad_norm": 2.0265419483184814, | |
| "learning_rate": 1.6174699194741276e-06, | |
| "loss": 0.6612314581871033, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 2.1317183951551852, | |
| "grad_norm": 3.4283297061920166, | |
| "learning_rate": 1.6157803982353844e-06, | |
| "loss": 0.6018260717391968, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 2.133232399697199, | |
| "grad_norm": 1.7489575147628784, | |
| "learning_rate": 1.6140909306257075e-06, | |
| "loss": 0.47798389196395874, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 2.1347464042392126, | |
| "grad_norm": 1.7350597381591797, | |
| "learning_rate": 1.6124015192928368e-06, | |
| "loss": 0.050837621092796326, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.1362604087812262, | |
| "grad_norm": 3.2042109966278076, | |
| "learning_rate": 1.6107121668844229e-06, | |
| "loss": 0.9809722900390625, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 2.13777441332324, | |
| "grad_norm": 1.8703489303588867, | |
| "learning_rate": 1.6090228760480233e-06, | |
| "loss": 0.33297398686408997, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 2.1392884178652536, | |
| "grad_norm": 2.5503528118133545, | |
| "learning_rate": 1.6073336494311e-06, | |
| "loss": 0.5941415429115295, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 2.1408024224072673, | |
| "grad_norm": 2.0041916370391846, | |
| "learning_rate": 1.605644489681015e-06, | |
| "loss": 0.6204849481582642, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 2.142316426949281, | |
| "grad_norm": 9.915755271911621, | |
| "learning_rate": 1.6039553994450242e-06, | |
| "loss": 0.3267604410648346, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.1438304314912946, | |
| "grad_norm": 2.206566572189331, | |
| "learning_rate": 1.602266381370275e-06, | |
| "loss": 0.5704283714294434, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 2.1453444360333083, | |
| "grad_norm": 2.92156982421875, | |
| "learning_rate": 1.6005774381038027e-06, | |
| "loss": 0.5793426036834717, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 2.146858440575322, | |
| "grad_norm": 3.446460247039795, | |
| "learning_rate": 1.5988885722925236e-06, | |
| "loss": 0.11840743571519852, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 2.148372445117335, | |
| "grad_norm": 2.32879900932312, | |
| "learning_rate": 1.5971997865832336e-06, | |
| "loss": 0.15414756536483765, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 2.149886449659349, | |
| "grad_norm": 3.7858409881591797, | |
| "learning_rate": 1.5955110836226026e-06, | |
| "loss": 0.6105750799179077, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.1514004542013625, | |
| "grad_norm": 2.2815473079681396, | |
| "learning_rate": 1.593822466057172e-06, | |
| "loss": 0.11312630772590637, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 2.152914458743376, | |
| "grad_norm": 9.054616928100586, | |
| "learning_rate": 1.592133936533348e-06, | |
| "loss": 0.3543716073036194, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.15442846328539, | |
| "grad_norm": 1.658771276473999, | |
| "learning_rate": 1.5904454976973997e-06, | |
| "loss": 0.10064493864774704, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 2.1559424678274035, | |
| "grad_norm": 2.0759246349334717, | |
| "learning_rate": 1.5887571521954526e-06, | |
| "loss": 0.97999507188797, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 2.157456472369417, | |
| "grad_norm": 2.34515118598938, | |
| "learning_rate": 1.5870689026734887e-06, | |
| "loss": 0.9874440431594849, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.158970476911431, | |
| "grad_norm": 2.011859655380249, | |
| "learning_rate": 1.5853807517773366e-06, | |
| "loss": 0.8331037759780884, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 2.1604844814534445, | |
| "grad_norm": 2.438368558883667, | |
| "learning_rate": 1.5836927021526724e-06, | |
| "loss": 0.20251956582069397, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 2.161998485995458, | |
| "grad_norm": 2.275050163269043, | |
| "learning_rate": 1.5820047564450122e-06, | |
| "loss": 1.0513229370117188, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 2.1635124905374714, | |
| "grad_norm": 1.618593692779541, | |
| "learning_rate": 1.5803169172997105e-06, | |
| "loss": 0.6685404777526855, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 2.165026495079485, | |
| "grad_norm": 3.4260945320129395, | |
| "learning_rate": 1.578629187361954e-06, | |
| "loss": 0.1660946160554886, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.1665404996214987, | |
| "grad_norm": 1.603978157043457, | |
| "learning_rate": 1.576941569276757e-06, | |
| "loss": 0.15563638508319855, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 2.1680545041635124, | |
| "grad_norm": 3.278381586074829, | |
| "learning_rate": 1.5752540656889617e-06, | |
| "loss": 0.17102839052677155, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 2.169568508705526, | |
| "grad_norm": 2.840925455093384, | |
| "learning_rate": 1.5735666792432283e-06, | |
| "loss": 0.14147916436195374, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 2.1710825132475398, | |
| "grad_norm": 2.1807408332824707, | |
| "learning_rate": 1.5718794125840328e-06, | |
| "loss": 0.5866944193840027, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 2.1725965177895534, | |
| "grad_norm": 2.3917977809906006, | |
| "learning_rate": 1.570192268355667e-06, | |
| "loss": 0.2330070585012436, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.174110522331567, | |
| "grad_norm": 2.4362566471099854, | |
| "learning_rate": 1.5685052492022274e-06, | |
| "loss": 0.24644072353839874, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 2.1756245268735808, | |
| "grad_norm": 2.0635030269622803, | |
| "learning_rate": 1.5668183577676157e-06, | |
| "loss": 0.53923499584198, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 2.1771385314155944, | |
| "grad_norm": 8.901561737060547, | |
| "learning_rate": 1.5651315966955332e-06, | |
| "loss": 0.662903904914856, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 2.1786525359576077, | |
| "grad_norm": 2.5459957122802734, | |
| "learning_rate": 1.5634449686294778e-06, | |
| "loss": 0.222152441740036, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 2.1801665404996213, | |
| "grad_norm": 1.7599998712539673, | |
| "learning_rate": 1.561758476212738e-06, | |
| "loss": 0.657718300819397, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.181680545041635, | |
| "grad_norm": 5.629086971282959, | |
| "learning_rate": 1.56007212208839e-06, | |
| "loss": 0.07736900448799133, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 2.1831945495836487, | |
| "grad_norm": 3.16239070892334, | |
| "learning_rate": 1.5583859088992927e-06, | |
| "loss": 0.4388081431388855, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 2.1847085541256623, | |
| "grad_norm": 1.404675006866455, | |
| "learning_rate": 1.5566998392880854e-06, | |
| "loss": 0.8538283109664917, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 2.186222558667676, | |
| "grad_norm": 2.138237714767456, | |
| "learning_rate": 1.5550139158971817e-06, | |
| "loss": 0.2884243428707123, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 2.1877365632096897, | |
| "grad_norm": 0.9016575813293457, | |
| "learning_rate": 1.553328141368765e-06, | |
| "loss": 0.5784789323806763, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.1892505677517033, | |
| "grad_norm": 2.4022958278656006, | |
| "learning_rate": 1.551642518344788e-06, | |
| "loss": 0.8029235601425171, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 2.190764572293717, | |
| "grad_norm": 28.471567153930664, | |
| "learning_rate": 1.5499570494669635e-06, | |
| "loss": 0.7160929441452026, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 2.1922785768357307, | |
| "grad_norm": 37.3333740234375, | |
| "learning_rate": 1.548271737376763e-06, | |
| "loss": 0.49402734637260437, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 2.1937925813777444, | |
| "grad_norm": 3.0640456676483154, | |
| "learning_rate": 1.5465865847154133e-06, | |
| "loss": 0.5062578320503235, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 2.1953065859197576, | |
| "grad_norm": 3.4620871543884277, | |
| "learning_rate": 1.5449015941238916e-06, | |
| "loss": 0.10250119119882584, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.1968205904617712, | |
| "grad_norm": 2.5394251346588135, | |
| "learning_rate": 1.5432167682429199e-06, | |
| "loss": 0.15818654000759125, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 2.198334595003785, | |
| "grad_norm": 0.4949427843093872, | |
| "learning_rate": 1.541532109712962e-06, | |
| "loss": 0.6417489051818848, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 2.1998485995457986, | |
| "grad_norm": 1.840445637702942, | |
| "learning_rate": 1.5398476211742212e-06, | |
| "loss": 0.6399612426757812, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 2.2013626040878123, | |
| "grad_norm": 1.615837574005127, | |
| "learning_rate": 1.5381633052666323e-06, | |
| "loss": 0.06168773025274277, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 2.202876608629826, | |
| "grad_norm": 1.5384738445281982, | |
| "learning_rate": 1.5364791646298612e-06, | |
| "loss": 0.9079161882400513, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.2043906131718396, | |
| "grad_norm": 4.141279697418213, | |
| "learning_rate": 1.5347952019032969e-06, | |
| "loss": 0.12153710424900055, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 2.2059046177138533, | |
| "grad_norm": 2.1120827198028564, | |
| "learning_rate": 1.533111419726053e-06, | |
| "loss": 0.5576128959655762, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 2.207418622255867, | |
| "grad_norm": 2.8576767444610596, | |
| "learning_rate": 1.5314278207369572e-06, | |
| "loss": 0.3549292981624603, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 2.20893262679788, | |
| "grad_norm": 2.535698413848877, | |
| "learning_rate": 1.5297444075745511e-06, | |
| "loss": 0.6953564882278442, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 2.210446631339894, | |
| "grad_norm": 4.89951229095459, | |
| "learning_rate": 1.5280611828770842e-06, | |
| "loss": 0.5466469526290894, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.2119606358819075, | |
| "grad_norm": 1.158060073852539, | |
| "learning_rate": 1.5263781492825134e-06, | |
| "loss": 0.5347949862480164, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 2.213474640423921, | |
| "grad_norm": 3.739546775817871, | |
| "learning_rate": 1.524695309428493e-06, | |
| "loss": 0.6225649118423462, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 2.214988644965935, | |
| "grad_norm": 1.4427951574325562, | |
| "learning_rate": 1.5230126659523748e-06, | |
| "loss": 0.4554247558116913, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 2.2165026495079485, | |
| "grad_norm": 2.3546080589294434, | |
| "learning_rate": 1.5213302214912033e-06, | |
| "loss": 0.32856038212776184, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 2.218016654049962, | |
| "grad_norm": 3.2373764514923096, | |
| "learning_rate": 1.5196479786817105e-06, | |
| "loss": 0.57108074426651, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.219530658591976, | |
| "grad_norm": 1.5101537704467773, | |
| "learning_rate": 1.517965940160313e-06, | |
| "loss": 0.4823313355445862, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 2.2210446631339895, | |
| "grad_norm": 2.8875467777252197, | |
| "learning_rate": 1.5162841085631062e-06, | |
| "loss": 0.12577395141124725, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 2.222558667676003, | |
| "grad_norm": 2.704087734222412, | |
| "learning_rate": 1.5146024865258626e-06, | |
| "loss": 1.092346429824829, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 2.224072672218017, | |
| "grad_norm": 1.7303626537322998, | |
| "learning_rate": 1.512921076684025e-06, | |
| "loss": 0.5065004229545593, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 2.22558667676003, | |
| "grad_norm": 6.826353549957275, | |
| "learning_rate": 1.5112398816727044e-06, | |
| "loss": 0.6223349571228027, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.2271006813020437, | |
| "grad_norm": 1.7305920124053955, | |
| "learning_rate": 1.5095589041266737e-06, | |
| "loss": 0.5957088470458984, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 2.2286146858440574, | |
| "grad_norm": 2.905937433242798, | |
| "learning_rate": 1.5078781466803683e-06, | |
| "loss": 0.29074761271476746, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 2.230128690386071, | |
| "grad_norm": 1.5235497951507568, | |
| "learning_rate": 1.5061976119678749e-06, | |
| "loss": 0.5921708345413208, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 2.2316426949280848, | |
| "grad_norm": 1.6646530628204346, | |
| "learning_rate": 1.5045173026229326e-06, | |
| "loss": 0.4309976398944855, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 2.2331566994700984, | |
| "grad_norm": 2.0782201290130615, | |
| "learning_rate": 1.502837221278929e-06, | |
| "loss": 0.243109330534935, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.234670704012112, | |
| "grad_norm": 3.6397688388824463, | |
| "learning_rate": 1.5011573705688922e-06, | |
| "loss": 0.8285622596740723, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 2.2361847085541258, | |
| "grad_norm": 2.441128969192505, | |
| "learning_rate": 1.4994777531254882e-06, | |
| "loss": 0.430575430393219, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 2.2376987130961394, | |
| "grad_norm": 1.0673562288284302, | |
| "learning_rate": 1.49779837158102e-06, | |
| "loss": 0.040843378752470016, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 2.239212717638153, | |
| "grad_norm": 1.7394381761550903, | |
| "learning_rate": 1.4961192285674194e-06, | |
| "loss": 0.6127630472183228, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 2.2407267221801668, | |
| "grad_norm": 1.1732391119003296, | |
| "learning_rate": 1.494440326716245e-06, | |
| "loss": 0.5040069222450256, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.24224072672218, | |
| "grad_norm": 3.5119881629943848, | |
| "learning_rate": 1.4927616686586755e-06, | |
| "loss": 0.4314347505569458, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 2.2437547312641937, | |
| "grad_norm": 21.22010612487793, | |
| "learning_rate": 1.4910832570255105e-06, | |
| "loss": 0.15195772051811218, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 2.2452687358062073, | |
| "grad_norm": 1.7871365547180176, | |
| "learning_rate": 1.489405094447162e-06, | |
| "loss": 0.07986627519130707, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 2.246782740348221, | |
| "grad_norm": 1.7326053380966187, | |
| "learning_rate": 1.4877271835536508e-06, | |
| "loss": 0.163385808467865, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 2.2482967448902347, | |
| "grad_norm": 4.657560348510742, | |
| "learning_rate": 1.486049526974604e-06, | |
| "loss": 0.07589483261108398, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.2498107494322483, | |
| "grad_norm": 2.992844343185425, | |
| "learning_rate": 1.4843721273392512e-06, | |
| "loss": 0.18186970055103302, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 2.251324753974262, | |
| "grad_norm": 1.3081936836242676, | |
| "learning_rate": 1.4826949872764181e-06, | |
| "loss": 0.4326886534690857, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 2.2528387585162757, | |
| "grad_norm": 4.688754081726074, | |
| "learning_rate": 1.4810181094145231e-06, | |
| "loss": 0.5936760306358337, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 2.2543527630582894, | |
| "grad_norm": 2.676875591278076, | |
| "learning_rate": 1.4793414963815745e-06, | |
| "loss": 0.20641954243183136, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 2.2558667676003026, | |
| "grad_norm": 2.0084316730499268, | |
| "learning_rate": 1.4776651508051667e-06, | |
| "loss": 0.47862863540649414, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.2573807721423162, | |
| "grad_norm": 1.0666364431381226, | |
| "learning_rate": 1.4759890753124724e-06, | |
| "loss": 0.7472076416015625, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 2.25889477668433, | |
| "grad_norm": 1.7119903564453125, | |
| "learning_rate": 1.4743132725302427e-06, | |
| "loss": 0.5262482166290283, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 2.2604087812263436, | |
| "grad_norm": 1.4337292909622192, | |
| "learning_rate": 1.472637745084801e-06, | |
| "loss": 0.4823380410671234, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 2.2619227857683573, | |
| "grad_norm": 2.332228183746338, | |
| "learning_rate": 1.47096249560204e-06, | |
| "loss": 0.983479917049408, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 2.263436790310371, | |
| "grad_norm": 9.281291961669922, | |
| "learning_rate": 1.469287526707415e-06, | |
| "loss": 0.6645342111587524, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.2649507948523846, | |
| "grad_norm": 11.439191818237305, | |
| "learning_rate": 1.467612841025942e-06, | |
| "loss": 0.43795791268348694, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 2.2664647993943983, | |
| "grad_norm": 0.34899380803108215, | |
| "learning_rate": 1.465938441182195e-06, | |
| "loss": 0.486656129360199, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 2.267978803936412, | |
| "grad_norm": 1.5586745738983154, | |
| "learning_rate": 1.4642643298002977e-06, | |
| "loss": 0.5811508893966675, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 2.2694928084784256, | |
| "grad_norm": 1.6818510293960571, | |
| "learning_rate": 1.4625905095039232e-06, | |
| "loss": 0.9852191209793091, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 2.2710068130204393, | |
| "grad_norm": 3.359684467315674, | |
| "learning_rate": 1.4609169829162866e-06, | |
| "loss": 0.5309067368507385, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.2725208175624525, | |
| "grad_norm": 2.7355499267578125, | |
| "learning_rate": 1.4592437526601462e-06, | |
| "loss": 0.03945651277899742, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 2.274034822104466, | |
| "grad_norm": 1.5781525373458862, | |
| "learning_rate": 1.4575708213577915e-06, | |
| "loss": 0.5300910472869873, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 2.27554882664648, | |
| "grad_norm": 2.344346523284912, | |
| "learning_rate": 1.4558981916310474e-06, | |
| "loss": 0.08178112655878067, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 2.2770628311884935, | |
| "grad_norm": 2.0909883975982666, | |
| "learning_rate": 1.4542258661012636e-06, | |
| "loss": 0.5931339859962463, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 2.278576835730507, | |
| "grad_norm": 4.088789939880371, | |
| "learning_rate": 1.4525538473893138e-06, | |
| "loss": 0.05056603625416756, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.280090840272521, | |
| "grad_norm": 2.683748245239258, | |
| "learning_rate": 1.4508821381155916e-06, | |
| "loss": 0.08106040209531784, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 2.2816048448145345, | |
| "grad_norm": 9.581048011779785, | |
| "learning_rate": 1.4492107409000037e-06, | |
| "loss": 0.23262442648410797, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 2.283118849356548, | |
| "grad_norm": 2.917015552520752, | |
| "learning_rate": 1.4475396583619706e-06, | |
| "loss": 0.31293708086013794, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 2.284632853898562, | |
| "grad_norm": 1.8726041316986084, | |
| "learning_rate": 1.445868893120417e-06, | |
| "loss": 0.9713987112045288, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 2.286146858440575, | |
| "grad_norm": 2.3735573291778564, | |
| "learning_rate": 1.444198447793772e-06, | |
| "loss": 0.9806248545646667, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.287660862982589, | |
| "grad_norm": 2.190314769744873, | |
| "learning_rate": 1.4425283249999626e-06, | |
| "loss": 0.7395898699760437, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 2.2891748675246024, | |
| "grad_norm": 1.5809588432312012, | |
| "learning_rate": 1.4408585273564101e-06, | |
| "loss": 0.5897817015647888, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 2.290688872066616, | |
| "grad_norm": 1.4036309719085693, | |
| "learning_rate": 1.4391890574800273e-06, | |
| "loss": 0.5153738260269165, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 2.2922028766086298, | |
| "grad_norm": 1.4983464479446411, | |
| "learning_rate": 1.4375199179872111e-06, | |
| "loss": 0.9634343981742859, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 2.2937168811506434, | |
| "grad_norm": 2.8257312774658203, | |
| "learning_rate": 1.435851111493844e-06, | |
| "loss": 0.27471813559532166, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.295230885692657, | |
| "grad_norm": 1.8934541940689087, | |
| "learning_rate": 1.434182640615284e-06, | |
| "loss": 0.5577218532562256, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 2.2967448902346708, | |
| "grad_norm": 3.0278127193450928, | |
| "learning_rate": 1.4325145079663634e-06, | |
| "loss": 0.13709710538387299, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 2.2982588947766844, | |
| "grad_norm": 1.5450032949447632, | |
| "learning_rate": 1.4308467161613854e-06, | |
| "loss": 0.4284347593784332, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 2.299772899318698, | |
| "grad_norm": 3.1318891048431396, | |
| "learning_rate": 1.4291792678141184e-06, | |
| "loss": 0.810684323310852, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 2.3012869038607118, | |
| "grad_norm": 6.301877021789551, | |
| "learning_rate": 1.4275121655377932e-06, | |
| "loss": 0.07751720398664474, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.302800908402725, | |
| "grad_norm": 3.4808292388916016, | |
| "learning_rate": 1.4258454119450961e-06, | |
| "loss": 0.61885666847229, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 2.3043149129447387, | |
| "grad_norm": 3.063957452774048, | |
| "learning_rate": 1.4241790096481704e-06, | |
| "loss": 0.4505324363708496, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 2.3058289174867523, | |
| "grad_norm": 4.3270039558410645, | |
| "learning_rate": 1.4225129612586064e-06, | |
| "loss": 0.6248122453689575, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 2.307342922028766, | |
| "grad_norm": 1.7819933891296387, | |
| "learning_rate": 1.4208472693874397e-06, | |
| "loss": 0.4963420331478119, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 2.3088569265707797, | |
| "grad_norm": 1.4675928354263306, | |
| "learning_rate": 1.4191819366451482e-06, | |
| "loss": 1.0235692262649536, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.3103709311127933, | |
| "grad_norm": 3.1224544048309326, | |
| "learning_rate": 1.4175169656416467e-06, | |
| "loss": 0.530079185962677, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 2.311884935654807, | |
| "grad_norm": 1.1855491399765015, | |
| "learning_rate": 1.4158523589862829e-06, | |
| "loss": 0.5151659250259399, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 2.3133989401968207, | |
| "grad_norm": 2.658396005630493, | |
| "learning_rate": 1.4141881192878332e-06, | |
| "loss": 0.17711491882801056, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 2.3149129447388344, | |
| "grad_norm": 0.5937647819519043, | |
| "learning_rate": 1.4125242491545e-06, | |
| "loss": 0.5193536877632141, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 2.316426949280848, | |
| "grad_norm": 1.0708814859390259, | |
| "learning_rate": 1.4108607511939053e-06, | |
| "loss": 0.39464443922042847, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.3179409538228617, | |
| "grad_norm": 3.7162861824035645, | |
| "learning_rate": 1.4091976280130884e-06, | |
| "loss": 0.0514480397105217, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 2.319454958364875, | |
| "grad_norm": 3.0497095584869385, | |
| "learning_rate": 1.4075348822185006e-06, | |
| "loss": 0.14000831544399261, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 2.3209689629068886, | |
| "grad_norm": 1.7098908424377441, | |
| "learning_rate": 1.4058725164160035e-06, | |
| "loss": 0.2299700379371643, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 2.3224829674489023, | |
| "grad_norm": 2.4175784587860107, | |
| "learning_rate": 1.404210533210861e-06, | |
| "loss": 0.7841640114784241, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 2.323996971990916, | |
| "grad_norm": 8.18373966217041, | |
| "learning_rate": 1.4025489352077387e-06, | |
| "loss": 0.6784352660179138, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.3255109765329296, | |
| "grad_norm": 1.799009919166565, | |
| "learning_rate": 1.4008877250106977e-06, | |
| "loss": 0.6317290663719177, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 2.3270249810749433, | |
| "grad_norm": 3.8978631496429443, | |
| "learning_rate": 1.399226905223193e-06, | |
| "loss": 1.0654164552688599, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 2.328538985616957, | |
| "grad_norm": 1.7303528785705566, | |
| "learning_rate": 1.3975664784480653e-06, | |
| "loss": 0.9460194110870361, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 2.3300529901589706, | |
| "grad_norm": 1.330672025680542, | |
| "learning_rate": 1.3959064472875406e-06, | |
| "loss": 0.11510738730430603, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 2.3315669947009843, | |
| "grad_norm": 3.222262144088745, | |
| "learning_rate": 1.3942468143432263e-06, | |
| "loss": 0.6555764675140381, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.3330809992429975, | |
| "grad_norm": 3.8371193408966064, | |
| "learning_rate": 1.3925875822161034e-06, | |
| "loss": 0.09196102619171143, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 2.334595003785011, | |
| "grad_norm": 2.885496139526367, | |
| "learning_rate": 1.3909287535065254e-06, | |
| "loss": 0.4630357623100281, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 2.336109008327025, | |
| "grad_norm": 1.3782941102981567, | |
| "learning_rate": 1.3892703308142144e-06, | |
| "loss": 0.39131051301956177, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 2.3376230128690385, | |
| "grad_norm": 1.547868251800537, | |
| "learning_rate": 1.3876123167382551e-06, | |
| "loss": 0.9046714901924133, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 2.339137017411052, | |
| "grad_norm": 1.5988070964813232, | |
| "learning_rate": 1.3859547138770932e-06, | |
| "loss": 0.5064923167228699, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.340651021953066, | |
| "grad_norm": 2.8355846405029297, | |
| "learning_rate": 1.3842975248285284e-06, | |
| "loss": 0.6032267212867737, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 2.3421650264950795, | |
| "grad_norm": 1.981899380683899, | |
| "learning_rate": 1.382640752189712e-06, | |
| "loss": 0.8943812847137451, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 2.343679031037093, | |
| "grad_norm": 4.821799278259277, | |
| "learning_rate": 1.380984398557145e-06, | |
| "loss": 0.99314284324646, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 2.345193035579107, | |
| "grad_norm": 2.2897233963012695, | |
| "learning_rate": 1.3793284665266681e-06, | |
| "loss": 0.3706459105014801, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 2.3467070401211205, | |
| "grad_norm": 2.3508996963500977, | |
| "learning_rate": 1.3776729586934643e-06, | |
| "loss": 0.09634893387556076, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.348221044663134, | |
| "grad_norm": 3.219851016998291, | |
| "learning_rate": 1.3760178776520502e-06, | |
| "loss": 0.0941806435585022, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 2.3497350492051474, | |
| "grad_norm": 2.2882401943206787, | |
| "learning_rate": 1.3743632259962745e-06, | |
| "loss": 0.9920378923416138, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 2.351249053747161, | |
| "grad_norm": 2.4805197715759277, | |
| "learning_rate": 1.3727090063193114e-06, | |
| "loss": 0.2563851475715637, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 2.3527630582891748, | |
| "grad_norm": 1.203678846359253, | |
| "learning_rate": 1.3710552212136604e-06, | |
| "loss": 0.09712015837430954, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 2.3542770628311884, | |
| "grad_norm": 1.679890751838684, | |
| "learning_rate": 1.3694018732711379e-06, | |
| "loss": 0.5343849658966064, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.355791067373202, | |
| "grad_norm": 2.469313859939575, | |
| "learning_rate": 1.367748965082876e-06, | |
| "loss": 0.45109063386917114, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 2.3573050719152158, | |
| "grad_norm": 2.0011284351348877, | |
| "learning_rate": 1.3660964992393176e-06, | |
| "loss": 0.9690456390380859, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 2.3588190764572294, | |
| "grad_norm": 2.9229679107666016, | |
| "learning_rate": 1.3644444783302122e-06, | |
| "loss": 0.4914405941963196, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 2.360333080999243, | |
| "grad_norm": 4.822331428527832, | |
| "learning_rate": 1.3627929049446132e-06, | |
| "loss": 0.9364929795265198, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 2.3618470855412568, | |
| "grad_norm": 2.3485610485076904, | |
| "learning_rate": 1.3611417816708704e-06, | |
| "loss": 0.08451011031866074, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.3633610900832704, | |
| "grad_norm": 3.3834290504455566, | |
| "learning_rate": 1.3594911110966294e-06, | |
| "loss": 0.7591638565063477, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 2.364875094625284, | |
| "grad_norm": 3.9524335861206055, | |
| "learning_rate": 1.357840895808827e-06, | |
| "loss": 0.5419999957084656, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 2.3663890991672973, | |
| "grad_norm": 2.177633047103882, | |
| "learning_rate": 1.3561911383936855e-06, | |
| "loss": 0.5028657913208008, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 2.367903103709311, | |
| "grad_norm": 1.8020979166030884, | |
| "learning_rate": 1.3545418414367094e-06, | |
| "loss": 0.5325472354888916, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 2.3694171082513247, | |
| "grad_norm": 2.580286741256714, | |
| "learning_rate": 1.3528930075226817e-06, | |
| "loss": 0.7464925646781921, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.3709311127933383, | |
| "grad_norm": 1.5663801431655884, | |
| "learning_rate": 1.3512446392356616e-06, | |
| "loss": 0.10833124816417694, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 2.372445117335352, | |
| "grad_norm": 2.8936641216278076, | |
| "learning_rate": 1.3495967391589757e-06, | |
| "loss": 0.6852737665176392, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 2.3739591218773657, | |
| "grad_norm": 16.39162254333496, | |
| "learning_rate": 1.347949309875219e-06, | |
| "loss": 0.14308148622512817, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 2.3754731264193794, | |
| "grad_norm": 2.1235458850860596, | |
| "learning_rate": 1.3463023539662466e-06, | |
| "loss": 0.5667533278465271, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 2.376987130961393, | |
| "grad_norm": 4.373684406280518, | |
| "learning_rate": 1.3446558740131748e-06, | |
| "loss": 0.15360027551651, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.3785011355034067, | |
| "grad_norm": 2.0062785148620605, | |
| "learning_rate": 1.3430098725963704e-06, | |
| "loss": 0.7532010078430176, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 2.38001514004542, | |
| "grad_norm": 4.252340316772461, | |
| "learning_rate": 1.3413643522954523e-06, | |
| "loss": 0.11521820724010468, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 2.3815291445874336, | |
| "grad_norm": 2.2112393379211426, | |
| "learning_rate": 1.3397193156892861e-06, | |
| "loss": 0.234744593501091, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 2.3830431491294473, | |
| "grad_norm": 2.064846992492676, | |
| "learning_rate": 1.3380747653559774e-06, | |
| "loss": 0.3616696298122406, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 2.384557153671461, | |
| "grad_norm": 3.4467649459838867, | |
| "learning_rate": 1.3364307038728712e-06, | |
| "loss": 0.5284278392791748, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.3860711582134746, | |
| "grad_norm": 1.8391021490097046, | |
| "learning_rate": 1.3347871338165446e-06, | |
| "loss": 0.21906384825706482, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 2.3875851627554883, | |
| "grad_norm": 2.8953263759613037, | |
| "learning_rate": 1.3331440577628072e-06, | |
| "loss": 0.5176718831062317, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 2.389099167297502, | |
| "grad_norm": 1.4748371839523315, | |
| "learning_rate": 1.3315014782866924e-06, | |
| "loss": 0.9320688843727112, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 2.3906131718395156, | |
| "grad_norm": 4.977100849151611, | |
| "learning_rate": 1.3298593979624551e-06, | |
| "loss": 0.07454890012741089, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 2.3921271763815293, | |
| "grad_norm": 2.885956048965454, | |
| "learning_rate": 1.3282178193635696e-06, | |
| "loss": 0.20941953361034393, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.393641180923543, | |
| "grad_norm": 1.426200032234192, | |
| "learning_rate": 1.3265767450627227e-06, | |
| "loss": 0.20474065840244293, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 2.3951551854655566, | |
| "grad_norm": 17.105777740478516, | |
| "learning_rate": 1.3249361776318117e-06, | |
| "loss": 0.33967655897140503, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 2.39666919000757, | |
| "grad_norm": 7.441493988037109, | |
| "learning_rate": 1.3232961196419376e-06, | |
| "loss": 0.5956662893295288, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 2.3981831945495835, | |
| "grad_norm": 0.9323331713676453, | |
| "learning_rate": 1.321656573663406e-06, | |
| "loss": 0.12122826278209686, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 2.399697199091597, | |
| "grad_norm": 1.7128007411956787, | |
| "learning_rate": 1.3200175422657182e-06, | |
| "loss": 0.5543103814125061, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.401211203633611, | |
| "grad_norm": 1.926504135131836, | |
| "learning_rate": 1.318379028017568e-06, | |
| "loss": 0.09472674131393433, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 2.4027252081756245, | |
| "grad_norm": 1.6713393926620483, | |
| "learning_rate": 1.3167410334868418e-06, | |
| "loss": 0.5352200269699097, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 2.404239212717638, | |
| "grad_norm": 1.0860508680343628, | |
| "learning_rate": 1.3151035612406088e-06, | |
| "loss": 0.6046788096427917, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 2.405753217259652, | |
| "grad_norm": 1.8982021808624268, | |
| "learning_rate": 1.3134666138451209e-06, | |
| "loss": 0.1095985621213913, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 2.4072672218016655, | |
| "grad_norm": 4.064739227294922, | |
| "learning_rate": 1.3118301938658064e-06, | |
| "loss": 0.37438011169433594, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.408781226343679, | |
| "grad_norm": 24.300867080688477, | |
| "learning_rate": 1.3101943038672687e-06, | |
| "loss": 0.14832612872123718, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 2.4102952308856924, | |
| "grad_norm": 3.4894673824310303, | |
| "learning_rate": 1.30855894641328e-06, | |
| "loss": 0.051485493779182434, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 2.4118092354277065, | |
| "grad_norm": 1.913360834121704, | |
| "learning_rate": 1.3069241240667765e-06, | |
| "loss": 0.8969357013702393, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 2.4133232399697198, | |
| "grad_norm": 2.673508644104004, | |
| "learning_rate": 1.3052898393898576e-06, | |
| "loss": 1.0752180814743042, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 2.4148372445117334, | |
| "grad_norm": 1.7779754400253296, | |
| "learning_rate": 1.303656094943779e-06, | |
| "loss": 0.637548565864563, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.416351249053747, | |
| "grad_norm": 2.6721699237823486, | |
| "learning_rate": 1.3020228932889508e-06, | |
| "loss": 0.1810387670993805, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 2.4178652535957608, | |
| "grad_norm": 3.1952381134033203, | |
| "learning_rate": 1.3003902369849306e-06, | |
| "loss": 0.5444831848144531, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 2.4193792581377744, | |
| "grad_norm": 2.336055040359497, | |
| "learning_rate": 1.2987581285904236e-06, | |
| "loss": 0.5591921210289001, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 2.420893262679788, | |
| "grad_norm": 5.412187576293945, | |
| "learning_rate": 1.2971265706632747e-06, | |
| "loss": 0.17424336075782776, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 2.4224072672218018, | |
| "grad_norm": 1.522705078125, | |
| "learning_rate": 1.2954955657604666e-06, | |
| "loss": 0.08939556032419205, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.4239212717638154, | |
| "grad_norm": 1.936441421508789, | |
| "learning_rate": 1.293865116438115e-06, | |
| "loss": 0.17253662645816803, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 2.425435276305829, | |
| "grad_norm": 6.317035675048828, | |
| "learning_rate": 1.2922352252514653e-06, | |
| "loss": 0.17524859309196472, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 2.4269492808478423, | |
| "grad_norm": 10.295120239257812, | |
| "learning_rate": 1.2906058947548886e-06, | |
| "loss": 0.510208249092102, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 2.428463285389856, | |
| "grad_norm": 1.5548295974731445, | |
| "learning_rate": 1.2889771275018757e-06, | |
| "loss": 0.9413988590240479, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 2.4299772899318697, | |
| "grad_norm": 2.107252597808838, | |
| "learning_rate": 1.287348926045037e-06, | |
| "loss": 0.44147780537605286, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.4314912944738833, | |
| "grad_norm": 2.5720386505126953, | |
| "learning_rate": 1.285721292936094e-06, | |
| "loss": 0.5519513487815857, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 2.433005299015897, | |
| "grad_norm": 1.7843011617660522, | |
| "learning_rate": 1.2840942307258784e-06, | |
| "loss": 0.5143168568611145, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 2.4345193035579107, | |
| "grad_norm": 2.339644432067871, | |
| "learning_rate": 1.2824677419643277e-06, | |
| "loss": 0.5728105902671814, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 2.4360333080999244, | |
| "grad_norm": 4.653833866119385, | |
| "learning_rate": 1.2808418292004795e-06, | |
| "loss": 0.48528730869293213, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 2.437547312641938, | |
| "grad_norm": 9.157849311828613, | |
| "learning_rate": 1.2792164949824702e-06, | |
| "loss": 0.5953377485275269, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.4390613171839517, | |
| "grad_norm": 1.5174750089645386, | |
| "learning_rate": 1.2775917418575284e-06, | |
| "loss": 0.5056532621383667, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 2.4405753217259654, | |
| "grad_norm": 2.567638874053955, | |
| "learning_rate": 1.275967572371971e-06, | |
| "loss": 0.5474219918251038, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 2.442089326267979, | |
| "grad_norm": 4.96596097946167, | |
| "learning_rate": 1.2743439890712035e-06, | |
| "loss": 0.12137976288795471, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 2.4436033308099923, | |
| "grad_norm": 1.7557021379470825, | |
| "learning_rate": 1.2727209944997099e-06, | |
| "loss": 0.867196261882782, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 2.445117335352006, | |
| "grad_norm": 3.688567876815796, | |
| "learning_rate": 1.2710985912010514e-06, | |
| "loss": 0.6646262407302856, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.4466313398940196, | |
| "grad_norm": 1.933400273323059, | |
| "learning_rate": 1.2694767817178651e-06, | |
| "loss": 0.4808739423751831, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 2.4481453444360333, | |
| "grad_norm": 1.9126027822494507, | |
| "learning_rate": 1.2678555685918549e-06, | |
| "loss": 0.7815792560577393, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 2.449659348978047, | |
| "grad_norm": 5.408775329589844, | |
| "learning_rate": 1.2662349543637915e-06, | |
| "loss": 0.5138024091720581, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 2.4511733535200606, | |
| "grad_norm": 6.258272647857666, | |
| "learning_rate": 1.2646149415735061e-06, | |
| "loss": 0.15966981649398804, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 2.4526873580620743, | |
| "grad_norm": 6.899496555328369, | |
| "learning_rate": 1.2629955327598884e-06, | |
| "loss": 0.4678770899772644, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.454201362604088, | |
| "grad_norm": 1.813686490058899, | |
| "learning_rate": 1.2613767304608808e-06, | |
| "loss": 0.9744421243667603, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 2.4557153671461016, | |
| "grad_norm": 7.620000839233398, | |
| "learning_rate": 1.2597585372134754e-06, | |
| "loss": 0.43850672245025635, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 2.457229371688115, | |
| "grad_norm": 1.3412652015686035, | |
| "learning_rate": 1.2581409555537087e-06, | |
| "loss": 0.06274896115064621, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 2.4587433762301285, | |
| "grad_norm": 1.733108639717102, | |
| "learning_rate": 1.2565239880166613e-06, | |
| "loss": 0.020732510834932327, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 2.460257380772142, | |
| "grad_norm": 1.8441457748413086, | |
| "learning_rate": 1.2549076371364487e-06, | |
| "loss": 0.09537363052368164, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.461771385314156, | |
| "grad_norm": 3.559648036956787, | |
| "learning_rate": 1.2532919054462209e-06, | |
| "loss": 0.21012158691883087, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 2.4632853898561695, | |
| "grad_norm": 1.165614128112793, | |
| "learning_rate": 1.2516767954781588e-06, | |
| "loss": 0.09470941871404648, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 2.464799394398183, | |
| "grad_norm": 2.507286310195923, | |
| "learning_rate": 1.250062309763467e-06, | |
| "loss": 0.46259403228759766, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 2.466313398940197, | |
| "grad_norm": 2.2950599193573, | |
| "learning_rate": 1.248448450832373e-06, | |
| "loss": 0.0759514644742012, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 2.4678274034822105, | |
| "grad_norm": 0.58045494556427, | |
| "learning_rate": 1.2468352212141202e-06, | |
| "loss": 0.11328712105751038, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.469341408024224, | |
| "grad_norm": 3.1575472354888916, | |
| "learning_rate": 1.245222623436969e-06, | |
| "loss": 0.7599681615829468, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 2.470855412566238, | |
| "grad_norm": 1.8420294523239136, | |
| "learning_rate": 1.243610660028186e-06, | |
| "loss": 0.2730555236339569, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 2.4723694171082515, | |
| "grad_norm": 4.679914951324463, | |
| "learning_rate": 1.2419993335140467e-06, | |
| "loss": 0.5341447591781616, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 2.4738834216502648, | |
| "grad_norm": 1.712031364440918, | |
| "learning_rate": 1.2403886464198259e-06, | |
| "loss": 0.44335949420928955, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 2.4753974261922784, | |
| "grad_norm": 2.764129638671875, | |
| "learning_rate": 1.2387786012697987e-06, | |
| "loss": 0.5020673274993896, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.476911430734292, | |
| "grad_norm": 6.6505842208862305, | |
| "learning_rate": 1.237169200587232e-06, | |
| "loss": 0.021848905831575394, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 2.4784254352763058, | |
| "grad_norm": 1.904347538948059, | |
| "learning_rate": 1.235560446894383e-06, | |
| "loss": 0.23235182464122772, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 2.4799394398183194, | |
| "grad_norm": 3.316446542739868, | |
| "learning_rate": 1.233952342712497e-06, | |
| "loss": 0.16898798942565918, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 2.481453444360333, | |
| "grad_norm": 3.6128406524658203, | |
| "learning_rate": 1.232344890561799e-06, | |
| "loss": 0.5249270796775818, | |
| "step": 3278 | |
| }, | |
| { | |
| "epoch": 2.4829674489023468, | |
| "grad_norm": 2.04036021232605, | |
| "learning_rate": 1.2307380929614932e-06, | |
| "loss": 0.5710758566856384, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.4844814534443604, | |
| "grad_norm": 1.612221121788025, | |
| "learning_rate": 1.2291319524297573e-06, | |
| "loss": 0.4911920726299286, | |
| "step": 3282 | |
| }, | |
| { | |
| "epoch": 2.485995457986374, | |
| "grad_norm": 2.189225196838379, | |
| "learning_rate": 1.2275264714837408e-06, | |
| "loss": 0.12952546775341034, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 2.4875094625283873, | |
| "grad_norm": 2.0172598361968994, | |
| "learning_rate": 1.225921652639558e-06, | |
| "loss": 0.3338145315647125, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 2.4890234670704015, | |
| "grad_norm": 1.3855159282684326, | |
| "learning_rate": 1.2243174984122853e-06, | |
| "loss": 0.4880499541759491, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 2.4905374716124147, | |
| "grad_norm": 1.6459860801696777, | |
| "learning_rate": 1.2227140113159594e-06, | |
| "loss": 1.032573938369751, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.4920514761544283, | |
| "grad_norm": 3.479882001876831, | |
| "learning_rate": 1.2211111938635695e-06, | |
| "loss": 0.8720075488090515, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 2.493565480696442, | |
| "grad_norm": 2.3142521381378174, | |
| "learning_rate": 1.2195090485670563e-06, | |
| "loss": 0.9480175971984863, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 2.4950794852384557, | |
| "grad_norm": 2.713301181793213, | |
| "learning_rate": 1.2179075779373064e-06, | |
| "loss": 0.597554624080658, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 2.4965934897804694, | |
| "grad_norm": 1.2404519319534302, | |
| "learning_rate": 1.216306784484151e-06, | |
| "loss": 0.5238319039344788, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 2.498107494322483, | |
| "grad_norm": 2.1829569339752197, | |
| "learning_rate": 1.2147066707163578e-06, | |
| "loss": 0.2818773686885834, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.4996214988644967, | |
| "grad_norm": 0.9500485062599182, | |
| "learning_rate": 1.2131072391416298e-06, | |
| "loss": 0.5698157548904419, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 2.5011355034065104, | |
| "grad_norm": 42.315731048583984, | |
| "learning_rate": 1.2115084922666007e-06, | |
| "loss": 0.926612138748169, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 2.502649507948524, | |
| "grad_norm": 2.067148447036743, | |
| "learning_rate": 1.2099104325968327e-06, | |
| "loss": 0.5611676573753357, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 2.5041635124905373, | |
| "grad_norm": 1.7812166213989258, | |
| "learning_rate": 1.20831306263681e-06, | |
| "loss": 0.3297508656978607, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 2.5056775170325514, | |
| "grad_norm": 2.64509916305542, | |
| "learning_rate": 1.2067163848899345e-06, | |
| "loss": 0.36095792055130005, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.5071915215745646, | |
| "grad_norm": 1.3159887790679932, | |
| "learning_rate": 1.2051204018585258e-06, | |
| "loss": 0.45377278327941895, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 2.5087055261165783, | |
| "grad_norm": 1.2811301946640015, | |
| "learning_rate": 1.203525116043813e-06, | |
| "loss": 0.5460622906684875, | |
| "step": 3314 | |
| }, | |
| { | |
| "epoch": 2.510219530658592, | |
| "grad_norm": 1.6105045080184937, | |
| "learning_rate": 1.201930529945933e-06, | |
| "loss": 0.5222399830818176, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 2.5117335352006056, | |
| "grad_norm": 1.8894621133804321, | |
| "learning_rate": 1.2003366460639257e-06, | |
| "loss": 0.47699031233787537, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 2.5132475397426193, | |
| "grad_norm": 7.224452972412109, | |
| "learning_rate": 1.1987434668957316e-06, | |
| "loss": 0.18395495414733887, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.514761544284633, | |
| "grad_norm": 2.792215585708618, | |
| "learning_rate": 1.1971509949381862e-06, | |
| "loss": 0.962587833404541, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 2.5162755488266466, | |
| "grad_norm": 2.333487033843994, | |
| "learning_rate": 1.1955592326870153e-06, | |
| "loss": 0.9294099807739258, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 2.51778955336866, | |
| "grad_norm": 2.340708017349243, | |
| "learning_rate": 1.1939681826368353e-06, | |
| "loss": 0.551225483417511, | |
| "step": 3326 | |
| }, | |
| { | |
| "epoch": 2.519303557910674, | |
| "grad_norm": 4.234295845031738, | |
| "learning_rate": 1.192377847281144e-06, | |
| "loss": 0.5359384417533875, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 2.520817562452687, | |
| "grad_norm": 9.935158729553223, | |
| "learning_rate": 1.1907882291123196e-06, | |
| "loss": 0.18878018856048584, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.522331566994701, | |
| "grad_norm": 1.8245105743408203, | |
| "learning_rate": 1.1891993306216168e-06, | |
| "loss": 0.5585557222366333, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 2.5238455715367145, | |
| "grad_norm": 3.751849889755249, | |
| "learning_rate": 1.187611154299163e-06, | |
| "loss": 0.695787250995636, | |
| "step": 3334 | |
| }, | |
| { | |
| "epoch": 2.525359576078728, | |
| "grad_norm": 1.6436039209365845, | |
| "learning_rate": 1.1860237026339524e-06, | |
| "loss": 0.5512560606002808, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 2.526873580620742, | |
| "grad_norm": 1.9282137155532837, | |
| "learning_rate": 1.1844369781138445e-06, | |
| "loss": 0.5277320146560669, | |
| "step": 3338 | |
| }, | |
| { | |
| "epoch": 2.5283875851627555, | |
| "grad_norm": 1.7889491319656372, | |
| "learning_rate": 1.1828509832255586e-06, | |
| "loss": 0.790787935256958, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.529901589704769, | |
| "grad_norm": 1.8602066040039062, | |
| "learning_rate": 1.181265720454671e-06, | |
| "loss": 0.5047529935836792, | |
| "step": 3342 | |
| }, | |
| { | |
| "epoch": 2.531415594246783, | |
| "grad_norm": 1.6128684282302856, | |
| "learning_rate": 1.1796811922856107e-06, | |
| "loss": 1.01747727394104, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 2.5329295987887965, | |
| "grad_norm": 2.1299707889556885, | |
| "learning_rate": 1.1780974012016552e-06, | |
| "loss": 0.1209339052438736, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 2.5344436033308098, | |
| "grad_norm": 1.494289755821228, | |
| "learning_rate": 1.1765143496849262e-06, | |
| "loss": 0.11673936992883682, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 2.535957607872824, | |
| "grad_norm": 10.133544921875, | |
| "learning_rate": 1.1749320402163878e-06, | |
| "loss": 0.04495559260249138, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.537471612414837, | |
| "grad_norm": 1.7074990272521973, | |
| "learning_rate": 1.1733504752758404e-06, | |
| "loss": 0.6414316892623901, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 2.5389856169568508, | |
| "grad_norm": 2.5590038299560547, | |
| "learning_rate": 1.1717696573419162e-06, | |
| "loss": 0.27318763732910156, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 2.5404996214988644, | |
| "grad_norm": 2.422823667526245, | |
| "learning_rate": 1.1701895888920792e-06, | |
| "loss": 0.3937920331954956, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 2.542013626040878, | |
| "grad_norm": 5.183249473571777, | |
| "learning_rate": 1.1686102724026177e-06, | |
| "loss": 0.6359275579452515, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 2.5435276305828918, | |
| "grad_norm": 2.020559549331665, | |
| "learning_rate": 1.1670317103486403e-06, | |
| "loss": 0.5142892599105835, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.5450416351249054, | |
| "grad_norm": 5.478321552276611, | |
| "learning_rate": 1.165453905204076e-06, | |
| "loss": 0.32605746388435364, | |
| "step": 3362 | |
| }, | |
| { | |
| "epoch": 2.546555639666919, | |
| "grad_norm": 1.0260086059570312, | |
| "learning_rate": 1.1638768594416648e-06, | |
| "loss": 0.6114858388900757, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 2.548069644208933, | |
| "grad_norm": 15.645800590515137, | |
| "learning_rate": 1.162300575532958e-06, | |
| "loss": 0.07219941914081573, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 2.5495836487509465, | |
| "grad_norm": 2.069432497024536, | |
| "learning_rate": 1.1607250559483121e-06, | |
| "loss": 0.9569191336631775, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 2.5510976532929597, | |
| "grad_norm": 1.3018686771392822, | |
| "learning_rate": 1.1591503031568875e-06, | |
| "loss": 0.5765461921691895, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.5526116578349733, | |
| "grad_norm": 3.0679144859313965, | |
| "learning_rate": 1.1575763196266412e-06, | |
| "loss": 0.6863498687744141, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 2.554125662376987, | |
| "grad_norm": 3.4148383140563965, | |
| "learning_rate": 1.1560031078243248e-06, | |
| "loss": 0.5839001536369324, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 2.5556396669190007, | |
| "grad_norm": 7.998443603515625, | |
| "learning_rate": 1.1544306702154807e-06, | |
| "loss": 0.8170933723449707, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 2.5571536714610144, | |
| "grad_norm": 2.7478621006011963, | |
| "learning_rate": 1.1528590092644387e-06, | |
| "loss": 0.3825238049030304, | |
| "step": 3378 | |
| }, | |
| { | |
| "epoch": 2.558667676003028, | |
| "grad_norm": 2.4262313842773438, | |
| "learning_rate": 1.1512881274343105e-06, | |
| "loss": 0.9992386102676392, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.5601816805450417, | |
| "grad_norm": 3.3657608032226562, | |
| "learning_rate": 1.1497180271869862e-06, | |
| "loss": 0.5366327166557312, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 2.5616956850870554, | |
| "grad_norm": 3.6988351345062256, | |
| "learning_rate": 1.1481487109831329e-06, | |
| "loss": 0.9917261004447937, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 2.563209689629069, | |
| "grad_norm": 2.595430374145508, | |
| "learning_rate": 1.1465801812821875e-06, | |
| "loss": 0.06481704860925674, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 2.5647236941710823, | |
| "grad_norm": 2.457298994064331, | |
| "learning_rate": 1.1450124405423544e-06, | |
| "loss": 0.8893528580665588, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 2.5662376987130964, | |
| "grad_norm": 2.416517972946167, | |
| "learning_rate": 1.1434454912206018e-06, | |
| "loss": 0.14643681049346924, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.5677517032551096, | |
| "grad_norm": 2.499912977218628, | |
| "learning_rate": 1.1418793357726579e-06, | |
| "loss": 0.07367072999477386, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 2.5692657077971233, | |
| "grad_norm": 1.3387746810913086, | |
| "learning_rate": 1.1403139766530063e-06, | |
| "loss": 0.48521560430526733, | |
| "step": 3394 | |
| }, | |
| { | |
| "epoch": 2.570779712339137, | |
| "grad_norm": 1.4146757125854492, | |
| "learning_rate": 1.1387494163148827e-06, | |
| "loss": 0.07745841890573502, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 2.5722937168811506, | |
| "grad_norm": 3.8005497455596924, | |
| "learning_rate": 1.1371856572102705e-06, | |
| "loss": 0.27372169494628906, | |
| "step": 3398 | |
| }, | |
| { | |
| "epoch": 2.5738077214231643, | |
| "grad_norm": 5.660574913024902, | |
| "learning_rate": 1.1356227017898985e-06, | |
| "loss": 0.5278230905532837, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.575321725965178, | |
| "grad_norm": 1.703931212425232, | |
| "learning_rate": 1.1340605525032353e-06, | |
| "loss": 0.5331737995147705, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 2.5768357305071916, | |
| "grad_norm": 3.0242526531219482, | |
| "learning_rate": 1.1324992117984852e-06, | |
| "loss": 0.9489548206329346, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 2.5783497350492053, | |
| "grad_norm": 18.639877319335938, | |
| "learning_rate": 1.1309386821225879e-06, | |
| "loss": 0.5219172239303589, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 2.579863739591219, | |
| "grad_norm": 2.207932233810425, | |
| "learning_rate": 1.1293789659212089e-06, | |
| "loss": 0.5801731944084167, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 2.581377744133232, | |
| "grad_norm": 1.7648814916610718, | |
| "learning_rate": 1.127820065638741e-06, | |
| "loss": 0.8760764598846436, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.5828917486752463, | |
| "grad_norm": 5.7800517082214355, | |
| "learning_rate": 1.1262619837182968e-06, | |
| "loss": 0.18599385023117065, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 2.5844057532172595, | |
| "grad_norm": 2.5123376846313477, | |
| "learning_rate": 1.1247047226017085e-06, | |
| "loss": 0.5670759677886963, | |
| "step": 3414 | |
| }, | |
| { | |
| "epoch": 2.585919757759273, | |
| "grad_norm": 2.1895976066589355, | |
| "learning_rate": 1.1231482847295195e-06, | |
| "loss": 0.8945675492286682, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 2.587433762301287, | |
| "grad_norm": 2.59700345993042, | |
| "learning_rate": 1.1215926725409841e-06, | |
| "loss": 0.17124387621879578, | |
| "step": 3418 | |
| }, | |
| { | |
| "epoch": 2.5889477668433005, | |
| "grad_norm": 1.7561355829238892, | |
| "learning_rate": 1.1200378884740637e-06, | |
| "loss": 0.5565921068191528, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.590461771385314, | |
| "grad_norm": 4.272517204284668, | |
| "learning_rate": 1.1184839349654195e-06, | |
| "loss": 0.17669063806533813, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 2.591975775927328, | |
| "grad_norm": 2.457369327545166, | |
| "learning_rate": 1.116930814450413e-06, | |
| "loss": 0.9974216222763062, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 2.5934897804693415, | |
| "grad_norm": 3.1485443115234375, | |
| "learning_rate": 1.1153785293630988e-06, | |
| "loss": 0.49294838309288025, | |
| "step": 3426 | |
| }, | |
| { | |
| "epoch": 2.5950037850113548, | |
| "grad_norm": 1.056787133216858, | |
| "learning_rate": 1.1138270821362239e-06, | |
| "loss": 0.9152722358703613, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 2.596517789553369, | |
| "grad_norm": 3.3435306549072266, | |
| "learning_rate": 1.1122764752012208e-06, | |
| "loss": 0.10038082301616669, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.598031794095382, | |
| "grad_norm": 1.7850005626678467, | |
| "learning_rate": 1.1107267109882053e-06, | |
| "loss": 0.6021050810813904, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 2.5995457986373958, | |
| "grad_norm": 4.395745277404785, | |
| "learning_rate": 1.1091777919259723e-06, | |
| "loss": 0.5003665089607239, | |
| "step": 3434 | |
| }, | |
| { | |
| "epoch": 2.6010598031794094, | |
| "grad_norm": 1.6627795696258545, | |
| "learning_rate": 1.107629720441994e-06, | |
| "loss": 0.3916073143482208, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 2.602573807721423, | |
| "grad_norm": 1.2196578979492188, | |
| "learning_rate": 1.1060824989624123e-06, | |
| "loss": 0.6092214584350586, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 2.6040878122634368, | |
| "grad_norm": 1.57082200050354, | |
| "learning_rate": 1.1045361299120364e-06, | |
| "loss": 0.06277672201395035, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.6056018168054504, | |
| "grad_norm": 3.2692267894744873, | |
| "learning_rate": 1.1029906157143425e-06, | |
| "loss": 0.7701391577720642, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 2.607115821347464, | |
| "grad_norm": 4.937279224395752, | |
| "learning_rate": 1.1014459587914638e-06, | |
| "loss": 0.5263334512710571, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 2.608629825889478, | |
| "grad_norm": 1.818088412284851, | |
| "learning_rate": 1.0999021615641927e-06, | |
| "loss": 0.3035053312778473, | |
| "step": 3446 | |
| }, | |
| { | |
| "epoch": 2.6101438304314915, | |
| "grad_norm": 2.087446451187134, | |
| "learning_rate": 1.0983592264519717e-06, | |
| "loss": 0.537712812423706, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 2.6116578349735047, | |
| "grad_norm": 1.2241779565811157, | |
| "learning_rate": 1.0968171558728945e-06, | |
| "loss": 0.5087027549743652, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.613171839515519, | |
| "grad_norm": 18.9871826171875, | |
| "learning_rate": 1.0952759522436987e-06, | |
| "loss": 0.8322153687477112, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 2.614685844057532, | |
| "grad_norm": 1.5005947351455688, | |
| "learning_rate": 1.0937356179797627e-06, | |
| "loss": 0.970147967338562, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 2.6161998485995457, | |
| "grad_norm": 4.126323699951172, | |
| "learning_rate": 1.092196155495105e-06, | |
| "loss": 0.18110370635986328, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 2.6177138531415594, | |
| "grad_norm": 1.706655502319336, | |
| "learning_rate": 1.0906575672023743e-06, | |
| "loss": 0.723210334777832, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 2.619227857683573, | |
| "grad_norm": 1.5822142362594604, | |
| "learning_rate": 1.0891198555128516e-06, | |
| "loss": 0.9544146656990051, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.6207418622255867, | |
| "grad_norm": 2.0501010417938232, | |
| "learning_rate": 1.0875830228364431e-06, | |
| "loss": 0.5467653870582581, | |
| "step": 3462 | |
| }, | |
| { | |
| "epoch": 2.6222558667676004, | |
| "grad_norm": 1.315675139427185, | |
| "learning_rate": 1.0860470715816785e-06, | |
| "loss": 0.13321208953857422, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 2.623769871309614, | |
| "grad_norm": 3.8847084045410156, | |
| "learning_rate": 1.0845120041557049e-06, | |
| "loss": 0.7330424785614014, | |
| "step": 3466 | |
| }, | |
| { | |
| "epoch": 2.6252838758516277, | |
| "grad_norm": 8.901777267456055, | |
| "learning_rate": 1.0829778229642848e-06, | |
| "loss": 0.04311962053179741, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 2.6267978803936414, | |
| "grad_norm": 2.727778434753418, | |
| "learning_rate": 1.0814445304117917e-06, | |
| "loss": 0.07144928723573685, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.6283118849356546, | |
| "grad_norm": 2.459710121154785, | |
| "learning_rate": 1.0799121289012068e-06, | |
| "loss": 0.5683809518814087, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 2.6298258894776687, | |
| "grad_norm": 4.345935344696045, | |
| "learning_rate": 1.0783806208341141e-06, | |
| "loss": 0.2112594097852707, | |
| "step": 3474 | |
| }, | |
| { | |
| "epoch": 2.631339894019682, | |
| "grad_norm": 2.6355857849121094, | |
| "learning_rate": 1.0768500086106978e-06, | |
| "loss": 0.14096690714359283, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 2.6328538985616956, | |
| "grad_norm": 0.5131763219833374, | |
| "learning_rate": 1.075320294629739e-06, | |
| "loss": 0.46342194080352783, | |
| "step": 3478 | |
| }, | |
| { | |
| "epoch": 2.6343679031037093, | |
| "grad_norm": 1.176900029182434, | |
| "learning_rate": 1.0737914812886094e-06, | |
| "loss": 0.5241625308990479, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.635881907645723, | |
| "grad_norm": 3.1469740867614746, | |
| "learning_rate": 1.07226357098327e-06, | |
| "loss": 0.14466527104377747, | |
| "step": 3482 | |
| }, | |
| { | |
| "epoch": 2.6373959121877366, | |
| "grad_norm": 1.3433618545532227, | |
| "learning_rate": 1.0707365661082674e-06, | |
| "loss": 0.1578199863433838, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 2.6389099167297503, | |
| "grad_norm": 2.3391337394714355, | |
| "learning_rate": 1.069210469056727e-06, | |
| "loss": 0.03919367492198944, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 2.640423921271764, | |
| "grad_norm": 3.255871057510376, | |
| "learning_rate": 1.0676852822203547e-06, | |
| "loss": 0.3218518793582916, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 2.641937925813777, | |
| "grad_norm": 3.4141852855682373, | |
| "learning_rate": 1.0661610079894268e-06, | |
| "loss": 1.0428060293197632, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.6434519303557913, | |
| "grad_norm": 2.1213839054107666, | |
| "learning_rate": 1.0646376487527907e-06, | |
| "loss": 0.5885476469993591, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 2.6449659348978045, | |
| "grad_norm": 4.027782440185547, | |
| "learning_rate": 1.0631152068978604e-06, | |
| "loss": 0.17618165910243988, | |
| "step": 3494 | |
| }, | |
| { | |
| "epoch": 2.646479939439818, | |
| "grad_norm": 1.679085373878479, | |
| "learning_rate": 1.0615936848106113e-06, | |
| "loss": 0.9989380836486816, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 2.647993943981832, | |
| "grad_norm": 1.7569453716278076, | |
| "learning_rate": 1.0600730848755767e-06, | |
| "loss": 0.47763514518737793, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 2.6495079485238455, | |
| "grad_norm": 3.1439387798309326, | |
| "learning_rate": 1.058553409475847e-06, | |
| "loss": 0.4873977601528168, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.651021953065859, | |
| "grad_norm": 1.0988174676895142, | |
| "learning_rate": 1.0570346609930612e-06, | |
| "loss": 0.39969107508659363, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 2.652535957607873, | |
| "grad_norm": 1.5461552143096924, | |
| "learning_rate": 1.0555168418074074e-06, | |
| "loss": 0.21987293660640717, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 2.6540499621498865, | |
| "grad_norm": 1.8398942947387695, | |
| "learning_rate": 1.0539999542976152e-06, | |
| "loss": 0.9838016629219055, | |
| "step": 3506 | |
| }, | |
| { | |
| "epoch": 2.6555639666919, | |
| "grad_norm": 2.6948370933532715, | |
| "learning_rate": 1.0524840008409575e-06, | |
| "loss": 0.08101513981819153, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 2.657077971233914, | |
| "grad_norm": 6.337255477905273, | |
| "learning_rate": 1.0509689838132395e-06, | |
| "loss": 0.010323734022676945, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.658591975775927, | |
| "grad_norm": 2.0004448890686035, | |
| "learning_rate": 1.0494549055888013e-06, | |
| "loss": 0.5439300537109375, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 2.660105980317941, | |
| "grad_norm": 1.2642723321914673, | |
| "learning_rate": 1.0479417685405115e-06, | |
| "loss": 0.8853827714920044, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 2.6616199848599544, | |
| "grad_norm": 5.2497687339782715, | |
| "learning_rate": 1.0464295750397626e-06, | |
| "loss": 0.08229418843984604, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 2.663133989401968, | |
| "grad_norm": 2.096442937850952, | |
| "learning_rate": 1.044918327456469e-06, | |
| "loss": 0.7499485015869141, | |
| "step": 3518 | |
| }, | |
| { | |
| "epoch": 2.6646479939439818, | |
| "grad_norm": 1.2751201391220093, | |
| "learning_rate": 1.0434080281590626e-06, | |
| "loss": 0.6057901382446289, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.6661619984859954, | |
| "grad_norm": 2.4840683937072754, | |
| "learning_rate": 1.0418986795144896e-06, | |
| "loss": 0.2760433852672577, | |
| "step": 3522 | |
| }, | |
| { | |
| "epoch": 2.667676003028009, | |
| "grad_norm": 2.1772100925445557, | |
| "learning_rate": 1.0403902838882056e-06, | |
| "loss": 0.6145902276039124, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 2.669190007570023, | |
| "grad_norm": 1.2478702068328857, | |
| "learning_rate": 1.0388828436441733e-06, | |
| "loss": 0.9742065072059631, | |
| "step": 3526 | |
| }, | |
| { | |
| "epoch": 2.6707040121120365, | |
| "grad_norm": 0.7841724753379822, | |
| "learning_rate": 1.0373763611448567e-06, | |
| "loss": 0.47128647565841675, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 2.67221801665405, | |
| "grad_norm": 1.6649569272994995, | |
| "learning_rate": 1.035870838751221e-06, | |
| "loss": 0.5873335599899292, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.673732021196064, | |
| "grad_norm": 30.892988204956055, | |
| "learning_rate": 1.0343662788227249e-06, | |
| "loss": 0.08930249512195587, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 2.675246025738077, | |
| "grad_norm": 2.34309458732605, | |
| "learning_rate": 1.0328626837173202e-06, | |
| "loss": 0.5236271619796753, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 2.6767600302800907, | |
| "grad_norm": 1.6401013135910034, | |
| "learning_rate": 1.0313600557914452e-06, | |
| "loss": 0.28992849588394165, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 2.6782740348221044, | |
| "grad_norm": 1.5496026277542114, | |
| "learning_rate": 1.029858397400023e-06, | |
| "loss": 0.11498652398586273, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 2.679788039364118, | |
| "grad_norm": 2.9359993934631348, | |
| "learning_rate": 1.028357710896458e-06, | |
| "loss": 0.45273083448410034, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.6813020439061317, | |
| "grad_norm": 3.2220873832702637, | |
| "learning_rate": 1.0268579986326298e-06, | |
| "loss": 0.5863102674484253, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 2.6828160484481454, | |
| "grad_norm": 4.268006324768066, | |
| "learning_rate": 1.0253592629588934e-06, | |
| "loss": 0.9020994305610657, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 2.684330052990159, | |
| "grad_norm": 2.0981273651123047, | |
| "learning_rate": 1.0238615062240713e-06, | |
| "loss": 0.4718743860721588, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 2.6858440575321727, | |
| "grad_norm": 2.6294424533843994, | |
| "learning_rate": 1.0223647307754524e-06, | |
| "loss": 0.38430675864219666, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 2.6873580620741864, | |
| "grad_norm": 0.6526815295219421, | |
| "learning_rate": 1.0208689389587875e-06, | |
| "loss": 0.10240335762500763, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.6888720666161996, | |
| "grad_norm": 1.712877869606018, | |
| "learning_rate": 1.0193741331182873e-06, | |
| "loss": 0.18102604150772095, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 2.6903860711582137, | |
| "grad_norm": 3.130396842956543, | |
| "learning_rate": 1.0178803155966158e-06, | |
| "loss": 0.34598445892333984, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 2.691900075700227, | |
| "grad_norm": 0.3503313660621643, | |
| "learning_rate": 1.0163874887348873e-06, | |
| "loss": 0.09671512991189957, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 2.6934140802422406, | |
| "grad_norm": 6.0429792404174805, | |
| "learning_rate": 1.0148956548726668e-06, | |
| "loss": 0.5806812644004822, | |
| "step": 3558 | |
| }, | |
| { | |
| "epoch": 2.6949280847842543, | |
| "grad_norm": 1.3720959424972534, | |
| "learning_rate": 1.0134048163479599e-06, | |
| "loss": 1.0482804775238037, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.696442089326268, | |
| "grad_norm": 1.4913266897201538, | |
| "learning_rate": 1.0119149754972132e-06, | |
| "loss": 0.5744074583053589, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 2.6979560938682816, | |
| "grad_norm": 2.419517755508423, | |
| "learning_rate": 1.0104261346553096e-06, | |
| "loss": 0.5351262092590332, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 2.6994700984102953, | |
| "grad_norm": 2.7999117374420166, | |
| "learning_rate": 1.0089382961555663e-06, | |
| "loss": 0.6164992451667786, | |
| "step": 3566 | |
| }, | |
| { | |
| "epoch": 2.700984102952309, | |
| "grad_norm": 1.8278381824493408, | |
| "learning_rate": 1.0074514623297277e-06, | |
| "loss": 0.5365289449691772, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 2.7024981074943226, | |
| "grad_norm": 1.0970771312713623, | |
| "learning_rate": 1.005965635507964e-06, | |
| "loss": 0.36719387769699097, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.7040121120363363, | |
| "grad_norm": 2.4264323711395264, | |
| "learning_rate": 1.0044808180188685e-06, | |
| "loss": 0.24815315008163452, | |
| "step": 3572 | |
| }, | |
| { | |
| "epoch": 2.7055261165783495, | |
| "grad_norm": 0.4873034656047821, | |
| "learning_rate": 1.0029970121894516e-06, | |
| "loss": 0.14031077921390533, | |
| "step": 3574 | |
| }, | |
| { | |
| "epoch": 2.7070401211203636, | |
| "grad_norm": 0.6915985345840454, | |
| "learning_rate": 1.0015142203451384e-06, | |
| "loss": 0.2932851314544678, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 2.708554125662377, | |
| "grad_norm": 2.271310806274414, | |
| "learning_rate": 1.000032444809764e-06, | |
| "loss": 0.4614425599575043, | |
| "step": 3578 | |
| }, | |
| { | |
| "epoch": 2.7100681302043905, | |
| "grad_norm": 1.5097922086715698, | |
| "learning_rate": 9.985516879055733e-07, | |
| "loss": 0.9115333557128906, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.711582134746404, | |
| "grad_norm": 1.59455406665802, | |
| "learning_rate": 9.970719519532123e-07, | |
| "loss": 0.5052527785301208, | |
| "step": 3582 | |
| }, | |
| { | |
| "epoch": 2.713096139288418, | |
| "grad_norm": 2.1244618892669678, | |
| "learning_rate": 9.955932392717273e-07, | |
| "loss": 0.8183184266090393, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 2.7146101438304315, | |
| "grad_norm": 5.1214399337768555, | |
| "learning_rate": 9.941155521785622e-07, | |
| "loss": 0.538994550704956, | |
| "step": 3586 | |
| }, | |
| { | |
| "epoch": 2.716124148372445, | |
| "grad_norm": 4.378061294555664, | |
| "learning_rate": 9.926388929895523e-07, | |
| "loss": 0.2428453415632248, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 2.717638152914459, | |
| "grad_norm": 7.363132476806641, | |
| "learning_rate": 9.91163264018923e-07, | |
| "loss": 0.06354464590549469, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.719152157456472, | |
| "grad_norm": 3.2854292392730713, | |
| "learning_rate": 9.89688667579284e-07, | |
| "loss": 0.617462694644928, | |
| "step": 3592 | |
| }, | |
| { | |
| "epoch": 2.720666161998486, | |
| "grad_norm": 1.5972332954406738, | |
| "learning_rate": 9.882151059816286e-07, | |
| "loss": 0.4859785735607147, | |
| "step": 3594 | |
| }, | |
| { | |
| "epoch": 2.7221801665404994, | |
| "grad_norm": 2.845470905303955, | |
| "learning_rate": 9.867425815353263e-07, | |
| "loss": 0.5740007162094116, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 2.723694171082513, | |
| "grad_norm": 2.0157742500305176, | |
| "learning_rate": 9.852710965481219e-07, | |
| "loss": 0.06600300222635269, | |
| "step": 3598 | |
| }, | |
| { | |
| "epoch": 2.7252081756245268, | |
| "grad_norm": 1.7511472702026367, | |
| "learning_rate": 9.83800653326131e-07, | |
| "loss": 0.847884476184845, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.7267221801665404, | |
| "grad_norm": 1.3995939493179321, | |
| "learning_rate": 9.823312541738378e-07, | |
| "loss": 0.8712521195411682, | |
| "step": 3602 | |
| }, | |
| { | |
| "epoch": 2.728236184708554, | |
| "grad_norm": 1.488347053527832, | |
| "learning_rate": 9.808629013940889e-07, | |
| "loss": 0.5205467343330383, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 2.729750189250568, | |
| "grad_norm": 1.5527247190475464, | |
| "learning_rate": 9.793955972880904e-07, | |
| "loss": 0.5302475094795227, | |
| "step": 3606 | |
| }, | |
| { | |
| "epoch": 2.7312641937925815, | |
| "grad_norm": 1.9730198383331299, | |
| "learning_rate": 9.779293441554072e-07, | |
| "loss": 0.05066138133406639, | |
| "step": 3608 | |
| }, | |
| { | |
| "epoch": 2.732778198334595, | |
| "grad_norm": 1.394031286239624, | |
| "learning_rate": 9.764641442939552e-07, | |
| "loss": 0.5910233855247498, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.734292202876609, | |
| "grad_norm": 7.18306303024292, | |
| "learning_rate": 9.750000000000004e-07, | |
| "loss": 0.7828827500343323, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 2.735806207418622, | |
| "grad_norm": 1.5465166568756104, | |
| "learning_rate": 9.735369135681535e-07, | |
| "loss": 0.4381512999534607, | |
| "step": 3614 | |
| }, | |
| { | |
| "epoch": 2.737320211960636, | |
| "grad_norm": 1.4410380125045776, | |
| "learning_rate": 9.720748872913692e-07, | |
| "loss": 0.030536165460944176, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 2.7388342165026494, | |
| "grad_norm": 5.242462158203125, | |
| "learning_rate": 9.706139234609395e-07, | |
| "loss": 0.5820929408073425, | |
| "step": 3618 | |
| }, | |
| { | |
| "epoch": 2.740348221044663, | |
| "grad_norm": 2.1642913818359375, | |
| "learning_rate": 9.691540243664913e-07, | |
| "loss": 0.23760852217674255, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.7418622255866767, | |
| "grad_norm": 2.6150195598602295, | |
| "learning_rate": 9.67695192295982e-07, | |
| "loss": 0.8972626328468323, | |
| "step": 3622 | |
| }, | |
| { | |
| "epoch": 2.7433762301286904, | |
| "grad_norm": 0.4961766302585602, | |
| "learning_rate": 9.662374295356995e-07, | |
| "loss": 0.48937559127807617, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 2.744890234670704, | |
| "grad_norm": 1.2891615629196167, | |
| "learning_rate": 9.647807383702534e-07, | |
| "loss": 0.08011382818222046, | |
| "step": 3626 | |
| }, | |
| { | |
| "epoch": 2.7464042392127177, | |
| "grad_norm": 1.4327223300933838, | |
| "learning_rate": 9.63325121082574e-07, | |
| "loss": 0.9761390686035156, | |
| "step": 3628 | |
| }, | |
| { | |
| "epoch": 2.7479182437547314, | |
| "grad_norm": 0.7498922348022461, | |
| "learning_rate": 9.618705799539105e-07, | |
| "loss": 0.11114507913589478, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.749432248296745, | |
| "grad_norm": 2.269970417022705, | |
| "learning_rate": 9.604171172638233e-07, | |
| "loss": 0.546760082244873, | |
| "step": 3632 | |
| }, | |
| { | |
| "epoch": 2.7509462528387587, | |
| "grad_norm": 2.625152111053467, | |
| "learning_rate": 9.589647352901837e-07, | |
| "loss": 0.15670128166675568, | |
| "step": 3634 | |
| }, | |
| { | |
| "epoch": 2.752460257380772, | |
| "grad_norm": 2.0255126953125, | |
| "learning_rate": 9.575134363091702e-07, | |
| "loss": 0.6736434698104858, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 2.7539742619227856, | |
| "grad_norm": 4.662033557891846, | |
| "learning_rate": 9.560632225952626e-07, | |
| "loss": 0.15046308934688568, | |
| "step": 3638 | |
| }, | |
| { | |
| "epoch": 2.7554882664647993, | |
| "grad_norm": 1.6699665784835815, | |
| "learning_rate": 9.546140964212397e-07, | |
| "loss": 1.004642128944397, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.757002271006813, | |
| "grad_norm": 5.824310779571533, | |
| "learning_rate": 9.531660600581774e-07, | |
| "loss": 0.22400666773319244, | |
| "step": 3642 | |
| }, | |
| { | |
| "epoch": 2.7585162755488266, | |
| "grad_norm": 1.661800742149353, | |
| "learning_rate": 9.517191157754421e-07, | |
| "loss": 0.04683089628815651, | |
| "step": 3644 | |
| }, | |
| { | |
| "epoch": 2.7600302800908403, | |
| "grad_norm": 2.4442532062530518, | |
| "learning_rate": 9.502732658406903e-07, | |
| "loss": 0.670370876789093, | |
| "step": 3646 | |
| }, | |
| { | |
| "epoch": 2.761544284632854, | |
| "grad_norm": 3.1336352825164795, | |
| "learning_rate": 9.488285125198622e-07, | |
| "loss": 0.1903330534696579, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 2.7630582891748676, | |
| "grad_norm": 2.6961746215820312, | |
| "learning_rate": 9.473848580771794e-07, | |
| "loss": 0.9148120880126953, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.7645722937168813, | |
| "grad_norm": 1.6039396524429321, | |
| "learning_rate": 9.459423047751417e-07, | |
| "loss": 0.9865699410438538, | |
| "step": 3652 | |
| }, | |
| { | |
| "epoch": 2.7660862982588945, | |
| "grad_norm": 2.1121411323547363, | |
| "learning_rate": 9.445008548745238e-07, | |
| "loss": 0.245061993598938, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 2.7676003028009086, | |
| "grad_norm": 1.7591969966888428, | |
| "learning_rate": 9.430605106343696e-07, | |
| "loss": 0.0845777839422226, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 2.769114307342922, | |
| "grad_norm": 4.009654521942139, | |
| "learning_rate": 9.416212743119911e-07, | |
| "loss": 0.519393265247345, | |
| "step": 3658 | |
| }, | |
| { | |
| "epoch": 2.7706283118849355, | |
| "grad_norm": 1.430734395980835, | |
| "learning_rate": 9.401831481629649e-07, | |
| "loss": 0.48087310791015625, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.772142316426949, | |
| "grad_norm": 1.65854811668396, | |
| "learning_rate": 9.387461344411263e-07, | |
| "loss": 0.9938499331474304, | |
| "step": 3662 | |
| }, | |
| { | |
| "epoch": 2.773656320968963, | |
| "grad_norm": 0.5344567894935608, | |
| "learning_rate": 9.373102353985668e-07, | |
| "loss": 0.48683303594589233, | |
| "step": 3664 | |
| }, | |
| { | |
| "epoch": 2.7751703255109765, | |
| "grad_norm": 1.240643858909607, | |
| "learning_rate": 9.358754532856334e-07, | |
| "loss": 0.4459698796272278, | |
| "step": 3666 | |
| }, | |
| { | |
| "epoch": 2.77668433005299, | |
| "grad_norm": 1.141143798828125, | |
| "learning_rate": 9.344417903509201e-07, | |
| "loss": 0.5696487426757812, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 2.778198334595004, | |
| "grad_norm": 2.9075422286987305, | |
| "learning_rate": 9.330092488412687e-07, | |
| "loss": 0.4729193150997162, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.7797123391370175, | |
| "grad_norm": 0.8197333216667175, | |
| "learning_rate": 9.315778310017616e-07, | |
| "loss": 0.5160009860992432, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 2.781226343679031, | |
| "grad_norm": 1.4015450477600098, | |
| "learning_rate": 9.301475390757222e-07, | |
| "loss": 0.5251963138580322, | |
| "step": 3674 | |
| }, | |
| { | |
| "epoch": 2.7827403482210444, | |
| "grad_norm": 2.8256664276123047, | |
| "learning_rate": 9.287183753047082e-07, | |
| "loss": 0.1814594566822052, | |
| "step": 3676 | |
| }, | |
| { | |
| "epoch": 2.7842543527630585, | |
| "grad_norm": 4.373061180114746, | |
| "learning_rate": 9.272903419285096e-07, | |
| "loss": 0.11841978132724762, | |
| "step": 3678 | |
| }, | |
| { | |
| "epoch": 2.7857683573050718, | |
| "grad_norm": 2.711310386657715, | |
| "learning_rate": 9.258634411851445e-07, | |
| "loss": 0.9179194569587708, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.7872823618470854, | |
| "grad_norm": 3.6902360916137695, | |
| "learning_rate": 9.244376753108567e-07, | |
| "loss": 0.14111346006393433, | |
| "step": 3682 | |
| }, | |
| { | |
| "epoch": 2.788796366389099, | |
| "grad_norm": 5.682900905609131, | |
| "learning_rate": 9.230130465401107e-07, | |
| "loss": 0.0874456986784935, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 2.790310370931113, | |
| "grad_norm": 3.762624740600586, | |
| "learning_rate": 9.215895571055886e-07, | |
| "loss": 0.22180086374282837, | |
| "step": 3686 | |
| }, | |
| { | |
| "epoch": 2.7918243754731265, | |
| "grad_norm": 1.6312108039855957, | |
| "learning_rate": 9.201672092381885e-07, | |
| "loss": 0.6041345000267029, | |
| "step": 3688 | |
| }, | |
| { | |
| "epoch": 2.79333838001514, | |
| "grad_norm": 3.2012195587158203, | |
| "learning_rate": 9.187460051670173e-07, | |
| "loss": 0.5570380091667175, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.794852384557154, | |
| "grad_norm": 1.1236485242843628, | |
| "learning_rate": 9.173259471193918e-07, | |
| "loss": 0.5052518844604492, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 2.7963663890991675, | |
| "grad_norm": 2.8305928707122803, | |
| "learning_rate": 9.159070373208301e-07, | |
| "loss": 0.19987015426158905, | |
| "step": 3694 | |
| }, | |
| { | |
| "epoch": 2.797880393641181, | |
| "grad_norm": 7.895492076873779, | |
| "learning_rate": 9.144892779950532e-07, | |
| "loss": 0.737027645111084, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 2.7993943981831944, | |
| "grad_norm": 3.232949733734131, | |
| "learning_rate": 9.130726713639774e-07, | |
| "loss": 0.09915422648191452, | |
| "step": 3698 | |
| }, | |
| { | |
| "epoch": 2.800908402725208, | |
| "grad_norm": 1.2412161827087402, | |
| "learning_rate": 9.116572196477129e-07, | |
| "loss": 0.5296503305435181, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.8024224072672217, | |
| "grad_norm": 2.9720041751861572, | |
| "learning_rate": 9.102429250645598e-07, | |
| "loss": 0.9459198713302612, | |
| "step": 3702 | |
| }, | |
| { | |
| "epoch": 2.8039364118092354, | |
| "grad_norm": 1.1881636381149292, | |
| "learning_rate": 9.088297898310059e-07, | |
| "loss": 0.7172422409057617, | |
| "step": 3704 | |
| }, | |
| { | |
| "epoch": 2.805450416351249, | |
| "grad_norm": 4.256730079650879, | |
| "learning_rate": 9.074178161617206e-07, | |
| "loss": 0.339231014251709, | |
| "step": 3706 | |
| }, | |
| { | |
| "epoch": 2.8069644208932627, | |
| "grad_norm": 7.478480339050293, | |
| "learning_rate": 9.06007006269553e-07, | |
| "loss": 0.5643231272697449, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 2.8084784254352764, | |
| "grad_norm": 3.0805113315582275, | |
| "learning_rate": 9.045973623655298e-07, | |
| "loss": 0.45885780453681946, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.80999242997729, | |
| "grad_norm": 4.949220657348633, | |
| "learning_rate": 9.031888866588486e-07, | |
| "loss": 0.5311449766159058, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 2.8115064345193037, | |
| "grad_norm": 1.7975176572799683, | |
| "learning_rate": 9.017815813568773e-07, | |
| "loss": 0.5856797099113464, | |
| "step": 3714 | |
| }, | |
| { | |
| "epoch": 2.813020439061317, | |
| "grad_norm": 2.2618489265441895, | |
| "learning_rate": 9.003754486651483e-07, | |
| "loss": 0.15607936680316925, | |
| "step": 3716 | |
| }, | |
| { | |
| "epoch": 2.814534443603331, | |
| "grad_norm": 2.57824969291687, | |
| "learning_rate": 8.989704907873585e-07, | |
| "loss": 0.5110048055648804, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 2.8160484481453443, | |
| "grad_norm": 1.3291974067687988, | |
| "learning_rate": 8.975667099253615e-07, | |
| "loss": 0.0944562554359436, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.817562452687358, | |
| "grad_norm": 2.122201442718506, | |
| "learning_rate": 8.961641082791665e-07, | |
| "loss": 0.029286984354257584, | |
| "step": 3722 | |
| }, | |
| { | |
| "epoch": 2.8190764572293716, | |
| "grad_norm": 1.4434117078781128, | |
| "learning_rate": 8.947626880469365e-07, | |
| "loss": 0.5472773909568787, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 2.8205904617713853, | |
| "grad_norm": 1.7789900302886963, | |
| "learning_rate": 8.933624514249809e-07, | |
| "loss": 0.6741805076599121, | |
| "step": 3726 | |
| }, | |
| { | |
| "epoch": 2.822104466313399, | |
| "grad_norm": 1.073285698890686, | |
| "learning_rate": 8.919634006077551e-07, | |
| "loss": 0.43643903732299805, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 2.8236184708554126, | |
| "grad_norm": 7.309019565582275, | |
| "learning_rate": 8.905655377878552e-07, | |
| "loss": 0.7717113494873047, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.8251324753974263, | |
| "grad_norm": 1.233903169631958, | |
| "learning_rate": 8.891688651560177e-07, | |
| "loss": 0.032879047095775604, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 2.82664647993944, | |
| "grad_norm": 1.6785112619400024, | |
| "learning_rate": 8.87773384901111e-07, | |
| "loss": 0.9319685101509094, | |
| "step": 3734 | |
| }, | |
| { | |
| "epoch": 2.8281604844814536, | |
| "grad_norm": 30.342561721801758, | |
| "learning_rate": 8.863790992101359e-07, | |
| "loss": 0.5449196100234985, | |
| "step": 3736 | |
| }, | |
| { | |
| "epoch": 2.829674489023467, | |
| "grad_norm": 3.7048192024230957, | |
| "learning_rate": 8.849860102682226e-07, | |
| "loss": 0.7348836064338684, | |
| "step": 3738 | |
| }, | |
| { | |
| "epoch": 2.831188493565481, | |
| "grad_norm": 2.610330104827881, | |
| "learning_rate": 8.835941202586237e-07, | |
| "loss": 0.06106247752904892, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.832702498107494, | |
| "grad_norm": 3.5520286560058594, | |
| "learning_rate": 8.822034313627131e-07, | |
| "loss": 1.0147613286972046, | |
| "step": 3742 | |
| }, | |
| { | |
| "epoch": 2.834216502649508, | |
| "grad_norm": 5.207727909088135, | |
| "learning_rate": 8.808139457599839e-07, | |
| "loss": 0.5845840573310852, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 2.8357305071915215, | |
| "grad_norm": 2.8798627853393555, | |
| "learning_rate": 8.794256656280411e-07, | |
| "loss": 0.520442008972168, | |
| "step": 3746 | |
| }, | |
| { | |
| "epoch": 2.837244511733535, | |
| "grad_norm": 2.0684001445770264, | |
| "learning_rate": 8.780385931426028e-07, | |
| "loss": 0.1020556092262268, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 2.838758516275549, | |
| "grad_norm": 0.7989282608032227, | |
| "learning_rate": 8.766527304774929e-07, | |
| "loss": 0.18852849304676056, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.8402725208175625, | |
| "grad_norm": 2.2457497119903564, | |
| "learning_rate": 8.752680798046388e-07, | |
| "loss": 0.13739685714244843, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 2.841786525359576, | |
| "grad_norm": 1.6464593410491943, | |
| "learning_rate": 8.738846432940708e-07, | |
| "loss": 0.4965594410896301, | |
| "step": 3754 | |
| }, | |
| { | |
| "epoch": 2.8433005299015894, | |
| "grad_norm": 3.5233702659606934, | |
| "learning_rate": 8.725024231139141e-07, | |
| "loss": 0.5871769785881042, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 2.8448145344436035, | |
| "grad_norm": 0.7635524272918701, | |
| "learning_rate": 8.711214214303882e-07, | |
| "loss": 0.5196180939674377, | |
| "step": 3758 | |
| }, | |
| { | |
| "epoch": 2.8463285389856168, | |
| "grad_norm": 1.6084498167037964, | |
| "learning_rate": 8.69741640407804e-07, | |
| "loss": 0.18574142456054688, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.8478425435276304, | |
| "grad_norm": 0.49115055799484253, | |
| "learning_rate": 8.683630822085586e-07, | |
| "loss": 0.19083711504936218, | |
| "step": 3762 | |
| }, | |
| { | |
| "epoch": 2.849356548069644, | |
| "grad_norm": 1.3124921321868896, | |
| "learning_rate": 8.669857489931323e-07, | |
| "loss": 0.43203872442245483, | |
| "step": 3764 | |
| }, | |
| { | |
| "epoch": 2.850870552611658, | |
| "grad_norm": 1.9938055276870728, | |
| "learning_rate": 8.656096429200857e-07, | |
| "loss": 0.05717456340789795, | |
| "step": 3766 | |
| }, | |
| { | |
| "epoch": 2.8523845571536715, | |
| "grad_norm": 15.529302597045898, | |
| "learning_rate": 8.642347661460574e-07, | |
| "loss": 0.0767684355378151, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 2.853898561695685, | |
| "grad_norm": 0.2853293716907501, | |
| "learning_rate": 8.628611208257582e-07, | |
| "loss": 0.4549844264984131, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.855412566237699, | |
| "grad_norm": 1.8048611879348755, | |
| "learning_rate": 8.614887091119692e-07, | |
| "loss": 0.49801960587501526, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 2.8569265707797125, | |
| "grad_norm": 3.1209964752197266, | |
| "learning_rate": 8.60117533155538e-07, | |
| "loss": 0.07470957934856415, | |
| "step": 3774 | |
| }, | |
| { | |
| "epoch": 2.858440575321726, | |
| "grad_norm": 3.946650266647339, | |
| "learning_rate": 8.587475951053769e-07, | |
| "loss": 0.4709521234035492, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 2.8599545798637394, | |
| "grad_norm": 1.5060186386108398, | |
| "learning_rate": 8.573788971084563e-07, | |
| "loss": 0.2602544128894806, | |
| "step": 3778 | |
| }, | |
| { | |
| "epoch": 2.8614685844057535, | |
| "grad_norm": 2.0307724475860596, | |
| "learning_rate": 8.560114413098036e-07, | |
| "loss": 0.7113937139511108, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.8629825889477667, | |
| "grad_norm": 2.940063714981079, | |
| "learning_rate": 8.54645229852501e-07, | |
| "loss": 0.10309739410877228, | |
| "step": 3782 | |
| }, | |
| { | |
| "epoch": 2.8644965934897804, | |
| "grad_norm": 1.7410873174667358, | |
| "learning_rate": 8.532802648776786e-07, | |
| "loss": 0.5519030690193176, | |
| "step": 3784 | |
| }, | |
| { | |
| "epoch": 2.866010598031794, | |
| "grad_norm": 2.5878376960754395, | |
| "learning_rate": 8.519165485245139e-07, | |
| "loss": 0.1054067388176918, | |
| "step": 3786 | |
| }, | |
| { | |
| "epoch": 2.8675246025738077, | |
| "grad_norm": 1.256569266319275, | |
| "learning_rate": 8.505540829302267e-07, | |
| "loss": 0.5424407124519348, | |
| "step": 3788 | |
| }, | |
| { | |
| "epoch": 2.8690386071158214, | |
| "grad_norm": 0.8407960534095764, | |
| "learning_rate": 8.491928702300788e-07, | |
| "loss": 0.4280465245246887, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.870552611657835, | |
| "grad_norm": 0.8045430183410645, | |
| "learning_rate": 8.478329125573654e-07, | |
| "loss": 0.03474471718072891, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 2.8720666161998487, | |
| "grad_norm": 2.285611629486084, | |
| "learning_rate": 8.464742120434181e-07, | |
| "loss": 0.07297685742378235, | |
| "step": 3794 | |
| }, | |
| { | |
| "epoch": 2.8735806207418624, | |
| "grad_norm": 1.3411763906478882, | |
| "learning_rate": 8.451167708175949e-07, | |
| "loss": 0.18974605202674866, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 2.875094625283876, | |
| "grad_norm": 1.9717655181884766, | |
| "learning_rate": 8.437605910072835e-07, | |
| "loss": 0.49169692397117615, | |
| "step": 3798 | |
| }, | |
| { | |
| "epoch": 2.8766086298258893, | |
| "grad_norm": 1.0608391761779785, | |
| "learning_rate": 8.424056747378924e-07, | |
| "loss": 0.46762970089912415, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.878122634367903, | |
| "grad_norm": 2.073852777481079, | |
| "learning_rate": 8.410520241328499e-07, | |
| "loss": 0.5141778588294983, | |
| "step": 3802 | |
| }, | |
| { | |
| "epoch": 2.8796366389099166, | |
| "grad_norm": 2.4379963874816895, | |
| "learning_rate": 8.396996413136029e-07, | |
| "loss": 0.06461524963378906, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 2.8811506434519303, | |
| "grad_norm": 1.331201195716858, | |
| "learning_rate": 8.383485283996091e-07, | |
| "loss": 0.5797151327133179, | |
| "step": 3806 | |
| }, | |
| { | |
| "epoch": 2.882664647993944, | |
| "grad_norm": 2.5884780883789062, | |
| "learning_rate": 8.369986875083369e-07, | |
| "loss": 0.48049142956733704, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 2.8841786525359576, | |
| "grad_norm": 1.6268047094345093, | |
| "learning_rate": 8.356501207552611e-07, | |
| "loss": 0.5873973965644836, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.8856926570779713, | |
| "grad_norm": 2.7758703231811523, | |
| "learning_rate": 8.3430283025386e-07, | |
| "loss": 0.5925127863883972, | |
| "step": 3812 | |
| }, | |
| { | |
| "epoch": 2.887206661619985, | |
| "grad_norm": 21.12485694885254, | |
| "learning_rate": 8.329568181156116e-07, | |
| "loss": 0.981511652469635, | |
| "step": 3814 | |
| }, | |
| { | |
| "epoch": 2.8887206661619986, | |
| "grad_norm": 1.62020742893219, | |
| "learning_rate": 8.316120864499896e-07, | |
| "loss": 0.2684553861618042, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 2.890234670704012, | |
| "grad_norm": 0.5304837226867676, | |
| "learning_rate": 8.302686373644626e-07, | |
| "loss": 0.4767705798149109, | |
| "step": 3818 | |
| }, | |
| { | |
| "epoch": 2.891748675246026, | |
| "grad_norm": 1.8308970928192139, | |
| "learning_rate": 8.289264729644878e-07, | |
| "loss": 0.5119712352752686, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.893262679788039, | |
| "grad_norm": 4.053905487060547, | |
| "learning_rate": 8.275855953535097e-07, | |
| "loss": 0.5398560762405396, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 2.894776684330053, | |
| "grad_norm": 1.6479356288909912, | |
| "learning_rate": 8.262460066329549e-07, | |
| "loss": 0.46708399057388306, | |
| "step": 3824 | |
| }, | |
| { | |
| "epoch": 2.8962906888720665, | |
| "grad_norm": 0.3225676417350769, | |
| "learning_rate": 8.249077089022323e-07, | |
| "loss": 0.5151532888412476, | |
| "step": 3826 | |
| }, | |
| { | |
| "epoch": 2.89780469341408, | |
| "grad_norm": 1.7271003723144531, | |
| "learning_rate": 8.235707042587258e-07, | |
| "loss": 1.0721513032913208, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 2.899318697956094, | |
| "grad_norm": 3.939084768295288, | |
| "learning_rate": 8.222349947977929e-07, | |
| "loss": 0.13510605692863464, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.9008327024981075, | |
| "grad_norm": 1.4582927227020264, | |
| "learning_rate": 8.209005826127616e-07, | |
| "loss": 0.5551086664199829, | |
| "step": 3832 | |
| }, | |
| { | |
| "epoch": 2.902346707040121, | |
| "grad_norm": 1.3123462200164795, | |
| "learning_rate": 8.195674697949277e-07, | |
| "loss": 0.7564160227775574, | |
| "step": 3834 | |
| }, | |
| { | |
| "epoch": 2.903860711582135, | |
| "grad_norm": 3.6382358074188232, | |
| "learning_rate": 8.182356584335491e-07, | |
| "loss": 0.11954665184020996, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 2.9053747161241485, | |
| "grad_norm": 1.0889722108840942, | |
| "learning_rate": 8.169051506158443e-07, | |
| "loss": 1.0122618675231934, | |
| "step": 3838 | |
| }, | |
| { | |
| "epoch": 2.9068887206661618, | |
| "grad_norm": 1.8109678030014038, | |
| "learning_rate": 8.155759484269905e-07, | |
| "loss": 0.770567774772644, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.908402725208176, | |
| "grad_norm": 1.7070538997650146, | |
| "learning_rate": 8.142480539501167e-07, | |
| "loss": 0.5919016599655151, | |
| "step": 3842 | |
| }, | |
| { | |
| "epoch": 2.909916729750189, | |
| "grad_norm": 2.2287955284118652, | |
| "learning_rate": 8.129214692663032e-07, | |
| "loss": 0.6235976219177246, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 2.911430734292203, | |
| "grad_norm": 1.6173175573349, | |
| "learning_rate": 8.115961964545783e-07, | |
| "loss": 0.08541975915431976, | |
| "step": 3846 | |
| }, | |
| { | |
| "epoch": 2.9129447388342165, | |
| "grad_norm": 1.2753394842147827, | |
| "learning_rate": 8.10272237591913e-07, | |
| "loss": 0.02214045450091362, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 2.91445874337623, | |
| "grad_norm": 1.2947241067886353, | |
| "learning_rate": 8.089495947532204e-07, | |
| "loss": 0.6888188719749451, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.915972747918244, | |
| "grad_norm": 0.6611459255218506, | |
| "learning_rate": 8.0762827001135e-07, | |
| "loss": 0.14420601725578308, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 2.9174867524602575, | |
| "grad_norm": 1.7915887832641602, | |
| "learning_rate": 8.063082654370859e-07, | |
| "loss": 0.04857180640101433, | |
| "step": 3854 | |
| }, | |
| { | |
| "epoch": 2.919000757002271, | |
| "grad_norm": 1.6762808561325073, | |
| "learning_rate": 8.049895830991442e-07, | |
| "loss": 0.03882667049765587, | |
| "step": 3856 | |
| }, | |
| { | |
| "epoch": 2.920514761544285, | |
| "grad_norm": 1.8337093591690063, | |
| "learning_rate": 8.036722250641675e-07, | |
| "loss": 0.5603622794151306, | |
| "step": 3858 | |
| }, | |
| { | |
| "epoch": 2.9220287660862985, | |
| "grad_norm": 2.0373966693878174, | |
| "learning_rate": 8.023561933967231e-07, | |
| "loss": 0.021611543372273445, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.9235427706283117, | |
| "grad_norm": 1.4119657278060913, | |
| "learning_rate": 8.010414901593006e-07, | |
| "loss": 0.028445757925510406, | |
| "step": 3862 | |
| }, | |
| { | |
| "epoch": 2.9250567751703254, | |
| "grad_norm": 1.7821069955825806, | |
| "learning_rate": 7.997281174123065e-07, | |
| "loss": 0.5556588172912598, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 2.926570779712339, | |
| "grad_norm": 3.5669658184051514, | |
| "learning_rate": 7.98416077214063e-07, | |
| "loss": 0.05701752379536629, | |
| "step": 3866 | |
| }, | |
| { | |
| "epoch": 2.9280847842543527, | |
| "grad_norm": 4.570811748504639, | |
| "learning_rate": 7.971053716208031e-07, | |
| "loss": 0.033300720155239105, | |
| "step": 3868 | |
| }, | |
| { | |
| "epoch": 2.9295987887963664, | |
| "grad_norm": 2.6463215351104736, | |
| "learning_rate": 7.957960026866695e-07, | |
| "loss": 0.5267956256866455, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.93111279333838, | |
| "grad_norm": 1.602906584739685, | |
| "learning_rate": 7.944879724637089e-07, | |
| "loss": 1.021190881729126, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 2.9326267978803937, | |
| "grad_norm": 1.8349531888961792, | |
| "learning_rate": 7.931812830018696e-07, | |
| "loss": 0.5174588561058044, | |
| "step": 3874 | |
| }, | |
| { | |
| "epoch": 2.9341408024224074, | |
| "grad_norm": 1.067012906074524, | |
| "learning_rate": 7.918759363490007e-07, | |
| "loss": 0.4694302976131439, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 2.935654806964421, | |
| "grad_norm": 7.647733688354492, | |
| "learning_rate": 7.905719345508448e-07, | |
| "loss": 0.5283836126327515, | |
| "step": 3878 | |
| }, | |
| { | |
| "epoch": 2.9371688115064343, | |
| "grad_norm": 2.6443963050842285, | |
| "learning_rate": 7.89269279651038e-07, | |
| "loss": 0.6997630000114441, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.9386828160484484, | |
| "grad_norm": 2.3616750240325928, | |
| "learning_rate": 7.879679736911043e-07, | |
| "loss": 0.4031399190425873, | |
| "step": 3882 | |
| }, | |
| { | |
| "epoch": 2.9401968205904616, | |
| "grad_norm": 1.977614164352417, | |
| "learning_rate": 7.866680187104554e-07, | |
| "loss": 0.6264676451683044, | |
| "step": 3884 | |
| }, | |
| { | |
| "epoch": 2.9417108251324753, | |
| "grad_norm": 1.331671953201294, | |
| "learning_rate": 7.853694167463845e-07, | |
| "loss": 0.5368511080741882, | |
| "step": 3886 | |
| }, | |
| { | |
| "epoch": 2.943224829674489, | |
| "grad_norm": 1.7582005262374878, | |
| "learning_rate": 7.840721698340645e-07, | |
| "loss": 0.4475153088569641, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 2.9447388342165026, | |
| "grad_norm": 2.403273344039917, | |
| "learning_rate": 7.827762800065447e-07, | |
| "loss": 0.05570833012461662, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.9462528387585163, | |
| "grad_norm": 0.9218311905860901, | |
| "learning_rate": 7.814817492947481e-07, | |
| "loss": 0.10007733106613159, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 2.94776684330053, | |
| "grad_norm": 2.469407081604004, | |
| "learning_rate": 7.801885797274668e-07, | |
| "loss": 0.5435525178909302, | |
| "step": 3894 | |
| }, | |
| { | |
| "epoch": 2.9492808478425436, | |
| "grad_norm": 2.3963382244110107, | |
| "learning_rate": 7.788967733313607e-07, | |
| "loss": 0.5850597620010376, | |
| "step": 3896 | |
| }, | |
| { | |
| "epoch": 2.9507948523845573, | |
| "grad_norm": 1.2060469388961792, | |
| "learning_rate": 7.776063321309522e-07, | |
| "loss": 0.004651893395930529, | |
| "step": 3898 | |
| }, | |
| { | |
| "epoch": 2.952308856926571, | |
| "grad_norm": 3.9787027835845947, | |
| "learning_rate": 7.763172581486256e-07, | |
| "loss": 0.04053030163049698, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.953822861468584, | |
| "grad_norm": 1.6185312271118164, | |
| "learning_rate": 7.750295534046214e-07, | |
| "loss": 1.1225244998931885, | |
| "step": 3902 | |
| }, | |
| { | |
| "epoch": 2.9553368660105983, | |
| "grad_norm": 1.8362376689910889, | |
| "learning_rate": 7.737432199170336e-07, | |
| "loss": 0.07839284092187881, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 2.9568508705526115, | |
| "grad_norm": 2.360868215560913, | |
| "learning_rate": 7.724582597018097e-07, | |
| "loss": 0.5056535005569458, | |
| "step": 3906 | |
| }, | |
| { | |
| "epoch": 2.958364875094625, | |
| "grad_norm": 1.8739367723464966, | |
| "learning_rate": 7.711746747727421e-07, | |
| "loss": 0.5829885601997375, | |
| "step": 3908 | |
| }, | |
| { | |
| "epoch": 2.959878879636639, | |
| "grad_norm": 1.7606217861175537, | |
| "learning_rate": 7.698924671414689e-07, | |
| "loss": 0.8447286486625671, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.9613928841786525, | |
| "grad_norm": 18.142555236816406, | |
| "learning_rate": 7.686116388174711e-07, | |
| "loss": 0.3059283196926117, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 2.962906888720666, | |
| "grad_norm": 0.9034169316291809, | |
| "learning_rate": 7.67332191808066e-07, | |
| "loss": 0.41916462779045105, | |
| "step": 3914 | |
| }, | |
| { | |
| "epoch": 2.96442089326268, | |
| "grad_norm": 1.391530156135559, | |
| "learning_rate": 7.660541281184074e-07, | |
| "loss": 0.8247898817062378, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 2.9659348978046935, | |
| "grad_norm": 2.9683992862701416, | |
| "learning_rate": 7.6477744975148e-07, | |
| "loss": 0.470153272151947, | |
| "step": 3918 | |
| }, | |
| { | |
| "epoch": 2.9674489023467068, | |
| "grad_norm": 1.7733650207519531, | |
| "learning_rate": 7.63502158708099e-07, | |
| "loss": 0.5974211096763611, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.968962906888721, | |
| "grad_norm": 1.377882480621338, | |
| "learning_rate": 7.62228256986904e-07, | |
| "loss": 0.9456145167350769, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 2.970476911430734, | |
| "grad_norm": 0.9141403436660767, | |
| "learning_rate": 7.609557465843581e-07, | |
| "loss": 0.440866619348526, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 2.971990915972748, | |
| "grad_norm": 1.7118134498596191, | |
| "learning_rate": 7.596846294947427e-07, | |
| "loss": 0.21067911386489868, | |
| "step": 3926 | |
| }, | |
| { | |
| "epoch": 2.9735049205147615, | |
| "grad_norm": 1.9212812185287476, | |
| "learning_rate": 7.584149077101576e-07, | |
| "loss": 0.47009432315826416, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 2.975018925056775, | |
| "grad_norm": 0.7594667673110962, | |
| "learning_rate": 7.571465832205142e-07, | |
| "loss": 0.1051906868815422, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.976532929598789, | |
| "grad_norm": 3.166438341140747, | |
| "learning_rate": 7.558796580135345e-07, | |
| "loss": 1.0215100049972534, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 2.9780469341408025, | |
| "grad_norm": 4.067107677459717, | |
| "learning_rate": 7.546141340747478e-07, | |
| "loss": 0.4328348934650421, | |
| "step": 3934 | |
| }, | |
| { | |
| "epoch": 2.979560938682816, | |
| "grad_norm": 7.947187900543213, | |
| "learning_rate": 7.533500133874874e-07, | |
| "loss": 0.37044578790664673, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 2.98107494322483, | |
| "grad_norm": 1.890057921409607, | |
| "learning_rate": 7.52087297932887e-07, | |
| "loss": 0.6495625376701355, | |
| "step": 3938 | |
| }, | |
| { | |
| "epoch": 2.9825889477668435, | |
| "grad_norm": 1.2888870239257812, | |
| "learning_rate": 7.508259896898774e-07, | |
| "loss": 0.9499898552894592, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.9841029523088567, | |
| "grad_norm": 1.0993458032608032, | |
| "learning_rate": 7.495660906351864e-07, | |
| "loss": 0.5022351741790771, | |
| "step": 3942 | |
| }, | |
| { | |
| "epoch": 2.985616956850871, | |
| "grad_norm": 1.2433116436004639, | |
| "learning_rate": 7.483076027433309e-07, | |
| "loss": 0.5429167151451111, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 2.987130961392884, | |
| "grad_norm": 1.759813666343689, | |
| "learning_rate": 7.470505279866165e-07, | |
| "loss": 0.5135501623153687, | |
| "step": 3946 | |
| }, | |
| { | |
| "epoch": 2.9886449659348977, | |
| "grad_norm": 1.7812200784683228, | |
| "learning_rate": 7.457948683351357e-07, | |
| "loss": 0.5178404450416565, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 2.9901589704769114, | |
| "grad_norm": 1.8572566509246826, | |
| "learning_rate": 7.445406257567613e-07, | |
| "loss": 0.5528455376625061, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.991672975018925, | |
| "grad_norm": 1.6367700099945068, | |
| "learning_rate": 7.432878022171473e-07, | |
| "loss": 0.5564888715744019, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 2.9931869795609387, | |
| "grad_norm": 1.6826878786087036, | |
| "learning_rate": 7.420363996797214e-07, | |
| "loss": 0.08277818560600281, | |
| "step": 3954 | |
| }, | |
| { | |
| "epoch": 2.9947009841029524, | |
| "grad_norm": 2.2437868118286133, | |
| "learning_rate": 7.407864201056869e-07, | |
| "loss": 0.4363061785697937, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 2.996214988644966, | |
| "grad_norm": 8.740562438964844, | |
| "learning_rate": 7.395378654540147e-07, | |
| "loss": 0.7006982564926147, | |
| "step": 3958 | |
| }, | |
| { | |
| "epoch": 2.9977289931869797, | |
| "grad_norm": 1.6342284679412842, | |
| "learning_rate": 7.382907376814441e-07, | |
| "loss": 0.5745993256568909, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.9992429977289934, | |
| "grad_norm": 1.8306623697280884, | |
| "learning_rate": 7.370450387424767e-07, | |
| "loss": 0.9066737294197083, | |
| "step": 3962 | |
| }, | |
| { | |
| "epoch": 3.0007570022710066, | |
| "grad_norm": 1.6099592447280884, | |
| "learning_rate": 7.358007705893771e-07, | |
| "loss": 0.56649249792099, | |
| "step": 3964 | |
| }, | |
| { | |
| "epoch": 3.0022710068130203, | |
| "grad_norm": 0.6058228015899658, | |
| "learning_rate": 7.345579351721656e-07, | |
| "loss": 0.4400520920753479, | |
| "step": 3966 | |
| }, | |
| { | |
| "epoch": 3.003785011355034, | |
| "grad_norm": 2.340282440185547, | |
| "learning_rate": 7.333165344386171e-07, | |
| "loss": 0.2406800538301468, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 3.0052990158970476, | |
| "grad_norm": 7.830657005310059, | |
| "learning_rate": 7.3207657033426e-07, | |
| "loss": 0.19384807348251343, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 3.0068130204390613, | |
| "grad_norm": 0.14254476130008698, | |
| "learning_rate": 7.308380448023691e-07, | |
| "loss": 0.47479867935180664, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 3.008327024981075, | |
| "grad_norm": 1.5983010530471802, | |
| "learning_rate": 7.296009597839658e-07, | |
| "loss": 0.41168585419654846, | |
| "step": 3974 | |
| }, | |
| { | |
| "epoch": 3.0098410295230886, | |
| "grad_norm": 1.721829891204834, | |
| "learning_rate": 7.283653172178129e-07, | |
| "loss": 0.4500812888145447, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 3.0113550340651023, | |
| "grad_norm": 1.6810276508331299, | |
| "learning_rate": 7.271311190404144e-07, | |
| "loss": 0.4039463996887207, | |
| "step": 3978 | |
| }, | |
| { | |
| "epoch": 3.012869038607116, | |
| "grad_norm": 1.7916243076324463, | |
| "learning_rate": 7.25898367186009e-07, | |
| "loss": 0.9354234337806702, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 3.0143830431491296, | |
| "grad_norm": 1.3198893070220947, | |
| "learning_rate": 7.246670635865692e-07, | |
| "loss": 0.13221746683120728, | |
| "step": 3982 | |
| }, | |
| { | |
| "epoch": 3.015897047691143, | |
| "grad_norm": 1.7246809005737305, | |
| "learning_rate": 7.234372101717974e-07, | |
| "loss": 0.8873751759529114, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 3.0174110522331565, | |
| "grad_norm": 1.7376207113265991, | |
| "learning_rate": 7.222088088691246e-07, | |
| "loss": 0.451565146446228, | |
| "step": 3986 | |
| }, | |
| { | |
| "epoch": 3.01892505677517, | |
| "grad_norm": 2.4644076824188232, | |
| "learning_rate": 7.209818616037046e-07, | |
| "loss": 0.014454965479671955, | |
| "step": 3988 | |
| }, | |
| { | |
| "epoch": 3.020439061317184, | |
| "grad_norm": 2.7672579288482666, | |
| "learning_rate": 7.197563702984131e-07, | |
| "loss": 0.4274209439754486, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 3.0219530658591975, | |
| "grad_norm": 1.235212802886963, | |
| "learning_rate": 7.185323368738442e-07, | |
| "loss": 0.4580550193786621, | |
| "step": 3992 | |
| }, | |
| { | |
| "epoch": 3.023467070401211, | |
| "grad_norm": 1.6338485479354858, | |
| "learning_rate": 7.173097632483067e-07, | |
| "loss": 0.47135257720947266, | |
| "step": 3994 | |
| }, | |
| { | |
| "epoch": 3.024981074943225, | |
| "grad_norm": 0.5066173672676086, | |
| "learning_rate": 7.160886513378211e-07, | |
| "loss": 0.47466373443603516, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 3.0264950794852385, | |
| "grad_norm": 1.0611977577209473, | |
| "learning_rate": 7.148690030561192e-07, | |
| "loss": 0.42254048585891724, | |
| "step": 3998 | |
| }, | |
| { | |
| "epoch": 3.028009084027252, | |
| "grad_norm": 1.4310609102249146, | |
| "learning_rate": 7.136508203146364e-07, | |
| "loss": 0.42798805236816406, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.029523088569266, | |
| "grad_norm": 0.9165547490119934, | |
| "learning_rate": 7.124341050225133e-07, | |
| "loss": 0.0023502728436142206, | |
| "step": 4002 | |
| }, | |
| { | |
| "epoch": 3.031037093111279, | |
| "grad_norm": 2.101602077484131, | |
| "learning_rate": 7.112188590865894e-07, | |
| "loss": 0.5950231552124023, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 3.032551097653293, | |
| "grad_norm": 1.025341272354126, | |
| "learning_rate": 7.10005084411402e-07, | |
| "loss": 0.03122119978070259, | |
| "step": 4006 | |
| }, | |
| { | |
| "epoch": 3.0340651021953065, | |
| "grad_norm": 1.6090428829193115, | |
| "learning_rate": 7.087927828991828e-07, | |
| "loss": 0.32853564620018005, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 3.03557910673732, | |
| "grad_norm": 1.611303687095642, | |
| "learning_rate": 7.075819564498545e-07, | |
| "loss": 0.3890955448150635, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 3.037093111279334, | |
| "grad_norm": 3.2842607498168945, | |
| "learning_rate": 7.063726069610276e-07, | |
| "loss": 0.42601776123046875, | |
| "step": 4012 | |
| }, | |
| { | |
| "epoch": 3.0386071158213475, | |
| "grad_norm": 4.464943885803223, | |
| "learning_rate": 7.05164736327999e-07, | |
| "loss": 0.1855679452419281, | |
| "step": 4014 | |
| }, | |
| { | |
| "epoch": 3.040121120363361, | |
| "grad_norm": 2.2660470008850098, | |
| "learning_rate": 7.039583464437473e-07, | |
| "loss": 0.535140335559845, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 3.041635124905375, | |
| "grad_norm": 1.3778276443481445, | |
| "learning_rate": 7.027534391989301e-07, | |
| "loss": 0.3070753216743469, | |
| "step": 4018 | |
| }, | |
| { | |
| "epoch": 3.0431491294473885, | |
| "grad_norm": 1.3708959817886353, | |
| "learning_rate": 7.015500164818816e-07, | |
| "loss": 0.011551040224730968, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 3.044663133989402, | |
| "grad_norm": 1.6248399019241333, | |
| "learning_rate": 7.003480801786104e-07, | |
| "loss": 0.7897078394889832, | |
| "step": 4022 | |
| }, | |
| { | |
| "epoch": 3.046177138531416, | |
| "grad_norm": 1.192365050315857, | |
| "learning_rate": 6.991476321727945e-07, | |
| "loss": 0.015628105029463768, | |
| "step": 4024 | |
| }, | |
| { | |
| "epoch": 3.047691143073429, | |
| "grad_norm": 1.2526236772537231, | |
| "learning_rate": 6.979486743457794e-07, | |
| "loss": 0.09048985689878464, | |
| "step": 4026 | |
| }, | |
| { | |
| "epoch": 3.0492051476154427, | |
| "grad_norm": 3.9266533851623535, | |
| "learning_rate": 6.967512085765763e-07, | |
| "loss": 0.42560815811157227, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 3.0507191521574564, | |
| "grad_norm": 2.915327548980713, | |
| "learning_rate": 6.955552367418566e-07, | |
| "loss": 0.21876971423625946, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 3.05223315669947, | |
| "grad_norm": 0.17085859179496765, | |
| "learning_rate": 6.943607607159516e-07, | |
| "loss": 0.4021713435649872, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 3.0537471612414837, | |
| "grad_norm": 1.7475470304489136, | |
| "learning_rate": 6.93167782370847e-07, | |
| "loss": 0.5051688551902771, | |
| "step": 4034 | |
| }, | |
| { | |
| "epoch": 3.0552611657834974, | |
| "grad_norm": 2.716975688934326, | |
| "learning_rate": 6.919763035761835e-07, | |
| "loss": 0.4326034188270569, | |
| "step": 4036 | |
| }, | |
| { | |
| "epoch": 3.056775170325511, | |
| "grad_norm": 1.3036128282546997, | |
| "learning_rate": 6.907863261992494e-07, | |
| "loss": 0.5222240686416626, | |
| "step": 4038 | |
| }, | |
| { | |
| "epoch": 3.0582891748675247, | |
| "grad_norm": 2.3033268451690674, | |
| "learning_rate": 6.895978521049816e-07, | |
| "loss": 0.0866556465625763, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 3.0598031794095384, | |
| "grad_norm": 1.632857084274292, | |
| "learning_rate": 6.884108831559594e-07, | |
| "loss": 0.44751986861228943, | |
| "step": 4042 | |
| }, | |
| { | |
| "epoch": 3.061317183951552, | |
| "grad_norm": 0.7052401900291443, | |
| "learning_rate": 6.872254212124053e-07, | |
| "loss": 0.43911439180374146, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 3.0628311884935653, | |
| "grad_norm": 2.376359701156616, | |
| "learning_rate": 6.860414681321787e-07, | |
| "loss": 0.20868328213691711, | |
| "step": 4046 | |
| }, | |
| { | |
| "epoch": 3.064345193035579, | |
| "grad_norm": 1.0090512037277222, | |
| "learning_rate": 6.848590257707741e-07, | |
| "loss": 0.45377230644226074, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 3.0658591975775926, | |
| "grad_norm": 1.676525354385376, | |
| "learning_rate": 6.836780959813194e-07, | |
| "loss": 0.37644314765930176, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.0673732021196063, | |
| "grad_norm": 42.375518798828125, | |
| "learning_rate": 6.82498680614571e-07, | |
| "loss": 0.474353164434433, | |
| "step": 4052 | |
| }, | |
| { | |
| "epoch": 3.06888720666162, | |
| "grad_norm": 1.7034921646118164, | |
| "learning_rate": 6.81320781518913e-07, | |
| "loss": 0.5057543516159058, | |
| "step": 4054 | |
| }, | |
| { | |
| "epoch": 3.0704012112036336, | |
| "grad_norm": 1.43311607837677, | |
| "learning_rate": 6.801444005403517e-07, | |
| "loss": 0.005710270255804062, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 3.0719152157456473, | |
| "grad_norm": 1.3534364700317383, | |
| "learning_rate": 6.789695395225158e-07, | |
| "loss": 0.3858188986778259, | |
| "step": 4058 | |
| }, | |
| { | |
| "epoch": 3.073429220287661, | |
| "grad_norm": 2.685340404510498, | |
| "learning_rate": 6.777962003066511e-07, | |
| "loss": 0.45474767684936523, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 3.0749432248296746, | |
| "grad_norm": 1.9002461433410645, | |
| "learning_rate": 6.766243847316176e-07, | |
| "loss": 0.45055925846099854, | |
| "step": 4062 | |
| }, | |
| { | |
| "epoch": 3.0764572293716883, | |
| "grad_norm": 1.7495943307876587, | |
| "learning_rate": 6.754540946338894e-07, | |
| "loss": 0.28715768456459045, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 3.0779712339137015, | |
| "grad_norm": 3.2639667987823486, | |
| "learning_rate": 6.742853318475486e-07, | |
| "loss": 0.6930996775627136, | |
| "step": 4066 | |
| }, | |
| { | |
| "epoch": 3.079485238455715, | |
| "grad_norm": 1.3847533464431763, | |
| "learning_rate": 6.731180982042835e-07, | |
| "loss": 0.4592818319797516, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 3.080999242997729, | |
| "grad_norm": 6.530165672302246, | |
| "learning_rate": 6.719523955333861e-07, | |
| "loss": 0.031308241188526154, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 3.0825132475397425, | |
| "grad_norm": 2.292935609817505, | |
| "learning_rate": 6.707882256617498e-07, | |
| "loss": 0.3984447419643402, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 3.084027252081756, | |
| "grad_norm": 1.8727905750274658, | |
| "learning_rate": 6.696255904138654e-07, | |
| "loss": 0.5818535089492798, | |
| "step": 4074 | |
| }, | |
| { | |
| "epoch": 3.08554125662377, | |
| "grad_norm": 1.011590838432312, | |
| "learning_rate": 6.684644916118179e-07, | |
| "loss": 0.3981076776981354, | |
| "step": 4076 | |
| }, | |
| { | |
| "epoch": 3.0870552611657835, | |
| "grad_norm": 2.1105289459228516, | |
| "learning_rate": 6.673049310752851e-07, | |
| "loss": 0.46739429235458374, | |
| "step": 4078 | |
| }, | |
| { | |
| "epoch": 3.088569265707797, | |
| "grad_norm": 7.887820243835449, | |
| "learning_rate": 6.661469106215343e-07, | |
| "loss": 0.11569129675626755, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 3.090083270249811, | |
| "grad_norm": 1.2003979682922363, | |
| "learning_rate": 6.649904320654185e-07, | |
| "loss": 0.3800753057003021, | |
| "step": 4082 | |
| }, | |
| { | |
| "epoch": 3.0915972747918246, | |
| "grad_norm": 0.3290994465351105, | |
| "learning_rate": 6.638354972193742e-07, | |
| "loss": 0.011019574478268623, | |
| "step": 4084 | |
| }, | |
| { | |
| "epoch": 3.0931112793338382, | |
| "grad_norm": 1.8022518157958984, | |
| "learning_rate": 6.626821078934197e-07, | |
| "loss": 0.5054448246955872, | |
| "step": 4086 | |
| }, | |
| { | |
| "epoch": 3.0946252838758515, | |
| "grad_norm": 1.4422074556350708, | |
| "learning_rate": 6.615302658951497e-07, | |
| "loss": 0.8614604473114014, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 3.096139288417865, | |
| "grad_norm": 0.8293083906173706, | |
| "learning_rate": 6.603799730297347e-07, | |
| "loss": 0.0018426375463604927, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 3.097653292959879, | |
| "grad_norm": 1.566907525062561, | |
| "learning_rate": 6.592312310999173e-07, | |
| "loss": 0.4373411536216736, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 3.0991672975018925, | |
| "grad_norm": 2.7237191200256348, | |
| "learning_rate": 6.580840419060095e-07, | |
| "loss": 0.02366696111857891, | |
| "step": 4094 | |
| }, | |
| { | |
| "epoch": 3.100681302043906, | |
| "grad_norm": 1.0543371438980103, | |
| "learning_rate": 6.5693840724589e-07, | |
| "loss": 0.6218542456626892, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 3.10219530658592, | |
| "grad_norm": 0.6855198740959167, | |
| "learning_rate": 6.557943289150002e-07, | |
| "loss": 0.0873081237077713, | |
| "step": 4098 | |
| }, | |
| { | |
| "epoch": 3.1037093111279335, | |
| "grad_norm": 0.9665345549583435, | |
| "learning_rate": 6.546518087063444e-07, | |
| "loss": 0.0009544052300043404, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.105223315669947, | |
| "grad_norm": 0.9792335629463196, | |
| "learning_rate": 6.535108484104827e-07, | |
| "loss": 0.08898274600505829, | |
| "step": 4102 | |
| }, | |
| { | |
| "epoch": 3.106737320211961, | |
| "grad_norm": 2.054558753967285, | |
| "learning_rate": 6.523714498155326e-07, | |
| "loss": 0.04483972489833832, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 3.1082513247539745, | |
| "grad_norm": 2.7286934852600098, | |
| "learning_rate": 6.512336147071624e-07, | |
| "loss": 0.613974392414093, | |
| "step": 4106 | |
| }, | |
| { | |
| "epoch": 3.1097653292959877, | |
| "grad_norm": 1.400834560394287, | |
| "learning_rate": 6.500973448685914e-07, | |
| "loss": 0.24737434089183807, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 3.1112793338380014, | |
| "grad_norm": 0.4912169277667999, | |
| "learning_rate": 6.489626420805851e-07, | |
| "loss": 0.37733766436576843, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 3.112793338380015, | |
| "grad_norm": 6.200310707092285, | |
| "learning_rate": 6.47829508121453e-07, | |
| "loss": 0.05362119525671005, | |
| "step": 4112 | |
| }, | |
| { | |
| "epoch": 3.1143073429220287, | |
| "grad_norm": 3.9851672649383545, | |
| "learning_rate": 6.466979447670463e-07, | |
| "loss": 0.1483810842037201, | |
| "step": 4114 | |
| }, | |
| { | |
| "epoch": 3.1158213474640424, | |
| "grad_norm": 0.546320915222168, | |
| "learning_rate": 6.455679537907548e-07, | |
| "loss": 0.007687057368457317, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 3.117335352006056, | |
| "grad_norm": 1.32984459400177, | |
| "learning_rate": 6.44439536963504e-07, | |
| "loss": 0.7310623526573181, | |
| "step": 4118 | |
| }, | |
| { | |
| "epoch": 3.1188493565480697, | |
| "grad_norm": 3.537933826446533, | |
| "learning_rate": 6.433126960537513e-07, | |
| "loss": 0.6782516837120056, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 3.1203633610900834, | |
| "grad_norm": 1.9288111925125122, | |
| "learning_rate": 6.421874328274865e-07, | |
| "loss": 0.8860838413238525, | |
| "step": 4122 | |
| }, | |
| { | |
| "epoch": 3.121877365632097, | |
| "grad_norm": 2.1687636375427246, | |
| "learning_rate": 6.410637490482252e-07, | |
| "loss": 0.8524699211120605, | |
| "step": 4124 | |
| }, | |
| { | |
| "epoch": 3.1233913701741107, | |
| "grad_norm": 2.177737236022949, | |
| "learning_rate": 6.399416464770082e-07, | |
| "loss": 0.19032512605190277, | |
| "step": 4126 | |
| }, | |
| { | |
| "epoch": 3.124905374716124, | |
| "grad_norm": 2.1528878211975098, | |
| "learning_rate": 6.388211268723975e-07, | |
| "loss": 0.5917698740959167, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 3.1264193792581376, | |
| "grad_norm": 3.231980085372925, | |
| "learning_rate": 6.377021919904758e-07, | |
| "loss": 0.05690597742795944, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 3.1279333838001513, | |
| "grad_norm": 1.488774061203003, | |
| "learning_rate": 6.365848435848412e-07, | |
| "loss": 0.4813673794269562, | |
| "step": 4132 | |
| }, | |
| { | |
| "epoch": 3.129447388342165, | |
| "grad_norm": 2.938225269317627, | |
| "learning_rate": 6.354690834066054e-07, | |
| "loss": 0.9174196124076843, | |
| "step": 4134 | |
| }, | |
| { | |
| "epoch": 3.1309613928841786, | |
| "grad_norm": 7.670570373535156, | |
| "learning_rate": 6.343549132043909e-07, | |
| "loss": 0.20087343454360962, | |
| "step": 4136 | |
| }, | |
| { | |
| "epoch": 3.1324753974261923, | |
| "grad_norm": 1.8709648847579956, | |
| "learning_rate": 6.332423347243294e-07, | |
| "loss": 0.9196281433105469, | |
| "step": 4138 | |
| }, | |
| { | |
| "epoch": 3.133989401968206, | |
| "grad_norm": 1.3427828550338745, | |
| "learning_rate": 6.321313497100571e-07, | |
| "loss": 0.3599630296230316, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 3.1355034065102196, | |
| "grad_norm": 3.089582920074463, | |
| "learning_rate": 6.310219599027128e-07, | |
| "loss": 0.6179099678993225, | |
| "step": 4142 | |
| }, | |
| { | |
| "epoch": 3.1370174110522333, | |
| "grad_norm": 2.12060809135437, | |
| "learning_rate": 6.299141670409361e-07, | |
| "loss": 0.5258981585502625, | |
| "step": 4144 | |
| }, | |
| { | |
| "epoch": 3.138531415594247, | |
| "grad_norm": 1.4477745294570923, | |
| "learning_rate": 6.288079728608635e-07, | |
| "loss": 0.9324302673339844, | |
| "step": 4146 | |
| }, | |
| { | |
| "epoch": 3.14004542013626, | |
| "grad_norm": 1.8090686798095703, | |
| "learning_rate": 6.277033790961259e-07, | |
| "loss": 0.80943763256073, | |
| "step": 4148 | |
| }, | |
| { | |
| "epoch": 3.141559424678274, | |
| "grad_norm": 2.9717986583709717, | |
| "learning_rate": 6.266003874778456e-07, | |
| "loss": 0.49069246649742126, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.1430734292202875, | |
| "grad_norm": 4.860805034637451, | |
| "learning_rate": 6.254989997346353e-07, | |
| "loss": 0.03697580099105835, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 3.144587433762301, | |
| "grad_norm": 1.3705027103424072, | |
| "learning_rate": 6.243992175925925e-07, | |
| "loss": 0.28662988543510437, | |
| "step": 4154 | |
| }, | |
| { | |
| "epoch": 3.146101438304315, | |
| "grad_norm": 0.6647135019302368, | |
| "learning_rate": 6.233010427753001e-07, | |
| "loss": 0.46875080466270447, | |
| "step": 4156 | |
| }, | |
| { | |
| "epoch": 3.1476154428463285, | |
| "grad_norm": 0.3081839382648468, | |
| "learning_rate": 6.222044770038203e-07, | |
| "loss": 0.3812503516674042, | |
| "step": 4158 | |
| }, | |
| { | |
| "epoch": 3.149129447388342, | |
| "grad_norm": 5.093878746032715, | |
| "learning_rate": 6.21109521996695e-07, | |
| "loss": 0.872576892375946, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 3.150643451930356, | |
| "grad_norm": 1.9643348455429077, | |
| "learning_rate": 6.20016179469941e-07, | |
| "loss": 0.7427912950515747, | |
| "step": 4162 | |
| }, | |
| { | |
| "epoch": 3.1521574564723696, | |
| "grad_norm": 1.43099045753479, | |
| "learning_rate": 6.189244511370476e-07, | |
| "loss": 0.5047937035560608, | |
| "step": 4164 | |
| }, | |
| { | |
| "epoch": 3.1536714610143832, | |
| "grad_norm": 2.2820796966552734, | |
| "learning_rate": 6.178343387089756e-07, | |
| "loss": 0.7190396189689636, | |
| "step": 4166 | |
| }, | |
| { | |
| "epoch": 3.1551854655563965, | |
| "grad_norm": 1.949791669845581, | |
| "learning_rate": 6.16745843894152e-07, | |
| "loss": 0.4036288559436798, | |
| "step": 4168 | |
| }, | |
| { | |
| "epoch": 3.15669947009841, | |
| "grad_norm": 3.984266757965088, | |
| "learning_rate": 6.156589683984697e-07, | |
| "loss": 0.04631911590695381, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 3.158213474640424, | |
| "grad_norm": 4.274811744689941, | |
| "learning_rate": 6.145737139252829e-07, | |
| "loss": 0.0733456015586853, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 3.1597274791824375, | |
| "grad_norm": 5.016620635986328, | |
| "learning_rate": 6.134900821754063e-07, | |
| "loss": 0.06877418607473373, | |
| "step": 4174 | |
| }, | |
| { | |
| "epoch": 3.161241483724451, | |
| "grad_norm": 2.872525453567505, | |
| "learning_rate": 6.124080748471109e-07, | |
| "loss": 0.08794441819190979, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 3.162755488266465, | |
| "grad_norm": 1.5351786613464355, | |
| "learning_rate": 6.113276936361215e-07, | |
| "loss": 0.38341233134269714, | |
| "step": 4178 | |
| }, | |
| { | |
| "epoch": 3.1642694928084785, | |
| "grad_norm": 2.5572965145111084, | |
| "learning_rate": 6.102489402356153e-07, | |
| "loss": 0.9050129652023315, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 3.165783497350492, | |
| "grad_norm": 0.9266190528869629, | |
| "learning_rate": 6.091718163362182e-07, | |
| "loss": 0.500056803226471, | |
| "step": 4182 | |
| }, | |
| { | |
| "epoch": 3.167297501892506, | |
| "grad_norm": 2.0204591751098633, | |
| "learning_rate": 6.080963236260016e-07, | |
| "loss": 0.8148636221885681, | |
| "step": 4184 | |
| }, | |
| { | |
| "epoch": 3.1688115064345195, | |
| "grad_norm": 0.7167491316795349, | |
| "learning_rate": 6.070224637904811e-07, | |
| "loss": 0.4477299451828003, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 3.170325510976533, | |
| "grad_norm": 2.1239423751831055, | |
| "learning_rate": 6.059502385126138e-07, | |
| "loss": 0.8294087052345276, | |
| "step": 4188 | |
| }, | |
| { | |
| "epoch": 3.1718395155185464, | |
| "grad_norm": 1.3196355104446411, | |
| "learning_rate": 6.04879649472794e-07, | |
| "loss": 0.5490575432777405, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 3.17335352006056, | |
| "grad_norm": 1.4261651039123535, | |
| "learning_rate": 6.038106983488523e-07, | |
| "loss": 0.06415217369794846, | |
| "step": 4192 | |
| }, | |
| { | |
| "epoch": 3.1748675246025737, | |
| "grad_norm": 1.8780325651168823, | |
| "learning_rate": 6.027433868160518e-07, | |
| "loss": 0.05316773056983948, | |
| "step": 4194 | |
| }, | |
| { | |
| "epoch": 3.1763815291445874, | |
| "grad_norm": 0.09676197916269302, | |
| "learning_rate": 6.016777165470872e-07, | |
| "loss": 0.0006479769363068044, | |
| "step": 4196 | |
| }, | |
| { | |
| "epoch": 3.177895533686601, | |
| "grad_norm": 4.501723289489746, | |
| "learning_rate": 6.0061368921208e-07, | |
| "loss": 0.030987899750471115, | |
| "step": 4198 | |
| }, | |
| { | |
| "epoch": 3.1794095382286147, | |
| "grad_norm": 3.0383286476135254, | |
| "learning_rate": 5.99551306478577e-07, | |
| "loss": 0.10911338776350021, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.1809235427706284, | |
| "grad_norm": 1.3916982412338257, | |
| "learning_rate": 5.98490570011548e-07, | |
| "loss": 0.442693829536438, | |
| "step": 4202 | |
| }, | |
| { | |
| "epoch": 3.182437547312642, | |
| "grad_norm": 1.404009461402893, | |
| "learning_rate": 5.97431481473382e-07, | |
| "loss": 0.3937862515449524, | |
| "step": 4204 | |
| }, | |
| { | |
| "epoch": 3.1839515518546557, | |
| "grad_norm": 2.481732130050659, | |
| "learning_rate": 5.963740425238867e-07, | |
| "loss": 0.8848980665206909, | |
| "step": 4206 | |
| }, | |
| { | |
| "epoch": 3.1854655563966694, | |
| "grad_norm": 2.028106927871704, | |
| "learning_rate": 5.953182548202828e-07, | |
| "loss": 0.1351696103811264, | |
| "step": 4208 | |
| }, | |
| { | |
| "epoch": 3.1869795609386826, | |
| "grad_norm": 2.2008683681488037, | |
| "learning_rate": 5.94264120017205e-07, | |
| "loss": 0.03230151906609535, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 3.1884935654806963, | |
| "grad_norm": 1.489862322807312, | |
| "learning_rate": 5.932116397666961e-07, | |
| "loss": 0.23553022742271423, | |
| "step": 4212 | |
| }, | |
| { | |
| "epoch": 3.19000757002271, | |
| "grad_norm": 1.8328267335891724, | |
| "learning_rate": 5.921608157182062e-07, | |
| "loss": 0.8542577028274536, | |
| "step": 4214 | |
| }, | |
| { | |
| "epoch": 3.1915215745647236, | |
| "grad_norm": 1.3404337167739868, | |
| "learning_rate": 5.911116495185907e-07, | |
| "loss": 0.5208070278167725, | |
| "step": 4216 | |
| }, | |
| { | |
| "epoch": 3.1930355791067373, | |
| "grad_norm": 3.571223020553589, | |
| "learning_rate": 5.900641428121059e-07, | |
| "loss": 0.48536592721939087, | |
| "step": 4218 | |
| }, | |
| { | |
| "epoch": 3.194549583648751, | |
| "grad_norm": 0.9041838049888611, | |
| "learning_rate": 5.890182972404074e-07, | |
| "loss": 0.035800751298666, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 3.1960635881907646, | |
| "grad_norm": 1.311282992362976, | |
| "learning_rate": 5.879741144425475e-07, | |
| "loss": 0.479990690946579, | |
| "step": 4222 | |
| }, | |
| { | |
| "epoch": 3.1975775927327783, | |
| "grad_norm": 9.55543327331543, | |
| "learning_rate": 5.869315960549734e-07, | |
| "loss": 0.46675267815589905, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 3.199091597274792, | |
| "grad_norm": 0.6630218029022217, | |
| "learning_rate": 5.858907437115225e-07, | |
| "loss": 0.003185458015650511, | |
| "step": 4226 | |
| }, | |
| { | |
| "epoch": 3.2006056018168056, | |
| "grad_norm": 3.2958920001983643, | |
| "learning_rate": 5.848515590434222e-07, | |
| "loss": 0.9570237994194031, | |
| "step": 4228 | |
| }, | |
| { | |
| "epoch": 3.202119606358819, | |
| "grad_norm": 1.514442801475525, | |
| "learning_rate": 5.838140436792856e-07, | |
| "loss": 0.8392640948295593, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 3.2036336109008325, | |
| "grad_norm": 0.65553218126297, | |
| "learning_rate": 5.827781992451105e-07, | |
| "loss": 0.15988874435424805, | |
| "step": 4232 | |
| }, | |
| { | |
| "epoch": 3.205147615442846, | |
| "grad_norm": 0.5351942777633667, | |
| "learning_rate": 5.817440273642755e-07, | |
| "loss": 0.016632379963994026, | |
| "step": 4234 | |
| }, | |
| { | |
| "epoch": 3.20666161998486, | |
| "grad_norm": 5.285929203033447, | |
| "learning_rate": 5.807115296575374e-07, | |
| "loss": 0.8525801301002502, | |
| "step": 4236 | |
| }, | |
| { | |
| "epoch": 3.2081756245268735, | |
| "grad_norm": 2.787135601043701, | |
| "learning_rate": 5.796807077430305e-07, | |
| "loss": 0.4894711375236511, | |
| "step": 4238 | |
| }, | |
| { | |
| "epoch": 3.209689629068887, | |
| "grad_norm": 1.6540796756744385, | |
| "learning_rate": 5.78651563236262e-07, | |
| "loss": 0.03871167451143265, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 3.211203633610901, | |
| "grad_norm": 1.149295687675476, | |
| "learning_rate": 5.776240977501102e-07, | |
| "loss": 0.5460255742073059, | |
| "step": 4242 | |
| }, | |
| { | |
| "epoch": 3.2127176381529146, | |
| "grad_norm": 1.5972412824630737, | |
| "learning_rate": 5.765983128948217e-07, | |
| "loss": 0.024195147678256035, | |
| "step": 4244 | |
| }, | |
| { | |
| "epoch": 3.2142316426949282, | |
| "grad_norm": 3.290018320083618, | |
| "learning_rate": 5.75574210278011e-07, | |
| "loss": 0.030811673030257225, | |
| "step": 4246 | |
| }, | |
| { | |
| "epoch": 3.215745647236942, | |
| "grad_norm": 0.38647592067718506, | |
| "learning_rate": 5.745517915046542e-07, | |
| "loss": 0.005239240825176239, | |
| "step": 4248 | |
| }, | |
| { | |
| "epoch": 3.2172596517789556, | |
| "grad_norm": 1.172089695930481, | |
| "learning_rate": 5.735310581770891e-07, | |
| "loss": 0.19742485880851746, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.218773656320969, | |
| "grad_norm": 2.1019949913024902, | |
| "learning_rate": 5.725120118950119e-07, | |
| "loss": 0.7881650924682617, | |
| "step": 4252 | |
| }, | |
| { | |
| "epoch": 3.2202876608629825, | |
| "grad_norm": 3.427130937576294, | |
| "learning_rate": 5.71494654255476e-07, | |
| "loss": 0.8992434740066528, | |
| "step": 4254 | |
| }, | |
| { | |
| "epoch": 3.221801665404996, | |
| "grad_norm": 1.4113669395446777, | |
| "learning_rate": 5.704789868528865e-07, | |
| "loss": 0.4951695203781128, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 3.22331566994701, | |
| "grad_norm": 8.905265808105469, | |
| "learning_rate": 5.694650112790013e-07, | |
| "loss": 0.011985964141786098, | |
| "step": 4258 | |
| }, | |
| { | |
| "epoch": 3.2248296744890235, | |
| "grad_norm": 0.7892521619796753, | |
| "learning_rate": 5.684527291229256e-07, | |
| "loss": 0.01021597534418106, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 3.226343679031037, | |
| "grad_norm": 0.35012581944465637, | |
| "learning_rate": 5.674421419711116e-07, | |
| "loss": 0.042361292988061905, | |
| "step": 4262 | |
| }, | |
| { | |
| "epoch": 3.227857683573051, | |
| "grad_norm": 2.7546911239624023, | |
| "learning_rate": 5.664332514073544e-07, | |
| "loss": 0.3553725779056549, | |
| "step": 4264 | |
| }, | |
| { | |
| "epoch": 3.2293716881150645, | |
| "grad_norm": 4.400173664093018, | |
| "learning_rate": 5.6542605901279e-07, | |
| "loss": 0.503367006778717, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.230885692657078, | |
| "grad_norm": 1.0059622526168823, | |
| "learning_rate": 5.644205663658943e-07, | |
| "loss": 0.03507407009601593, | |
| "step": 4268 | |
| }, | |
| { | |
| "epoch": 3.2323996971990914, | |
| "grad_norm": 2.090927839279175, | |
| "learning_rate": 5.634167750424781e-07, | |
| "loss": 0.4575502276420593, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 3.233913701741105, | |
| "grad_norm": 4.920496463775635, | |
| "learning_rate": 5.624146866156859e-07, | |
| "loss": 0.10926653444766998, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 3.2354277062831187, | |
| "grad_norm": 5.562958717346191, | |
| "learning_rate": 5.614143026559947e-07, | |
| "loss": 0.33701470494270325, | |
| "step": 4274 | |
| }, | |
| { | |
| "epoch": 3.2369417108251324, | |
| "grad_norm": 1.8385343551635742, | |
| "learning_rate": 5.604156247312088e-07, | |
| "loss": 0.405283659696579, | |
| "step": 4276 | |
| }, | |
| { | |
| "epoch": 3.238455715367146, | |
| "grad_norm": 1.3062556982040405, | |
| "learning_rate": 5.594186544064595e-07, | |
| "loss": 0.1007096916437149, | |
| "step": 4278 | |
| }, | |
| { | |
| "epoch": 3.2399697199091597, | |
| "grad_norm": 3.5049281120300293, | |
| "learning_rate": 5.584233932442013e-07, | |
| "loss": 0.14424999058246613, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 3.2414837244511734, | |
| "grad_norm": 0.5313048958778381, | |
| "learning_rate": 5.574298428042114e-07, | |
| "loss": 0.15579357743263245, | |
| "step": 4282 | |
| }, | |
| { | |
| "epoch": 3.242997728993187, | |
| "grad_norm": 0.5158863067626953, | |
| "learning_rate": 5.564380046435849e-07, | |
| "loss": 0.48773688077926636, | |
| "step": 4284 | |
| }, | |
| { | |
| "epoch": 3.2445117335352007, | |
| "grad_norm": 2.8779616355895996, | |
| "learning_rate": 5.554478803167336e-07, | |
| "loss": 0.05835890769958496, | |
| "step": 4286 | |
| }, | |
| { | |
| "epoch": 3.2460257380772144, | |
| "grad_norm": 1.8299925327301025, | |
| "learning_rate": 5.544594713753831e-07, | |
| "loss": 0.09974260628223419, | |
| "step": 4288 | |
| }, | |
| { | |
| "epoch": 3.247539742619228, | |
| "grad_norm": 4.498845100402832, | |
| "learning_rate": 5.534727793685715e-07, | |
| "loss": 0.1410599797964096, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 3.2490537471612413, | |
| "grad_norm": 0.45668092370033264, | |
| "learning_rate": 5.524878058426454e-07, | |
| "loss": 0.0029913324397057295, | |
| "step": 4292 | |
| }, | |
| { | |
| "epoch": 3.250567751703255, | |
| "grad_norm": 0.4835710823535919, | |
| "learning_rate": 5.515045523412582e-07, | |
| "loss": 0.4473477602005005, | |
| "step": 4294 | |
| }, | |
| { | |
| "epoch": 3.2520817562452686, | |
| "grad_norm": 1.6173064708709717, | |
| "learning_rate": 5.505230204053681e-07, | |
| "loss": 0.8970786929130554, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 3.2535957607872823, | |
| "grad_norm": 1.275166630744934, | |
| "learning_rate": 5.495432115732351e-07, | |
| "loss": 0.5767251253128052, | |
| "step": 4298 | |
| }, | |
| { | |
| "epoch": 3.255109765329296, | |
| "grad_norm": 2.301729202270508, | |
| "learning_rate": 5.485651273804185e-07, | |
| "loss": 0.5156683325767517, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.2566237698713096, | |
| "grad_norm": 0.3116729259490967, | |
| "learning_rate": 5.475887693597747e-07, | |
| "loss": 0.4290734827518463, | |
| "step": 4302 | |
| }, | |
| { | |
| "epoch": 3.2581377744133233, | |
| "grad_norm": 2.6406760215759277, | |
| "learning_rate": 5.466141390414554e-07, | |
| "loss": 0.8496693968772888, | |
| "step": 4304 | |
| }, | |
| { | |
| "epoch": 3.259651778955337, | |
| "grad_norm": 3.133697032928467, | |
| "learning_rate": 5.456412379529038e-07, | |
| "loss": 0.45309460163116455, | |
| "step": 4306 | |
| }, | |
| { | |
| "epoch": 3.2611657834973506, | |
| "grad_norm": 1.8580689430236816, | |
| "learning_rate": 5.446700676188544e-07, | |
| "loss": 0.4132268726825714, | |
| "step": 4308 | |
| }, | |
| { | |
| "epoch": 3.262679788039364, | |
| "grad_norm": 1.7730848789215088, | |
| "learning_rate": 5.437006295613273e-07, | |
| "loss": 0.4618237018585205, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 3.264193792581378, | |
| "grad_norm": 1.9740703105926514, | |
| "learning_rate": 5.427329252996299e-07, | |
| "loss": 0.4546656310558319, | |
| "step": 4312 | |
| }, | |
| { | |
| "epoch": 3.265707797123391, | |
| "grad_norm": 1.3898946046829224, | |
| "learning_rate": 5.417669563503508e-07, | |
| "loss": 0.5297324657440186, | |
| "step": 4314 | |
| }, | |
| { | |
| "epoch": 3.267221801665405, | |
| "grad_norm": 3.3110592365264893, | |
| "learning_rate": 5.408027242273592e-07, | |
| "loss": 0.5353357195854187, | |
| "step": 4316 | |
| }, | |
| { | |
| "epoch": 3.2687358062074185, | |
| "grad_norm": 1.8744398355484009, | |
| "learning_rate": 5.398402304418033e-07, | |
| "loss": 0.48005539178848267, | |
| "step": 4318 | |
| }, | |
| { | |
| "epoch": 3.270249810749432, | |
| "grad_norm": 1.2090200185775757, | |
| "learning_rate": 5.388794765021063e-07, | |
| "loss": 0.411783367395401, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 3.271763815291446, | |
| "grad_norm": 1.5509945154190063, | |
| "learning_rate": 5.379204639139646e-07, | |
| "loss": 0.7165201902389526, | |
| "step": 4322 | |
| }, | |
| { | |
| "epoch": 3.2732778198334596, | |
| "grad_norm": 1.963220477104187, | |
| "learning_rate": 5.369631941803455e-07, | |
| "loss": 0.8382374048233032, | |
| "step": 4324 | |
| }, | |
| { | |
| "epoch": 3.2747918243754732, | |
| "grad_norm": 0.9289786219596863, | |
| "learning_rate": 5.360076688014856e-07, | |
| "loss": 0.05874892696738243, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 3.276305828917487, | |
| "grad_norm": 1.2153873443603516, | |
| "learning_rate": 5.350538892748871e-07, | |
| "loss": 0.2230982780456543, | |
| "step": 4328 | |
| }, | |
| { | |
| "epoch": 3.2778198334595006, | |
| "grad_norm": 1.8008383512496948, | |
| "learning_rate": 5.341018570953158e-07, | |
| "loss": 0.5720658898353577, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 3.279333838001514, | |
| "grad_norm": 0.8865185976028442, | |
| "learning_rate": 5.331515737548002e-07, | |
| "loss": 0.42918962240219116, | |
| "step": 4332 | |
| }, | |
| { | |
| "epoch": 3.2808478425435275, | |
| "grad_norm": 1.399993658065796, | |
| "learning_rate": 5.32203040742627e-07, | |
| "loss": 0.34146907925605774, | |
| "step": 4334 | |
| }, | |
| { | |
| "epoch": 3.282361847085541, | |
| "grad_norm": 0.1398351937532425, | |
| "learning_rate": 5.3125625954534e-07, | |
| "loss": 0.02566056326031685, | |
| "step": 4336 | |
| }, | |
| { | |
| "epoch": 3.283875851627555, | |
| "grad_norm": 1.386157512664795, | |
| "learning_rate": 5.303112316467375e-07, | |
| "loss": 0.5041114091873169, | |
| "step": 4338 | |
| }, | |
| { | |
| "epoch": 3.2853898561695685, | |
| "grad_norm": 2.1241602897644043, | |
| "learning_rate": 5.293679585278709e-07, | |
| "loss": 0.5188767313957214, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 3.286903860711582, | |
| "grad_norm": 2.159628391265869, | |
| "learning_rate": 5.284264416670402e-07, | |
| "loss": 0.3858943283557892, | |
| "step": 4342 | |
| }, | |
| { | |
| "epoch": 3.288417865253596, | |
| "grad_norm": 0.35991570353507996, | |
| "learning_rate": 5.274866825397937e-07, | |
| "loss": 0.033612895756959915, | |
| "step": 4344 | |
| }, | |
| { | |
| "epoch": 3.2899318697956095, | |
| "grad_norm": 8.98794174194336, | |
| "learning_rate": 5.265486826189246e-07, | |
| "loss": 0.065807044506073, | |
| "step": 4346 | |
| }, | |
| { | |
| "epoch": 3.291445874337623, | |
| "grad_norm": 0.7042241096496582, | |
| "learning_rate": 5.256124433744697e-07, | |
| "loss": 0.4922759532928467, | |
| "step": 4348 | |
| }, | |
| { | |
| "epoch": 3.292959878879637, | |
| "grad_norm": 1.7255072593688965, | |
| "learning_rate": 5.246779662737058e-07, | |
| "loss": 0.35371652245521545, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.2944738834216505, | |
| "grad_norm": 1.7350187301635742, | |
| "learning_rate": 5.237452527811482e-07, | |
| "loss": 0.5240334272384644, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 3.2959878879636637, | |
| "grad_norm": 1.2877637147903442, | |
| "learning_rate": 5.228143043585488e-07, | |
| "loss": 0.0797341838479042, | |
| "step": 4354 | |
| }, | |
| { | |
| "epoch": 3.2975018925056774, | |
| "grad_norm": 0.647106945514679, | |
| "learning_rate": 5.21885122464893e-07, | |
| "loss": 0.001529219443909824, | |
| "step": 4356 | |
| }, | |
| { | |
| "epoch": 3.299015897047691, | |
| "grad_norm": 0.8982298374176025, | |
| "learning_rate": 5.209577085563968e-07, | |
| "loss": 0.4877578914165497, | |
| "step": 4358 | |
| }, | |
| { | |
| "epoch": 3.3005299015897047, | |
| "grad_norm": 2.524348020553589, | |
| "learning_rate": 5.200320640865069e-07, | |
| "loss": 0.45231226086616516, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 3.3020439061317184, | |
| "grad_norm": 0.022618478164076805, | |
| "learning_rate": 5.191081905058956e-07, | |
| "loss": 0.3647257089614868, | |
| "step": 4362 | |
| }, | |
| { | |
| "epoch": 3.303557910673732, | |
| "grad_norm": 1.8879731893539429, | |
| "learning_rate": 5.181860892624612e-07, | |
| "loss": 0.4972490072250366, | |
| "step": 4364 | |
| }, | |
| { | |
| "epoch": 3.3050719152157457, | |
| "grad_norm": 2.4274215698242188, | |
| "learning_rate": 5.17265761801323e-07, | |
| "loss": 0.28176480531692505, | |
| "step": 4366 | |
| }, | |
| { | |
| "epoch": 3.3065859197577594, | |
| "grad_norm": 6.416411876678467, | |
| "learning_rate": 5.163472095648216e-07, | |
| "loss": 0.40716928243637085, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 3.308099924299773, | |
| "grad_norm": 1.85149085521698, | |
| "learning_rate": 5.154304339925146e-07, | |
| "loss": 0.9484966993331909, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 3.3096139288417863, | |
| "grad_norm": 4.972357273101807, | |
| "learning_rate": 5.145154365211757e-07, | |
| "loss": 0.09232676029205322, | |
| "step": 4372 | |
| }, | |
| { | |
| "epoch": 3.3111279333838, | |
| "grad_norm": 1.1895396709442139, | |
| "learning_rate": 5.136022185847912e-07, | |
| "loss": 0.03847669064998627, | |
| "step": 4374 | |
| }, | |
| { | |
| "epoch": 3.3126419379258136, | |
| "grad_norm": 1.5939233303070068, | |
| "learning_rate": 5.126907816145599e-07, | |
| "loss": 0.500072181224823, | |
| "step": 4376 | |
| }, | |
| { | |
| "epoch": 3.3141559424678273, | |
| "grad_norm": 0.835131049156189, | |
| "learning_rate": 5.117811270388885e-07, | |
| "loss": 0.008409892208874226, | |
| "step": 4378 | |
| }, | |
| { | |
| "epoch": 3.315669947009841, | |
| "grad_norm": 29.27231216430664, | |
| "learning_rate": 5.108732562833903e-07, | |
| "loss": 0.5254101157188416, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 3.3171839515518546, | |
| "grad_norm": 1.65263032913208, | |
| "learning_rate": 5.09967170770883e-07, | |
| "loss": 0.09114937484264374, | |
| "step": 4382 | |
| }, | |
| { | |
| "epoch": 3.3186979560938683, | |
| "grad_norm": 2.0233285427093506, | |
| "learning_rate": 5.090628719213871e-07, | |
| "loss": 0.6537255644798279, | |
| "step": 4384 | |
| }, | |
| { | |
| "epoch": 3.320211960635882, | |
| "grad_norm": 2.8925328254699707, | |
| "learning_rate": 5.081603611521223e-07, | |
| "loss": 0.027660584077239037, | |
| "step": 4386 | |
| }, | |
| { | |
| "epoch": 3.3217259651778956, | |
| "grad_norm": 2.887153387069702, | |
| "learning_rate": 5.072596398775062e-07, | |
| "loss": 0.001339872251264751, | |
| "step": 4388 | |
| }, | |
| { | |
| "epoch": 3.3232399697199093, | |
| "grad_norm": 1.8406734466552734, | |
| "learning_rate": 5.063607095091524e-07, | |
| "loss": 0.0365619957447052, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 3.324753974261923, | |
| "grad_norm": 1.0864671468734741, | |
| "learning_rate": 5.054635714558673e-07, | |
| "loss": 0.8768489360809326, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 3.326267978803936, | |
| "grad_norm": 17.93305206298828, | |
| "learning_rate": 5.045682271236481e-07, | |
| "loss": 0.40835052728652954, | |
| "step": 4394 | |
| }, | |
| { | |
| "epoch": 3.32778198334595, | |
| "grad_norm": 4.551692962646484, | |
| "learning_rate": 5.036746779156812e-07, | |
| "loss": 0.06771939992904663, | |
| "step": 4396 | |
| }, | |
| { | |
| "epoch": 3.3292959878879635, | |
| "grad_norm": 1.5928632020950317, | |
| "learning_rate": 5.027829252323402e-07, | |
| "loss": 0.40175971388816833, | |
| "step": 4398 | |
| }, | |
| { | |
| "epoch": 3.330809992429977, | |
| "grad_norm": 1.8709660768508911, | |
| "learning_rate": 5.018929704711824e-07, | |
| "loss": 0.5133115649223328, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.332323996971991, | |
| "grad_norm": 1.6498949527740479, | |
| "learning_rate": 5.010048150269478e-07, | |
| "loss": 0.03898582234978676, | |
| "step": 4402 | |
| }, | |
| { | |
| "epoch": 3.3338380015140046, | |
| "grad_norm": 0.4957684278488159, | |
| "learning_rate": 5.001184602915561e-07, | |
| "loss": 0.007185522932559252, | |
| "step": 4404 | |
| }, | |
| { | |
| "epoch": 3.3353520060560182, | |
| "grad_norm": 1.3830325603485107, | |
| "learning_rate": 4.992339076541056e-07, | |
| "loss": 0.4123181998729706, | |
| "step": 4406 | |
| }, | |
| { | |
| "epoch": 3.336866010598032, | |
| "grad_norm": 1.9279829263687134, | |
| "learning_rate": 4.983511585008695e-07, | |
| "loss": 0.8564941883087158, | |
| "step": 4408 | |
| }, | |
| { | |
| "epoch": 3.3383800151400456, | |
| "grad_norm": 1.7395809888839722, | |
| "learning_rate": 4.974702142152955e-07, | |
| "loss": 0.3417756259441376, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 3.3398940196820592, | |
| "grad_norm": 1.7301632165908813, | |
| "learning_rate": 4.965910761780018e-07, | |
| "loss": 0.4873124063014984, | |
| "step": 4412 | |
| }, | |
| { | |
| "epoch": 3.341408024224073, | |
| "grad_norm": 0.8227750658988953, | |
| "learning_rate": 4.957137457667769e-07, | |
| "loss": 0.009282869286835194, | |
| "step": 4414 | |
| }, | |
| { | |
| "epoch": 3.342922028766086, | |
| "grad_norm": 0.2841896116733551, | |
| "learning_rate": 4.948382243565753e-07, | |
| "loss": 0.01866152510046959, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 3.3444360333081, | |
| "grad_norm": 1.9856455326080322, | |
| "learning_rate": 4.939645133195168e-07, | |
| "loss": 0.40884366631507874, | |
| "step": 4418 | |
| }, | |
| { | |
| "epoch": 3.3459500378501135, | |
| "grad_norm": 2.2969679832458496, | |
| "learning_rate": 4.930926140248843e-07, | |
| "loss": 0.7881647944450378, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 3.347464042392127, | |
| "grad_norm": 0.34639841318130493, | |
| "learning_rate": 4.922225278391212e-07, | |
| "loss": 0.0036944563034921885, | |
| "step": 4422 | |
| }, | |
| { | |
| "epoch": 3.348978046934141, | |
| "grad_norm": 2.087341070175171, | |
| "learning_rate": 4.913542561258286e-07, | |
| "loss": 0.6937224864959717, | |
| "step": 4424 | |
| }, | |
| { | |
| "epoch": 3.3504920514761545, | |
| "grad_norm": 0.256864070892334, | |
| "learning_rate": 4.904878002457658e-07, | |
| "loss": 0.41196000576019287, | |
| "step": 4426 | |
| }, | |
| { | |
| "epoch": 3.352006056018168, | |
| "grad_norm": 0.23681002855300903, | |
| "learning_rate": 4.896231615568442e-07, | |
| "loss": 0.06520024687051773, | |
| "step": 4428 | |
| }, | |
| { | |
| "epoch": 3.353520060560182, | |
| "grad_norm": 2.564225673675537, | |
| "learning_rate": 4.887603414141288e-07, | |
| "loss": 0.8333681225776672, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 3.3550340651021955, | |
| "grad_norm": 4.068796157836914, | |
| "learning_rate": 4.878993411698335e-07, | |
| "loss": 0.3338027596473694, | |
| "step": 4432 | |
| }, | |
| { | |
| "epoch": 3.3565480696442087, | |
| "grad_norm": 0.886523425579071, | |
| "learning_rate": 4.87040162173321e-07, | |
| "loss": 0.03563188016414642, | |
| "step": 4434 | |
| }, | |
| { | |
| "epoch": 3.3580620741862224, | |
| "grad_norm": 1.7797421216964722, | |
| "learning_rate": 4.861828057710993e-07, | |
| "loss": 0.7589153051376343, | |
| "step": 4436 | |
| }, | |
| { | |
| "epoch": 3.359576078728236, | |
| "grad_norm": 1.5448027849197388, | |
| "learning_rate": 4.853272733068198e-07, | |
| "loss": 0.8362768888473511, | |
| "step": 4438 | |
| }, | |
| { | |
| "epoch": 3.3610900832702497, | |
| "grad_norm": 2.307023763656616, | |
| "learning_rate": 4.844735661212755e-07, | |
| "loss": 0.48151662945747375, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 3.3626040878122634, | |
| "grad_norm": 2.37272572517395, | |
| "learning_rate": 4.836216855523995e-07, | |
| "loss": 0.8961389660835266, | |
| "step": 4442 | |
| }, | |
| { | |
| "epoch": 3.364118092354277, | |
| "grad_norm": 2.659275531768799, | |
| "learning_rate": 4.827716329352615e-07, | |
| "loss": 0.7384055256843567, | |
| "step": 4444 | |
| }, | |
| { | |
| "epoch": 3.3656320968962907, | |
| "grad_norm": 1.6435630321502686, | |
| "learning_rate": 4.819234096020662e-07, | |
| "loss": 0.13354086875915527, | |
| "step": 4446 | |
| }, | |
| { | |
| "epoch": 3.3671461014383044, | |
| "grad_norm": 8.236377716064453, | |
| "learning_rate": 4.810770168821524e-07, | |
| "loss": 0.10777951031923294, | |
| "step": 4448 | |
| }, | |
| { | |
| "epoch": 3.368660105980318, | |
| "grad_norm": 3.1149511337280273, | |
| "learning_rate": 4.802324561019895e-07, | |
| "loss": 0.11652589589357376, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.3701741105223317, | |
| "grad_norm": 2.0627260208129883, | |
| "learning_rate": 4.793897285851753e-07, | |
| "loss": 0.3498725891113281, | |
| "step": 4452 | |
| }, | |
| { | |
| "epoch": 3.3716881150643454, | |
| "grad_norm": 2.8674232959747314, | |
| "learning_rate": 4.785488356524347e-07, | |
| "loss": 0.6957799196243286, | |
| "step": 4454 | |
| }, | |
| { | |
| "epoch": 3.3732021196063586, | |
| "grad_norm": 22.25187873840332, | |
| "learning_rate": 4.777097786216188e-07, | |
| "loss": 0.8306207656860352, | |
| "step": 4456 | |
| }, | |
| { | |
| "epoch": 3.3747161241483723, | |
| "grad_norm": 1.1734298467636108, | |
| "learning_rate": 4.768725588076997e-07, | |
| "loss": 0.48902827501296997, | |
| "step": 4458 | |
| }, | |
| { | |
| "epoch": 3.376230128690386, | |
| "grad_norm": 0.1804582178592682, | |
| "learning_rate": 4.7603717752277094e-07, | |
| "loss": 0.2803400754928589, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 3.3777441332323996, | |
| "grad_norm": 2.76470947265625, | |
| "learning_rate": 4.752036360760449e-07, | |
| "loss": 0.5458463430404663, | |
| "step": 4462 | |
| }, | |
| { | |
| "epoch": 3.3792581377744133, | |
| "grad_norm": 2.0508320331573486, | |
| "learning_rate": 4.743719357738499e-07, | |
| "loss": 0.5317753553390503, | |
| "step": 4464 | |
| }, | |
| { | |
| "epoch": 3.380772142316427, | |
| "grad_norm": 2.33542537689209, | |
| "learning_rate": 4.735420779196299e-07, | |
| "loss": 0.6131026744842529, | |
| "step": 4466 | |
| }, | |
| { | |
| "epoch": 3.3822861468584406, | |
| "grad_norm": 1.4460207223892212, | |
| "learning_rate": 4.727140638139402e-07, | |
| "loss": 0.098182313144207, | |
| "step": 4468 | |
| }, | |
| { | |
| "epoch": 3.3838001514004543, | |
| "grad_norm": 1.3774964809417725, | |
| "learning_rate": 4.718878947544473e-07, | |
| "loss": 0.4701768457889557, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 3.385314155942468, | |
| "grad_norm": 0.5767019987106323, | |
| "learning_rate": 4.7106357203592613e-07, | |
| "loss": 0.09576580673456192, | |
| "step": 4472 | |
| }, | |
| { | |
| "epoch": 3.386828160484481, | |
| "grad_norm": 0.33554038405418396, | |
| "learning_rate": 4.702410969502575e-07, | |
| "loss": 0.3836270272731781, | |
| "step": 4474 | |
| }, | |
| { | |
| "epoch": 3.3883421650264953, | |
| "grad_norm": 2.1231112480163574, | |
| "learning_rate": 4.694204707864267e-07, | |
| "loss": 0.4471987783908844, | |
| "step": 4476 | |
| }, | |
| { | |
| "epoch": 3.3898561695685085, | |
| "grad_norm": 0.8759449124336243, | |
| "learning_rate": 4.6860169483052253e-07, | |
| "loss": 0.4150061309337616, | |
| "step": 4478 | |
| }, | |
| { | |
| "epoch": 3.391370174110522, | |
| "grad_norm": 2.5322000980377197, | |
| "learning_rate": 4.677847703657323e-07, | |
| "loss": 0.9512776732444763, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 3.392884178652536, | |
| "grad_norm": 1.4753117561340332, | |
| "learning_rate": 4.669696986723429e-07, | |
| "loss": 0.5149673819541931, | |
| "step": 4482 | |
| }, | |
| { | |
| "epoch": 3.3943981831945496, | |
| "grad_norm": 3.7654807567596436, | |
| "learning_rate": 4.6615648102773745e-07, | |
| "loss": 0.48217836022377014, | |
| "step": 4484 | |
| }, | |
| { | |
| "epoch": 3.3959121877365632, | |
| "grad_norm": 3.2758965492248535, | |
| "learning_rate": 4.653451187063932e-07, | |
| "loss": 0.4106427729129791, | |
| "step": 4486 | |
| }, | |
| { | |
| "epoch": 3.397426192278577, | |
| "grad_norm": 1.7070201635360718, | |
| "learning_rate": 4.6453561297987923e-07, | |
| "loss": 0.4504314661026001, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 3.3989401968205906, | |
| "grad_norm": 1.3219845294952393, | |
| "learning_rate": 4.637279651168556e-07, | |
| "loss": 0.0958370640873909, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 3.4004542013626042, | |
| "grad_norm": 1.774338960647583, | |
| "learning_rate": 4.6292217638307104e-07, | |
| "loss": 0.8819460272789001, | |
| "step": 4492 | |
| }, | |
| { | |
| "epoch": 3.401968205904618, | |
| "grad_norm": 1.8408117294311523, | |
| "learning_rate": 4.6211824804135973e-07, | |
| "loss": 0.6723110675811768, | |
| "step": 4494 | |
| }, | |
| { | |
| "epoch": 3.403482210446631, | |
| "grad_norm": 0.3831016719341278, | |
| "learning_rate": 4.6131618135164077e-07, | |
| "loss": 0.0017858326900750399, | |
| "step": 4496 | |
| }, | |
| { | |
| "epoch": 3.404996214988645, | |
| "grad_norm": 3.7257351875305176, | |
| "learning_rate": 4.605159775709153e-07, | |
| "loss": 0.845191478729248, | |
| "step": 4498 | |
| }, | |
| { | |
| "epoch": 3.4065102195306585, | |
| "grad_norm": 1.4804571866989136, | |
| "learning_rate": 4.5971763795326586e-07, | |
| "loss": 0.5123839974403381, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.408024224072672, | |
| "grad_norm": 1.1878763437271118, | |
| "learning_rate": 4.589211637498522e-07, | |
| "loss": 0.047980278730392456, | |
| "step": 4502 | |
| }, | |
| { | |
| "epoch": 3.409538228614686, | |
| "grad_norm": 2.475844621658325, | |
| "learning_rate": 4.5812655620891124e-07, | |
| "loss": 0.5012723803520203, | |
| "step": 4504 | |
| }, | |
| { | |
| "epoch": 3.4110522331566995, | |
| "grad_norm": 1.7084704637527466, | |
| "learning_rate": 4.5733381657575493e-07, | |
| "loss": 0.5272079706192017, | |
| "step": 4506 | |
| }, | |
| { | |
| "epoch": 3.412566237698713, | |
| "grad_norm": 2.307476282119751, | |
| "learning_rate": 4.5654294609276676e-07, | |
| "loss": 0.9087414145469666, | |
| "step": 4508 | |
| }, | |
| { | |
| "epoch": 3.414080242240727, | |
| "grad_norm": 1.5252008438110352, | |
| "learning_rate": 4.5575394599940126e-07, | |
| "loss": 0.4586748480796814, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 3.4155942467827405, | |
| "grad_norm": 0.6368719339370728, | |
| "learning_rate": 4.5496681753218254e-07, | |
| "loss": 0.030393214896321297, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 3.417108251324754, | |
| "grad_norm": 2.486400604248047, | |
| "learning_rate": 4.541815619247002e-07, | |
| "loss": 0.4503438174724579, | |
| "step": 4514 | |
| }, | |
| { | |
| "epoch": 3.418622255866768, | |
| "grad_norm": 1.8390527963638306, | |
| "learning_rate": 4.533981804076097e-07, | |
| "loss": 0.41847169399261475, | |
| "step": 4516 | |
| }, | |
| { | |
| "epoch": 3.420136260408781, | |
| "grad_norm": 3.992145299911499, | |
| "learning_rate": 4.526166742086288e-07, | |
| "loss": 0.6022506952285767, | |
| "step": 4518 | |
| }, | |
| { | |
| "epoch": 3.4216502649507947, | |
| "grad_norm": 3.6967203617095947, | |
| "learning_rate": 4.5183704455253603e-07, | |
| "loss": 0.03936862200498581, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 3.4231642694928084, | |
| "grad_norm": 1.9748190641403198, | |
| "learning_rate": 4.5105929266117035e-07, | |
| "loss": 0.8123414516448975, | |
| "step": 4522 | |
| }, | |
| { | |
| "epoch": 3.424678274034822, | |
| "grad_norm": 1.5579864978790283, | |
| "learning_rate": 4.5028341975342617e-07, | |
| "loss": 0.46440210938453674, | |
| "step": 4524 | |
| }, | |
| { | |
| "epoch": 3.4261922785768357, | |
| "grad_norm": 1.6632148027420044, | |
| "learning_rate": 4.495094270452542e-07, | |
| "loss": 0.22790756821632385, | |
| "step": 4526 | |
| }, | |
| { | |
| "epoch": 3.4277062831188494, | |
| "grad_norm": 2.2529702186584473, | |
| "learning_rate": 4.487373157496584e-07, | |
| "loss": 0.07386959344148636, | |
| "step": 4528 | |
| }, | |
| { | |
| "epoch": 3.429220287660863, | |
| "grad_norm": 3.140373945236206, | |
| "learning_rate": 4.479670870766938e-07, | |
| "loss": 0.02634722925722599, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 3.4307342922028767, | |
| "grad_norm": 1.9195737838745117, | |
| "learning_rate": 4.4719874223346523e-07, | |
| "loss": 0.8603999614715576, | |
| "step": 4532 | |
| }, | |
| { | |
| "epoch": 3.4322482967448904, | |
| "grad_norm": 1.6826436519622803, | |
| "learning_rate": 4.4643228242412494e-07, | |
| "loss": 0.3530040979385376, | |
| "step": 4534 | |
| }, | |
| { | |
| "epoch": 3.4337623012869036, | |
| "grad_norm": 2.034973382949829, | |
| "learning_rate": 4.456677088498715e-07, | |
| "loss": 0.47168779373168945, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 3.4352763058289173, | |
| "grad_norm": 1.598543643951416, | |
| "learning_rate": 4.449050227089469e-07, | |
| "loss": 0.7359284162521362, | |
| "step": 4538 | |
| }, | |
| { | |
| "epoch": 3.436790310370931, | |
| "grad_norm": 1.941410779953003, | |
| "learning_rate": 4.4414422519663487e-07, | |
| "loss": 0.475564181804657, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 3.4383043149129446, | |
| "grad_norm": 1.9490824937820435, | |
| "learning_rate": 4.4338531750526014e-07, | |
| "loss": 0.7518396973609924, | |
| "step": 4542 | |
| }, | |
| { | |
| "epoch": 3.4398183194549583, | |
| "grad_norm": 3.9313268661499023, | |
| "learning_rate": 4.426283008241852e-07, | |
| "loss": 0.5432718992233276, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 3.441332323996972, | |
| "grad_norm": 0.7866805195808411, | |
| "learning_rate": 4.418731763398087e-07, | |
| "loss": 0.4719991087913513, | |
| "step": 4546 | |
| }, | |
| { | |
| "epoch": 3.4428463285389856, | |
| "grad_norm": 1.8675153255462646, | |
| "learning_rate": 4.4111994523556387e-07, | |
| "loss": 0.42668935656547546, | |
| "step": 4548 | |
| }, | |
| { | |
| "epoch": 3.4443603330809993, | |
| "grad_norm": 2.0621190071105957, | |
| "learning_rate": 4.4036860869191734e-07, | |
| "loss": 0.4930598735809326, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.445874337623013, | |
| "grad_norm": 1.0746771097183228, | |
| "learning_rate": 4.3961916788636594e-07, | |
| "loss": 0.10933978855609894, | |
| "step": 4552 | |
| }, | |
| { | |
| "epoch": 3.4473883421650267, | |
| "grad_norm": 1.4768182039260864, | |
| "learning_rate": 4.388716239934357e-07, | |
| "loss": 0.43619269132614136, | |
| "step": 4554 | |
| }, | |
| { | |
| "epoch": 3.4489023467070403, | |
| "grad_norm": 2.2217190265655518, | |
| "learning_rate": 4.381259781846793e-07, | |
| "loss": 0.0945279598236084, | |
| "step": 4556 | |
| }, | |
| { | |
| "epoch": 3.4504163512490535, | |
| "grad_norm": 0.46245077252388, | |
| "learning_rate": 4.3738223162867586e-07, | |
| "loss": 0.013268718495965004, | |
| "step": 4558 | |
| }, | |
| { | |
| "epoch": 3.451930355791067, | |
| "grad_norm": 10.982232093811035, | |
| "learning_rate": 4.3664038549102674e-07, | |
| "loss": 0.7407252192497253, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 3.453444360333081, | |
| "grad_norm": 1.541535496711731, | |
| "learning_rate": 4.3590044093435584e-07, | |
| "loss": 0.40889009833335876, | |
| "step": 4562 | |
| }, | |
| { | |
| "epoch": 3.4549583648750946, | |
| "grad_norm": 1.5232385396957397, | |
| "learning_rate": 4.351623991183065e-07, | |
| "loss": 0.038662832230329514, | |
| "step": 4564 | |
| }, | |
| { | |
| "epoch": 3.4564723694171082, | |
| "grad_norm": 0.6275953650474548, | |
| "learning_rate": 4.3442626119953994e-07, | |
| "loss": 0.4955153167247772, | |
| "step": 4566 | |
| }, | |
| { | |
| "epoch": 3.457986373959122, | |
| "grad_norm": 1.9511977434158325, | |
| "learning_rate": 4.336920283317344e-07, | |
| "loss": 0.3769458532333374, | |
| "step": 4568 | |
| }, | |
| { | |
| "epoch": 3.4595003785011356, | |
| "grad_norm": 0.9865078330039978, | |
| "learning_rate": 4.329597016655811e-07, | |
| "loss": 0.06349517405033112, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 3.4610143830431492, | |
| "grad_norm": 1.6756504774093628, | |
| "learning_rate": 4.3222928234878545e-07, | |
| "loss": 0.4160110652446747, | |
| "step": 4572 | |
| }, | |
| { | |
| "epoch": 3.462528387585163, | |
| "grad_norm": 1.8068053722381592, | |
| "learning_rate": 4.315007715260625e-07, | |
| "loss": 0.9335572719573975, | |
| "step": 4574 | |
| }, | |
| { | |
| "epoch": 3.4640423921271766, | |
| "grad_norm": 1.5421242713928223, | |
| "learning_rate": 4.307741703391366e-07, | |
| "loss": 0.0467277355492115, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 3.4655563966691902, | |
| "grad_norm": 1.0545746088027954, | |
| "learning_rate": 4.3004947992673973e-07, | |
| "loss": 0.4467891752719879, | |
| "step": 4578 | |
| }, | |
| { | |
| "epoch": 3.4670704012112035, | |
| "grad_norm": 0.03795475885272026, | |
| "learning_rate": 4.2932670142460887e-07, | |
| "loss": 0.005698340944945812, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 3.468584405753217, | |
| "grad_norm": 1.9916728734970093, | |
| "learning_rate": 4.286058359654846e-07, | |
| "loss": 0.5108667612075806, | |
| "step": 4582 | |
| }, | |
| { | |
| "epoch": 3.470098410295231, | |
| "grad_norm": 6.060962677001953, | |
| "learning_rate": 4.278868846791094e-07, | |
| "loss": 0.33193695545196533, | |
| "step": 4584 | |
| }, | |
| { | |
| "epoch": 3.4716124148372445, | |
| "grad_norm": 4.610118389129639, | |
| "learning_rate": 4.2716984869222625e-07, | |
| "loss": 0.49625271558761597, | |
| "step": 4586 | |
| }, | |
| { | |
| "epoch": 3.473126419379258, | |
| "grad_norm": 3.719632148742676, | |
| "learning_rate": 4.2645472912857617e-07, | |
| "loss": 0.41371193528175354, | |
| "step": 4588 | |
| }, | |
| { | |
| "epoch": 3.474640423921272, | |
| "grad_norm": 0.6784191727638245, | |
| "learning_rate": 4.257415271088966e-07, | |
| "loss": 0.05130447819828987, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 3.4761544284632855, | |
| "grad_norm": 1.290065884590149, | |
| "learning_rate": 4.2503024375092e-07, | |
| "loss": 0.4512989819049835, | |
| "step": 4592 | |
| }, | |
| { | |
| "epoch": 3.477668433005299, | |
| "grad_norm": 1.6803733110427856, | |
| "learning_rate": 4.24320880169372e-07, | |
| "loss": 0.4859740436077118, | |
| "step": 4594 | |
| }, | |
| { | |
| "epoch": 3.479182437547313, | |
| "grad_norm": 1.7524383068084717, | |
| "learning_rate": 4.236134374759694e-07, | |
| "loss": 0.7698912620544434, | |
| "step": 4596 | |
| }, | |
| { | |
| "epoch": 3.480696442089326, | |
| "grad_norm": 10.697996139526367, | |
| "learning_rate": 4.229079167794184e-07, | |
| "loss": 0.0986369401216507, | |
| "step": 4598 | |
| }, | |
| { | |
| "epoch": 3.4822104466313397, | |
| "grad_norm": 1.8657169342041016, | |
| "learning_rate": 4.2220431918541345e-07, | |
| "loss": 0.8880587816238403, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.4837244511733534, | |
| "grad_norm": 0.8085569739341736, | |
| "learning_rate": 4.2150264579663514e-07, | |
| "loss": 0.4447839856147766, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 3.485238455715367, | |
| "grad_norm": 1.6194043159484863, | |
| "learning_rate": 4.208028977127476e-07, | |
| "loss": 0.025584876537322998, | |
| "step": 4604 | |
| }, | |
| { | |
| "epoch": 3.4867524602573807, | |
| "grad_norm": 1.456982970237732, | |
| "learning_rate": 4.2010507603039853e-07, | |
| "loss": 0.7217623591423035, | |
| "step": 4606 | |
| }, | |
| { | |
| "epoch": 3.4882664647993944, | |
| "grad_norm": 2.8561789989471436, | |
| "learning_rate": 4.194091818432163e-07, | |
| "loss": 0.43590599298477173, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 3.489780469341408, | |
| "grad_norm": 1.112181544303894, | |
| "learning_rate": 4.187152162418084e-07, | |
| "loss": 0.47518157958984375, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 3.4912944738834217, | |
| "grad_norm": 2.6314733028411865, | |
| "learning_rate": 4.1802318031375967e-07, | |
| "loss": 0.8224350810050964, | |
| "step": 4612 | |
| }, | |
| { | |
| "epoch": 3.4928084784254354, | |
| "grad_norm": 1.5108767747879028, | |
| "learning_rate": 4.1733307514363146e-07, | |
| "loss": 0.3456285893917084, | |
| "step": 4614 | |
| }, | |
| { | |
| "epoch": 3.494322482967449, | |
| "grad_norm": 6.438747406005859, | |
| "learning_rate": 4.1664490181295813e-07, | |
| "loss": 0.03416212648153305, | |
| "step": 4616 | |
| }, | |
| { | |
| "epoch": 3.4958364875094627, | |
| "grad_norm": 2.583399534225464, | |
| "learning_rate": 4.1595866140024767e-07, | |
| "loss": 0.48854929208755493, | |
| "step": 4618 | |
| }, | |
| { | |
| "epoch": 3.497350492051476, | |
| "grad_norm": 2.204908847808838, | |
| "learning_rate": 4.1527435498097803e-07, | |
| "loss": 0.4382208287715912, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 3.4988644965934896, | |
| "grad_norm": 0.6002346873283386, | |
| "learning_rate": 4.145919836275961e-07, | |
| "loss": 0.5514084696769714, | |
| "step": 4622 | |
| }, | |
| { | |
| "epoch": 3.5003785011355033, | |
| "grad_norm": 0.23682530224323273, | |
| "learning_rate": 4.1391154840951664e-07, | |
| "loss": 0.13242147862911224, | |
| "step": 4624 | |
| }, | |
| { | |
| "epoch": 3.501892505677517, | |
| "grad_norm": 2.97751784324646, | |
| "learning_rate": 4.1323305039311985e-07, | |
| "loss": 0.5178206562995911, | |
| "step": 4626 | |
| }, | |
| { | |
| "epoch": 3.5034065102195306, | |
| "grad_norm": 0.030940603464841843, | |
| "learning_rate": 4.125564906417497e-07, | |
| "loss": 0.46128955483436584, | |
| "step": 4628 | |
| }, | |
| { | |
| "epoch": 3.5049205147615443, | |
| "grad_norm": 2.5979979038238525, | |
| "learning_rate": 4.118818702157131e-07, | |
| "loss": 0.08022566884756088, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 3.506434519303558, | |
| "grad_norm": 1.7580132484436035, | |
| "learning_rate": 4.1120919017227715e-07, | |
| "loss": 0.43764054775238037, | |
| "step": 4632 | |
| }, | |
| { | |
| "epoch": 3.5079485238455717, | |
| "grad_norm": 5.585171699523926, | |
| "learning_rate": 4.105384515656678e-07, | |
| "loss": 0.06207115575671196, | |
| "step": 4634 | |
| }, | |
| { | |
| "epoch": 3.5094625283875853, | |
| "grad_norm": 5.287844657897949, | |
| "learning_rate": 4.098696554470691e-07, | |
| "loss": 0.0812431275844574, | |
| "step": 4636 | |
| }, | |
| { | |
| "epoch": 3.5109765329295985, | |
| "grad_norm": 2.570967674255371, | |
| "learning_rate": 4.0920280286462046e-07, | |
| "loss": 0.9307460188865662, | |
| "step": 4638 | |
| }, | |
| { | |
| "epoch": 3.5124905374716127, | |
| "grad_norm": 0.485571950674057, | |
| "learning_rate": 4.0853789486341506e-07, | |
| "loss": 0.006304420065134764, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 3.514004542013626, | |
| "grad_norm": 2.8949780464172363, | |
| "learning_rate": 4.078749324854988e-07, | |
| "loss": 0.023514462634921074, | |
| "step": 4642 | |
| }, | |
| { | |
| "epoch": 3.5155185465556396, | |
| "grad_norm": 1.7327907085418701, | |
| "learning_rate": 4.0721391676986864e-07, | |
| "loss": 0.8459428548812866, | |
| "step": 4644 | |
| }, | |
| { | |
| "epoch": 3.5170325510976532, | |
| "grad_norm": 3.3375747203826904, | |
| "learning_rate": 4.0655484875247025e-07, | |
| "loss": 0.8779842853546143, | |
| "step": 4646 | |
| }, | |
| { | |
| "epoch": 3.518546555639667, | |
| "grad_norm": 0.9328950643539429, | |
| "learning_rate": 4.058977294661972e-07, | |
| "loss": 0.4344834089279175, | |
| "step": 4648 | |
| }, | |
| { | |
| "epoch": 3.5200605601816806, | |
| "grad_norm": 7.156875133514404, | |
| "learning_rate": 4.0524255994088855e-07, | |
| "loss": 0.14196467399597168, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.5215745647236942, | |
| "grad_norm": 2.4609146118164062, | |
| "learning_rate": 4.0458934120332856e-07, | |
| "loss": 0.3472437560558319, | |
| "step": 4652 | |
| }, | |
| { | |
| "epoch": 3.523088569265708, | |
| "grad_norm": 1.547291874885559, | |
| "learning_rate": 4.039380742772435e-07, | |
| "loss": 0.5102356672286987, | |
| "step": 4654 | |
| }, | |
| { | |
| "epoch": 3.5246025738077216, | |
| "grad_norm": 0.6036778688430786, | |
| "learning_rate": 4.032887601833006e-07, | |
| "loss": 0.5507434010505676, | |
| "step": 4656 | |
| }, | |
| { | |
| "epoch": 3.5261165783497352, | |
| "grad_norm": 0.9084627032279968, | |
| "learning_rate": 4.026413999391074e-07, | |
| "loss": 0.001643902505747974, | |
| "step": 4658 | |
| }, | |
| { | |
| "epoch": 3.5276305828917485, | |
| "grad_norm": 2.3534629344940186, | |
| "learning_rate": 4.0199599455920866e-07, | |
| "loss": 0.19594229757785797, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 3.529144587433762, | |
| "grad_norm": 3.39477276802063, | |
| "learning_rate": 4.013525450550858e-07, | |
| "loss": 0.447501003742218, | |
| "step": 4662 | |
| }, | |
| { | |
| "epoch": 3.530658591975776, | |
| "grad_norm": 3.5711495876312256, | |
| "learning_rate": 4.007110524351546e-07, | |
| "loss": 0.490939736366272, | |
| "step": 4664 | |
| }, | |
| { | |
| "epoch": 3.5321725965177895, | |
| "grad_norm": 0.9418548941612244, | |
| "learning_rate": 4.000715177047648e-07, | |
| "loss": 0.45484209060668945, | |
| "step": 4666 | |
| }, | |
| { | |
| "epoch": 3.533686601059803, | |
| "grad_norm": 0.17408324778079987, | |
| "learning_rate": 3.994339418661967e-07, | |
| "loss": 0.43646398186683655, | |
| "step": 4668 | |
| }, | |
| { | |
| "epoch": 3.535200605601817, | |
| "grad_norm": 0.4622572064399719, | |
| "learning_rate": 3.9879832591866166e-07, | |
| "loss": 0.008858655579388142, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 3.5367146101438305, | |
| "grad_norm": 0.3369908034801483, | |
| "learning_rate": 3.981646708582985e-07, | |
| "loss": 0.39352118968963623, | |
| "step": 4672 | |
| }, | |
| { | |
| "epoch": 3.538228614685844, | |
| "grad_norm": 1.5281133651733398, | |
| "learning_rate": 3.9753297767817374e-07, | |
| "loss": 0.012237350456416607, | |
| "step": 4674 | |
| }, | |
| { | |
| "epoch": 3.539742619227858, | |
| "grad_norm": 2.077275037765503, | |
| "learning_rate": 3.969032473682789e-07, | |
| "loss": 0.024612778797745705, | |
| "step": 4676 | |
| }, | |
| { | |
| "epoch": 3.541256623769871, | |
| "grad_norm": 1.828776240348816, | |
| "learning_rate": 3.962754809155289e-07, | |
| "loss": 0.9028818011283875, | |
| "step": 4678 | |
| }, | |
| { | |
| "epoch": 3.542770628311885, | |
| "grad_norm": 1.3136188983917236, | |
| "learning_rate": 3.956496793037618e-07, | |
| "loss": 0.44292280077934265, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 3.5442846328538984, | |
| "grad_norm": 2.2508389949798584, | |
| "learning_rate": 3.950258435137358e-07, | |
| "loss": 0.5394406318664551, | |
| "step": 4682 | |
| }, | |
| { | |
| "epoch": 3.545798637395912, | |
| "grad_norm": 1.111860752105713, | |
| "learning_rate": 3.944039745231279e-07, | |
| "loss": 0.41304436326026917, | |
| "step": 4684 | |
| }, | |
| { | |
| "epoch": 3.5473126419379257, | |
| "grad_norm": 0.2608751356601715, | |
| "learning_rate": 3.937840733065333e-07, | |
| "loss": 0.09430210292339325, | |
| "step": 4686 | |
| }, | |
| { | |
| "epoch": 3.5488266464799394, | |
| "grad_norm": 0.8345021605491638, | |
| "learning_rate": 3.9316614083546326e-07, | |
| "loss": 0.4794743061065674, | |
| "step": 4688 | |
| }, | |
| { | |
| "epoch": 3.550340651021953, | |
| "grad_norm": 1.2579665184020996, | |
| "learning_rate": 3.925501780783433e-07, | |
| "loss": 0.025660770013928413, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 3.5518546555639667, | |
| "grad_norm": 2.4215896129608154, | |
| "learning_rate": 3.9193618600051217e-07, | |
| "loss": 0.45629942417144775, | |
| "step": 4692 | |
| }, | |
| { | |
| "epoch": 3.5533686601059804, | |
| "grad_norm": 1.93221116065979, | |
| "learning_rate": 3.913241655642205e-07, | |
| "loss": 0.4120534658432007, | |
| "step": 4694 | |
| }, | |
| { | |
| "epoch": 3.554882664647994, | |
| "grad_norm": 1.3998444080352783, | |
| "learning_rate": 3.907141177286283e-07, | |
| "loss": 0.5035297870635986, | |
| "step": 4696 | |
| }, | |
| { | |
| "epoch": 3.5563966691900077, | |
| "grad_norm": 2.830394744873047, | |
| "learning_rate": 3.901060434498047e-07, | |
| "loss": 0.5286551713943481, | |
| "step": 4698 | |
| }, | |
| { | |
| "epoch": 3.557910673732021, | |
| "grad_norm": 1.3005797863006592, | |
| "learning_rate": 3.89499943680725e-07, | |
| "loss": 0.48577815294265747, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.559424678274035, | |
| "grad_norm": 8.160534858703613, | |
| "learning_rate": 3.8889581937127145e-07, | |
| "loss": 0.7468402981758118, | |
| "step": 4702 | |
| }, | |
| { | |
| "epoch": 3.5609386828160483, | |
| "grad_norm": 1.0731797218322754, | |
| "learning_rate": 3.882936714682292e-07, | |
| "loss": 0.035015299916267395, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 3.562452687358062, | |
| "grad_norm": 0.23713716864585876, | |
| "learning_rate": 3.876935009152862e-07, | |
| "loss": 0.0008841986418701708, | |
| "step": 4706 | |
| }, | |
| { | |
| "epoch": 3.5639666919000756, | |
| "grad_norm": 2.5999464988708496, | |
| "learning_rate": 3.870953086530317e-07, | |
| "loss": 0.19958657026290894, | |
| "step": 4708 | |
| }, | |
| { | |
| "epoch": 3.5654806964420893, | |
| "grad_norm": 1.8141475915908813, | |
| "learning_rate": 3.8649909561895453e-07, | |
| "loss": 0.8769348859786987, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 3.566994700984103, | |
| "grad_norm": 3.1186091899871826, | |
| "learning_rate": 3.8590486274744177e-07, | |
| "loss": 0.142172709107399, | |
| "step": 4712 | |
| }, | |
| { | |
| "epoch": 3.5685087055261167, | |
| "grad_norm": 4.5143046379089355, | |
| "learning_rate": 3.853126109697766e-07, | |
| "loss": 0.07558415085077286, | |
| "step": 4714 | |
| }, | |
| { | |
| "epoch": 3.5700227100681303, | |
| "grad_norm": 0.4608166217803955, | |
| "learning_rate": 3.8472234121413844e-07, | |
| "loss": 0.01279566902667284, | |
| "step": 4716 | |
| }, | |
| { | |
| "epoch": 3.571536714610144, | |
| "grad_norm": 1.0723479986190796, | |
| "learning_rate": 3.841340544055992e-07, | |
| "loss": 0.02807643637061119, | |
| "step": 4718 | |
| }, | |
| { | |
| "epoch": 3.5730507191521577, | |
| "grad_norm": 2.0866928100585938, | |
| "learning_rate": 3.8354775146612457e-07, | |
| "loss": 0.48381567001342773, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 3.574564723694171, | |
| "grad_norm": 1.0565446615219116, | |
| "learning_rate": 3.8296343331456966e-07, | |
| "loss": 0.4933178424835205, | |
| "step": 4722 | |
| }, | |
| { | |
| "epoch": 3.5760787282361846, | |
| "grad_norm": 1.5202834606170654, | |
| "learning_rate": 3.823811008666803e-07, | |
| "loss": 0.7812383770942688, | |
| "step": 4724 | |
| }, | |
| { | |
| "epoch": 3.5775927327781982, | |
| "grad_norm": 1.1805784702301025, | |
| "learning_rate": 3.818007550350892e-07, | |
| "loss": 0.009394466876983643, | |
| "step": 4726 | |
| }, | |
| { | |
| "epoch": 3.579106737320212, | |
| "grad_norm": 0.8975813984870911, | |
| "learning_rate": 3.8122239672931647e-07, | |
| "loss": 0.10344241559505463, | |
| "step": 4728 | |
| }, | |
| { | |
| "epoch": 3.5806207418622256, | |
| "grad_norm": 1.8160611391067505, | |
| "learning_rate": 3.8064602685576696e-07, | |
| "loss": 0.8045218586921692, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 3.5821347464042392, | |
| "grad_norm": 0.764239490032196, | |
| "learning_rate": 3.800716463177295e-07, | |
| "loss": 0.02110106125473976, | |
| "step": 4732 | |
| }, | |
| { | |
| "epoch": 3.583648750946253, | |
| "grad_norm": 1.9680922031402588, | |
| "learning_rate": 3.7949925601537495e-07, | |
| "loss": 0.22424939274787903, | |
| "step": 4734 | |
| }, | |
| { | |
| "epoch": 3.5851627554882666, | |
| "grad_norm": 2.4243404865264893, | |
| "learning_rate": 3.789288568457548e-07, | |
| "loss": 0.47614777088165283, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 3.5866767600302802, | |
| "grad_norm": 1.1433113813400269, | |
| "learning_rate": 3.78360449702801e-07, | |
| "loss": 0.45122095942497253, | |
| "step": 4738 | |
| }, | |
| { | |
| "epoch": 3.5881907645722935, | |
| "grad_norm": 0.37726327776908875, | |
| "learning_rate": 3.777940354773227e-07, | |
| "loss": 0.008424730040133, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 3.5897047691143076, | |
| "grad_norm": 2.8628084659576416, | |
| "learning_rate": 3.7722961505700625e-07, | |
| "loss": 0.4138200879096985, | |
| "step": 4742 | |
| }, | |
| { | |
| "epoch": 3.591218773656321, | |
| "grad_norm": 1.5430349111557007, | |
| "learning_rate": 3.766671893264126e-07, | |
| "loss": 0.4892534911632538, | |
| "step": 4744 | |
| }, | |
| { | |
| "epoch": 3.5927327781983345, | |
| "grad_norm": 0.7445770502090454, | |
| "learning_rate": 3.7610675916697766e-07, | |
| "loss": 0.038960278034210205, | |
| "step": 4746 | |
| }, | |
| { | |
| "epoch": 3.594246782740348, | |
| "grad_norm": 4.10042142868042, | |
| "learning_rate": 3.7554832545700917e-07, | |
| "loss": 0.3631005883216858, | |
| "step": 4748 | |
| }, | |
| { | |
| "epoch": 3.595760787282362, | |
| "grad_norm": 2.208441972732544, | |
| "learning_rate": 3.7499188907168595e-07, | |
| "loss": 0.4665554463863373, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.5972747918243755, | |
| "grad_norm": 1.664963722229004, | |
| "learning_rate": 3.7443745088305723e-07, | |
| "loss": 0.05097402259707451, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 3.598788796366389, | |
| "grad_norm": 2.0958125591278076, | |
| "learning_rate": 3.7388501176004005e-07, | |
| "loss": 0.0315166674554348, | |
| "step": 4754 | |
| }, | |
| { | |
| "epoch": 3.600302800908403, | |
| "grad_norm": 0.5980046391487122, | |
| "learning_rate": 3.733345725684187e-07, | |
| "loss": 0.10055415332317352, | |
| "step": 4756 | |
| }, | |
| { | |
| "epoch": 3.6018168054504165, | |
| "grad_norm": 1.2863233089447021, | |
| "learning_rate": 3.727861341708432e-07, | |
| "loss": 0.024909744039177895, | |
| "step": 4758 | |
| }, | |
| { | |
| "epoch": 3.60333080999243, | |
| "grad_norm": 4.173029899597168, | |
| "learning_rate": 3.7223969742682807e-07, | |
| "loss": 0.541300356388092, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 3.6048448145344434, | |
| "grad_norm": 4.239688396453857, | |
| "learning_rate": 3.716952631927505e-07, | |
| "loss": 0.09993429481983185, | |
| "step": 4762 | |
| }, | |
| { | |
| "epoch": 3.6063588190764575, | |
| "grad_norm": 2.102480411529541, | |
| "learning_rate": 3.711528323218495e-07, | |
| "loss": 0.03141137957572937, | |
| "step": 4764 | |
| }, | |
| { | |
| "epoch": 3.6078728236184707, | |
| "grad_norm": 0.5398826599121094, | |
| "learning_rate": 3.7061240566422476e-07, | |
| "loss": 0.5443601608276367, | |
| "step": 4766 | |
| }, | |
| { | |
| "epoch": 3.6093868281604844, | |
| "grad_norm": 2.651846408843994, | |
| "learning_rate": 3.700739840668343e-07, | |
| "loss": 0.48372316360473633, | |
| "step": 4768 | |
| }, | |
| { | |
| "epoch": 3.610900832702498, | |
| "grad_norm": 3.62178635597229, | |
| "learning_rate": 3.695375683734941e-07, | |
| "loss": 0.5068149566650391, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 3.6124148372445117, | |
| "grad_norm": 0.22578562796115875, | |
| "learning_rate": 3.6900315942487687e-07, | |
| "loss": 0.4274672567844391, | |
| "step": 4772 | |
| }, | |
| { | |
| "epoch": 3.6139288417865254, | |
| "grad_norm": 0.422225683927536, | |
| "learning_rate": 3.6847075805850934e-07, | |
| "loss": 0.003254303941503167, | |
| "step": 4774 | |
| }, | |
| { | |
| "epoch": 3.615442846328539, | |
| "grad_norm": 2.6041109561920166, | |
| "learning_rate": 3.6794036510877343e-07, | |
| "loss": 0.4800873100757599, | |
| "step": 4776 | |
| }, | |
| { | |
| "epoch": 3.6169568508705527, | |
| "grad_norm": 1.3889739513397217, | |
| "learning_rate": 3.6741198140690205e-07, | |
| "loss": 0.019312866032123566, | |
| "step": 4778 | |
| }, | |
| { | |
| "epoch": 3.618470855412566, | |
| "grad_norm": 0.17547756433486938, | |
| "learning_rate": 3.668856077809798e-07, | |
| "loss": 0.48028045892715454, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 3.61998485995458, | |
| "grad_norm": 1.1334937810897827, | |
| "learning_rate": 3.663612450559414e-07, | |
| "loss": 0.04994925111532211, | |
| "step": 4782 | |
| }, | |
| { | |
| "epoch": 3.6214988644965933, | |
| "grad_norm": 0.4655733108520508, | |
| "learning_rate": 3.658388940535696e-07, | |
| "loss": 0.5019496083259583, | |
| "step": 4784 | |
| }, | |
| { | |
| "epoch": 3.623012869038607, | |
| "grad_norm": 0.9031540751457214, | |
| "learning_rate": 3.653185555924943e-07, | |
| "loss": 0.45209193229675293, | |
| "step": 4786 | |
| }, | |
| { | |
| "epoch": 3.6245268735806206, | |
| "grad_norm": 1.9063076972961426, | |
| "learning_rate": 3.6480023048819196e-07, | |
| "loss": 0.9164039492607117, | |
| "step": 4788 | |
| }, | |
| { | |
| "epoch": 3.6260408781226343, | |
| "grad_norm": 2.208582639694214, | |
| "learning_rate": 3.64283919552983e-07, | |
| "loss": 0.05035603046417236, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 3.627554882664648, | |
| "grad_norm": 1.7097535133361816, | |
| "learning_rate": 3.637696235960316e-07, | |
| "loss": 0.23326392471790314, | |
| "step": 4792 | |
| }, | |
| { | |
| "epoch": 3.6290688872066617, | |
| "grad_norm": 1.653237223625183, | |
| "learning_rate": 3.632573434233438e-07, | |
| "loss": 0.4799347221851349, | |
| "step": 4794 | |
| }, | |
| { | |
| "epoch": 3.6305828917486753, | |
| "grad_norm": 2.5724575519561768, | |
| "learning_rate": 3.6274707983776723e-07, | |
| "loss": 0.8532195091247559, | |
| "step": 4796 | |
| }, | |
| { | |
| "epoch": 3.632096896290689, | |
| "grad_norm": 0.6447435021400452, | |
| "learning_rate": 3.622388336389881e-07, | |
| "loss": 0.4974871277809143, | |
| "step": 4798 | |
| }, | |
| { | |
| "epoch": 3.6336109008327027, | |
| "grad_norm": 2.2832088470458984, | |
| "learning_rate": 3.6173260562353163e-07, | |
| "loss": 0.19416573643684387, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.635124905374716, | |
| "grad_norm": 3.592654228210449, | |
| "learning_rate": 3.6122839658475964e-07, | |
| "loss": 0.6662434935569763, | |
| "step": 4802 | |
| }, | |
| { | |
| "epoch": 3.63663890991673, | |
| "grad_norm": 1.6334006786346436, | |
| "learning_rate": 3.6072620731287066e-07, | |
| "loss": 0.4261821210384369, | |
| "step": 4804 | |
| }, | |
| { | |
| "epoch": 3.6381529144587432, | |
| "grad_norm": 0.5838547945022583, | |
| "learning_rate": 3.6022603859489704e-07, | |
| "loss": 0.09783003479242325, | |
| "step": 4806 | |
| }, | |
| { | |
| "epoch": 3.639666919000757, | |
| "grad_norm": 1.4045666456222534, | |
| "learning_rate": 3.597278912147044e-07, | |
| "loss": 0.07156740128993988, | |
| "step": 4808 | |
| }, | |
| { | |
| "epoch": 3.6411809235427706, | |
| "grad_norm": 0.7741947174072266, | |
| "learning_rate": 3.592317659529913e-07, | |
| "loss": 0.5537245869636536, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 3.6426949280847842, | |
| "grad_norm": 0.0055837552063167095, | |
| "learning_rate": 3.5873766358728684e-07, | |
| "loss": 0.024619167670607567, | |
| "step": 4812 | |
| }, | |
| { | |
| "epoch": 3.644208932626798, | |
| "grad_norm": 1.4337111711502075, | |
| "learning_rate": 3.582455848919496e-07, | |
| "loss": 0.4184442460536957, | |
| "step": 4814 | |
| }, | |
| { | |
| "epoch": 3.6457229371688116, | |
| "grad_norm": 1.548632264137268, | |
| "learning_rate": 3.577555306381666e-07, | |
| "loss": 0.7215121388435364, | |
| "step": 4816 | |
| }, | |
| { | |
| "epoch": 3.6472369417108252, | |
| "grad_norm": 22.545286178588867, | |
| "learning_rate": 3.5726750159395296e-07, | |
| "loss": 0.8514040112495422, | |
| "step": 4818 | |
| }, | |
| { | |
| "epoch": 3.648750946252839, | |
| "grad_norm": 1.5197862386703491, | |
| "learning_rate": 3.5678149852414884e-07, | |
| "loss": 0.39174315333366394, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 3.6502649507948526, | |
| "grad_norm": 2.523045778274536, | |
| "learning_rate": 3.5629752219042015e-07, | |
| "loss": 0.37386053800582886, | |
| "step": 4822 | |
| }, | |
| { | |
| "epoch": 3.651778955336866, | |
| "grad_norm": 1.6517491340637207, | |
| "learning_rate": 3.558155733512557e-07, | |
| "loss": 0.46329256892204285, | |
| "step": 4824 | |
| }, | |
| { | |
| "epoch": 3.6532929598788795, | |
| "grad_norm": 1.114324927330017, | |
| "learning_rate": 3.5533565276196766e-07, | |
| "loss": 0.12746909260749817, | |
| "step": 4826 | |
| }, | |
| { | |
| "epoch": 3.654806964420893, | |
| "grad_norm": 1.4792526960372925, | |
| "learning_rate": 3.5485776117468887e-07, | |
| "loss": 0.053770918399095535, | |
| "step": 4828 | |
| }, | |
| { | |
| "epoch": 3.656320968962907, | |
| "grad_norm": 1.9576071500778198, | |
| "learning_rate": 3.543818993383724e-07, | |
| "loss": 0.857465922832489, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 3.6578349735049205, | |
| "grad_norm": 0.609967827796936, | |
| "learning_rate": 3.5390806799879065e-07, | |
| "loss": 0.4741651117801666, | |
| "step": 4832 | |
| }, | |
| { | |
| "epoch": 3.659348978046934, | |
| "grad_norm": 1.04575514793396, | |
| "learning_rate": 3.534362678985334e-07, | |
| "loss": 0.38848766684532166, | |
| "step": 4834 | |
| }, | |
| { | |
| "epoch": 3.660862982588948, | |
| "grad_norm": 0.827594518661499, | |
| "learning_rate": 3.529664997770074e-07, | |
| "loss": 0.16450990736484528, | |
| "step": 4836 | |
| }, | |
| { | |
| "epoch": 3.6623769871309615, | |
| "grad_norm": 4.762159824371338, | |
| "learning_rate": 3.524987643704343e-07, | |
| "loss": 0.8133305311203003, | |
| "step": 4838 | |
| }, | |
| { | |
| "epoch": 3.663890991672975, | |
| "grad_norm": 4.865293979644775, | |
| "learning_rate": 3.5203306241185103e-07, | |
| "loss": 0.36955681443214417, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 3.6654049962149884, | |
| "grad_norm": 0.7137389183044434, | |
| "learning_rate": 3.515693946311069e-07, | |
| "loss": 0.0033145768102258444, | |
| "step": 4842 | |
| }, | |
| { | |
| "epoch": 3.6669190007570025, | |
| "grad_norm": 0.4564444124698639, | |
| "learning_rate": 3.511077617548632e-07, | |
| "loss": 0.0010724674211815, | |
| "step": 4844 | |
| }, | |
| { | |
| "epoch": 3.6684330052990157, | |
| "grad_norm": 0.32473912835121155, | |
| "learning_rate": 3.506481645065932e-07, | |
| "loss": 0.4716408848762512, | |
| "step": 4846 | |
| }, | |
| { | |
| "epoch": 3.6699470098410294, | |
| "grad_norm": 3.3602616786956787, | |
| "learning_rate": 3.501906036065784e-07, | |
| "loss": 0.5193576216697693, | |
| "step": 4848 | |
| }, | |
| { | |
| "epoch": 3.671461014383043, | |
| "grad_norm": 2.077227830886841, | |
| "learning_rate": 3.497350797719101e-07, | |
| "loss": 0.18189232051372528, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.6729750189250567, | |
| "grad_norm": 2.433666229248047, | |
| "learning_rate": 3.4928159371648635e-07, | |
| "loss": 0.46935853362083435, | |
| "step": 4852 | |
| }, | |
| { | |
| "epoch": 3.6744890234670704, | |
| "grad_norm": 1.6422847509384155, | |
| "learning_rate": 3.488301461510123e-07, | |
| "loss": 0.4132682979106903, | |
| "step": 4854 | |
| }, | |
| { | |
| "epoch": 3.676003028009084, | |
| "grad_norm": 1.8942492008209229, | |
| "learning_rate": 3.4838073778299776e-07, | |
| "loss": 0.7164862751960754, | |
| "step": 4856 | |
| }, | |
| { | |
| "epoch": 3.6775170325510977, | |
| "grad_norm": 0.6586605906486511, | |
| "learning_rate": 3.4793336931675693e-07, | |
| "loss": 0.28098931908607483, | |
| "step": 4858 | |
| }, | |
| { | |
| "epoch": 3.6790310370931114, | |
| "grad_norm": 1.4436407089233398, | |
| "learning_rate": 3.4748804145340693e-07, | |
| "loss": 0.6255571842193604, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 3.680545041635125, | |
| "grad_norm": 6.418086051940918, | |
| "learning_rate": 3.470447548908672e-07, | |
| "loss": 0.5840961933135986, | |
| "step": 4862 | |
| }, | |
| { | |
| "epoch": 3.6820590461771383, | |
| "grad_norm": 0.7285082340240479, | |
| "learning_rate": 3.466035103238579e-07, | |
| "loss": 0.12661457061767578, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 3.6835730507191524, | |
| "grad_norm": 3.845855712890625, | |
| "learning_rate": 3.461643084438984e-07, | |
| "loss": 0.38669610023498535, | |
| "step": 4866 | |
| }, | |
| { | |
| "epoch": 3.6850870552611656, | |
| "grad_norm": 2.364147663116455, | |
| "learning_rate": 3.4572714993930797e-07, | |
| "loss": 0.9998100996017456, | |
| "step": 4868 | |
| }, | |
| { | |
| "epoch": 3.6866010598031793, | |
| "grad_norm": 3.6311380863189697, | |
| "learning_rate": 3.4529203549520226e-07, | |
| "loss": 0.14734582602977753, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 3.688115064345193, | |
| "grad_norm": 0.20212996006011963, | |
| "learning_rate": 3.4485896579349415e-07, | |
| "loss": 0.004680413752794266, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 3.6896290688872067, | |
| "grad_norm": 2.0408899784088135, | |
| "learning_rate": 3.444279415128919e-07, | |
| "loss": 0.06556116044521332, | |
| "step": 4874 | |
| }, | |
| { | |
| "epoch": 3.6911430734292203, | |
| "grad_norm": 2.7118277549743652, | |
| "learning_rate": 3.43998963328898e-07, | |
| "loss": 0.8545077443122864, | |
| "step": 4876 | |
| }, | |
| { | |
| "epoch": 3.692657077971234, | |
| "grad_norm": 1.9300854206085205, | |
| "learning_rate": 3.435720319138084e-07, | |
| "loss": 0.9223077297210693, | |
| "step": 4878 | |
| }, | |
| { | |
| "epoch": 3.6941710825132477, | |
| "grad_norm": 0.09388744086027145, | |
| "learning_rate": 3.431471479367113e-07, | |
| "loss": 0.41516542434692383, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 3.6956850870552613, | |
| "grad_norm": 0.01692943461239338, | |
| "learning_rate": 3.4272431206348635e-07, | |
| "loss": 0.37090763449668884, | |
| "step": 4882 | |
| }, | |
| { | |
| "epoch": 3.697199091597275, | |
| "grad_norm": 2.315216302871704, | |
| "learning_rate": 3.4230352495680335e-07, | |
| "loss": 0.9441631436347961, | |
| "step": 4884 | |
| }, | |
| { | |
| "epoch": 3.6987130961392882, | |
| "grad_norm": 2.9745566844940186, | |
| "learning_rate": 3.4188478727612087e-07, | |
| "loss": 0.9142878651618958, | |
| "step": 4886 | |
| }, | |
| { | |
| "epoch": 3.700227100681302, | |
| "grad_norm": 1.6987249851226807, | |
| "learning_rate": 3.4146809967768595e-07, | |
| "loss": 0.8680942058563232, | |
| "step": 4888 | |
| }, | |
| { | |
| "epoch": 3.7017411052233156, | |
| "grad_norm": 1.8989615440368652, | |
| "learning_rate": 3.41053462814533e-07, | |
| "loss": 0.022511256858706474, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 3.7032551097653292, | |
| "grad_norm": 1.9308021068572998, | |
| "learning_rate": 3.4064087733648184e-07, | |
| "loss": 0.8625115752220154, | |
| "step": 4892 | |
| }, | |
| { | |
| "epoch": 3.704769114307343, | |
| "grad_norm": 1.6914867162704468, | |
| "learning_rate": 3.40230343890138e-07, | |
| "loss": 0.8048926591873169, | |
| "step": 4894 | |
| }, | |
| { | |
| "epoch": 3.7062831188493566, | |
| "grad_norm": 0.6650161147117615, | |
| "learning_rate": 3.3982186311889046e-07, | |
| "loss": 0.46356675028800964, | |
| "step": 4896 | |
| }, | |
| { | |
| "epoch": 3.7077971233913702, | |
| "grad_norm": 2.505150556564331, | |
| "learning_rate": 3.3941543566291193e-07, | |
| "loss": 0.043180715292692184, | |
| "step": 4898 | |
| }, | |
| { | |
| "epoch": 3.709311127933384, | |
| "grad_norm": 2.578604221343994, | |
| "learning_rate": 3.390110621591566e-07, | |
| "loss": 0.39062124490737915, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.7108251324753976, | |
| "grad_norm": 1.5174658298492432, | |
| "learning_rate": 3.3860874324135974e-07, | |
| "loss": 0.46687743067741394, | |
| "step": 4902 | |
| }, | |
| { | |
| "epoch": 3.712339137017411, | |
| "grad_norm": 1.998696208000183, | |
| "learning_rate": 3.3820847954003713e-07, | |
| "loss": 0.010921820998191833, | |
| "step": 4904 | |
| }, | |
| { | |
| "epoch": 3.713853141559425, | |
| "grad_norm": 1.7215547561645508, | |
| "learning_rate": 3.378102716824832e-07, | |
| "loss": 0.0716339498758316, | |
| "step": 4906 | |
| }, | |
| { | |
| "epoch": 3.715367146101438, | |
| "grad_norm": 0.12513110041618347, | |
| "learning_rate": 3.3741412029277034e-07, | |
| "loss": 0.4399803876876831, | |
| "step": 4908 | |
| }, | |
| { | |
| "epoch": 3.716881150643452, | |
| "grad_norm": 0.37457987666130066, | |
| "learning_rate": 3.370200259917483e-07, | |
| "loss": 0.3699595034122467, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 3.7183951551854655, | |
| "grad_norm": 1.7787532806396484, | |
| "learning_rate": 3.3662798939704323e-07, | |
| "loss": 0.021962502971291542, | |
| "step": 4912 | |
| }, | |
| { | |
| "epoch": 3.719909159727479, | |
| "grad_norm": 1.0377203226089478, | |
| "learning_rate": 3.3623801112305577e-07, | |
| "loss": 0.09830473363399506, | |
| "step": 4914 | |
| }, | |
| { | |
| "epoch": 3.721423164269493, | |
| "grad_norm": 0.755774736404419, | |
| "learning_rate": 3.3585009178096114e-07, | |
| "loss": 0.01071044523268938, | |
| "step": 4916 | |
| }, | |
| { | |
| "epoch": 3.7229371688115065, | |
| "grad_norm": 0.1359897404909134, | |
| "learning_rate": 3.3546423197870793e-07, | |
| "loss": 0.46218374371528625, | |
| "step": 4918 | |
| }, | |
| { | |
| "epoch": 3.72445117335352, | |
| "grad_norm": 4.3549065589904785, | |
| "learning_rate": 3.350804323210165e-07, | |
| "loss": 0.32898473739624023, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 3.725965177895534, | |
| "grad_norm": 0.4687940180301666, | |
| "learning_rate": 3.3469869340937915e-07, | |
| "loss": 0.05365509167313576, | |
| "step": 4922 | |
| }, | |
| { | |
| "epoch": 3.7274791824375475, | |
| "grad_norm": 0.1344975084066391, | |
| "learning_rate": 3.3431901584205834e-07, | |
| "loss": 0.4858251214027405, | |
| "step": 4924 | |
| }, | |
| { | |
| "epoch": 3.7289931869795607, | |
| "grad_norm": 2.259783983230591, | |
| "learning_rate": 3.3394140021408555e-07, | |
| "loss": 0.47054964303970337, | |
| "step": 4926 | |
| }, | |
| { | |
| "epoch": 3.730507191521575, | |
| "grad_norm": 5.918397903442383, | |
| "learning_rate": 3.335658471172615e-07, | |
| "loss": 0.3773191571235657, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 3.732021196063588, | |
| "grad_norm": 1.230940580368042, | |
| "learning_rate": 3.3319235714015426e-07, | |
| "loss": 0.11731115728616714, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 3.7335352006056017, | |
| "grad_norm": 0.3544609546661377, | |
| "learning_rate": 3.328209308680981e-07, | |
| "loss": 0.5121480822563171, | |
| "step": 4932 | |
| }, | |
| { | |
| "epoch": 3.7350492051476154, | |
| "grad_norm": 0.2798723876476288, | |
| "learning_rate": 3.324515688831939e-07, | |
| "loss": 0.015740007162094116, | |
| "step": 4934 | |
| }, | |
| { | |
| "epoch": 3.736563209689629, | |
| "grad_norm": 2.402623414993286, | |
| "learning_rate": 3.320842717643066e-07, | |
| "loss": 0.4829557538032532, | |
| "step": 4936 | |
| }, | |
| { | |
| "epoch": 3.7380772142316427, | |
| "grad_norm": 3.5150060653686523, | |
| "learning_rate": 3.317190400870653e-07, | |
| "loss": 1.0066595077514648, | |
| "step": 4938 | |
| }, | |
| { | |
| "epoch": 3.7395912187736564, | |
| "grad_norm": 2.071301221847534, | |
| "learning_rate": 3.3135587442386263e-07, | |
| "loss": 0.5190615653991699, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 3.74110522331567, | |
| "grad_norm": 2.0592801570892334, | |
| "learning_rate": 3.309947753438528e-07, | |
| "loss": 0.4132930636405945, | |
| "step": 4942 | |
| }, | |
| { | |
| "epoch": 3.7426192278576833, | |
| "grad_norm": 0.05614176392555237, | |
| "learning_rate": 3.3063574341295124e-07, | |
| "loss": 0.18080037832260132, | |
| "step": 4944 | |
| }, | |
| { | |
| "epoch": 3.7441332323996974, | |
| "grad_norm": 1.8372803926467896, | |
| "learning_rate": 3.302787791938339e-07, | |
| "loss": 0.4844646155834198, | |
| "step": 4946 | |
| }, | |
| { | |
| "epoch": 3.7456472369417106, | |
| "grad_norm": 0.5025854706764221, | |
| "learning_rate": 3.299238832459364e-07, | |
| "loss": 0.0169985331594944, | |
| "step": 4948 | |
| }, | |
| { | |
| "epoch": 3.7471612414837243, | |
| "grad_norm": 2.4502007961273193, | |
| "learning_rate": 3.2957105612545275e-07, | |
| "loss": 0.3472217321395874, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.748675246025738, | |
| "grad_norm": 0.560302197933197, | |
| "learning_rate": 3.292202983853347e-07, | |
| "loss": 0.46183523535728455, | |
| "step": 4952 | |
| }, | |
| { | |
| "epoch": 3.7501892505677517, | |
| "grad_norm": 1.8078094720840454, | |
| "learning_rate": 3.2887161057529067e-07, | |
| "loss": 0.40099021792411804, | |
| "step": 4954 | |
| }, | |
| { | |
| "epoch": 3.7517032551097653, | |
| "grad_norm": 1.7329490184783936, | |
| "learning_rate": 3.2852499324178554e-07, | |
| "loss": 0.761401355266571, | |
| "step": 4956 | |
| }, | |
| { | |
| "epoch": 3.753217259651779, | |
| "grad_norm": 1.790453314781189, | |
| "learning_rate": 3.281804469280392e-07, | |
| "loss": 0.8640339970588684, | |
| "step": 4958 | |
| }, | |
| { | |
| "epoch": 3.7547312641937927, | |
| "grad_norm": 0.2882123291492462, | |
| "learning_rate": 3.2783797217402525e-07, | |
| "loss": 0.4108726680278778, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 3.7562452687358063, | |
| "grad_norm": 1.0697760581970215, | |
| "learning_rate": 3.274975695164716e-07, | |
| "loss": 0.27952757477760315, | |
| "step": 4962 | |
| }, | |
| { | |
| "epoch": 3.75775927327782, | |
| "grad_norm": 0.7273476123809814, | |
| "learning_rate": 3.271592394888583e-07, | |
| "loss": 0.041864413768053055, | |
| "step": 4964 | |
| }, | |
| { | |
| "epoch": 3.7592732778198332, | |
| "grad_norm": 1.7666912078857422, | |
| "learning_rate": 3.2682298262141696e-07, | |
| "loss": 0.9729198217391968, | |
| "step": 4966 | |
| }, | |
| { | |
| "epoch": 3.7607872823618473, | |
| "grad_norm": 1.9599757194519043, | |
| "learning_rate": 3.264887994411306e-07, | |
| "loss": 0.10504017025232315, | |
| "step": 4968 | |
| }, | |
| { | |
| "epoch": 3.7623012869038606, | |
| "grad_norm": 0.019330648705363274, | |
| "learning_rate": 3.2615669047173225e-07, | |
| "loss": 0.41043803095817566, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.7638152914458742, | |
| "grad_norm": 0.7178271412849426, | |
| "learning_rate": 3.2582665623370385e-07, | |
| "loss": 0.47693270444869995, | |
| "step": 4972 | |
| }, | |
| { | |
| "epoch": 3.765329295987888, | |
| "grad_norm": 0.08044314384460449, | |
| "learning_rate": 3.2549869724427634e-07, | |
| "loss": 0.04021545127034187, | |
| "step": 4974 | |
| }, | |
| { | |
| "epoch": 3.7668433005299016, | |
| "grad_norm": 3.817652702331543, | |
| "learning_rate": 3.251728140174279e-07, | |
| "loss": 0.9784973859786987, | |
| "step": 4976 | |
| }, | |
| { | |
| "epoch": 3.7683573050719152, | |
| "grad_norm": 3.3584280014038086, | |
| "learning_rate": 3.248490070638837e-07, | |
| "loss": 0.5802483558654785, | |
| "step": 4978 | |
| }, | |
| { | |
| "epoch": 3.769871309613929, | |
| "grad_norm": 1.8819648027420044, | |
| "learning_rate": 3.2452727689111543e-07, | |
| "loss": 0.4363057017326355, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 3.7713853141559426, | |
| "grad_norm": 0.22932952642440796, | |
| "learning_rate": 3.2420762400333903e-07, | |
| "loss": 0.43567347526550293, | |
| "step": 4982 | |
| }, | |
| { | |
| "epoch": 3.7728993186979563, | |
| "grad_norm": 2.5808701515197754, | |
| "learning_rate": 3.238900489015158e-07, | |
| "loss": 0.48371621966362, | |
| "step": 4984 | |
| }, | |
| { | |
| "epoch": 3.77441332323997, | |
| "grad_norm": 2.555543899536133, | |
| "learning_rate": 3.235745520833506e-07, | |
| "loss": 0.5207771062850952, | |
| "step": 4986 | |
| }, | |
| { | |
| "epoch": 3.775927327781983, | |
| "grad_norm": 0.1696174144744873, | |
| "learning_rate": 3.232611340432908e-07, | |
| "loss": 0.4082969129085541, | |
| "step": 4988 | |
| }, | |
| { | |
| "epoch": 3.777441332323997, | |
| "grad_norm": 3.984182357788086, | |
| "learning_rate": 3.22949795272526e-07, | |
| "loss": 0.1221313551068306, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 3.7789553368660105, | |
| "grad_norm": 2.366755723953247, | |
| "learning_rate": 3.226405362589877e-07, | |
| "loss": 0.36068195104599, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 3.780469341408024, | |
| "grad_norm": 0.05323546752333641, | |
| "learning_rate": 3.2233335748734724e-07, | |
| "loss": 0.5102896690368652, | |
| "step": 4994 | |
| }, | |
| { | |
| "epoch": 3.781983345950038, | |
| "grad_norm": 0.11353172361850739, | |
| "learning_rate": 3.220282594390161e-07, | |
| "loss": 0.0019964941311627626, | |
| "step": 4996 | |
| }, | |
| { | |
| "epoch": 3.7834973504920515, | |
| "grad_norm": 5.21306037902832, | |
| "learning_rate": 3.2172524259214534e-07, | |
| "loss": 0.42315593361854553, | |
| "step": 4998 | |
| }, | |
| { | |
| "epoch": 3.785011355034065, | |
| "grad_norm": 0.41837674379348755, | |
| "learning_rate": 3.2142430742162355e-07, | |
| "loss": 0.00036322709638625383, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.786525359576079, | |
| "grad_norm": 2.8493845462799072, | |
| "learning_rate": 3.211254543990774e-07, | |
| "loss": 0.009760625660419464, | |
| "step": 5002 | |
| }, | |
| { | |
| "epoch": 3.7880393641180925, | |
| "grad_norm": 2.051926374435425, | |
| "learning_rate": 3.208286839928701e-07, | |
| "loss": 0.8073641657829285, | |
| "step": 5004 | |
| }, | |
| { | |
| "epoch": 3.7895533686601057, | |
| "grad_norm": 8.931031227111816, | |
| "learning_rate": 3.2053399666810133e-07, | |
| "loss": 0.47890886664390564, | |
| "step": 5006 | |
| }, | |
| { | |
| "epoch": 3.79106737320212, | |
| "grad_norm": 0.6862629652023315, | |
| "learning_rate": 3.2024139288660594e-07, | |
| "loss": 0.10860828310251236, | |
| "step": 5008 | |
| }, | |
| { | |
| "epoch": 3.792581377744133, | |
| "grad_norm": 0.049004968255758286, | |
| "learning_rate": 3.1995087310695357e-07, | |
| "loss": 0.008655873127281666, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 3.7940953822861467, | |
| "grad_norm": 0.8857827186584473, | |
| "learning_rate": 3.196624377844476e-07, | |
| "loss": 0.008771966211497784, | |
| "step": 5012 | |
| }, | |
| { | |
| "epoch": 3.7956093868281604, | |
| "grad_norm": 1.8794935941696167, | |
| "learning_rate": 3.1937608737112493e-07, | |
| "loss": 0.4379273056983948, | |
| "step": 5014 | |
| }, | |
| { | |
| "epoch": 3.797123391370174, | |
| "grad_norm": 0.3324967324733734, | |
| "learning_rate": 3.190918223157546e-07, | |
| "loss": 0.41587817668914795, | |
| "step": 5016 | |
| }, | |
| { | |
| "epoch": 3.7986373959121877, | |
| "grad_norm": 0.26607972383499146, | |
| "learning_rate": 3.188096430638377e-07, | |
| "loss": 0.022580014541745186, | |
| "step": 5018 | |
| }, | |
| { | |
| "epoch": 3.8001514004542014, | |
| "grad_norm": 2.0681216716766357, | |
| "learning_rate": 3.1852955005760665e-07, | |
| "loss": 0.3928464949131012, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 3.801665404996215, | |
| "grad_norm": 0.1730499416589737, | |
| "learning_rate": 3.1825154373602416e-07, | |
| "loss": 0.48923325538635254, | |
| "step": 5022 | |
| }, | |
| { | |
| "epoch": 3.8031794095382288, | |
| "grad_norm": 4.178548812866211, | |
| "learning_rate": 3.179756245347822e-07, | |
| "loss": 0.4484570324420929, | |
| "step": 5024 | |
| }, | |
| { | |
| "epoch": 3.8046934140802424, | |
| "grad_norm": 0.5775771141052246, | |
| "learning_rate": 3.1770179288630244e-07, | |
| "loss": 0.20080740749835968, | |
| "step": 5026 | |
| }, | |
| { | |
| "epoch": 3.8062074186222556, | |
| "grad_norm": 0.3481805920600891, | |
| "learning_rate": 3.174300492197346e-07, | |
| "loss": 0.01089209783822298, | |
| "step": 5028 | |
| }, | |
| { | |
| "epoch": 3.8077214231642698, | |
| "grad_norm": 1.8364008665084839, | |
| "learning_rate": 3.1716039396095646e-07, | |
| "loss": 0.45432248711586, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 3.809235427706283, | |
| "grad_norm": 0.7630714178085327, | |
| "learning_rate": 3.168928275325724e-07, | |
| "loss": 0.45632779598236084, | |
| "step": 5032 | |
| }, | |
| { | |
| "epoch": 3.8107494322482967, | |
| "grad_norm": 5.66952657699585, | |
| "learning_rate": 3.1662735035391325e-07, | |
| "loss": 0.5279141664505005, | |
| "step": 5034 | |
| }, | |
| { | |
| "epoch": 3.8122634367903103, | |
| "grad_norm": 1.6522698402404785, | |
| "learning_rate": 3.1636396284103594e-07, | |
| "loss": 0.8378298878669739, | |
| "step": 5036 | |
| }, | |
| { | |
| "epoch": 3.813777441332324, | |
| "grad_norm": 2.261073589324951, | |
| "learning_rate": 3.16102665406722e-07, | |
| "loss": 0.8691202998161316, | |
| "step": 5038 | |
| }, | |
| { | |
| "epoch": 3.8152914458743377, | |
| "grad_norm": 2.561520576477051, | |
| "learning_rate": 3.1584345846047765e-07, | |
| "loss": 0.47621315717697144, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 3.8168054504163513, | |
| "grad_norm": 3.8017935752868652, | |
| "learning_rate": 3.155863424085327e-07, | |
| "loss": 0.40808743238449097, | |
| "step": 5042 | |
| }, | |
| { | |
| "epoch": 3.818319454958365, | |
| "grad_norm": 2.399139165878296, | |
| "learning_rate": 3.1533131765384063e-07, | |
| "loss": 0.45948272943496704, | |
| "step": 5044 | |
| }, | |
| { | |
| "epoch": 3.8198334595003782, | |
| "grad_norm": 1.641716480255127, | |
| "learning_rate": 3.150783845960765e-07, | |
| "loss": 0.5885670781135559, | |
| "step": 5046 | |
| }, | |
| { | |
| "epoch": 3.8213474640423923, | |
| "grad_norm": 1.085816502571106, | |
| "learning_rate": 3.148275436316381e-07, | |
| "loss": 0.281469464302063, | |
| "step": 5048 | |
| }, | |
| { | |
| "epoch": 3.8228614685844056, | |
| "grad_norm": 1.2711929082870483, | |
| "learning_rate": 3.145787951536441e-07, | |
| "loss": 0.32660719752311707, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.8243754731264192, | |
| "grad_norm": 1.4348828792572021, | |
| "learning_rate": 3.1433213955193404e-07, | |
| "loss": 0.4572903513908386, | |
| "step": 5052 | |
| }, | |
| { | |
| "epoch": 3.825889477668433, | |
| "grad_norm": 1.9743393659591675, | |
| "learning_rate": 3.1408757721306693e-07, | |
| "loss": 0.8791977763175964, | |
| "step": 5054 | |
| }, | |
| { | |
| "epoch": 3.8274034822104466, | |
| "grad_norm": 2.0506911277770996, | |
| "learning_rate": 3.1384510852032213e-07, | |
| "loss": 0.005691791884601116, | |
| "step": 5056 | |
| }, | |
| { | |
| "epoch": 3.8289174867524602, | |
| "grad_norm": 0.03359084948897362, | |
| "learning_rate": 3.1360473385369705e-07, | |
| "loss": 0.039005476981401443, | |
| "step": 5058 | |
| }, | |
| { | |
| "epoch": 3.830431491294474, | |
| "grad_norm": 3.871607780456543, | |
| "learning_rate": 3.133664535899078e-07, | |
| "loss": 0.748072624206543, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 3.8319454958364876, | |
| "grad_norm": 2.2511465549468994, | |
| "learning_rate": 3.131302681023877e-07, | |
| "loss": 0.862351655960083, | |
| "step": 5062 | |
| }, | |
| { | |
| "epoch": 3.8334595003785013, | |
| "grad_norm": 1.9889755249023438, | |
| "learning_rate": 3.128961777612876e-07, | |
| "loss": 0.08380108326673508, | |
| "step": 5064 | |
| }, | |
| { | |
| "epoch": 3.834973504920515, | |
| "grad_norm": 2.928863048553467, | |
| "learning_rate": 3.1266418293347464e-07, | |
| "loss": 0.4806826412677765, | |
| "step": 5066 | |
| }, | |
| { | |
| "epoch": 3.836487509462528, | |
| "grad_norm": 5.594569683074951, | |
| "learning_rate": 3.124342839825314e-07, | |
| "loss": 0.020531795918941498, | |
| "step": 5068 | |
| }, | |
| { | |
| "epoch": 3.8380015140045423, | |
| "grad_norm": 3.1761984825134277, | |
| "learning_rate": 3.122064812687568e-07, | |
| "loss": 0.6929623484611511, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 3.8395155185465555, | |
| "grad_norm": 0.08937308937311172, | |
| "learning_rate": 3.1198077514916374e-07, | |
| "loss": 0.2494657337665558, | |
| "step": 5072 | |
| }, | |
| { | |
| "epoch": 3.841029523088569, | |
| "grad_norm": 2.0265660285949707, | |
| "learning_rate": 3.1175716597747953e-07, | |
| "loss": 0.4214555621147156, | |
| "step": 5074 | |
| }, | |
| { | |
| "epoch": 3.842543527630583, | |
| "grad_norm": 1.8746016025543213, | |
| "learning_rate": 3.1153565410414514e-07, | |
| "loss": 0.43491077423095703, | |
| "step": 5076 | |
| }, | |
| { | |
| "epoch": 3.8440575321725965, | |
| "grad_norm": 0.8957091569900513, | |
| "learning_rate": 3.113162398763148e-07, | |
| "loss": 0.40135228633880615, | |
| "step": 5078 | |
| }, | |
| { | |
| "epoch": 3.84557153671461, | |
| "grad_norm": 2.91975998878479, | |
| "learning_rate": 3.110989236378549e-07, | |
| "loss": 0.7896611094474792, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 3.847085541256624, | |
| "grad_norm": 0.6040961742401123, | |
| "learning_rate": 3.108837057293445e-07, | |
| "loss": 0.012859215028584003, | |
| "step": 5082 | |
| }, | |
| { | |
| "epoch": 3.8485995457986375, | |
| "grad_norm": 0.19318491220474243, | |
| "learning_rate": 3.106705864880735e-07, | |
| "loss": 0.47871896624565125, | |
| "step": 5084 | |
| }, | |
| { | |
| "epoch": 3.850113550340651, | |
| "grad_norm": 2.807324171066284, | |
| "learning_rate": 3.1045956624804317e-07, | |
| "loss": 0.4998835325241089, | |
| "step": 5086 | |
| }, | |
| { | |
| "epoch": 3.851627554882665, | |
| "grad_norm": 4.192824363708496, | |
| "learning_rate": 3.1025064533996517e-07, | |
| "loss": 0.39344674348831177, | |
| "step": 5088 | |
| }, | |
| { | |
| "epoch": 3.853141559424678, | |
| "grad_norm": 6.574662208557129, | |
| "learning_rate": 3.1004382409126064e-07, | |
| "loss": 0.4768523871898651, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 3.8546555639666917, | |
| "grad_norm": 1.621928334236145, | |
| "learning_rate": 3.0983910282606083e-07, | |
| "loss": 0.42897361516952515, | |
| "step": 5092 | |
| }, | |
| { | |
| "epoch": 3.8561695685087054, | |
| "grad_norm": 2.074524402618408, | |
| "learning_rate": 3.096364818652055e-07, | |
| "loss": 0.445551335811615, | |
| "step": 5094 | |
| }, | |
| { | |
| "epoch": 3.857683573050719, | |
| "grad_norm": 1.5912147760391235, | |
| "learning_rate": 3.0943596152624284e-07, | |
| "loss": 0.003714049933478236, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 3.8591975775927327, | |
| "grad_norm": 1.4102367162704468, | |
| "learning_rate": 3.0923754212342886e-07, | |
| "loss": 0.698522686958313, | |
| "step": 5098 | |
| }, | |
| { | |
| "epoch": 3.8607115821347464, | |
| "grad_norm": 1.337382435798645, | |
| "learning_rate": 3.0904122396772705e-07, | |
| "loss": 0.007130913436412811, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.86222558667676, | |
| "grad_norm": 3.0540449619293213, | |
| "learning_rate": 3.08847007366808e-07, | |
| "loss": 0.4882618188858032, | |
| "step": 5102 | |
| }, | |
| { | |
| "epoch": 3.8637395912187738, | |
| "grad_norm": 1.6839513778686523, | |
| "learning_rate": 3.0865489262504844e-07, | |
| "loss": 0.41174182295799255, | |
| "step": 5104 | |
| }, | |
| { | |
| "epoch": 3.8652535957607874, | |
| "grad_norm": 0.445525199174881, | |
| "learning_rate": 3.0846488004353116e-07, | |
| "loss": 0.018767863512039185, | |
| "step": 5106 | |
| }, | |
| { | |
| "epoch": 3.8667676003028006, | |
| "grad_norm": 3.5258700847625732, | |
| "learning_rate": 3.0827696992004456e-07, | |
| "loss": 0.8859530091285706, | |
| "step": 5108 | |
| }, | |
| { | |
| "epoch": 3.8682816048448148, | |
| "grad_norm": 1.3775527477264404, | |
| "learning_rate": 3.0809116254908205e-07, | |
| "loss": 0.9377045631408691, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 3.869795609386828, | |
| "grad_norm": 3.718674659729004, | |
| "learning_rate": 3.079074582218412e-07, | |
| "loss": 0.8108839392662048, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 3.8713096139288417, | |
| "grad_norm": 1.806331992149353, | |
| "learning_rate": 3.077258572262245e-07, | |
| "loss": 0.5829481482505798, | |
| "step": 5114 | |
| }, | |
| { | |
| "epoch": 3.8728236184708553, | |
| "grad_norm": 1.367382287979126, | |
| "learning_rate": 3.0754635984683733e-07, | |
| "loss": 0.3863900303840637, | |
| "step": 5116 | |
| }, | |
| { | |
| "epoch": 3.874337623012869, | |
| "grad_norm": 0.5311092138290405, | |
| "learning_rate": 3.0736896636498844e-07, | |
| "loss": 0.24167108535766602, | |
| "step": 5118 | |
| }, | |
| { | |
| "epoch": 3.8758516275548827, | |
| "grad_norm": 0.8665949106216431, | |
| "learning_rate": 3.0719367705868947e-07, | |
| "loss": 0.11795948445796967, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 3.8773656320968963, | |
| "grad_norm": 1.5483936071395874, | |
| "learning_rate": 3.0702049220265463e-07, | |
| "loss": 0.4619280695915222, | |
| "step": 5122 | |
| }, | |
| { | |
| "epoch": 3.87887963663891, | |
| "grad_norm": 1.4960511922836304, | |
| "learning_rate": 3.0684941206829945e-07, | |
| "loss": 0.1171555370092392, | |
| "step": 5124 | |
| }, | |
| { | |
| "epoch": 3.8803936411809237, | |
| "grad_norm": 0.2610466778278351, | |
| "learning_rate": 3.0668043692374135e-07, | |
| "loss": 0.0014266979414969683, | |
| "step": 5126 | |
| }, | |
| { | |
| "epoch": 3.8819076457229373, | |
| "grad_norm": 0.6856014132499695, | |
| "learning_rate": 3.06513567033799e-07, | |
| "loss": 0.4256713390350342, | |
| "step": 5128 | |
| }, | |
| { | |
| "epoch": 3.8834216502649506, | |
| "grad_norm": 1.56218421459198, | |
| "learning_rate": 3.063488026599908e-07, | |
| "loss": 0.5585575699806213, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 3.8849356548069647, | |
| "grad_norm": 1.8463737964630127, | |
| "learning_rate": 3.061861440605366e-07, | |
| "loss": 0.461412250995636, | |
| "step": 5132 | |
| }, | |
| { | |
| "epoch": 3.886449659348978, | |
| "grad_norm": 0.11369356513023376, | |
| "learning_rate": 3.060255914903552e-07, | |
| "loss": 0.4456888735294342, | |
| "step": 5134 | |
| }, | |
| { | |
| "epoch": 3.8879636638909916, | |
| "grad_norm": 0.9624853134155273, | |
| "learning_rate": 3.05867145201065e-07, | |
| "loss": 0.1871456354856491, | |
| "step": 5136 | |
| }, | |
| { | |
| "epoch": 3.8894776684330052, | |
| "grad_norm": 1.1454511880874634, | |
| "learning_rate": 3.057108054409837e-07, | |
| "loss": 0.45751628279685974, | |
| "step": 5138 | |
| }, | |
| { | |
| "epoch": 3.890991672975019, | |
| "grad_norm": 1.262891411781311, | |
| "learning_rate": 3.0555657245512737e-07, | |
| "loss": 0.04787885770201683, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 3.8925056775170326, | |
| "grad_norm": 2.1630172729492188, | |
| "learning_rate": 3.0540444648521044e-07, | |
| "loss": 0.47380274534225464, | |
| "step": 5142 | |
| }, | |
| { | |
| "epoch": 3.8940196820590463, | |
| "grad_norm": 0.3003329634666443, | |
| "learning_rate": 3.052544277696452e-07, | |
| "loss": 0.024536890909075737, | |
| "step": 5144 | |
| }, | |
| { | |
| "epoch": 3.89553368660106, | |
| "grad_norm": 1.0984807014465332, | |
| "learning_rate": 3.0510651654354144e-07, | |
| "loss": 0.08106108009815216, | |
| "step": 5146 | |
| }, | |
| { | |
| "epoch": 3.8970476911430736, | |
| "grad_norm": 1.8982774019241333, | |
| "learning_rate": 3.0496071303870617e-07, | |
| "loss": 0.011285691522061825, | |
| "step": 5148 | |
| }, | |
| { | |
| "epoch": 3.8985616956850873, | |
| "grad_norm": 1.2132909297943115, | |
| "learning_rate": 3.0481701748364293e-07, | |
| "loss": 0.7924096584320068, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.9000757002271005, | |
| "grad_norm": 1.6980708837509155, | |
| "learning_rate": 3.04675430103552e-07, | |
| "loss": 0.8110805749893188, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 3.901589704769114, | |
| "grad_norm": 0.3759213984012604, | |
| "learning_rate": 3.045359511203297e-07, | |
| "loss": 0.4529341161251068, | |
| "step": 5154 | |
| }, | |
| { | |
| "epoch": 3.903103709311128, | |
| "grad_norm": 1.5288448333740234, | |
| "learning_rate": 3.043985807525675e-07, | |
| "loss": 0.555069625377655, | |
| "step": 5156 | |
| }, | |
| { | |
| "epoch": 3.9046177138531415, | |
| "grad_norm": 2.1005005836486816, | |
| "learning_rate": 3.04263319215553e-07, | |
| "loss": 0.1001816838979721, | |
| "step": 5158 | |
| }, | |
| { | |
| "epoch": 3.906131718395155, | |
| "grad_norm": 2.5000596046447754, | |
| "learning_rate": 3.041301667212684e-07, | |
| "loss": 0.5128546357154846, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 3.907645722937169, | |
| "grad_norm": 2.431713581085205, | |
| "learning_rate": 3.039991234783906e-07, | |
| "loss": 0.02077617682516575, | |
| "step": 5162 | |
| }, | |
| { | |
| "epoch": 3.9091597274791825, | |
| "grad_norm": 1.96620512008667, | |
| "learning_rate": 3.0387018969229133e-07, | |
| "loss": 0.02237214706838131, | |
| "step": 5164 | |
| }, | |
| { | |
| "epoch": 3.910673732021196, | |
| "grad_norm": 2.4163711071014404, | |
| "learning_rate": 3.0374336556503574e-07, | |
| "loss": 0.8278625011444092, | |
| "step": 5166 | |
| }, | |
| { | |
| "epoch": 3.91218773656321, | |
| "grad_norm": 2.0597403049468994, | |
| "learning_rate": 3.0361865129538293e-07, | |
| "loss": 0.655526876449585, | |
| "step": 5168 | |
| }, | |
| { | |
| "epoch": 3.913701741105223, | |
| "grad_norm": 3.301499843597412, | |
| "learning_rate": 3.0349604707878565e-07, | |
| "loss": 0.23730042576789856, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 3.915215745647237, | |
| "grad_norm": 2.0083491802215576, | |
| "learning_rate": 3.033755531073897e-07, | |
| "loss": 0.27088233828544617, | |
| "step": 5172 | |
| }, | |
| { | |
| "epoch": 3.9167297501892504, | |
| "grad_norm": 3.793509006500244, | |
| "learning_rate": 3.0325716957003346e-07, | |
| "loss": 0.4128110408782959, | |
| "step": 5174 | |
| }, | |
| { | |
| "epoch": 3.918243754731264, | |
| "grad_norm": 2.2577743530273438, | |
| "learning_rate": 3.03140896652248e-07, | |
| "loss": 0.9377378821372986, | |
| "step": 5176 | |
| }, | |
| { | |
| "epoch": 3.9197577592732777, | |
| "grad_norm": 2.377145528793335, | |
| "learning_rate": 3.0302673453625645e-07, | |
| "loss": 0.4497866928577423, | |
| "step": 5178 | |
| }, | |
| { | |
| "epoch": 3.9212717638152914, | |
| "grad_norm": 0.19746972620487213, | |
| "learning_rate": 3.0291468340097433e-07, | |
| "loss": 0.0009020717116072774, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 3.922785768357305, | |
| "grad_norm": 0.45454391837120056, | |
| "learning_rate": 3.0280474342200826e-07, | |
| "loss": 0.1682163029909134, | |
| "step": 5182 | |
| }, | |
| { | |
| "epoch": 3.9242997728993188, | |
| "grad_norm": 1.051390528678894, | |
| "learning_rate": 3.0269691477165676e-07, | |
| "loss": 0.07067491859197617, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 3.9258137774413324, | |
| "grad_norm": 1.027337908744812, | |
| "learning_rate": 3.025911976189091e-07, | |
| "loss": 0.08587085455656052, | |
| "step": 5186 | |
| }, | |
| { | |
| "epoch": 3.927327781983346, | |
| "grad_norm": 1.2078237533569336, | |
| "learning_rate": 3.024875921294456e-07, | |
| "loss": 0.30312326550483704, | |
| "step": 5188 | |
| }, | |
| { | |
| "epoch": 3.9288417865253598, | |
| "grad_norm": 1.9159677028656006, | |
| "learning_rate": 3.0238609846563696e-07, | |
| "loss": 0.4570953845977783, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 3.930355791067373, | |
| "grad_norm": 2.077888250350952, | |
| "learning_rate": 3.0228671678654446e-07, | |
| "loss": 0.34210488200187683, | |
| "step": 5192 | |
| }, | |
| { | |
| "epoch": 3.931869795609387, | |
| "grad_norm": 1.5590956211090088, | |
| "learning_rate": 3.0218944724791933e-07, | |
| "loss": 0.641905665397644, | |
| "step": 5194 | |
| }, | |
| { | |
| "epoch": 3.9333838001514003, | |
| "grad_norm": 0.030448101460933685, | |
| "learning_rate": 3.020942900022029e-07, | |
| "loss": 0.0016837774310261011, | |
| "step": 5196 | |
| }, | |
| { | |
| "epoch": 3.934897804693414, | |
| "grad_norm": 0.7177050113677979, | |
| "learning_rate": 3.020012451985256e-07, | |
| "loss": 0.03333354741334915, | |
| "step": 5198 | |
| }, | |
| { | |
| "epoch": 3.9364118092354277, | |
| "grad_norm": 4.787274360656738, | |
| "learning_rate": 3.019103129827078e-07, | |
| "loss": 0.09767407178878784, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.9379258137774413, | |
| "grad_norm": 1.6870243549346924, | |
| "learning_rate": 3.018214934972586e-07, | |
| "loss": 0.4257047474384308, | |
| "step": 5202 | |
| }, | |
| { | |
| "epoch": 3.939439818319455, | |
| "grad_norm": 1.786167025566101, | |
| "learning_rate": 3.0173478688137626e-07, | |
| "loss": 0.07438768446445465, | |
| "step": 5204 | |
| }, | |
| { | |
| "epoch": 3.9409538228614687, | |
| "grad_norm": 0.9023681879043579, | |
| "learning_rate": 3.0165019327094754e-07, | |
| "loss": 0.43461889028549194, | |
| "step": 5206 | |
| }, | |
| { | |
| "epoch": 3.9424678274034823, | |
| "grad_norm": 0.7739644646644592, | |
| "learning_rate": 3.0156771279854786e-07, | |
| "loss": 0.44720742106437683, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 3.9439818319454956, | |
| "grad_norm": 0.15927131474018097, | |
| "learning_rate": 3.0148734559344096e-07, | |
| "loss": 0.0011613935930654407, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 3.9454958364875097, | |
| "grad_norm": 2.753117799758911, | |
| "learning_rate": 3.0140909178157846e-07, | |
| "loss": 0.49144524335861206, | |
| "step": 5212 | |
| }, | |
| { | |
| "epoch": 3.947009841029523, | |
| "grad_norm": 0.03960929438471794, | |
| "learning_rate": 3.0133295148559994e-07, | |
| "loss": 0.26274147629737854, | |
| "step": 5214 | |
| }, | |
| { | |
| "epoch": 3.9485238455715366, | |
| "grad_norm": 4.2824177742004395, | |
| "learning_rate": 3.0125892482483296e-07, | |
| "loss": 0.5472410321235657, | |
| "step": 5216 | |
| }, | |
| { | |
| "epoch": 3.9500378501135502, | |
| "grad_norm": 0.7858144044876099, | |
| "learning_rate": 3.0118701191529207e-07, | |
| "loss": 0.03546662628650665, | |
| "step": 5218 | |
| }, | |
| { | |
| "epoch": 3.951551854655564, | |
| "grad_norm": 4.754327297210693, | |
| "learning_rate": 3.011172128696795e-07, | |
| "loss": 0.7001851797103882, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 3.9530658591975776, | |
| "grad_norm": 5.084624767303467, | |
| "learning_rate": 3.0104952779738463e-07, | |
| "loss": 0.19021165370941162, | |
| "step": 5222 | |
| }, | |
| { | |
| "epoch": 3.9545798637395913, | |
| "grad_norm": 2.424482583999634, | |
| "learning_rate": 3.0098395680448356e-07, | |
| "loss": 0.8408768773078918, | |
| "step": 5224 | |
| }, | |
| { | |
| "epoch": 3.956093868281605, | |
| "grad_norm": 2.059044361114502, | |
| "learning_rate": 3.0092049999373956e-07, | |
| "loss": 0.4773581922054291, | |
| "step": 5226 | |
| }, | |
| { | |
| "epoch": 3.9576078728236186, | |
| "grad_norm": 0.9264152646064758, | |
| "learning_rate": 3.00859157464602e-07, | |
| "loss": 0.039199769496917725, | |
| "step": 5228 | |
| }, | |
| { | |
| "epoch": 3.9591218773656323, | |
| "grad_norm": 2.8337342739105225, | |
| "learning_rate": 3.0079992931320743e-07, | |
| "loss": 0.35346996784210205, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 3.9606358819076455, | |
| "grad_norm": 0.22176103293895721, | |
| "learning_rate": 3.0074281563237815e-07, | |
| "loss": 0.042797479778528214, | |
| "step": 5232 | |
| }, | |
| { | |
| "epoch": 3.9621498864496596, | |
| "grad_norm": 1.4219660758972168, | |
| "learning_rate": 3.0068781651162303e-07, | |
| "loss": 0.44434934854507446, | |
| "step": 5234 | |
| }, | |
| { | |
| "epoch": 3.963663890991673, | |
| "grad_norm": 2.8318819999694824, | |
| "learning_rate": 3.006349320371369e-07, | |
| "loss": 0.5036948323249817, | |
| "step": 5236 | |
| }, | |
| { | |
| "epoch": 3.9651778955336865, | |
| "grad_norm": 2.2482142448425293, | |
| "learning_rate": 3.005841622918003e-07, | |
| "loss": 0.7446083426475525, | |
| "step": 5238 | |
| }, | |
| { | |
| "epoch": 3.9666919000757, | |
| "grad_norm": 2.282947063446045, | |
| "learning_rate": 3.0053550735517994e-07, | |
| "loss": 0.9360232949256897, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 3.968205904617714, | |
| "grad_norm": 38.85729217529297, | |
| "learning_rate": 3.004889673035278e-07, | |
| "loss": 0.4850436747074127, | |
| "step": 5242 | |
| }, | |
| { | |
| "epoch": 3.9697199091597275, | |
| "grad_norm": 5.286346912384033, | |
| "learning_rate": 3.0044454220978176e-07, | |
| "loss": 0.13758641481399536, | |
| "step": 5244 | |
| }, | |
| { | |
| "epoch": 3.971233913701741, | |
| "grad_norm": 3.7393798828125, | |
| "learning_rate": 3.004022321435649e-07, | |
| "loss": 0.3747367262840271, | |
| "step": 5246 | |
| }, | |
| { | |
| "epoch": 3.972747918243755, | |
| "grad_norm": 1.6003137826919556, | |
| "learning_rate": 3.003620371711856e-07, | |
| "loss": 0.4701666831970215, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 3.9742619227857685, | |
| "grad_norm": 1.439442753791809, | |
| "learning_rate": 3.0032395735563773e-07, | |
| "loss": 0.3731074333190918, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.975775927327782, | |
| "grad_norm": 1.7399569749832153, | |
| "learning_rate": 3.0028799275659976e-07, | |
| "loss": 0.663485050201416, | |
| "step": 5252 | |
| }, | |
| { | |
| "epoch": 3.9772899318697954, | |
| "grad_norm": 1.0444694757461548, | |
| "learning_rate": 3.002541434304361e-07, | |
| "loss": 0.46440961956977844, | |
| "step": 5254 | |
| }, | |
| { | |
| "epoch": 3.978803936411809, | |
| "grad_norm": 1.0770765542984009, | |
| "learning_rate": 3.0022240943019483e-07, | |
| "loss": 0.04875878617167473, | |
| "step": 5256 | |
| }, | |
| { | |
| "epoch": 3.9803179409538227, | |
| "grad_norm": 1.9652454853057861, | |
| "learning_rate": 3.001927908056102e-07, | |
| "loss": 0.44029855728149414, | |
| "step": 5258 | |
| }, | |
| { | |
| "epoch": 3.9818319454958364, | |
| "grad_norm": 6.682809352874756, | |
| "learning_rate": 3.0016528760310013e-07, | |
| "loss": 0.09948641061782837, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 3.98334595003785, | |
| "grad_norm": 2.8983285427093506, | |
| "learning_rate": 3.00139899865768e-07, | |
| "loss": 0.6057555079460144, | |
| "step": 5262 | |
| }, | |
| { | |
| "epoch": 3.9848599545798638, | |
| "grad_norm": 2.302161931991577, | |
| "learning_rate": 3.001166276334015e-07, | |
| "loss": 0.07627967745065689, | |
| "step": 5264 | |
| }, | |
| { | |
| "epoch": 3.9863739591218774, | |
| "grad_norm": 1.456366777420044, | |
| "learning_rate": 3.0009547094247273e-07, | |
| "loss": 0.007791428826749325, | |
| "step": 5266 | |
| }, | |
| { | |
| "epoch": 3.987887963663891, | |
| "grad_norm": 1.7385027408599854, | |
| "learning_rate": 3.000764298261389e-07, | |
| "loss": 0.5243682861328125, | |
| "step": 5268 | |
| }, | |
| { | |
| "epoch": 3.9894019682059048, | |
| "grad_norm": 0.8515236377716064, | |
| "learning_rate": 3.000595043142412e-07, | |
| "loss": 0.04642985761165619, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 3.990915972747918, | |
| "grad_norm": 0.29582443833351135, | |
| "learning_rate": 3.0004469443330506e-07, | |
| "loss": 0.05615620315074921, | |
| "step": 5272 | |
| }, | |
| { | |
| "epoch": 3.992429977289932, | |
| "grad_norm": 3.1598730087280273, | |
| "learning_rate": 3.0003200020654085e-07, | |
| "loss": 0.7824563980102539, | |
| "step": 5274 | |
| }, | |
| { | |
| "epoch": 3.9939439818319453, | |
| "grad_norm": 1.9997477531433105, | |
| "learning_rate": 3.0002142165384287e-07, | |
| "loss": 0.4484213590621948, | |
| "step": 5276 | |
| }, | |
| { | |
| "epoch": 3.995457986373959, | |
| "grad_norm": 0.953460156917572, | |
| "learning_rate": 3.0001295879179e-07, | |
| "loss": 0.013839129358530045, | |
| "step": 5278 | |
| }, | |
| { | |
| "epoch": 3.9969719909159727, | |
| "grad_norm": 2.7543108463287354, | |
| "learning_rate": 3.0000661163364527e-07, | |
| "loss": 0.4201052784919739, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 3.9984859954579863, | |
| "grad_norm": 5.094695568084717, | |
| "learning_rate": 3.000023801893557e-07, | |
| "loss": 0.08943390846252441, | |
| "step": 5282 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.777651071548462, | |
| "learning_rate": 3.0000026446555307e-07, | |
| "loss": 0.7438609004020691, | |
| "step": 5284 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 5284, | |
| "total_flos": 6.453258552541708e+18, | |
| "train_loss": 0.5996934499091338, | |
| "train_runtime": 17171.6272, | |
| "train_samples_per_second": 4.923, | |
| "train_steps_per_second": 0.308 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 5284, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.453258552541708e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |