Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-113 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-113 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-113") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-113") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-113") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-113 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-113" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-113", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-113
- SGLang
How to use furproxy/9b-113 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-113" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-113", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-113" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-113", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-113 with Docker Model Runner:
docker model run hf.co/furproxy/9b-113
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1748, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004576659038901602, | |
| "grad_norm": 18.951799392700195, | |
| "learning_rate": 1.1363636363636364e-07, | |
| "loss": 2.251408100128174, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009153318077803204, | |
| "grad_norm": 3.432992696762085, | |
| "learning_rate": 3.409090909090909e-07, | |
| "loss": 2.022919178009033, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013729977116704805, | |
| "grad_norm": 4.982711315155029, | |
| "learning_rate": 5.681818181818182e-07, | |
| "loss": 2.143446207046509, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.018306636155606407, | |
| "grad_norm": 12.981165885925293, | |
| "learning_rate": 7.954545454545455e-07, | |
| "loss": 2.0866191387176514, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02288329519450801, | |
| "grad_norm": 5.714666843414307, | |
| "learning_rate": 1.0227272727272729e-06, | |
| "loss": 1.896759033203125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02745995423340961, | |
| "grad_norm": 12.909541130065918, | |
| "learning_rate": 1.25e-06, | |
| "loss": 2.020211696624756, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.032036613272311214, | |
| "grad_norm": 16.100811004638672, | |
| "learning_rate": 1.4772727272727275e-06, | |
| "loss": 1.792801022529602, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.036613272311212815, | |
| "grad_norm": 2.436553955078125, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 1.8900394439697266, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.041189931350114416, | |
| "grad_norm": 5.285153865814209, | |
| "learning_rate": 1.931818181818182e-06, | |
| "loss": 1.5955464839935303, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04576659038901602, | |
| "grad_norm": 12.234434127807617, | |
| "learning_rate": 2.1590909090909092e-06, | |
| "loss": 1.2172309160232544, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05034324942791762, | |
| "grad_norm": 12.644134521484375, | |
| "learning_rate": 2.3863636363636367e-06, | |
| "loss": 0.9669137597084045, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05491990846681922, | |
| "grad_norm": 3.1600699424743652, | |
| "learning_rate": 2.6136363636363637e-06, | |
| "loss": 1.7239738702774048, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.059496567505720827, | |
| "grad_norm": 13.365952491760254, | |
| "learning_rate": 2.8409090909090916e-06, | |
| "loss": 1.436122179031372, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06407322654462243, | |
| "grad_norm": 2.321202278137207, | |
| "learning_rate": 3.0681818181818186e-06, | |
| "loss": 1.8740382194519043, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06864988558352403, | |
| "grad_norm": 8.490443229675293, | |
| "learning_rate": 3.2954545454545456e-06, | |
| "loss": 1.5721473693847656, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07322654462242563, | |
| "grad_norm": 3.1264488697052, | |
| "learning_rate": 3.522727272727273e-06, | |
| "loss": 1.5784003734588623, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07780320366132723, | |
| "grad_norm": 2.265772581100464, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 1.4820594787597656, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08237986270022883, | |
| "grad_norm": 2.990844964981079, | |
| "learning_rate": 3.9772727272727275e-06, | |
| "loss": 1.2070374488830566, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08695652173913043, | |
| "grad_norm": 12.784393310546875, | |
| "learning_rate": 4.204545454545455e-06, | |
| "loss": 1.3093262910842896, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09153318077803203, | |
| "grad_norm": 3.836043357849121, | |
| "learning_rate": 4.4318181818181824e-06, | |
| "loss": 1.4149296283721924, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09610983981693363, | |
| "grad_norm": 1.9667078256607056, | |
| "learning_rate": 4.6590909090909095e-06, | |
| "loss": 1.117027997970581, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10068649885583524, | |
| "grad_norm": 3.5431325435638428, | |
| "learning_rate": 4.8863636363636365e-06, | |
| "loss": 0.7506925463676453, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 2.191105604171753, | |
| "learning_rate": 5.113636363636364e-06, | |
| "loss": 1.2588834762573242, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10983981693363844, | |
| "grad_norm": 2.621471643447876, | |
| "learning_rate": 5.340909090909091e-06, | |
| "loss": 0.9852038621902466, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11441647597254005, | |
| "grad_norm": 2.538278341293335, | |
| "learning_rate": 5.568181818181818e-06, | |
| "loss": 1.0315567255020142, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11899313501144165, | |
| "grad_norm": 3.634997606277466, | |
| "learning_rate": 5.795454545454546e-06, | |
| "loss": 0.7927528619766235, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.12356979405034325, | |
| "grad_norm": 5.6927170753479, | |
| "learning_rate": 6.022727272727273e-06, | |
| "loss": 0.6859608888626099, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12814645308924486, | |
| "grad_norm": 2.9429779052734375, | |
| "learning_rate": 6.25e-06, | |
| "loss": 1.399317741394043, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.13272311212814644, | |
| "grad_norm": 3.453831434249878, | |
| "learning_rate": 6.477272727272727e-06, | |
| "loss": 0.9231398105621338, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13729977116704806, | |
| "grad_norm": 3.813654899597168, | |
| "learning_rate": 6.704545454545454e-06, | |
| "loss": 1.1301286220550537, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14187643020594964, | |
| "grad_norm": 2.4758615493774414, | |
| "learning_rate": 6.931818181818183e-06, | |
| "loss": 1.120086669921875, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.14645308924485126, | |
| "grad_norm": 5.501305103302002, | |
| "learning_rate": 7.15909090909091e-06, | |
| "loss": 1.4239763021469116, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.15102974828375287, | |
| "grad_norm": 2.2261505126953125, | |
| "learning_rate": 7.386363636363637e-06, | |
| "loss": 0.883802056312561, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15560640732265446, | |
| "grad_norm": 2.1745944023132324, | |
| "learning_rate": 7.613636363636364e-06, | |
| "loss": 0.9119312763214111, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.16018306636155608, | |
| "grad_norm": 2.2609024047851562, | |
| "learning_rate": 7.840909090909091e-06, | |
| "loss": 1.3790769577026367, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16475972540045766, | |
| "grad_norm": 2.078526496887207, | |
| "learning_rate": 8.068181818181819e-06, | |
| "loss": 1.247062087059021, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.16933638443935928, | |
| "grad_norm": 1.86798095703125, | |
| "learning_rate": 8.295454545454547e-06, | |
| "loss": 1.320522665977478, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 3.6566405296325684, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 1.1991426944732666, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.17848970251716248, | |
| "grad_norm": 2.2346911430358887, | |
| "learning_rate": 8.750000000000001e-06, | |
| "loss": 1.1312178373336792, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.18306636155606407, | |
| "grad_norm": 4.466203212738037, | |
| "learning_rate": 8.977272727272727e-06, | |
| "loss": 1.2709550857543945, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18764302059496568, | |
| "grad_norm": 4.847050666809082, | |
| "learning_rate": 9.204545454545455e-06, | |
| "loss": 1.1973122358322144, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.19221967963386727, | |
| "grad_norm": 15.698393821716309, | |
| "learning_rate": 9.431818181818183e-06, | |
| "loss": 0.9569052457809448, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19679633867276888, | |
| "grad_norm": 1.8609542846679688, | |
| "learning_rate": 9.65909090909091e-06, | |
| "loss": 1.2557601928710938, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.20137299771167047, | |
| "grad_norm": 2.1308515071868896, | |
| "learning_rate": 9.886363636363637e-06, | |
| "loss": 1.3304195404052734, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.20594965675057209, | |
| "grad_norm": 3.0522403717041016, | |
| "learning_rate": 9.999991941282018e-06, | |
| "loss": 1.2789955139160156, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 2.302807569503784, | |
| "learning_rate": 9.999927471711333e-06, | |
| "loss": 0.9662280082702637, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2151029748283753, | |
| "grad_norm": 1.821358323097229, | |
| "learning_rate": 9.999798533493595e-06, | |
| "loss": 1.3702021837234497, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21967963386727687, | |
| "grad_norm": 2.9049158096313477, | |
| "learning_rate": 9.999605128476047e-06, | |
| "loss": 1.2553328275680542, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2242562929061785, | |
| "grad_norm": 4.430267810821533, | |
| "learning_rate": 9.999347259429527e-06, | |
| "loss": 1.2501583099365234, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2288329519450801, | |
| "grad_norm": 1.757613182067871, | |
| "learning_rate": 9.999024930048416e-06, | |
| "loss": 0.9878523945808411, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2334096109839817, | |
| "grad_norm": 1.882934808731079, | |
| "learning_rate": 9.998638144950604e-06, | |
| "loss": 1.3420817852020264, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2379862700228833, | |
| "grad_norm": 2.3838956356048584, | |
| "learning_rate": 9.998186909677402e-06, | |
| "loss": 0.6844009160995483, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2425629290617849, | |
| "grad_norm": 1.2917922735214233, | |
| "learning_rate": 9.997671230693475e-06, | |
| "loss": 1.2744860649108887, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2471395881006865, | |
| "grad_norm": 6.209701061248779, | |
| "learning_rate": 9.997091115386751e-06, | |
| "loss": 0.9371986985206604, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2517162471395881, | |
| "grad_norm": 3.551896333694458, | |
| "learning_rate": 9.996446572068303e-06, | |
| "loss": 0.993743896484375, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2562929061784897, | |
| "grad_norm": 2.348245859146118, | |
| "learning_rate": 9.995737609972248e-06, | |
| "loss": 1.222794771194458, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2608695652173913, | |
| "grad_norm": 2.0239150524139404, | |
| "learning_rate": 9.9949642392556e-06, | |
| "loss": 1.301463007926941, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2654462242562929, | |
| "grad_norm": 15.598209381103516, | |
| "learning_rate": 9.994126470998126e-06, | |
| "loss": 0.9942538738250732, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2700228832951945, | |
| "grad_norm": 3.621354103088379, | |
| "learning_rate": 9.993224317202196e-06, | |
| "loss": 0.9668116569519043, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2745995423340961, | |
| "grad_norm": 1.4748951196670532, | |
| "learning_rate": 9.992257790792606e-06, | |
| "loss": 1.1951305866241455, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2791762013729977, | |
| "grad_norm": 3.5889358520507812, | |
| "learning_rate": 9.991226905616387e-06, | |
| "loss": 1.4021278619766235, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2837528604118993, | |
| "grad_norm": 2.6599173545837402, | |
| "learning_rate": 9.990131676442615e-06, | |
| "loss": 1.2020851373672485, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.28832951945080093, | |
| "grad_norm": 6.152349948883057, | |
| "learning_rate": 9.9889721189622e-06, | |
| "loss": 1.3222763538360596, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2929061784897025, | |
| "grad_norm": 3.790332555770874, | |
| "learning_rate": 9.987748249787654e-06, | |
| "loss": 0.996853232383728, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2974828375286041, | |
| "grad_norm": 3.391437292098999, | |
| "learning_rate": 9.986460086452857e-06, | |
| "loss": 1.0844181776046753, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.30205949656750575, | |
| "grad_norm": 5.271035194396973, | |
| "learning_rate": 9.985107647412804e-06, | |
| "loss": 1.159407138824463, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.30663615560640733, | |
| "grad_norm": 1.60601007938385, | |
| "learning_rate": 9.983690952043345e-06, | |
| "loss": 1.3746013641357422, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3112128146453089, | |
| "grad_norm": 7.673926830291748, | |
| "learning_rate": 9.982210020640905e-06, | |
| "loss": 1.0017273426055908, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 2.7053334712982178, | |
| "learning_rate": 9.98066487442219e-06, | |
| "loss": 1.3173471689224243, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.32036613272311215, | |
| "grad_norm": 2.064483404159546, | |
| "learning_rate": 9.979055535523887e-06, | |
| "loss": 1.0609164237976074, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.32494279176201374, | |
| "grad_norm": 2.2231831550598145, | |
| "learning_rate": 9.977382027002348e-06, | |
| "loss": 1.304241418838501, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3295194508009153, | |
| "grad_norm": 1.224456787109375, | |
| "learning_rate": 9.97564437283325e-06, | |
| "loss": 1.1332193613052368, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3340961098398169, | |
| "grad_norm": 1.458966851234436, | |
| "learning_rate": 9.973842597911268e-06, | |
| "loss": 1.2001259326934814, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.33867276887871856, | |
| "grad_norm": 2.087167501449585, | |
| "learning_rate": 9.971976728049704e-06, | |
| "loss": 1.2756290435791016, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.34324942791762014, | |
| "grad_norm": 1.9153831005096436, | |
| "learning_rate": 9.970046789980122e-06, | |
| "loss": 1.2948122024536133, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 1.9743618965148926, | |
| "learning_rate": 9.96805281135197e-06, | |
| "loss": 1.3063007593154907, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3524027459954233, | |
| "grad_norm": 2.7115976810455322, | |
| "learning_rate": 9.965994820732174e-06, | |
| "loss": 0.9853567481040955, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.35697940503432496, | |
| "grad_norm": 1.8446264266967773, | |
| "learning_rate": 9.963872847604735e-06, | |
| "loss": 0.8984273076057434, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.36155606407322655, | |
| "grad_norm": 2.8307297229766846, | |
| "learning_rate": 9.961686922370309e-06, | |
| "loss": 1.1862984895706177, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.36613272311212813, | |
| "grad_norm": 1.9325522184371948, | |
| "learning_rate": 9.959437076345764e-06, | |
| "loss": 1.1266686916351318, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3707093821510298, | |
| "grad_norm": 2.4998505115509033, | |
| "learning_rate": 9.957123341763736e-06, | |
| "loss": 1.0668294429779053, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.37528604118993136, | |
| "grad_norm": 1.6686221361160278, | |
| "learning_rate": 9.954745751772172e-06, | |
| "loss": 1.3585450649261475, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.37986270022883295, | |
| "grad_norm": 3.186450958251953, | |
| "learning_rate": 9.952304340433845e-06, | |
| "loss": 1.062609314918518, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.38443935926773454, | |
| "grad_norm": 4.109751224517822, | |
| "learning_rate": 9.949799142725866e-06, | |
| "loss": 1.1804240942001343, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3890160183066362, | |
| "grad_norm": 1.3450946807861328, | |
| "learning_rate": 9.947230194539196e-06, | |
| "loss": 1.2886464595794678, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.39359267734553777, | |
| "grad_norm": 2.0440635681152344, | |
| "learning_rate": 9.94459753267812e-06, | |
| "loss": 1.0574209690093994, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.39816933638443935, | |
| "grad_norm": 3.0480129718780518, | |
| "learning_rate": 9.941901194859726e-06, | |
| "loss": 1.2568514347076416, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.40274599542334094, | |
| "grad_norm": 7.81062650680542, | |
| "learning_rate": 9.939141219713353e-06, | |
| "loss": 0.9926815032958984, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4073226544622426, | |
| "grad_norm": 2.2387683391571045, | |
| "learning_rate": 9.936317646780057e-06, | |
| "loss": 1.392266869544983, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.41189931350114417, | |
| "grad_norm": 1.5428338050842285, | |
| "learning_rate": 9.933430516512029e-06, | |
| "loss": 1.0441172122955322, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.41647597254004576, | |
| "grad_norm": 1.78026282787323, | |
| "learning_rate": 9.930479870272018e-06, | |
| "loss": 1.2641940116882324, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 3.4682302474975586, | |
| "learning_rate": 9.927465750332747e-06, | |
| "loss": 1.2930469512939453, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.425629290617849, | |
| "grad_norm": 2.080035448074341, | |
| "learning_rate": 9.924388199876294e-06, | |
| "loss": 1.3206355571746826, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4302059496567506, | |
| "grad_norm": 2.443369150161743, | |
| "learning_rate": 9.921247262993487e-06, | |
| "loss": 1.131381630897522, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 1.6168911457061768, | |
| "learning_rate": 9.918042984683262e-06, | |
| "loss": 0.9173140525817871, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.43935926773455375, | |
| "grad_norm": 2.228675365447998, | |
| "learning_rate": 9.91477541085202e-06, | |
| "loss": 1.298048734664917, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4439359267734554, | |
| "grad_norm": 5.84655237197876, | |
| "learning_rate": 9.911444588312976e-06, | |
| "loss": 0.8752405643463135, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.448512585812357, | |
| "grad_norm": 1.5287593603134155, | |
| "learning_rate": 9.908050564785481e-06, | |
| "loss": 1.25732421875, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.45308924485125857, | |
| "grad_norm": 1.5870686769485474, | |
| "learning_rate": 9.904593388894347e-06, | |
| "loss": 1.1686997413635254, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4576659038901602, | |
| "grad_norm": 2.4326746463775635, | |
| "learning_rate": 9.901073110169132e-06, | |
| "loss": 1.0250574350357056, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4622425629290618, | |
| "grad_norm": 3.7590527534484863, | |
| "learning_rate": 9.897489779043454e-06, | |
| "loss": 1.367837905883789, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4668192219679634, | |
| "grad_norm": 5.2029924392700195, | |
| "learning_rate": 9.893843446854255e-06, | |
| "loss": 1.2215626239776611, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.47139588100686497, | |
| "grad_norm": 2.076643466949463, | |
| "learning_rate": 9.890134165841064e-06, | |
| "loss": 1.1037054061889648, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4759725400457666, | |
| "grad_norm": 6.4338531494140625, | |
| "learning_rate": 9.886361989145256e-06, | |
| "loss": 1.1291370391845703, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4805491990846682, | |
| "grad_norm": 4.497616291046143, | |
| "learning_rate": 9.882526970809286e-06, | |
| "loss": 1.289217233657837, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4851258581235698, | |
| "grad_norm": 4.023069381713867, | |
| "learning_rate": 9.878629165775916e-06, | |
| "loss": 1.3998191356658936, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4897025171624714, | |
| "grad_norm": 2.4669904708862305, | |
| "learning_rate": 9.874668629887428e-06, | |
| "loss": 1.2304133176803589, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.494279176201373, | |
| "grad_norm": 3.409818410873413, | |
| "learning_rate": 9.870645419884821e-06, | |
| "loss": 1.0029253959655762, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4988558352402746, | |
| "grad_norm": 3.6900084018707275, | |
| "learning_rate": 9.866559593407006e-06, | |
| "loss": 1.1414201259613037, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5034324942791762, | |
| "grad_norm": 2.412198781967163, | |
| "learning_rate": 9.862411208989971e-06, | |
| "loss": 1.0081186294555664, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5080091533180778, | |
| "grad_norm": 4.239388465881348, | |
| "learning_rate": 9.858200326065948e-06, | |
| "loss": 1.2810657024383545, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5125858123569794, | |
| "grad_norm": 6.374491214752197, | |
| "learning_rate": 9.853927004962557e-06, | |
| "loss": 1.068434715270996, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.517162471395881, | |
| "grad_norm": 2.322037696838379, | |
| "learning_rate": 9.849591306901948e-06, | |
| "loss": 1.2551425695419312, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 1.8771734237670898, | |
| "learning_rate": 9.845193293999921e-06, | |
| "loss": 0.7825489044189453, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 1.1865154504776, | |
| "learning_rate": 9.840733029265033e-06, | |
| "loss": 1.1792237758636475, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5308924485125858, | |
| "grad_norm": 1.4317781925201416, | |
| "learning_rate": 9.836210576597699e-06, | |
| "loss": 1.24580717086792, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5354691075514875, | |
| "grad_norm": 2.155020236968994, | |
| "learning_rate": 9.831626000789274e-06, | |
| "loss": 1.2573895454406738, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.540045766590389, | |
| "grad_norm": 3.024273157119751, | |
| "learning_rate": 9.826979367521131e-06, | |
| "loss": 0.9660389423370361, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5446224256292906, | |
| "grad_norm": 2.3436639308929443, | |
| "learning_rate": 9.82227074336371e-06, | |
| "loss": 0.9024907350540161, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5491990846681922, | |
| "grad_norm": 1.5992021560668945, | |
| "learning_rate": 9.81750019577557e-06, | |
| "loss": 1.2666064500808716, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5537757437070938, | |
| "grad_norm": 17.894210815429688, | |
| "learning_rate": 9.812667793102425e-06, | |
| "loss": 0.9698783159255981, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5583524027459954, | |
| "grad_norm": 1.664934515953064, | |
| "learning_rate": 9.80777360457616e-06, | |
| "loss": 1.1118632555007935, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.562929061784897, | |
| "grad_norm": 2.275209665298462, | |
| "learning_rate": 9.802817700313842e-06, | |
| "loss": 1.523911714553833, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5675057208237986, | |
| "grad_norm": 4.4243998527526855, | |
| "learning_rate": 9.797800151316711e-06, | |
| "loss": 0.8372693061828613, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5720823798627003, | |
| "grad_norm": 4.079118728637695, | |
| "learning_rate": 9.792721029469173e-06, | |
| "loss": 0.6180707216262817, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5766590389016019, | |
| "grad_norm": 11.111287117004395, | |
| "learning_rate": 9.787580407537759e-06, | |
| "loss": 0.893789529800415, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5812356979405034, | |
| "grad_norm": 15.717961311340332, | |
| "learning_rate": 9.782378359170082e-06, | |
| "loss": 0.946647584438324, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.585812356979405, | |
| "grad_norm": 1.3548306226730347, | |
| "learning_rate": 9.777114958893799e-06, | |
| "loss": 1.598212480545044, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5903890160183066, | |
| "grad_norm": 3.011941909790039, | |
| "learning_rate": 9.77179028211552e-06, | |
| "loss": 1.2696183919906616, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5949656750572082, | |
| "grad_norm": 4.57606315612793, | |
| "learning_rate": 9.766404405119742e-06, | |
| "loss": 0.5768990516662598, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5995423340961098, | |
| "grad_norm": 2.027827024459839, | |
| "learning_rate": 9.760957405067758e-06, | |
| "loss": 1.0868414640426636, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6041189931350115, | |
| "grad_norm": 1.9111216068267822, | |
| "learning_rate": 9.75544935999654e-06, | |
| "loss": 1.313867449760437, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6086956521739131, | |
| "grad_norm": 1.7331411838531494, | |
| "learning_rate": 9.749880348817629e-06, | |
| "loss": 1.0849635601043701, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6132723112128147, | |
| "grad_norm": 2.179687976837158, | |
| "learning_rate": 9.744250451316003e-06, | |
| "loss": 1.2717711925506592, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6178489702517163, | |
| "grad_norm": 1.9285099506378174, | |
| "learning_rate": 9.738559748148937e-06, | |
| "loss": 1.4478249549865723, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6224256292906178, | |
| "grad_norm": 2.466549873352051, | |
| "learning_rate": 9.732808320844838e-06, | |
| "loss": 1.247833490371704, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6270022883295194, | |
| "grad_norm": 1.4353868961334229, | |
| "learning_rate": 9.726996251802088e-06, | |
| "loss": 0.9141141176223755, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 6.866336822509766, | |
| "learning_rate": 9.721123624287858e-06, | |
| "loss": 1.126939058303833, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6361556064073226, | |
| "grad_norm": 5.548222064971924, | |
| "learning_rate": 9.715190522436916e-06, | |
| "loss": 1.0435082912445068, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6407322654462243, | |
| "grad_norm": 1.6613945960998535, | |
| "learning_rate": 9.709197031250419e-06, | |
| "loss": 0.8629664182662964, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6453089244851259, | |
| "grad_norm": 3.140124797821045, | |
| "learning_rate": 9.7031432365947e-06, | |
| "loss": 1.2095342874526978, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6498855835240275, | |
| "grad_norm": 1.81770920753479, | |
| "learning_rate": 9.697029225200033e-06, | |
| "loss": 1.2224023342132568, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6544622425629291, | |
| "grad_norm": 3.419079303741455, | |
| "learning_rate": 9.690855084659399e-06, | |
| "loss": 1.1394864320755005, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6590389016018307, | |
| "grad_norm": 4.641355514526367, | |
| "learning_rate": 9.684620903427217e-06, | |
| "loss": 0.603121280670166, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6636155606407322, | |
| "grad_norm": 2.0329110622406006, | |
| "learning_rate": 9.678326770818091e-06, | |
| "loss": 1.2675936222076416, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6681922196796338, | |
| "grad_norm": 2.861783981323242, | |
| "learning_rate": 9.671972777005522e-06, | |
| "loss": 1.2691437005996704, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6727688787185355, | |
| "grad_norm": 3.443868398666382, | |
| "learning_rate": 9.665559013020615e-06, | |
| "loss": 0.9782974720001221, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6773455377574371, | |
| "grad_norm": 4.7333807945251465, | |
| "learning_rate": 9.659085570750786e-06, | |
| "loss": 1.1612757444381714, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6819221967963387, | |
| "grad_norm": 10.009819030761719, | |
| "learning_rate": 9.652552542938428e-06, | |
| "loss": 1.2675966024398804, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6864988558352403, | |
| "grad_norm": 1.4802839756011963, | |
| "learning_rate": 9.645960023179601e-06, | |
| "loss": 1.2351771593093872, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6910755148741419, | |
| "grad_norm": 2.1872692108154297, | |
| "learning_rate": 9.63930810592268e-06, | |
| "loss": 1.2081533670425415, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 4.144794940948486, | |
| "learning_rate": 9.632596886466995e-06, | |
| "loss": 1.019889235496521, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.700228832951945, | |
| "grad_norm": 2.8666250705718994, | |
| "learning_rate": 9.625826460961488e-06, | |
| "loss": 0.9050840139389038, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7048054919908466, | |
| "grad_norm": 1.1820842027664185, | |
| "learning_rate": 9.618996926403314e-06, | |
| "loss": 0.5790635347366333, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7093821510297483, | |
| "grad_norm": 3.2699780464172363, | |
| "learning_rate": 9.612108380636463e-06, | |
| "loss": 1.002554178237915, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7139588100686499, | |
| "grad_norm": 2.4456770420074463, | |
| "learning_rate": 9.605160922350351e-06, | |
| "loss": 1.2322945594787598, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7185354691075515, | |
| "grad_norm": 1.6210792064666748, | |
| "learning_rate": 9.598154651078419e-06, | |
| "loss": 1.235492467880249, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7231121281464531, | |
| "grad_norm": 7.475627899169922, | |
| "learning_rate": 9.591089667196682e-06, | |
| "loss": 0.9872145652770996, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7276887871853547, | |
| "grad_norm": 1.6771211624145508, | |
| "learning_rate": 9.583966071922322e-06, | |
| "loss": 1.2031590938568115, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7322654462242563, | |
| "grad_norm": 5.789905071258545, | |
| "learning_rate": 9.576783967312218e-06, | |
| "loss": 1.190985918045044, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 0.9887858629226685, | |
| "learning_rate": 9.569543456261485e-06, | |
| "loss": 1.188122272491455, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7414187643020596, | |
| "grad_norm": 2.4599740505218506, | |
| "learning_rate": 9.562244642502007e-06, | |
| "loss": 0.8889155387878418, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7459954233409611, | |
| "grad_norm": 3.597360372543335, | |
| "learning_rate": 9.554887630600945e-06, | |
| "loss": 1.1945111751556396, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7505720823798627, | |
| "grad_norm": 1.4225729703903198, | |
| "learning_rate": 9.547472525959247e-06, | |
| "loss": 1.2383577823638916, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7551487414187643, | |
| "grad_norm": 1.7745798826217651, | |
| "learning_rate": 9.539999434810127e-06, | |
| "loss": 1.2871983051300049, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7597254004576659, | |
| "grad_norm": 2.066685914993286, | |
| "learning_rate": 9.532468464217548e-06, | |
| "loss": 1.271721601486206, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7643020594965675, | |
| "grad_norm": 5.190197467803955, | |
| "learning_rate": 9.524879722074691e-06, | |
| "loss": 1.0039429664611816, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7688787185354691, | |
| "grad_norm": 4.809805393218994, | |
| "learning_rate": 9.517233317102406e-06, | |
| "loss": 1.159362554550171, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7734553775743707, | |
| "grad_norm": 1.4193916320800781, | |
| "learning_rate": 9.509529358847655e-06, | |
| "loss": 1.2524155378341675, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7780320366132724, | |
| "grad_norm": 5.768612861633301, | |
| "learning_rate": 9.501767957681943e-06, | |
| "loss": 1.1373052597045898, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.782608695652174, | |
| "grad_norm": 2.3782432079315186, | |
| "learning_rate": 9.493949224799735e-06, | |
| "loss": 1.197812795639038, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7871853546910755, | |
| "grad_norm": 2.0369019508361816, | |
| "learning_rate": 9.486073272216867e-06, | |
| "loss": 0.8322545886039734, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7917620137299771, | |
| "grad_norm": 3.6057472229003906, | |
| "learning_rate": 9.478140212768935e-06, | |
| "loss": 1.1050453186035156, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7963386727688787, | |
| "grad_norm": 3.5964956283569336, | |
| "learning_rate": 9.470150160109682e-06, | |
| "loss": 1.0718210935592651, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8009153318077803, | |
| "grad_norm": 12.48847770690918, | |
| "learning_rate": 9.462103228709379e-06, | |
| "loss": 1.3313639163970947, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8054919908466819, | |
| "grad_norm": 1.933138370513916, | |
| "learning_rate": 9.453999533853162e-06, | |
| "loss": 1.4489710330963135, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8100686498855835, | |
| "grad_norm": 2.0144715309143066, | |
| "learning_rate": 9.445839191639404e-06, | |
| "loss": 1.2176668643951416, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8146453089244852, | |
| "grad_norm": 3.0317883491516113, | |
| "learning_rate": 9.437622318978037e-06, | |
| "loss": 0.6330467462539673, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8192219679633868, | |
| "grad_norm": 6.440718650817871, | |
| "learning_rate": 9.429349033588884e-06, | |
| "loss": 0.8626018762588501, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8237986270022883, | |
| "grad_norm": 2.311493396759033, | |
| "learning_rate": 9.421019453999972e-06, | |
| "loss": 1.0874342918395996, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8283752860411899, | |
| "grad_norm": 2.266531467437744, | |
| "learning_rate": 9.412633699545828e-06, | |
| "loss": 1.2565999031066895, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8329519450800915, | |
| "grad_norm": 1.8435766696929932, | |
| "learning_rate": 9.404191890365775e-06, | |
| "loss": 0.9089647531509399, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8375286041189931, | |
| "grad_norm": 3.46240496635437, | |
| "learning_rate": 9.395694147402214e-06, | |
| "loss": 1.1782324314117432, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 4.988228797912598, | |
| "learning_rate": 9.387140592398878e-06, | |
| "loss": 1.0270354747772217, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8466819221967964, | |
| "grad_norm": 4.704240798950195, | |
| "learning_rate": 9.378531347899108e-06, | |
| "loss": 0.7700833082199097, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.851258581235698, | |
| "grad_norm": 7.564844131469727, | |
| "learning_rate": 9.369866537244076e-06, | |
| "loss": 0.8138679265975952, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8558352402745996, | |
| "grad_norm": 4.306687355041504, | |
| "learning_rate": 9.36114628457103e-06, | |
| "loss": 1.4564876556396484, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8604118993135011, | |
| "grad_norm": 3.528275966644287, | |
| "learning_rate": 9.352370714811518e-06, | |
| "loss": 0.7638095021247864, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8649885583524027, | |
| "grad_norm": 2.6173832416534424, | |
| "learning_rate": 9.343539953689592e-06, | |
| "loss": 0.62486332654953, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 3.4161272048950195, | |
| "learning_rate": 9.334654127720005e-06, | |
| "loss": 0.9487941265106201, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8741418764302059, | |
| "grad_norm": 2.358881711959839, | |
| "learning_rate": 9.325713364206402e-06, | |
| "loss": 1.1284657716751099, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8787185354691075, | |
| "grad_norm": 2.407302141189575, | |
| "learning_rate": 9.3167177912395e-06, | |
| "loss": 1.1534430980682373, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8832951945080092, | |
| "grad_norm": 2.1035103797912598, | |
| "learning_rate": 9.307667537695248e-06, | |
| "loss": 1.2289859056472778, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8878718535469108, | |
| "grad_norm": 2.478761911392212, | |
| "learning_rate": 9.298562733232979e-06, | |
| "loss": 1.2465143203735352, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8924485125858124, | |
| "grad_norm": 2.6959574222564697, | |
| "learning_rate": 9.289403508293558e-06, | |
| "loss": 1.225327730178833, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.897025171624714, | |
| "grad_norm": 4.03607702255249, | |
| "learning_rate": 9.280189994097507e-06, | |
| "loss": 1.2250657081604004, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9016018306636155, | |
| "grad_norm": 2.1316354274749756, | |
| "learning_rate": 9.27092232264313e-06, | |
| "loss": 1.065427303314209, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9061784897025171, | |
| "grad_norm": 2.279680013656616, | |
| "learning_rate": 9.261600626704622e-06, | |
| "loss": 1.302757978439331, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.9107551487414187, | |
| "grad_norm": 2.0204954147338867, | |
| "learning_rate": 9.252225039830163e-06, | |
| "loss": 1.316508412361145, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.9153318077803204, | |
| "grad_norm": 2.101081609725952, | |
| "learning_rate": 9.242795696340008e-06, | |
| "loss": 1.1978795528411865, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.919908466819222, | |
| "grad_norm": 1.5630186796188354, | |
| "learning_rate": 9.233312731324557e-06, | |
| "loss": 0.9110370874404907, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9244851258581236, | |
| "grad_norm": 3.5780534744262695, | |
| "learning_rate": 9.22377628064243e-06, | |
| "loss": 1.2253358364105225, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9290617848970252, | |
| "grad_norm": 0.4819481372833252, | |
| "learning_rate": 9.214186480918511e-06, | |
| "loss": 1.119720697402954, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9336384439359268, | |
| "grad_norm": 1.6182817220687866, | |
| "learning_rate": 9.204543469541997e-06, | |
| "loss": 1.2026877403259277, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9382151029748284, | |
| "grad_norm": 5.046341896057129, | |
| "learning_rate": 9.194847384664422e-06, | |
| "loss": 1.0480332374572754, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9427917620137299, | |
| "grad_norm": 1.5035834312438965, | |
| "learning_rate": 9.185098365197688e-06, | |
| "loss": 1.1682276725769043, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 1.4273966550827026, | |
| "learning_rate": 9.175296550812067e-06, | |
| "loss": 1.2405450344085693, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9519450800915332, | |
| "grad_norm": 7.252469539642334, | |
| "learning_rate": 9.165442081934202e-06, | |
| "loss": 1.122786045074463, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "grad_norm": 2.6789910793304443, | |
| "learning_rate": 9.155535099745097e-06, | |
| "loss": 1.2588951587677002, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9610983981693364, | |
| "grad_norm": 3.5507678985595703, | |
| "learning_rate": 9.145575746178092e-06, | |
| "loss": 1.0224729776382446, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.965675057208238, | |
| "grad_norm": 7.071379661560059, | |
| "learning_rate": 9.135564163916833e-06, | |
| "loss": 1.204231858253479, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9702517162471396, | |
| "grad_norm": 1.381493330001831, | |
| "learning_rate": 9.125500496393221e-06, | |
| "loss": 0.7406469583511353, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9748283752860412, | |
| "grad_norm": 2.6033928394317627, | |
| "learning_rate": 9.115384887785366e-06, | |
| "loss": 1.4214835166931152, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9794050343249427, | |
| "grad_norm": 50.466766357421875, | |
| "learning_rate": 9.105217483015514e-06, | |
| "loss": 0.8424577713012695, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9839816933638444, | |
| "grad_norm": 2.872386932373047, | |
| "learning_rate": 9.094998427747974e-06, | |
| "loss": 1.2860726118087769, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.988558352402746, | |
| "grad_norm": 3.8774938583374023, | |
| "learning_rate": 9.084727868387036e-06, | |
| "loss": 1.2441885471343994, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9931350114416476, | |
| "grad_norm": 1.6605303287506104, | |
| "learning_rate": 9.074405952074858e-06, | |
| "loss": 1.3228825330734253, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9977116704805492, | |
| "grad_norm": 2.563741683959961, | |
| "learning_rate": 9.064032826689378e-06, | |
| "loss": 1.1689465045928955, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.002288329519451, | |
| "grad_norm": 1.281950831413269, | |
| "learning_rate": 9.053608640842183e-06, | |
| "loss": 1.0966238975524902, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.0068649885583525, | |
| "grad_norm": 3.721905469894409, | |
| "learning_rate": 9.04313354387638e-06, | |
| "loss": 1.159909963607788, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.011441647597254, | |
| "grad_norm": 2.1142349243164062, | |
| "learning_rate": 9.032607685864463e-06, | |
| "loss": 0.7140793800354004, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.0160183066361557, | |
| "grad_norm": 1.9991384744644165, | |
| "learning_rate": 9.022031217606153e-06, | |
| "loss": 0.6476885080337524, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.0205949656750573, | |
| "grad_norm": 8.886889457702637, | |
| "learning_rate": 9.011404290626251e-06, | |
| "loss": 0.5267953872680664, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.0251716247139588, | |
| "grad_norm": 6.78672456741333, | |
| "learning_rate": 9.000727057172456e-06, | |
| "loss": 0.8036065101623535, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.0297482837528604, | |
| "grad_norm": 2.5695552825927734, | |
| "learning_rate": 8.989999670213186e-06, | |
| "loss": 0.8730241060256958, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.034324942791762, | |
| "grad_norm": 1.9509334564208984, | |
| "learning_rate": 8.979222283435392e-06, | |
| "loss": 1.1160993576049805, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0389016018306636, | |
| "grad_norm": 3.5826354026794434, | |
| "learning_rate": 8.96839505124235e-06, | |
| "loss": 1.0545172691345215, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0434782608695652, | |
| "grad_norm": 2.619070053100586, | |
| "learning_rate": 8.95751812875145e-06, | |
| "loss": 0.4537786543369293, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0480549199084668, | |
| "grad_norm": 1.5989960432052612, | |
| "learning_rate": 8.946591671791977e-06, | |
| "loss": 1.024822473526001, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 2.2013661861419678, | |
| "learning_rate": 8.935615836902876e-06, | |
| "loss": 0.6335904598236084, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.05720823798627, | |
| "grad_norm": 2.2854695320129395, | |
| "learning_rate": 8.92459078133051e-06, | |
| "loss": 1.0793886184692383, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0617848970251715, | |
| "grad_norm": 3.123532772064209, | |
| "learning_rate": 8.913516663026404e-06, | |
| "loss": 0.9790216088294983, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.0663615560640731, | |
| "grad_norm": 2.568563222885132, | |
| "learning_rate": 8.902393640644988e-06, | |
| "loss": 0.5305502414703369, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.070938215102975, | |
| "grad_norm": 1.6813656091690063, | |
| "learning_rate": 8.89122187354132e-06, | |
| "loss": 0.9657827019691467, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0755148741418765, | |
| "grad_norm": 3.9278571605682373, | |
| "learning_rate": 8.880001521768808e-06, | |
| "loss": 0.8472052216529846, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.080091533180778, | |
| "grad_norm": 0.806633710861206, | |
| "learning_rate": 8.868732746076904e-06, | |
| "loss": 0.6929956674575806, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0846681922196797, | |
| "grad_norm": 7.49754524230957, | |
| "learning_rate": 8.857415707908818e-06, | |
| "loss": 0.7502920627593994, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0892448512585813, | |
| "grad_norm": 1.6636337041854858, | |
| "learning_rate": 8.846050569399191e-06, | |
| "loss": 1.094468593597412, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0938215102974829, | |
| "grad_norm": 1.5605403184890747, | |
| "learning_rate": 8.834637493371785e-06, | |
| "loss": 1.0230355262756348, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0983981693363845, | |
| "grad_norm": 2.4448082447052, | |
| "learning_rate": 8.823176643337137e-06, | |
| "loss": 1.1374318599700928, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.102974828375286, | |
| "grad_norm": 1.8032822608947754, | |
| "learning_rate": 8.811668183490228e-06, | |
| "loss": 0.8566664457321167, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.1075514874141876, | |
| "grad_norm": 4.632296562194824, | |
| "learning_rate": 8.800112278708124e-06, | |
| "loss": 0.8171731233596802, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.1121281464530892, | |
| "grad_norm": 0.4392177164554596, | |
| "learning_rate": 8.788509094547612e-06, | |
| "loss": 0.5787323713302612, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.1167048054919908, | |
| "grad_norm": 1.527347207069397, | |
| "learning_rate": 8.776858797242837e-06, | |
| "loss": 0.9281891584396362, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.1212814645308924, | |
| "grad_norm": 2.3181469440460205, | |
| "learning_rate": 8.76516155370291e-06, | |
| "loss": 0.5847245454788208, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.125858123569794, | |
| "grad_norm": 1.7468464374542236, | |
| "learning_rate": 8.753417531509527e-06, | |
| "loss": 1.0287659168243408, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.1304347826086956, | |
| "grad_norm": 8.322272300720215, | |
| "learning_rate": 8.741626898914558e-06, | |
| "loss": 0.62440025806427, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.1350114416475972, | |
| "grad_norm": 1.4653323888778687, | |
| "learning_rate": 8.729789824837644e-06, | |
| "loss": 0.5702868700027466, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.139588100686499, | |
| "grad_norm": 2.8388781547546387, | |
| "learning_rate": 8.717906478863776e-06, | |
| "loss": 0.7256041765213013, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1441647597254005, | |
| "grad_norm": 3.141469717025757, | |
| "learning_rate": 8.70597703124086e-06, | |
| "loss": 0.9217201471328735, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1487414187643021, | |
| "grad_norm": 16.2866153717041, | |
| "learning_rate": 8.694001652877283e-06, | |
| "loss": 1.0957762002944946, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1533180778032037, | |
| "grad_norm": 21.44241714477539, | |
| "learning_rate": 8.681980515339464e-06, | |
| "loss": 1.2868103981018066, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 26.603134155273438, | |
| "learning_rate": 8.669913790849396e-06, | |
| "loss": 0.899326741695404, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.162471395881007, | |
| "grad_norm": 2.4431211948394775, | |
| "learning_rate": 8.657801652282178e-06, | |
| "loss": 0.8970417976379395, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.1670480549199085, | |
| "grad_norm": 2.117124080657959, | |
| "learning_rate": 8.645644273163536e-06, | |
| "loss": 0.9268218278884888, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.17162471395881, | |
| "grad_norm": 3.660663604736328, | |
| "learning_rate": 8.633441827667338e-06, | |
| "loss": 1.3189082145690918, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.1762013729977117, | |
| "grad_norm": 2.381092071533203, | |
| "learning_rate": 8.621194490613104e-06, | |
| "loss": 1.006082534790039, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1807780320366132, | |
| "grad_norm": 1.8072335720062256, | |
| "learning_rate": 8.608902437463495e-06, | |
| "loss": 1.0185256004333496, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1853546910755148, | |
| "grad_norm": 1.9344371557235718, | |
| "learning_rate": 8.596565844321804e-06, | |
| "loss": 0.7876001596450806, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1899313501144164, | |
| "grad_norm": 21.985254287719727, | |
| "learning_rate": 8.584184887929424e-06, | |
| "loss": 0.8519538640975952, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.194508009153318, | |
| "grad_norm": 7.320517063140869, | |
| "learning_rate": 8.57175974566333e-06, | |
| "loss": 0.9020718336105347, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1990846681922196, | |
| "grad_norm": 1.7533776760101318, | |
| "learning_rate": 8.559290595533528e-06, | |
| "loss": 0.7076669931411743, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.2036613272311212, | |
| "grad_norm": 1.601244568824768, | |
| "learning_rate": 8.5467776161805e-06, | |
| "loss": 1.0420892238616943, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.208237986270023, | |
| "grad_norm": 4.683840751647949, | |
| "learning_rate": 8.534220986872664e-06, | |
| "loss": 0.9078390002250671, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.2128146453089246, | |
| "grad_norm": 2.8337855339050293, | |
| "learning_rate": 8.521620887503783e-06, | |
| "loss": 0.9289965629577637, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.2173913043478262, | |
| "grad_norm": 2.4027771949768066, | |
| "learning_rate": 8.508977498590404e-06, | |
| "loss": 0.7684561610221863, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.2219679633867278, | |
| "grad_norm": 5.679591178894043, | |
| "learning_rate": 8.496291001269261e-06, | |
| "loss": 1.1440486907958984, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.2265446224256293, | |
| "grad_norm": 2.527660608291626, | |
| "learning_rate": 8.483561577294688e-06, | |
| "loss": 0.6309778690338135, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.231121281464531, | |
| "grad_norm": 1.9118728637695312, | |
| "learning_rate": 8.470789409036014e-06, | |
| "loss": 1.0466161966323853, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.2356979405034325, | |
| "grad_norm": 4.447290897369385, | |
| "learning_rate": 8.457974679474944e-06, | |
| "loss": 1.0474622249603271, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.240274599542334, | |
| "grad_norm": 2.7177319526672363, | |
| "learning_rate": 8.445117572202943e-06, | |
| "loss": 1.147586464881897, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.2448512585812357, | |
| "grad_norm": 2.1759936809539795, | |
| "learning_rate": 8.432218271418602e-06, | |
| "loss": 1.1140575408935547, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2494279176201373, | |
| "grad_norm": 2.892226219177246, | |
| "learning_rate": 8.419276961925006e-06, | |
| "loss": 1.0437395572662354, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2540045766590389, | |
| "grad_norm": 7.295011520385742, | |
| "learning_rate": 8.406293829127083e-06, | |
| "loss": 0.7300729751586914, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2585812356979404, | |
| "grad_norm": 0.512110710144043, | |
| "learning_rate": 8.393269059028937e-06, | |
| "loss": 0.7643875479698181, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 6.191455364227295, | |
| "learning_rate": 8.380202838231205e-06, | |
| "loss": 0.8880730867385864, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2677345537757438, | |
| "grad_norm": 1.766752004623413, | |
| "learning_rate": 8.367095353928361e-06, | |
| "loss": 0.906735360622406, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2723112128146452, | |
| "grad_norm": 4.45707368850708, | |
| "learning_rate": 8.35394679390605e-06, | |
| "loss": 1.1020989418029785, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.276887871853547, | |
| "grad_norm": 1.6555581092834473, | |
| "learning_rate": 8.340757346538394e-06, | |
| "loss": 1.121458888053894, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2814645308924484, | |
| "grad_norm": 2.084331750869751, | |
| "learning_rate": 8.32752720078529e-06, | |
| "loss": 0.7600382566452026, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.2860411899313502, | |
| "grad_norm": 2.9418492317199707, | |
| "learning_rate": 8.314256546189696e-06, | |
| "loss": 0.8527880907058716, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2906178489702518, | |
| "grad_norm": 6.980574607849121, | |
| "learning_rate": 8.30094557287494e-06, | |
| "loss": 0.7204505205154419, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2951945080091534, | |
| "grad_norm": 78.67940521240234, | |
| "learning_rate": 8.287594471541966e-06, | |
| "loss": 1.0420033931732178, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.299771167048055, | |
| "grad_norm": 7.022028923034668, | |
| "learning_rate": 8.274203433466625e-06, | |
| "loss": 0.9533605575561523, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 3.6711020469665527, | |
| "learning_rate": 8.260772650496918e-06, | |
| "loss": 1.0180366039276123, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.3089244851258581, | |
| "grad_norm": 3.5704965591430664, | |
| "learning_rate": 8.247302315050261e-06, | |
| "loss": 0.6935830116271973, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.3135011441647597, | |
| "grad_norm": 6.118966102600098, | |
| "learning_rate": 8.23379262011072e-06, | |
| "loss": 0.9882407784461975, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.3180778032036613, | |
| "grad_norm": 2.1732988357543945, | |
| "learning_rate": 8.220243759226248e-06, | |
| "loss": 1.063117504119873, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.322654462242563, | |
| "grad_norm": 6.698834419250488, | |
| "learning_rate": 8.206655926505916e-06, | |
| "loss": 0.732232391834259, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.3272311212814645, | |
| "grad_norm": 2.403120756149292, | |
| "learning_rate": 8.193029316617123e-06, | |
| "loss": 1.0798766613006592, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.331807780320366, | |
| "grad_norm": 1.4961130619049072, | |
| "learning_rate": 8.17936412478282e-06, | |
| "loss": 1.0436818599700928, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.3363844393592679, | |
| "grad_norm": 1.0409057140350342, | |
| "learning_rate": 8.1656605467787e-06, | |
| "loss": 0.5859847068786621, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.3409610983981692, | |
| "grad_norm": 8.445878982543945, | |
| "learning_rate": 8.1519187789304e-06, | |
| "loss": 0.9882102012634277, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.345537757437071, | |
| "grad_norm": 5.030638694763184, | |
| "learning_rate": 8.138139018110694e-06, | |
| "loss": 0.825863778591156, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3501144164759724, | |
| "grad_norm": 2.1235110759735107, | |
| "learning_rate": 8.124321461736655e-06, | |
| "loss": 0.8253368139266968, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3546910755148742, | |
| "grad_norm": 2.607447624206543, | |
| "learning_rate": 8.110466307766845e-06, | |
| "loss": 1.1002779006958008, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3592677345537758, | |
| "grad_norm": 2.3491551876068115, | |
| "learning_rate": 8.096573754698473e-06, | |
| "loss": 1.1305601596832275, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3638443935926774, | |
| "grad_norm": 1.7016195058822632, | |
| "learning_rate": 8.082644001564548e-06, | |
| "loss": 1.1057755947113037, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 3.3334386348724365, | |
| "learning_rate": 8.068677247931021e-06, | |
| "loss": 1.200844645500183, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3729977116704806, | |
| "grad_norm": 2.949726104736328, | |
| "learning_rate": 8.054673693893948e-06, | |
| "loss": 1.1354503631591797, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3775743707093822, | |
| "grad_norm": 1.6623036861419678, | |
| "learning_rate": 8.040633540076604e-06, | |
| "loss": 1.0025185346603394, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3821510297482837, | |
| "grad_norm": 1.9049066305160522, | |
| "learning_rate": 8.026556987626606e-06, | |
| "loss": 0.38326674699783325, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3867276887871853, | |
| "grad_norm": 1.9863322973251343, | |
| "learning_rate": 8.012444238213056e-06, | |
| "loss": 1.0257573127746582, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.391304347826087, | |
| "grad_norm": 8.692577362060547, | |
| "learning_rate": 7.99829549402362e-06, | |
| "loss": 0.9867333173751831, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.3958810068649885, | |
| "grad_norm": 1.6721243858337402, | |
| "learning_rate": 7.984110957761657e-06, | |
| "loss": 0.8433778285980225, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.40045766590389, | |
| "grad_norm": 8.466538429260254, | |
| "learning_rate": 7.969890832643296e-06, | |
| "loss": 0.9302389621734619, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.4050343249427917, | |
| "grad_norm": 1.992497444152832, | |
| "learning_rate": 7.955635322394543e-06, | |
| "loss": 0.7917460203170776, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.4096109839816933, | |
| "grad_norm": 38.11425018310547, | |
| "learning_rate": 7.941344631248343e-06, | |
| "loss": 0.743791937828064, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.414187643020595, | |
| "grad_norm": 1.7407969236373901, | |
| "learning_rate": 7.927018963941668e-06, | |
| "loss": 1.0373704433441162, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.4187643020594964, | |
| "grad_norm": 1.8311736583709717, | |
| "learning_rate": 7.912658525712582e-06, | |
| "loss": 1.042643427848816, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.4233409610983982, | |
| "grad_norm": 4.667344570159912, | |
| "learning_rate": 7.898263522297294e-06, | |
| "loss": 0.8382468223571777, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.4279176201372998, | |
| "grad_norm": 1.3862837553024292, | |
| "learning_rate": 7.883834159927212e-06, | |
| "loss": 0.8047330379486084, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.4324942791762014, | |
| "grad_norm": 2.3761935234069824, | |
| "learning_rate": 7.869370645326e-06, | |
| "loss": 1.0753339529037476, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.437070938215103, | |
| "grad_norm": 10.085698127746582, | |
| "learning_rate": 7.854873185706598e-06, | |
| "loss": 0.7506072521209717, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.4416475972540046, | |
| "grad_norm": 8.790916442871094, | |
| "learning_rate": 7.840341988768269e-06, | |
| "loss": 1.1122334003448486, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.4462242562929062, | |
| "grad_norm": 3.126335382461548, | |
| "learning_rate": 7.825777262693612e-06, | |
| "loss": 0.5835685133934021, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4508009153318078, | |
| "grad_norm": 2.1324803829193115, | |
| "learning_rate": 7.811179216145588e-06, | |
| "loss": 1.0125725269317627, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.4553775743707094, | |
| "grad_norm": 7.664412498474121, | |
| "learning_rate": 7.796548058264525e-06, | |
| "loss": 0.8314673900604248, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.459954233409611, | |
| "grad_norm": 1.4939762353897095, | |
| "learning_rate": 7.781883998665126e-06, | |
| "loss": 1.0299837589263916, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4645308924485125, | |
| "grad_norm": 125.39936828613281, | |
| "learning_rate": 7.767187247433459e-06, | |
| "loss": 0.8142813444137573, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4691075514874141, | |
| "grad_norm": 3.3667984008789062, | |
| "learning_rate": 7.752458015123955e-06, | |
| "loss": 0.7184183597564697, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 8.629426002502441, | |
| "learning_rate": 7.737696512756393e-06, | |
| "loss": 0.7312871217727661, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4782608695652173, | |
| "grad_norm": 20.76643943786621, | |
| "learning_rate": 7.722902951812863e-06, | |
| "loss": 0.8988088965415955, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.482837528604119, | |
| "grad_norm": 1.4531633853912354, | |
| "learning_rate": 7.70807754423475e-06, | |
| "loss": 0.8973408937454224, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.4874141876430205, | |
| "grad_norm": 2.0992820262908936, | |
| "learning_rate": 7.693220502419696e-06, | |
| "loss": 0.918885350227356, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4919908466819223, | |
| "grad_norm": 1.0393502712249756, | |
| "learning_rate": 7.678332039218549e-06, | |
| "loss": 1.0323870182037354, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4965675057208239, | |
| "grad_norm": 2.6336605548858643, | |
| "learning_rate": 7.663412367932315e-06, | |
| "loss": 1.0499887466430664, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.5011441647597255, | |
| "grad_norm": 7.964874267578125, | |
| "learning_rate": 7.648461702309116e-06, | |
| "loss": 0.7312684059143066, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.505720823798627, | |
| "grad_norm": 3.785597801208496, | |
| "learning_rate": 7.633480256541112e-06, | |
| "loss": 1.2282322645187378, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.5102974828375286, | |
| "grad_norm": 2.3747780323028564, | |
| "learning_rate": 7.618468245261436e-06, | |
| "loss": 0.7873207330703735, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.5148741418764302, | |
| "grad_norm": 2.2096705436706543, | |
| "learning_rate": 7.603425883541123e-06, | |
| "loss": 0.7644495964050293, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.5194508009153318, | |
| "grad_norm": 1.7680861949920654, | |
| "learning_rate": 7.588353386886026e-06, | |
| "loss": 1.152151346206665, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.5240274599542334, | |
| "grad_norm": 14.067296028137207, | |
| "learning_rate": 7.573250971233729e-06, | |
| "loss": 0.8791661262512207, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.528604118993135, | |
| "grad_norm": 1.3611705303192139, | |
| "learning_rate": 7.5581188529504556e-06, | |
| "loss": 0.40379875898361206, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.5331807780320366, | |
| "grad_norm": 1.303026795387268, | |
| "learning_rate": 7.5429572488279615e-06, | |
| "loss": 1.0647927522659302, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.5377574370709381, | |
| "grad_norm": 1.101729393005371, | |
| "learning_rate": 7.5277663760804395e-06, | |
| "loss": 1.0676069259643555, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.54233409610984, | |
| "grad_norm": 4.634459972381592, | |
| "learning_rate": 7.512546452341402e-06, | |
| "loss": 0.5080143809318542, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.5469107551487413, | |
| "grad_norm": 2.555481433868408, | |
| "learning_rate": 7.497297695660558e-06, | |
| "loss": 0.9243414402008057, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5514874141876431, | |
| "grad_norm": 2.7285444736480713, | |
| "learning_rate": 7.482020324500699e-06, | |
| "loss": 0.9102246165275574, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.5560640732265445, | |
| "grad_norm": 28.053659439086914, | |
| "learning_rate": 7.466714557734567e-06, | |
| "loss": 0.8449078798294067, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5606407322654463, | |
| "grad_norm": 1.8448493480682373, | |
| "learning_rate": 7.451380614641709e-06, | |
| "loss": 1.1165541410446167, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.5652173913043477, | |
| "grad_norm": 1.3494691848754883, | |
| "learning_rate": 7.436018714905347e-06, | |
| "loss": 1.132646083831787, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5697940503432495, | |
| "grad_norm": 1.6197850704193115, | |
| "learning_rate": 7.4206290786092305e-06, | |
| "loss": 1.129209280014038, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.574370709382151, | |
| "grad_norm": 7.3508620262146, | |
| "learning_rate": 7.405211926234472e-06, | |
| "loss": 0.6533366441726685, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 1.5411512851715088, | |
| "learning_rate": 7.389767478656399e-06, | |
| "loss": 0.9583989381790161, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5835240274599542, | |
| "grad_norm": 2.164099931716919, | |
| "learning_rate": 7.374295957141387e-06, | |
| "loss": 0.6867862939834595, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5881006864988558, | |
| "grad_norm": 3.5565309524536133, | |
| "learning_rate": 7.358797583343691e-06, | |
| "loss": 1.251814603805542, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5926773455377574, | |
| "grad_norm": 2.2329952716827393, | |
| "learning_rate": 7.34327257930226e-06, | |
| "loss": 0.7910688519477844, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.597254004576659, | |
| "grad_norm": 1.991982102394104, | |
| "learning_rate": 7.327721167437575e-06, | |
| "loss": 1.0751738548278809, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.6018306636155606, | |
| "grad_norm": 2.1958391666412354, | |
| "learning_rate": 7.312143570548441e-06, | |
| "loss": 1.0445199012756348, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.6064073226544622, | |
| "grad_norm": 1.833368182182312, | |
| "learning_rate": 7.296540011808814e-06, | |
| "loss": 1.0390658378601074, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.610983981693364, | |
| "grad_norm": 5.710247039794922, | |
| "learning_rate": 7.280910714764584e-06, | |
| "loss": 0.8262543678283691, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.6155606407322654, | |
| "grad_norm": 1.8299520015716553, | |
| "learning_rate": 7.2652559033303974e-06, | |
| "loss": 1.061065435409546, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.6201372997711672, | |
| "grad_norm": 2.613022565841675, | |
| "learning_rate": 7.249575801786421e-06, | |
| "loss": 1.0792577266693115, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.6247139588100685, | |
| "grad_norm": 1.213928461074829, | |
| "learning_rate": 7.233870634775153e-06, | |
| "loss": 0.6793702840805054, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.6292906178489703, | |
| "grad_norm": 2.241819143295288, | |
| "learning_rate": 7.218140627298192e-06, | |
| "loss": 0.7657841444015503, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.6338672768878717, | |
| "grad_norm": 2.9433786869049072, | |
| "learning_rate": 7.202386004713008e-06, | |
| "loss": 0.9588929414749146, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.6384439359267735, | |
| "grad_norm": 4.815126419067383, | |
| "learning_rate": 7.1866069927297366e-06, | |
| "loss": 0.5673887729644775, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.643020594965675, | |
| "grad_norm": 5.819674968719482, | |
| "learning_rate": 7.170803817407917e-06, | |
| "loss": 0.895261287689209, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.6475972540045767, | |
| "grad_norm": 1.3659911155700684, | |
| "learning_rate": 7.154976705153274e-06, | |
| "loss": 0.9793621301651001, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6521739130434783, | |
| "grad_norm": 14.497611999511719, | |
| "learning_rate": 7.139125882714465e-06, | |
| "loss": 0.8763951063156128, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6567505720823799, | |
| "grad_norm": 2.1780354976654053, | |
| "learning_rate": 7.123251577179834e-06, | |
| "loss": 0.7888288497924805, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6613272311212814, | |
| "grad_norm": 1.8186107873916626, | |
| "learning_rate": 7.107354015974156e-06, | |
| "loss": 0.7892118096351624, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.665903890160183, | |
| "grad_norm": 5.142183303833008, | |
| "learning_rate": 7.091433426855387e-06, | |
| "loss": 0.8022271394729614, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6704805491990846, | |
| "grad_norm": 2.999373435974121, | |
| "learning_rate": 7.075490037911384e-06, | |
| "loss": 0.48615947365760803, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.6750572082379862, | |
| "grad_norm": 13.79496955871582, | |
| "learning_rate": 7.059524077556659e-06, | |
| "loss": 0.46404361724853516, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.679633867276888, | |
| "grad_norm": 1.3626662492752075, | |
| "learning_rate": 7.043535774529088e-06, | |
| "loss": 0.9120252132415771, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 2.496614933013916, | |
| "learning_rate": 7.027525357886644e-06, | |
| "loss": 0.6731216311454773, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6887871853546912, | |
| "grad_norm": 3.0987555980682373, | |
| "learning_rate": 7.011493057004113e-06, | |
| "loss": 0.6688947677612305, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6933638443935926, | |
| "grad_norm": 8.601667404174805, | |
| "learning_rate": 6.995439101569808e-06, | |
| "loss": 1.0317034721374512, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6979405034324944, | |
| "grad_norm": 10.779264450073242, | |
| "learning_rate": 6.9793637215822755e-06, | |
| "loss": 0.8653741478919983, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.7025171624713957, | |
| "grad_norm": 2.992541790008545, | |
| "learning_rate": 6.963267147347007e-06, | |
| "loss": 0.9310321807861328, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.7070938215102975, | |
| "grad_norm": 12.640564918518066, | |
| "learning_rate": 6.947149609473134e-06, | |
| "loss": 0.6817935109138489, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.7116704805491991, | |
| "grad_norm": 1.5293439626693726, | |
| "learning_rate": 6.931011338870123e-06, | |
| "loss": 1.009579062461853, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.7162471395881007, | |
| "grad_norm": 1.235052227973938, | |
| "learning_rate": 6.914852566744472e-06, | |
| "loss": 0.5917520523071289, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.7208237986270023, | |
| "grad_norm": 3.1631014347076416, | |
| "learning_rate": 6.8986735245963965e-06, | |
| "loss": 1.1671645641326904, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.7254004576659039, | |
| "grad_norm": 11.111958503723145, | |
| "learning_rate": 6.8824744442165124e-06, | |
| "loss": 1.0235098600387573, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.7299771167048055, | |
| "grad_norm": 4.900933742523193, | |
| "learning_rate": 6.866255557682513e-06, | |
| "loss": 1.1278393268585205, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.734553775743707, | |
| "grad_norm": 1.4825588464736938, | |
| "learning_rate": 6.850017097355852e-06, | |
| "loss": 0.6875651478767395, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 6.850196361541748, | |
| "learning_rate": 6.833759295878403e-06, | |
| "loss": 0.9467449188232422, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.7437070938215102, | |
| "grad_norm": 5.623671054840088, | |
| "learning_rate": 6.817482386169131e-06, | |
| "loss": 0.9277236461639404, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.748283752860412, | |
| "grad_norm": 1.6635899543762207, | |
| "learning_rate": 6.801186601420766e-06, | |
| "loss": 1.0011539459228516, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.7528604118993134, | |
| "grad_norm": 2.057003974914551, | |
| "learning_rate": 6.7848721750964444e-06, | |
| "loss": 1.0401999950408936, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7574370709382152, | |
| "grad_norm": 1.9083833694458008, | |
| "learning_rate": 6.768539340926376e-06, | |
| "loss": 0.8578721284866333, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7620137299771166, | |
| "grad_norm": 6.834735870361328, | |
| "learning_rate": 6.752188332904495e-06, | |
| "loss": 0.4599095582962036, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7665903890160184, | |
| "grad_norm": 2.0236639976501465, | |
| "learning_rate": 6.7358193852851006e-06, | |
| "loss": 1.0432019233703613, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7711670480549198, | |
| "grad_norm": 2.964568614959717, | |
| "learning_rate": 6.719432732579509e-06, | |
| "loss": 1.0962594747543335, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7757437070938216, | |
| "grad_norm": 2.739694833755493, | |
| "learning_rate": 6.7030286095526855e-06, | |
| "loss": 1.00520658493042, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7803203661327232, | |
| "grad_norm": 2.45100474357605, | |
| "learning_rate": 6.6866072512198895e-06, | |
| "loss": 0.8793066143989563, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.7848970251716247, | |
| "grad_norm": 2.1978166103363037, | |
| "learning_rate": 6.670168892843304e-06, | |
| "loss": 1.0643588304519653, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 2.2212750911712646, | |
| "learning_rate": 6.653713769928664e-06, | |
| "loss": 0.9725496172904968, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.794050343249428, | |
| "grad_norm": 12.925155639648438, | |
| "learning_rate": 6.6372421182218806e-06, | |
| "loss": 0.5852289199829102, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7986270022883295, | |
| "grad_norm": 2.02931547164917, | |
| "learning_rate": 6.620754173705669e-06, | |
| "loss": 0.4103405475616455, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.803203661327231, | |
| "grad_norm": 6.498274803161621, | |
| "learning_rate": 6.604250172596166e-06, | |
| "loss": 0.7299265265464783, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.8077803203661327, | |
| "grad_norm": 0.8046561479568481, | |
| "learning_rate": 6.587730351339542e-06, | |
| "loss": 0.5971012115478516, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.8123569794050343, | |
| "grad_norm": 2.3184573650360107, | |
| "learning_rate": 6.571194946608615e-06, | |
| "loss": 0.5819271802902222, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.816933638443936, | |
| "grad_norm": 2.6694231033325195, | |
| "learning_rate": 6.554644195299467e-06, | |
| "loss": 1.0282055139541626, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.8215102974828374, | |
| "grad_norm": 2.3228371143341064, | |
| "learning_rate": 6.53807833452804e-06, | |
| "loss": 0.8639980554580688, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.8260869565217392, | |
| "grad_norm": 3.077423095703125, | |
| "learning_rate": 6.521497601626742e-06, | |
| "loss": 1.0374181270599365, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.8306636155606406, | |
| "grad_norm": 3.502397060394287, | |
| "learning_rate": 6.504902234141052e-06, | |
| "loss": 0.8395485877990723, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.8352402745995424, | |
| "grad_norm": 1.694636583328247, | |
| "learning_rate": 6.4882924698261086e-06, | |
| "loss": 0.8845337629318237, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.8398169336384438, | |
| "grad_norm": 1.3276764154434204, | |
| "learning_rate": 6.4716685466433125e-06, | |
| "loss": 0.7552372217178345, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.8443935926773456, | |
| "grad_norm": 3.640122652053833, | |
| "learning_rate": 6.455030702756909e-06, | |
| "loss": 0.7707520723342896, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.8489702517162472, | |
| "grad_norm": 1.5462133884429932, | |
| "learning_rate": 6.438379176530581e-06, | |
| "loss": 0.9930239915847778, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.8535469107551488, | |
| "grad_norm": 1.9529536962509155, | |
| "learning_rate": 6.421714206524032e-06, | |
| "loss": 1.040754795074463, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8581235697940504, | |
| "grad_norm": 3.2038698196411133, | |
| "learning_rate": 6.405036031489573e-06, | |
| "loss": 0.9112997055053711, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.862700228832952, | |
| "grad_norm": 2.7182154655456543, | |
| "learning_rate": 6.3883448903686926e-06, | |
| "loss": 1.0292009115219116, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8672768878718535, | |
| "grad_norm": 1.7210997343063354, | |
| "learning_rate": 6.371641022288642e-06, | |
| "loss": 0.7663242816925049, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8718535469107551, | |
| "grad_norm": 1.3438034057617188, | |
| "learning_rate": 6.354924666559007e-06, | |
| "loss": 0.9235577583312988, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8764302059496567, | |
| "grad_norm": 6.343740940093994, | |
| "learning_rate": 6.338196062668276e-06, | |
| "loss": 0.9253222942352295, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8810068649885583, | |
| "grad_norm": 10.784764289855957, | |
| "learning_rate": 6.321455450280417e-06, | |
| "loss": 1.0267045497894287, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.88558352402746, | |
| "grad_norm": 4.409283638000488, | |
| "learning_rate": 6.304703069231434e-06, | |
| "loss": 1.1460933685302734, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8901601830663615, | |
| "grad_norm": 6.675430774688721, | |
| "learning_rate": 6.287939159525939e-06, | |
| "loss": 0.9188438653945923, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 4.170138359069824, | |
| "learning_rate": 6.271163961333706e-06, | |
| "loss": 0.858944833278656, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8993135011441646, | |
| "grad_norm": 1.5649385452270508, | |
| "learning_rate": 6.25437771498624e-06, | |
| "loss": 0.9133040904998779, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.9038901601830664, | |
| "grad_norm": 2.2200839519500732, | |
| "learning_rate": 6.237580660973328e-06, | |
| "loss": 1.1158314943313599, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.9084668192219678, | |
| "grad_norm": 8.802407264709473, | |
| "learning_rate": 6.220773039939592e-06, | |
| "loss": 0.8092963695526123, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.9130434782608696, | |
| "grad_norm": 3.916245222091675, | |
| "learning_rate": 6.20395509268104e-06, | |
| "loss": 0.9357765913009644, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.9176201372997712, | |
| "grad_norm": 2.1684670448303223, | |
| "learning_rate": 6.1871270601416255e-06, | |
| "loss": 1.0700328350067139, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.9221967963386728, | |
| "grad_norm": 1.537369728088379, | |
| "learning_rate": 6.170289183409789e-06, | |
| "loss": 1.0650542974472046, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.9267734553775744, | |
| "grad_norm": 1.5754413604736328, | |
| "learning_rate": 6.153441703715e-06, | |
| "loss": 1.18636155128479, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.931350114416476, | |
| "grad_norm": 2.923276901245117, | |
| "learning_rate": 6.136584862424313e-06, | |
| "loss": 1.0751769542694092, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.9359267734553776, | |
| "grad_norm": 2.0239417552948, | |
| "learning_rate": 6.119718901038898e-06, | |
| "loss": 1.0461246967315674, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.9405034324942791, | |
| "grad_norm": 5.281694412231445, | |
| "learning_rate": 6.102844061190582e-06, | |
| "loss": 0.9867255091667175, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.9450800915331807, | |
| "grad_norm": 2.6705641746520996, | |
| "learning_rate": 6.0859605846383986e-06, | |
| "loss": 1.0285825729370117, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.9496567505720823, | |
| "grad_norm": 2.842245578765869, | |
| "learning_rate": 6.069068713265107e-06, | |
| "loss": 0.8631390929222107, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.9542334096109841, | |
| "grad_norm": 12.086994171142578, | |
| "learning_rate": 6.05216868907374e-06, | |
| "loss": 0.9532708525657654, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.9588100686498855, | |
| "grad_norm": 6.184656620025635, | |
| "learning_rate": 6.035260754184133e-06, | |
| "loss": 0.7836180925369263, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9633867276887873, | |
| "grad_norm": 0.9443724751472473, | |
| "learning_rate": 6.0183451508294555e-06, | |
| "loss": 0.505516767501831, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9679633867276887, | |
| "grad_norm": 4.477334499359131, | |
| "learning_rate": 6.001422121352736e-06, | |
| "loss": 0.7708160877227783, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9725400457665905, | |
| "grad_norm": 3.583055257797241, | |
| "learning_rate": 5.984491908203398e-06, | |
| "loss": 0.8886803388595581, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9771167048054918, | |
| "grad_norm": 3.272289991378784, | |
| "learning_rate": 5.96755475393378e-06, | |
| "loss": 1.0874934196472168, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9816933638443937, | |
| "grad_norm": 5.9545979499816895, | |
| "learning_rate": 5.950610901195664e-06, | |
| "loss": 0.8436876535415649, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9862700228832952, | |
| "grad_norm": 3.9929182529449463, | |
| "learning_rate": 5.933660592736798e-06, | |
| "loss": 1.1498433351516724, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9908466819221968, | |
| "grad_norm": 1.720807433128357, | |
| "learning_rate": 5.9167040713974224e-06, | |
| "loss": 1.0520302057266235, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9954233409610984, | |
| "grad_norm": 1.747268557548523, | |
| "learning_rate": 5.89974158010678e-06, | |
| "loss": 0.8821731805801392, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.795553207397461, | |
| "learning_rate": 5.8827733618796455e-06, | |
| "loss": 0.7908928990364075, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.004576659038902, | |
| "grad_norm": 1.620730996131897, | |
| "learning_rate": 5.865799659812846e-06, | |
| "loss": 0.945500910282135, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.009153318077803, | |
| "grad_norm": 6.447601795196533, | |
| "learning_rate": 5.848820717081767e-06, | |
| "loss": 0.8182247877120972, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.013729977116705, | |
| "grad_norm": 1.7102856636047363, | |
| "learning_rate": 5.831836776936876e-06, | |
| "loss": 0.7212214469909668, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.0183066361556063, | |
| "grad_norm": 2.019994020462036, | |
| "learning_rate": 5.81484808270024e-06, | |
| "loss": 0.6066313982009888, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.022883295194508, | |
| "grad_norm": 11.098702430725098, | |
| "learning_rate": 5.79785487776203e-06, | |
| "loss": 0.5976270437240601, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.0274599542334095, | |
| "grad_norm": 2.936749219894409, | |
| "learning_rate": 5.780857405577048e-06, | |
| "loss": 0.7158992290496826, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.0320366132723113, | |
| "grad_norm": 0.6403222680091858, | |
| "learning_rate": 5.7638559096612244e-06, | |
| "loss": 0.5098772644996643, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.0366132723112127, | |
| "grad_norm": 6.3164143562316895, | |
| "learning_rate": 5.746850633588138e-06, | |
| "loss": 0.5138643980026245, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.0411899313501145, | |
| "grad_norm": 5.037423610687256, | |
| "learning_rate": 5.729841820985525e-06, | |
| "loss": 0.7996326684951782, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.045766590389016, | |
| "grad_norm": 4.635165691375732, | |
| "learning_rate": 5.712829715531787e-06, | |
| "loss": 0.5906950235366821, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.0503432494279177, | |
| "grad_norm": 4.191209316253662, | |
| "learning_rate": 5.6958145609525005e-06, | |
| "loss": 0.9277024269104004, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.054919908466819, | |
| "grad_norm": 2.6020865440368652, | |
| "learning_rate": 5.67879660101693e-06, | |
| "loss": 0.4953917860984802, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.059496567505721, | |
| "grad_norm": 8.963276863098145, | |
| "learning_rate": 5.661776079534526e-06, | |
| "loss": 0.7233725786209106, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.064073226544622, | |
| "grad_norm": 0.6874380111694336, | |
| "learning_rate": 5.644753240351439e-06, | |
| "loss": 0.34285467863082886, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.068649885583524, | |
| "grad_norm": 3.8761210441589355, | |
| "learning_rate": 5.6277283273470255e-06, | |
| "loss": 0.7474602460861206, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.073226544622426, | |
| "grad_norm": 4.701927185058594, | |
| "learning_rate": 5.6107015844303505e-06, | |
| "loss": 0.5753633975982666, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.077803203661327, | |
| "grad_norm": 3.259704113006592, | |
| "learning_rate": 5.593673255536696e-06, | |
| "loss": 0.6351042985916138, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.082379862700229, | |
| "grad_norm": 5.490569114685059, | |
| "learning_rate": 5.5766435846240674e-06, | |
| "loss": 0.8573955297470093, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.0869565217391304, | |
| "grad_norm": 2.4090561866760254, | |
| "learning_rate": 5.559612815669697e-06, | |
| "loss": 0.36256104707717896, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.091533180778032, | |
| "grad_norm": 2.0318257808685303, | |
| "learning_rate": 5.5425811926665426e-06, | |
| "loss": 0.6678656339645386, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0961098398169336, | |
| "grad_norm": 2.644015073776245, | |
| "learning_rate": 5.525548959619807e-06, | |
| "loss": 0.8250109553337097, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.1006864988558354, | |
| "grad_norm": 2.946031332015991, | |
| "learning_rate": 5.508516360543424e-06, | |
| "loss": 0.6581653356552124, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 3.0805141925811768, | |
| "learning_rate": 5.491483639456577e-06, | |
| "loss": 0.6303462982177734, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.1098398169336385, | |
| "grad_norm": 3.5895307064056396, | |
| "learning_rate": 5.474451040380194e-06, | |
| "loss": 0.6395201683044434, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.11441647597254, | |
| "grad_norm": 6.964833736419678, | |
| "learning_rate": 5.457418807333458e-06, | |
| "loss": 0.5622954368591309, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.1189931350114417, | |
| "grad_norm": 1.2632827758789062, | |
| "learning_rate": 5.440387184330306e-06, | |
| "loss": 0.6777645349502563, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.123569794050343, | |
| "grad_norm": 1.7766778469085693, | |
| "learning_rate": 5.423356415375933e-06, | |
| "loss": 0.5913289785385132, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.128146453089245, | |
| "grad_norm": 4.999666213989258, | |
| "learning_rate": 5.406326744463305e-06, | |
| "loss": 0.4775615930557251, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.1327231121281462, | |
| "grad_norm": 4.9804229736328125, | |
| "learning_rate": 5.389298415569653e-06, | |
| "loss": 0.5832971334457397, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.137299771167048, | |
| "grad_norm": 2.392467975616455, | |
| "learning_rate": 5.372271672652978e-06, | |
| "loss": 0.8872381448745728, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.14187643020595, | |
| "grad_norm": 1.6833393573760986, | |
| "learning_rate": 5.355246759648563e-06, | |
| "loss": 0.770750105381012, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.1464530892448512, | |
| "grad_norm": 4.981933116912842, | |
| "learning_rate": 5.338223920465476e-06, | |
| "loss": 0.46837982535362244, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.151029748283753, | |
| "grad_norm": 2.7053420543670654, | |
| "learning_rate": 5.321203398983071e-06, | |
| "loss": 0.7598111629486084, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.1556064073226544, | |
| "grad_norm": 19.10597801208496, | |
| "learning_rate": 5.3041854390475e-06, | |
| "loss": 0.653712272644043, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.160183066361556, | |
| "grad_norm": 6.7961201667785645, | |
| "learning_rate": 5.287170284468216e-06, | |
| "loss": 0.6563678979873657, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.1647597254004576, | |
| "grad_norm": 1.762424111366272, | |
| "learning_rate": 5.2701581790144775e-06, | |
| "loss": 0.6890889406204224, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1693363844393594, | |
| "grad_norm": 3.5865209102630615, | |
| "learning_rate": 5.253149366411864e-06, | |
| "loss": 0.620913028717041, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 2.4212989807128906, | |
| "learning_rate": 5.236144090338777e-06, | |
| "loss": 0.9455079436302185, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1784897025171626, | |
| "grad_norm": 2.00994610786438, | |
| "learning_rate": 5.219142594422953e-06, | |
| "loss": 0.9673428535461426, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.183066361556064, | |
| "grad_norm": 1.9694055318832397, | |
| "learning_rate": 5.20214512223797e-06, | |
| "loss": 0.8950019478797913, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1876430205949657, | |
| "grad_norm": 2.0830254554748535, | |
| "learning_rate": 5.185151917299762e-06, | |
| "loss": 0.832808256149292, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.192219679633867, | |
| "grad_norm": 2.595118761062622, | |
| "learning_rate": 5.168163223063125e-06, | |
| "loss": 0.6324760913848877, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.196796338672769, | |
| "grad_norm": 1.7887650728225708, | |
| "learning_rate": 5.151179282918234e-06, | |
| "loss": 0.8795846700668335, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.2013729977116703, | |
| "grad_norm": 2.8889565467834473, | |
| "learning_rate": 5.134200340187155e-06, | |
| "loss": 0.6858696341514587, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.205949656750572, | |
| "grad_norm": 1.9928938150405884, | |
| "learning_rate": 5.117226638120356e-06, | |
| "loss": 0.8538745045661926, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 4.457608699798584, | |
| "learning_rate": 5.100258419893223e-06, | |
| "loss": 0.8814704418182373, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.2151029748283753, | |
| "grad_norm": 8.807114601135254, | |
| "learning_rate": 5.083295928602581e-06, | |
| "loss": 0.6599367260932922, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.219679633867277, | |
| "grad_norm": 4.574615955352783, | |
| "learning_rate": 5.066339407263203e-06, | |
| "loss": 0.8049777746200562, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.2242562929061784, | |
| "grad_norm": 1.0171655416488647, | |
| "learning_rate": 5.049389098804337e-06, | |
| "loss": 0.41160112619400024, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.2288329519450802, | |
| "grad_norm": 2.5793819427490234, | |
| "learning_rate": 5.032445246066223e-06, | |
| "loss": 0.9556428790092468, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.2334096109839816, | |
| "grad_norm": 14.489500999450684, | |
| "learning_rate": 5.0155080917966035e-06, | |
| "loss": 0.846240758895874, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.2379862700228834, | |
| "grad_norm": 1.3771382570266724, | |
| "learning_rate": 4.998577878647265e-06, | |
| "loss": 1.0038063526153564, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.242562929061785, | |
| "grad_norm": 2.4952244758605957, | |
| "learning_rate": 4.981654849170546e-06, | |
| "loss": 0.8787197470664978, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.2471395881006866, | |
| "grad_norm": 1.98478102684021, | |
| "learning_rate": 4.964739245815867e-06, | |
| "loss": 0.6971051692962646, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.251716247139588, | |
| "grad_norm": 10.939866065979004, | |
| "learning_rate": 4.947831310926261e-06, | |
| "loss": 0.8242970705032349, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.2562929061784898, | |
| "grad_norm": 3.1165544986724854, | |
| "learning_rate": 4.930931286734896e-06, | |
| "loss": 0.7920933961868286, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.260869565217391, | |
| "grad_norm": 4.3943095207214355, | |
| "learning_rate": 4.914039415361604e-06, | |
| "loss": 0.6728335618972778, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.265446224256293, | |
| "grad_norm": 4.197206497192383, | |
| "learning_rate": 4.897155938809418e-06, | |
| "loss": 0.5807865858078003, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.2700228832951943, | |
| "grad_norm": 4.6128363609313965, | |
| "learning_rate": 4.880281098961104e-06, | |
| "loss": 0.6835315227508545, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.274599542334096, | |
| "grad_norm": 3.85286808013916, | |
| "learning_rate": 4.863415137575688e-06, | |
| "loss": 0.8232854008674622, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.279176201372998, | |
| "grad_norm": 2.8220767974853516, | |
| "learning_rate": 4.846558296285e-06, | |
| "loss": 0.7026492357254028, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.2837528604118993, | |
| "grad_norm": 1.7302947044372559, | |
| "learning_rate": 4.829710816590214e-06, | |
| "loss": 0.8062256574630737, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.288329519450801, | |
| "grad_norm": 3.6414108276367188, | |
| "learning_rate": 4.812872939858375e-06, | |
| "loss": 0.45558106899261475, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.2929061784897025, | |
| "grad_norm": 6.842305660247803, | |
| "learning_rate": 4.796044907318961e-06, | |
| "loss": 0.6691060066223145, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2974828375286043, | |
| "grad_norm": 2.9899022579193115, | |
| "learning_rate": 4.7792269600604115e-06, | |
| "loss": 0.8572442531585693, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.3020594965675056, | |
| "grad_norm": 1.9516880512237549, | |
| "learning_rate": 4.7624193390266725e-06, | |
| "loss": 0.883170485496521, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.3066361556064074, | |
| "grad_norm": 10.57199478149414, | |
| "learning_rate": 4.74562228501376e-06, | |
| "loss": 0.5891278982162476, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.311212814645309, | |
| "grad_norm": 7.4861602783203125, | |
| "learning_rate": 4.7288360386662965e-06, | |
| "loss": 0.5752084255218506, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 4.013728141784668, | |
| "learning_rate": 4.7120608404740644e-06, | |
| "loss": 0.6811657547950745, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.320366132723112, | |
| "grad_norm": 2.6324877738952637, | |
| "learning_rate": 4.695296930768567e-06, | |
| "loss": 0.8065996170043945, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.324942791762014, | |
| "grad_norm": 8.653948783874512, | |
| "learning_rate": 4.678544549719585e-06, | |
| "loss": 1.0626581907272339, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.329519450800915, | |
| "grad_norm": 2.111722707748413, | |
| "learning_rate": 4.6618039373317245e-06, | |
| "loss": 0.6138767004013062, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.334096109839817, | |
| "grad_norm": 2.4597508907318115, | |
| "learning_rate": 4.645075333440995e-06, | |
| "loss": 0.7229528427124023, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.3386727688787188, | |
| "grad_norm": 2.4709105491638184, | |
| "learning_rate": 4.6283589777113605e-06, | |
| "loss": 0.8089841604232788, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.34324942791762, | |
| "grad_norm": 3.309113025665283, | |
| "learning_rate": 4.611655109631309e-06, | |
| "loss": 0.8193925023078918, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.3478260869565215, | |
| "grad_norm": 1.6609846353530884, | |
| "learning_rate": 4.594963968510428e-06, | |
| "loss": 0.9571186900138855, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.3524027459954233, | |
| "grad_norm": 3.329925537109375, | |
| "learning_rate": 4.578285793475969e-06, | |
| "loss": 0.6692005395889282, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.356979405034325, | |
| "grad_norm": 0.6110982894897461, | |
| "learning_rate": 4.56162082346942e-06, | |
| "loss": 0.5079280138015747, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.3615560640732265, | |
| "grad_norm": 5.090636253356934, | |
| "learning_rate": 4.544969297243091e-06, | |
| "loss": 0.32914191484451294, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.3661327231121283, | |
| "grad_norm": 1.61818528175354, | |
| "learning_rate": 4.528331453356689e-06, | |
| "loss": 0.7983392477035522, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.3707093821510297, | |
| "grad_norm": 1.8456085920333862, | |
| "learning_rate": 4.511707530173892e-06, | |
| "loss": 0.6458148956298828, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.3752860411899315, | |
| "grad_norm": 3.5649495124816895, | |
| "learning_rate": 4.495097765858949e-06, | |
| "loss": 1.0234034061431885, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.379862700228833, | |
| "grad_norm": 2.833116292953491, | |
| "learning_rate": 4.47850239837326e-06, | |
| "loss": 0.875385046005249, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3844393592677346, | |
| "grad_norm": 2.9960877895355225, | |
| "learning_rate": 4.461921665471962e-06, | |
| "loss": 0.65616375207901, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.389016018306636, | |
| "grad_norm": 5.081766128540039, | |
| "learning_rate": 4.445355804700533e-06, | |
| "loss": 0.838226318359375, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.393592677345538, | |
| "grad_norm": 5.546652793884277, | |
| "learning_rate": 4.428805053391386e-06, | |
| "loss": 0.39645886421203613, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.398169336384439, | |
| "grad_norm": 3.0292372703552246, | |
| "learning_rate": 4.41226964866046e-06, | |
| "loss": 0.7210261821746826, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.402745995423341, | |
| "grad_norm": 4.103295803070068, | |
| "learning_rate": 4.395749827403835e-06, | |
| "loss": 0.8829662203788757, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.4073226544622424, | |
| "grad_norm": 6.127950668334961, | |
| "learning_rate": 4.3792458262943324e-06, | |
| "loss": 0.26367780566215515, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.411899313501144, | |
| "grad_norm": 1.9098131656646729, | |
| "learning_rate": 4.362757881778122e-06, | |
| "loss": 0.7983935475349426, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.416475972540046, | |
| "grad_norm": 1.961759328842163, | |
| "learning_rate": 4.346286230071337e-06, | |
| "loss": 0.8044694066047668, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 0.8351384401321411, | |
| "learning_rate": 4.329831107156698e-06, | |
| "loss": 0.4787551164627075, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.425629290617849, | |
| "grad_norm": 2.753488779067993, | |
| "learning_rate": 4.313392748780112e-06, | |
| "loss": 0.7191129922866821, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.4302059496567505, | |
| "grad_norm": 2.585822582244873, | |
| "learning_rate": 4.296971390447317e-06, | |
| "loss": 0.8327975273132324, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.4347826086956523, | |
| "grad_norm": 2.122526168823242, | |
| "learning_rate": 4.2805672674204935e-06, | |
| "loss": 0.5977469086647034, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.4393592677345537, | |
| "grad_norm": 2.236255645751953, | |
| "learning_rate": 4.264180614714901e-06, | |
| "loss": 0.7535611987113953, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.4439359267734555, | |
| "grad_norm": 2.458059787750244, | |
| "learning_rate": 4.247811667095506e-06, | |
| "loss": 0.752835750579834, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.448512585812357, | |
| "grad_norm": 1.7041510343551636, | |
| "learning_rate": 4.2314606590736256e-06, | |
| "loss": 0.463468998670578, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.4530892448512587, | |
| "grad_norm": 1.9309899806976318, | |
| "learning_rate": 4.215127824903558e-06, | |
| "loss": 0.7822204232215881, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.45766590389016, | |
| "grad_norm": 2.6464591026306152, | |
| "learning_rate": 4.198813398579236e-06, | |
| "loss": 0.6719791293144226, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.462242562929062, | |
| "grad_norm": 2.6165504455566406, | |
| "learning_rate": 4.1825176138308695e-06, | |
| "loss": 0.7982878684997559, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.466819221967963, | |
| "grad_norm": 3.580535411834717, | |
| "learning_rate": 4.1662407041215995e-06, | |
| "loss": 0.41490495204925537, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.471395881006865, | |
| "grad_norm": 2.3776938915252686, | |
| "learning_rate": 4.14998290264415e-06, | |
| "loss": 0.5821819305419922, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.475972540045767, | |
| "grad_norm": 2.160568952560425, | |
| "learning_rate": 4.133744442317487e-06, | |
| "loss": 0.8048266768455505, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.480549199084668, | |
| "grad_norm": 16.509410858154297, | |
| "learning_rate": 4.117525555783489e-06, | |
| "loss": 0.9339464902877808, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4851258581235696, | |
| "grad_norm": 5.036529064178467, | |
| "learning_rate": 4.101326475403604e-06, | |
| "loss": 0.5219019651412964, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.4897025171624714, | |
| "grad_norm": 8.633739471435547, | |
| "learning_rate": 4.08514743325553e-06, | |
| "loss": 0.8041296601295471, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.494279176201373, | |
| "grad_norm": 7.70281457901001, | |
| "learning_rate": 4.068988661129879e-06, | |
| "loss": 0.8011189699172974, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4988558352402745, | |
| "grad_norm": 3.9831337928771973, | |
| "learning_rate": 4.052850390526868e-06, | |
| "loss": 0.6868438720703125, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.5034324942791764, | |
| "grad_norm": 2.012017250061035, | |
| "learning_rate": 4.036732852652995e-06, | |
| "loss": 1.0341452360153198, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.5080091533180777, | |
| "grad_norm": 2.237452268600464, | |
| "learning_rate": 4.020636278417727e-06, | |
| "loss": 0.8562324047088623, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.5125858123569795, | |
| "grad_norm": 2.1376569271087646, | |
| "learning_rate": 4.0045608984301945e-06, | |
| "loss": 0.7588418126106262, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.517162471395881, | |
| "grad_norm": 3.0613656044006348, | |
| "learning_rate": 3.98850694299589e-06, | |
| "loss": 0.7711232900619507, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.5217391304347827, | |
| "grad_norm": 2.243285655975342, | |
| "learning_rate": 3.972474642113357e-06, | |
| "loss": 0.7679527401924133, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 2.8805336952209473, | |
| "learning_rate": 3.956464225470914e-06, | |
| "loss": 0.5456798672676086, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.530892448512586, | |
| "grad_norm": 3.419220209121704, | |
| "learning_rate": 3.940475922443343e-06, | |
| "loss": 0.8449265956878662, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.5354691075514877, | |
| "grad_norm": 2.3951187133789062, | |
| "learning_rate": 3.924509962088617e-06, | |
| "loss": 0.5211961269378662, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.540045766590389, | |
| "grad_norm": 0.6526300311088562, | |
| "learning_rate": 3.9085665731446155e-06, | |
| "loss": 0.5454580783843994, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.5446224256292904, | |
| "grad_norm": 1.6130284070968628, | |
| "learning_rate": 3.892645984025846e-06, | |
| "loss": 0.8388891220092773, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.5491990846681922, | |
| "grad_norm": 1.6545199155807495, | |
| "learning_rate": 3.876748422820168e-06, | |
| "loss": 0.8067798018455505, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.553775743707094, | |
| "grad_norm": 1.631625771522522, | |
| "learning_rate": 3.860874117285535e-06, | |
| "loss": 0.7862741947174072, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.5583524027459954, | |
| "grad_norm": 1.7587617635726929, | |
| "learning_rate": 3.8450232948467285e-06, | |
| "loss": 0.7546199560165405, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.5629290617848968, | |
| "grad_norm": 3.396798610687256, | |
| "learning_rate": 3.829196182592084e-06, | |
| "loss": 0.8121140003204346, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.5675057208237986, | |
| "grad_norm": 3.6833112239837646, | |
| "learning_rate": 3.8133930072702653e-06, | |
| "loss": 0.8952106237411499, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.5720823798627004, | |
| "grad_norm": 3.539543390274048, | |
| "learning_rate": 3.797613995286993e-06, | |
| "loss": 0.9799838066101074, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.5766590389016018, | |
| "grad_norm": 6.784366607666016, | |
| "learning_rate": 3.7818593727018114e-06, | |
| "loss": 0.7816819548606873, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5812356979405036, | |
| "grad_norm": 7.371065616607666, | |
| "learning_rate": 3.7661293652248486e-06, | |
| "loss": 0.6339356899261475, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.585812356979405, | |
| "grad_norm": 4.603682994842529, | |
| "learning_rate": 3.7504241982135802e-06, | |
| "loss": 0.5646559000015259, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.5903890160183067, | |
| "grad_norm": 10.729260444641113, | |
| "learning_rate": 3.734744096669605e-06, | |
| "loss": 0.3738934397697449, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.594965675057208, | |
| "grad_norm": 2.632481813430786, | |
| "learning_rate": 3.7190892852354177e-06, | |
| "loss": 0.5772243738174438, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.59954233409611, | |
| "grad_norm": 2.2666807174682617, | |
| "learning_rate": 3.703459988191188e-06, | |
| "loss": 0.8983527421951294, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.6041189931350113, | |
| "grad_norm": 2.42862868309021, | |
| "learning_rate": 3.6878564294515597e-06, | |
| "loss": 0.41785839200019836, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 2.7941179275512695, | |
| "learning_rate": 3.672278832562427e-06, | |
| "loss": 0.9465121030807495, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.613272311212815, | |
| "grad_norm": 2.7512271404266357, | |
| "learning_rate": 3.656727420697741e-06, | |
| "loss": 0.9477014541625977, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.6178489702517163, | |
| "grad_norm": 3.315143346786499, | |
| "learning_rate": 3.641202416656311e-06, | |
| "loss": 0.48098376393318176, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.6224256292906176, | |
| "grad_norm": 0.8696630001068115, | |
| "learning_rate": 3.6257040428586143e-06, | |
| "loss": 0.4189261794090271, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.6270022883295194, | |
| "grad_norm": 4.89910364151001, | |
| "learning_rate": 3.610232521343603e-06, | |
| "loss": 0.25716477632522583, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 1.6397091150283813, | |
| "learning_rate": 3.5947880737655307e-06, | |
| "loss": 0.5442467927932739, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.6361556064073226, | |
| "grad_norm": 2.3286402225494385, | |
| "learning_rate": 3.5793709213907713e-06, | |
| "loss": 0.8711032867431641, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.6407322654462244, | |
| "grad_norm": 6.281716346740723, | |
| "learning_rate": 3.563981285094654e-06, | |
| "loss": 0.6418501138687134, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.645308924485126, | |
| "grad_norm": 1.7850065231323242, | |
| "learning_rate": 3.5486193853582917e-06, | |
| "loss": 0.5711671710014343, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.6498855835240276, | |
| "grad_norm": 4.116028308868408, | |
| "learning_rate": 3.533285442265435e-06, | |
| "loss": 0.7774121165275574, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.654462242562929, | |
| "grad_norm": 2.7807161808013916, | |
| "learning_rate": 3.5179796754993e-06, | |
| "loss": 0.6718448400497437, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.6590389016018308, | |
| "grad_norm": 2.0300822257995605, | |
| "learning_rate": 3.5027023043394436e-06, | |
| "loss": 0.824101448059082, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.663615560640732, | |
| "grad_norm": 3.844541072845459, | |
| "learning_rate": 3.4874535476586014e-06, | |
| "loss": 0.62496018409729, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.668192219679634, | |
| "grad_norm": 2.7920563220977783, | |
| "learning_rate": 3.4722336239195615e-06, | |
| "loss": 0.4988476634025574, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.6727688787185357, | |
| "grad_norm": 8.478647232055664, | |
| "learning_rate": 3.45704275117204e-06, | |
| "loss": 0.6625174283981323, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.677345537757437, | |
| "grad_norm": 8.968935012817383, | |
| "learning_rate": 3.4418811470495467e-06, | |
| "loss": 0.6531594395637512, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.6819221967963385, | |
| "grad_norm": 2.9643266201019287, | |
| "learning_rate": 3.426749028766273e-06, | |
| "loss": 0.33440902829170227, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.6864988558352403, | |
| "grad_norm": 3.355323553085327, | |
| "learning_rate": 3.411646613113976e-06, | |
| "loss": 0.6681325435638428, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.691075514874142, | |
| "grad_norm": 6.2293782234191895, | |
| "learning_rate": 3.3965741164588796e-06, | |
| "loss": 0.4832615852355957, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.6956521739130435, | |
| "grad_norm": 4.777223587036133, | |
| "learning_rate": 3.381531754738567e-06, | |
| "loss": 0.6058177947998047, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.700228832951945, | |
| "grad_norm": 6.559420585632324, | |
| "learning_rate": 3.366519743458889e-06, | |
| "loss": 0.6945388317108154, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.7048054919908466, | |
| "grad_norm": 4.55678653717041, | |
| "learning_rate": 3.351538297690886e-06, | |
| "loss": 0.8977880477905273, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.7093821510297484, | |
| "grad_norm": 1.9731976985931396, | |
| "learning_rate": 3.336587632067686e-06, | |
| "loss": 0.8714834451675415, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.71395881006865, | |
| "grad_norm": 2.371523380279541, | |
| "learning_rate": 3.321667960781454e-06, | |
| "loss": 0.8558663129806519, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.7185354691075516, | |
| "grad_norm": 23.87647819519043, | |
| "learning_rate": 3.3067794975803047e-06, | |
| "loss": 0.6330064535140991, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.723112128146453, | |
| "grad_norm": 0.8517280220985413, | |
| "learning_rate": 3.2919224557652494e-06, | |
| "loss": 0.4448906481266022, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.727688787185355, | |
| "grad_norm": 8.36446475982666, | |
| "learning_rate": 3.2770970481871378e-06, | |
| "loss": 0.6628245115280151, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.732265446224256, | |
| "grad_norm": 7.5198235511779785, | |
| "learning_rate": 3.262303487243609e-06, | |
| "loss": 0.7516108751296997, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 28.500797271728516, | |
| "learning_rate": 3.247541984876046e-06, | |
| "loss": 0.6165962815284729, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.7414187643020593, | |
| "grad_norm": 2.045964479446411, | |
| "learning_rate": 3.232812752566542e-06, | |
| "loss": 0.5546071529388428, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.745995423340961, | |
| "grad_norm": 1.7794232368469238, | |
| "learning_rate": 3.218116001334878e-06, | |
| "loss": 0.8487035036087036, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.750572082379863, | |
| "grad_norm": 2.4322621822357178, | |
| "learning_rate": 3.203451941735476e-06, | |
| "loss": 0.7778904438018799, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.7551487414187643, | |
| "grad_norm": 7.543785095214844, | |
| "learning_rate": 3.1888207838544127e-06, | |
| "loss": 0.5872822403907776, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.7597254004576657, | |
| "grad_norm": 2.053680419921875, | |
| "learning_rate": 3.1742227373063907e-06, | |
| "loss": 0.7311505675315857, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.7643020594965675, | |
| "grad_norm": 2.327242851257324, | |
| "learning_rate": 3.159658011231732e-06, | |
| "loss": 0.7317074537277222, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.7688787185354693, | |
| "grad_norm": 1.6999424695968628, | |
| "learning_rate": 3.1451268142934023e-06, | |
| "loss": 0.6040608882904053, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.7734553775743707, | |
| "grad_norm": 6.141793251037598, | |
| "learning_rate": 3.1306293546740007e-06, | |
| "loss": 0.5794140100479126, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.7780320366132725, | |
| "grad_norm": 10.223762512207031, | |
| "learning_rate": 3.116165840072789e-06, | |
| "loss": 0.963721513748169, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.782608695652174, | |
| "grad_norm": 3.224015474319458, | |
| "learning_rate": 3.101736477702707e-06, | |
| "loss": 0.8357968330383301, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7871853546910756, | |
| "grad_norm": 2.6462185382843018, | |
| "learning_rate": 3.08734147428742e-06, | |
| "loss": 0.5886989831924438, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.791762013729977, | |
| "grad_norm": 2.715287208557129, | |
| "learning_rate": 3.0729810360583333e-06, | |
| "loss": 0.852825403213501, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.796338672768879, | |
| "grad_norm": 2.657933473587036, | |
| "learning_rate": 3.058655368751658e-06, | |
| "loss": 0.8328732848167419, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.80091533180778, | |
| "grad_norm": 3.437394142150879, | |
| "learning_rate": 3.04436467760546e-06, | |
| "loss": 0.8728150129318237, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.805491990846682, | |
| "grad_norm": 5.744348049163818, | |
| "learning_rate": 3.030109167356704e-06, | |
| "loss": 0.7142363786697388, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.8100686498855834, | |
| "grad_norm": 3.96525239944458, | |
| "learning_rate": 3.0158890422383445e-06, | |
| "loss": 0.6308979988098145, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.814645308924485, | |
| "grad_norm": 1.1390433311462402, | |
| "learning_rate": 3.00170450597638e-06, | |
| "loss": 0.5205511450767517, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.8192219679633865, | |
| "grad_norm": 2.2246572971343994, | |
| "learning_rate": 2.9875557617869456e-06, | |
| "loss": 0.794964075088501, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.8237986270022883, | |
| "grad_norm": 3.394249677658081, | |
| "learning_rate": 2.9734430123733937e-06, | |
| "loss": 0.3426223397254944, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.82837528604119, | |
| "grad_norm": 2.538475751876831, | |
| "learning_rate": 2.9593664599233984e-06, | |
| "loss": 0.8177708387374878, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.8329519450800915, | |
| "grad_norm": 2.663276195526123, | |
| "learning_rate": 2.9453263061060522e-06, | |
| "loss": 0.607596755027771, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.837528604118993, | |
| "grad_norm": 5.610261917114258, | |
| "learning_rate": 2.9313227520689787e-06, | |
| "loss": 0.610859751701355, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 3.410015821456909, | |
| "learning_rate": 2.917355998435456e-06, | |
| "loss": 0.5197435617446899, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.8466819221967965, | |
| "grad_norm": 3.7659153938293457, | |
| "learning_rate": 2.903426245301526e-06, | |
| "loss": 0.8168070316314697, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.851258581235698, | |
| "grad_norm": 7.588848114013672, | |
| "learning_rate": 2.8895336922331546e-06, | |
| "loss": 0.6579625010490417, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.8558352402745997, | |
| "grad_norm": 2.474982738494873, | |
| "learning_rate": 2.875678538263347e-06, | |
| "loss": 0.3433322310447693, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.860411899313501, | |
| "grad_norm": 7.560976982116699, | |
| "learning_rate": 2.8618609818893082e-06, | |
| "loss": 0.5438008308410645, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.864988558352403, | |
| "grad_norm": 2.223369836807251, | |
| "learning_rate": 2.8480812210696005e-06, | |
| "loss": 0.8767250776290894, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.869565217391304, | |
| "grad_norm": 3.2690696716308594, | |
| "learning_rate": 2.834339453221302e-06, | |
| "loss": 0.9083548784255981, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.874141876430206, | |
| "grad_norm": 4.433350563049316, | |
| "learning_rate": 2.8206358752171813e-06, | |
| "loss": 0.597790002822876, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.8787185354691074, | |
| "grad_norm": 2.7721469402313232, | |
| "learning_rate": 2.8069706833828763e-06, | |
| "loss": 0.772271990776062, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.883295194508009, | |
| "grad_norm": 2.6030502319335938, | |
| "learning_rate": 2.7933440734940863e-06, | |
| "loss": 0.8379377126693726, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.887871853546911, | |
| "grad_norm": 6.367020606994629, | |
| "learning_rate": 2.7797562407737533e-06, | |
| "loss": 1.0624537467956543, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.8924485125858124, | |
| "grad_norm": 2.0598034858703613, | |
| "learning_rate": 2.766207379889281e-06, | |
| "loss": 0.8280332684516907, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.8970251716247137, | |
| "grad_norm": 10.405440330505371, | |
| "learning_rate": 2.752697684949741e-06, | |
| "loss": 0.5287131071090698, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.9016018306636155, | |
| "grad_norm": 2.383814811706543, | |
| "learning_rate": 2.739227349503083e-06, | |
| "loss": 0.85811847448349, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.9061784897025174, | |
| "grad_norm": 4.059746265411377, | |
| "learning_rate": 2.7257965665333765e-06, | |
| "loss": 0.8082501888275146, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.9107551487414187, | |
| "grad_norm": 0.8926740288734436, | |
| "learning_rate": 2.712405528458034e-06, | |
| "loss": 0.537039041519165, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.9153318077803205, | |
| "grad_norm": 1.766548752784729, | |
| "learning_rate": 2.6990544271250607e-06, | |
| "loss": 0.8076680302619934, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.919908466819222, | |
| "grad_norm": 2.0170223712921143, | |
| "learning_rate": 2.6857434538103043e-06, | |
| "loss": 0.5731369256973267, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.9244851258581237, | |
| "grad_norm": 16.56928253173828, | |
| "learning_rate": 2.672472799214714e-06, | |
| "loss": 0.5599454641342163, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.929061784897025, | |
| "grad_norm": 1.690929651260376, | |
| "learning_rate": 2.659242653461608e-06, | |
| "loss": 0.671285092830658, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.933638443935927, | |
| "grad_norm": 0.7769700288772583, | |
| "learning_rate": 2.64605320609395e-06, | |
| "loss": 0.452056348323822, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.9382151029748282, | |
| "grad_norm": 1.5757992267608643, | |
| "learning_rate": 2.6329046460716424e-06, | |
| "loss": 0.43658745288848877, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.94279176201373, | |
| "grad_norm": 4.979938507080078, | |
| "learning_rate": 2.6197971617687972e-06, | |
| "loss": 0.504380464553833, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 8.039284706115723, | |
| "learning_rate": 2.606730940971064e-06, | |
| "loss": 0.754509687423706, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.9519450800915332, | |
| "grad_norm": 12.317577362060547, | |
| "learning_rate": 2.5937061708729187e-06, | |
| "loss": 0.9081135988235474, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.9565217391304346, | |
| "grad_norm": 7.108748912811279, | |
| "learning_rate": 2.5807230380749942e-06, | |
| "loss": 0.5429270267486572, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.9610983981693364, | |
| "grad_norm": 2.071383476257324, | |
| "learning_rate": 2.5677817285813996e-06, | |
| "loss": 0.55026775598526, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.965675057208238, | |
| "grad_norm": 2.826601266860962, | |
| "learning_rate": 2.5548824277970595e-06, | |
| "loss": 0.8305662274360657, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.9702517162471396, | |
| "grad_norm": 6.209619522094727, | |
| "learning_rate": 2.542025320525058e-06, | |
| "loss": 0.9413694143295288, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.974828375286041, | |
| "grad_norm": 41.0315055847168, | |
| "learning_rate": 2.5292105909639857e-06, | |
| "loss": 0.5638728737831116, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.9794050343249427, | |
| "grad_norm": 0.5875497460365295, | |
| "learning_rate": 2.5164384227053133e-06, | |
| "loss": 0.5167251229286194, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.9839816933638446, | |
| "grad_norm": 3.607098340988159, | |
| "learning_rate": 2.5037089987307405e-06, | |
| "loss": 0.8430502414703369, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.988558352402746, | |
| "grad_norm": 6.2934064865112305, | |
| "learning_rate": 2.491022501409598e-06, | |
| "loss": 0.522983193397522, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.9931350114416477, | |
| "grad_norm": 1.3800368309020996, | |
| "learning_rate": 2.4783791124962197e-06, | |
| "loss": 0.7301946878433228, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.997711670480549, | |
| "grad_norm": 2.08162522315979, | |
| "learning_rate": 2.4657790131273376e-06, | |
| "loss": 0.7967828512191772, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.002288329519451, | |
| "grad_norm": 1.4447431564331055, | |
| "learning_rate": 2.4532223838195006e-06, | |
| "loss": 0.5404419898986816, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 3.0068649885583523, | |
| "grad_norm": 3.0550334453582764, | |
| "learning_rate": 2.4407094044664746e-06, | |
| "loss": 0.6030987501144409, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 3.011441647597254, | |
| "grad_norm": 6.2352166175842285, | |
| "learning_rate": 2.4282402543366706e-06, | |
| "loss": 0.561785101890564, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 3.0160183066361554, | |
| "grad_norm": 1.6362286806106567, | |
| "learning_rate": 2.4158151120705773e-06, | |
| "loss": 0.2637900412082672, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 3.0205949656750573, | |
| "grad_norm": 2.0714526176452637, | |
| "learning_rate": 2.4034341556781986e-06, | |
| "loss": 0.6601877212524414, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.0251716247139586, | |
| "grad_norm": 2.9864554405212402, | |
| "learning_rate": 2.3910975625365066e-06, | |
| "loss": 0.5502775311470032, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 3.0297482837528604, | |
| "grad_norm": 7.837927341461182, | |
| "learning_rate": 2.3788055093868962e-06, | |
| "loss": 0.33134838938713074, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 3.034324942791762, | |
| "grad_norm": 4.929914474487305, | |
| "learning_rate": 2.366558172332665e-06, | |
| "loss": 0.7175261974334717, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 3.0389016018306636, | |
| "grad_norm": 4.021068572998047, | |
| "learning_rate": 2.354355726836466e-06, | |
| "loss": 0.5193214416503906, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 3.0434782608695654, | |
| "grad_norm": 3.3567593097686768, | |
| "learning_rate": 2.342198347717823e-06, | |
| "loss": 0.7330721020698547, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.0480549199084668, | |
| "grad_norm": 4.255941390991211, | |
| "learning_rate": 2.330086209150604e-06, | |
| "loss": 0.3949548602104187, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 3.187955379486084, | |
| "learning_rate": 2.3180194846605367e-06, | |
| "loss": 0.7485113739967346, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 3.05720823798627, | |
| "grad_norm": 2.202007532119751, | |
| "learning_rate": 2.3059983471227186e-06, | |
| "loss": 0.3207942247390747, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 3.0617848970251718, | |
| "grad_norm": 2.3094310760498047, | |
| "learning_rate": 2.294022968759142e-06, | |
| "loss": 0.7273589372634888, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 3.066361556064073, | |
| "grad_norm": 2.190314292907715, | |
| "learning_rate": 2.2820935211362256e-06, | |
| "loss": 0.5962270498275757, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.070938215102975, | |
| "grad_norm": 6.551016330718994, | |
| "learning_rate": 2.2702101751623555e-06, | |
| "loss": 0.6893465518951416, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 3.0755148741418763, | |
| "grad_norm": 2.070547342300415, | |
| "learning_rate": 2.2583731010854436e-06, | |
| "loss": 0.730362057685852, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 3.080091533180778, | |
| "grad_norm": 3.0296781063079834, | |
| "learning_rate": 2.2465824684904737e-06, | |
| "loss": 0.44114387035369873, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 3.0846681922196795, | |
| "grad_norm": 2.1151537895202637, | |
| "learning_rate": 2.23483844629709e-06, | |
| "loss": 0.6440198421478271, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 3.0892448512585813, | |
| "grad_norm": 2.34732723236084, | |
| "learning_rate": 2.223141202757164e-06, | |
| "loss": 0.6223734617233276, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.0938215102974826, | |
| "grad_norm": 3.51031231880188, | |
| "learning_rate": 2.2114909054523883e-06, | |
| "loss": 0.65424644947052, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.0983981693363845, | |
| "grad_norm": 4.628645896911621, | |
| "learning_rate": 2.199887721291877e-06, | |
| "loss": 0.6126776337623596, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.1029748283752863, | |
| "grad_norm": 2.8772079944610596, | |
| "learning_rate": 2.188331816509772e-06, | |
| "loss": 0.7314285039901733, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.1075514874141876, | |
| "grad_norm": 0.4025164842605591, | |
| "learning_rate": 2.176823356662864e-06, | |
| "loss": 0.3512212634086609, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.1121281464530894, | |
| "grad_norm": 2.19573974609375, | |
| "learning_rate": 2.1653625066282153e-06, | |
| "loss": 0.6310482025146484, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.116704805491991, | |
| "grad_norm": 7.4715495109558105, | |
| "learning_rate": 2.153949430600811e-06, | |
| "loss": 0.41319721937179565, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.1212814645308926, | |
| "grad_norm": 3.5463063716888428, | |
| "learning_rate": 2.142584292091185e-06, | |
| "loss": 0.49941742420196533, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.125858123569794, | |
| "grad_norm": 3.7344858646392822, | |
| "learning_rate": 2.1312672539230973e-06, | |
| "loss": 0.5115246772766113, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.130434782608696, | |
| "grad_norm": 7.448498249053955, | |
| "learning_rate": 2.119998478231194e-06, | |
| "loss": 0.43049943447113037, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.135011441647597, | |
| "grad_norm": 4.2311882972717285, | |
| "learning_rate": 2.1087781264586795e-06, | |
| "loss": 0.7117477655410767, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.139588100686499, | |
| "grad_norm": 1.4386028051376343, | |
| "learning_rate": 2.0976063593550126e-06, | |
| "loss": 0.5673470497131348, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.1441647597254003, | |
| "grad_norm": 2.9334120750427246, | |
| "learning_rate": 2.0864833369735974e-06, | |
| "loss": 0.4686206579208374, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.148741418764302, | |
| "grad_norm": 2.7471981048583984, | |
| "learning_rate": 2.0754092186694917e-06, | |
| "loss": 0.4157622456550598, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.1533180778032035, | |
| "grad_norm": 3.158262252807617, | |
| "learning_rate": 2.064384163097125e-06, | |
| "loss": 0.7791534066200256, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 13.052440643310547, | |
| "learning_rate": 2.0534083282080243e-06, | |
| "loss": 0.3141913414001465, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.1624713958810067, | |
| "grad_norm": 11.294751167297363, | |
| "learning_rate": 2.0424818712485516e-06, | |
| "loss": 0.6616432070732117, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.1670480549199085, | |
| "grad_norm": 25.06646728515625, | |
| "learning_rate": 2.0316049487576505e-06, | |
| "loss": 0.5768702030181885, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.17162471395881, | |
| "grad_norm": 2.034757375717163, | |
| "learning_rate": 2.0207777165646096e-06, | |
| "loss": 0.40132611989974976, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.1762013729977117, | |
| "grad_norm": 7.5245137214660645, | |
| "learning_rate": 2.010000329786815e-06, | |
| "loss": 0.4842037260532379, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.1807780320366135, | |
| "grad_norm": 2.598806381225586, | |
| "learning_rate": 1.9992729428275452e-06, | |
| "loss": 0.4087521433830261, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.185354691075515, | |
| "grad_norm": 3.6281611919403076, | |
| "learning_rate": 1.9885957093737494e-06, | |
| "loss": 0.6152184009552002, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.1899313501144166, | |
| "grad_norm": 1.8896663188934326, | |
| "learning_rate": 1.977968782393848e-06, | |
| "loss": 0.2853028178215027, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.194508009153318, | |
| "grad_norm": 14.76069164276123, | |
| "learning_rate": 1.9673923141355387e-06, | |
| "loss": 0.45738574862480164, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.19908466819222, | |
| "grad_norm": 2.0312094688415527, | |
| "learning_rate": 1.9568664561236208e-06, | |
| "loss": 0.5310682654380798, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.203661327231121, | |
| "grad_norm": 12.73653793334961, | |
| "learning_rate": 1.946391359157818e-06, | |
| "loss": 0.7988094091415405, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.208237986270023, | |
| "grad_norm": 3.7427358627319336, | |
| "learning_rate": 1.935967173310621e-06, | |
| "loss": 0.48929834365844727, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.2128146453089244, | |
| "grad_norm": 2.088498592376709, | |
| "learning_rate": 1.9255940479251433e-06, | |
| "loss": 0.5670595765113831, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.217391304347826, | |
| "grad_norm": 3.3635458946228027, | |
| "learning_rate": 1.915272131612966e-06, | |
| "loss": 0.4398784637451172, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.2219679633867275, | |
| "grad_norm": 2.194676637649536, | |
| "learning_rate": 1.905001572252026e-06, | |
| "loss": 0.7907838821411133, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.2265446224256293, | |
| "grad_norm": 3.3996782302856445, | |
| "learning_rate": 1.8947825169844886e-06, | |
| "loss": 0.6364421844482422, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.2311212814645307, | |
| "grad_norm": 5.239405632019043, | |
| "learning_rate": 1.8846151122146353e-06, | |
| "loss": 0.42004287242889404, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.2356979405034325, | |
| "grad_norm": 0.6929391026496887, | |
| "learning_rate": 1.8744995036067799e-06, | |
| "loss": 0.26028335094451904, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.2402745995423343, | |
| "grad_norm": 0.17362770438194275, | |
| "learning_rate": 1.8644358360831683e-06, | |
| "loss": 0.3416166305541992, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.2448512585812357, | |
| "grad_norm": 1.93403959274292, | |
| "learning_rate": 1.8544242538219084e-06, | |
| "loss": 0.34089672565460205, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.2494279176201375, | |
| "grad_norm": 3.117030620574951, | |
| "learning_rate": 1.8444649002549042e-06, | |
| "loss": 0.7176865339279175, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.254004576659039, | |
| "grad_norm": 14.445137977600098, | |
| "learning_rate": 1.8345579180657996e-06, | |
| "loss": 0.5071847438812256, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.2585812356979407, | |
| "grad_norm": 2.8883860111236572, | |
| "learning_rate": 1.8247034491879346e-06, | |
| "loss": 0.528769850730896, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 6.8145036697387695, | |
| "learning_rate": 1.8149016348023121e-06, | |
| "loss": 0.6137822270393372, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.267734553775744, | |
| "grad_norm": 3.661930561065674, | |
| "learning_rate": 1.8051526153355797e-06, | |
| "loss": 0.726434588432312, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.272311212814645, | |
| "grad_norm": 2.532559871673584, | |
| "learning_rate": 1.7954565304580046e-06, | |
| "loss": 0.707175076007843, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.276887871853547, | |
| "grad_norm": 13.103681564331055, | |
| "learning_rate": 1.7858135190814896e-06, | |
| "loss": 0.4193027913570404, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.2814645308924484, | |
| "grad_norm": 21.0889949798584, | |
| "learning_rate": 1.776223719357571e-06, | |
| "loss": 0.4109644293785095, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.28604118993135, | |
| "grad_norm": 1.8079733848571777, | |
| "learning_rate": 1.7666872686754443e-06, | |
| "loss": 0.34100282192230225, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.2906178489702516, | |
| "grad_norm": 63.32174301147461, | |
| "learning_rate": 1.757204303659994e-06, | |
| "loss": 0.5300724506378174, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.2951945080091534, | |
| "grad_norm": 3.9798078536987305, | |
| "learning_rate": 1.747774960169838e-06, | |
| "loss": 0.4038028120994568, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.2997711670480547, | |
| "grad_norm": 2.6402554512023926, | |
| "learning_rate": 1.738399373295379e-06, | |
| "loss": 0.6060991287231445, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.3043478260869565, | |
| "grad_norm": 5.125815391540527, | |
| "learning_rate": 1.7290776773568701e-06, | |
| "loss": 0.487943172454834, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.308924485125858, | |
| "grad_norm": 2.9676544666290283, | |
| "learning_rate": 1.7198100059024958e-06, | |
| "loss": 0.6720585823059082, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.3135011441647597, | |
| "grad_norm": 4.840664386749268, | |
| "learning_rate": 1.7105964917064435e-06, | |
| "loss": 0.5866726636886597, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.3180778032036615, | |
| "grad_norm": 12.175851821899414, | |
| "learning_rate": 1.7014372667670218e-06, | |
| "loss": 0.62703537940979, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.322654462242563, | |
| "grad_norm": 2.532794952392578, | |
| "learning_rate": 1.692332462304754e-06, | |
| "loss": 0.8183742761611938, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.3272311212814647, | |
| "grad_norm": 5.84249210357666, | |
| "learning_rate": 1.683282208760501e-06, | |
| "loss": 0.4687821567058563, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.331807780320366, | |
| "grad_norm": 5.44354248046875, | |
| "learning_rate": 1.6742866357935997e-06, | |
| "loss": 0.3934783935546875, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.336384439359268, | |
| "grad_norm": 5.401495933532715, | |
| "learning_rate": 1.6653458722799973e-06, | |
| "loss": 0.6204153299331665, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.3409610983981692, | |
| "grad_norm": 2.019944190979004, | |
| "learning_rate": 1.656460046310409e-06, | |
| "loss": 0.6797080039978027, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.345537757437071, | |
| "grad_norm": 4.623880386352539, | |
| "learning_rate": 1.6476292851884809e-06, | |
| "loss": 0.640425443649292, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.3501144164759724, | |
| "grad_norm": 2.2270355224609375, | |
| "learning_rate": 1.6388537154289707e-06, | |
| "loss": 0.6423420906066895, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.354691075514874, | |
| "grad_norm": 11.633206367492676, | |
| "learning_rate": 1.6301334627559262e-06, | |
| "loss": 0.694199800491333, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.3592677345537756, | |
| "grad_norm": 1.825758695602417, | |
| "learning_rate": 1.6214686521008927e-06, | |
| "loss": 0.6198365688323975, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.3638443935926774, | |
| "grad_norm": 4.699729919433594, | |
| "learning_rate": 1.6128594076011226e-06, | |
| "loss": 0.5542856454849243, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.3684210526315788, | |
| "grad_norm": 1.0216766595840454, | |
| "learning_rate": 1.6043058525977879e-06, | |
| "loss": 0.2599141001701355, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.3729977116704806, | |
| "grad_norm": 2.5108420848846436, | |
| "learning_rate": 1.5958081096342256e-06, | |
| "loss": 0.734022319316864, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.3775743707093824, | |
| "grad_norm": 2.6871957778930664, | |
| "learning_rate": 1.5873663004541738e-06, | |
| "loss": 0.7463738918304443, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.3821510297482837, | |
| "grad_norm": 4.7424397468566895, | |
| "learning_rate": 1.5789805460000296e-06, | |
| "loss": 0.7995430827140808, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.386727688787185, | |
| "grad_norm": 10.438132286071777, | |
| "learning_rate": 1.5706509664111164e-06, | |
| "loss": 0.3752162456512451, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.391304347826087, | |
| "grad_norm": 2.6828954219818115, | |
| "learning_rate": 1.5623776810219643e-06, | |
| "loss": 0.3232945203781128, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.3958810068649887, | |
| "grad_norm": 2.298041820526123, | |
| "learning_rate": 1.554160808360598e-06, | |
| "loss": 0.77318274974823, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.40045766590389, | |
| "grad_norm": 4.018006801605225, | |
| "learning_rate": 1.5460004661468386e-06, | |
| "loss": 0.5600519180297852, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.405034324942792, | |
| "grad_norm": 2.762150287628174, | |
| "learning_rate": 1.537896771290623e-06, | |
| "loss": 0.7082411050796509, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.4096109839816933, | |
| "grad_norm": 9.372475624084473, | |
| "learning_rate": 1.5298498398903178e-06, | |
| "loss": 0.5185490846633911, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.414187643020595, | |
| "grad_norm": 3.0372886657714844, | |
| "learning_rate": 1.5218597872310673e-06, | |
| "loss": 0.7110550403594971, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.4187643020594964, | |
| "grad_norm": 2.260855197906494, | |
| "learning_rate": 1.5139267277831348e-06, | |
| "loss": 0.363142192363739, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.4233409610983982, | |
| "grad_norm": 4.717143535614014, | |
| "learning_rate": 1.5060507752002656e-06, | |
| "loss": 0.42447251081466675, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.4279176201372996, | |
| "grad_norm": 2.622420072555542, | |
| "learning_rate": 1.4982320423180574e-06, | |
| "loss": 0.4342220425605774, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.4324942791762014, | |
| "grad_norm": 2.491084575653076, | |
| "learning_rate": 1.490470641152345e-06, | |
| "loss": 0.4623292088508606, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.437070938215103, | |
| "grad_norm": 7.559942245483398, | |
| "learning_rate": 1.4827666828975943e-06, | |
| "loss": 0.40327316522598267, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.4416475972540046, | |
| "grad_norm": 5.229639530181885, | |
| "learning_rate": 1.4751202779253086e-06, | |
| "loss": 0.3544307351112366, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.446224256292906, | |
| "grad_norm": 3.293280601501465, | |
| "learning_rate": 1.4675315357824527e-06, | |
| "loss": 0.7979464530944824, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.4508009153318078, | |
| "grad_norm": 1.9233644008636475, | |
| "learning_rate": 1.4600005651898741e-06, | |
| "loss": 0.6851720213890076, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.4553775743707096, | |
| "grad_norm": 3.2330470085144043, | |
| "learning_rate": 1.4525274740407524e-06, | |
| "loss": 0.6885404586791992, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.459954233409611, | |
| "grad_norm": 4.268771648406982, | |
| "learning_rate": 1.4451123693990555e-06, | |
| "loss": 0.4016433656215668, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.4645308924485128, | |
| "grad_norm": 5.801273345947266, | |
| "learning_rate": 1.4377553574979946e-06, | |
| "loss": 0.2582213878631592, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.469107551487414, | |
| "grad_norm": 1.9621074199676514, | |
| "learning_rate": 1.4304565437385165e-06, | |
| "loss": 0.7295342683792114, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.473684210526316, | |
| "grad_norm": 4.003790378570557, | |
| "learning_rate": 1.4232160326877832e-06, | |
| "loss": 0.3042123317718506, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 2.3673927783966064, | |
| "learning_rate": 1.4160339280776785e-06, | |
| "loss": 0.5627752542495728, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.482837528604119, | |
| "grad_norm": 3.853731632232666, | |
| "learning_rate": 1.408910332803319e-06, | |
| "loss": 0.3514673113822937, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.4874141876430205, | |
| "grad_norm": 88.11868286132812, | |
| "learning_rate": 1.4018453489215835e-06, | |
| "loss": 0.5591588020324707, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.4919908466819223, | |
| "grad_norm": 2.757547616958618, | |
| "learning_rate": 1.3948390776496484e-06, | |
| "loss": 0.47397273778915405, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.4965675057208236, | |
| "grad_norm": 6.918557643890381, | |
| "learning_rate": 1.3878916193635373e-06, | |
| "loss": 0.33857864141464233, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.5011441647597255, | |
| "grad_norm": 20.17970848083496, | |
| "learning_rate": 1.3810030735966867e-06, | |
| "loss": 0.6692100763320923, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.505720823798627, | |
| "grad_norm": 1.748712420463562, | |
| "learning_rate": 1.3741735390385128e-06, | |
| "loss": 0.6161905527114868, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.5102974828375286, | |
| "grad_norm": 12.148778915405273, | |
| "learning_rate": 1.3674031135330054e-06, | |
| "loss": 0.45425230264663696, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.5148741418764304, | |
| "grad_norm": 2.459949254989624, | |
| "learning_rate": 1.360691894077322e-06, | |
| "loss": 0.7768383026123047, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.519450800915332, | |
| "grad_norm": 1.3486472368240356, | |
| "learning_rate": 1.3540399768203989e-06, | |
| "loss": 0.7696608304977417, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.524027459954233, | |
| "grad_norm": 7.643558979034424, | |
| "learning_rate": 1.347447457061572e-06, | |
| "loss": 0.7031220197677612, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.528604118993135, | |
| "grad_norm": 2.340738534927368, | |
| "learning_rate": 1.3409144292492152e-06, | |
| "loss": 0.5920687913894653, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.533180778032037, | |
| "grad_norm": 2.42668080329895, | |
| "learning_rate": 1.3344409869793851e-06, | |
| "loss": 0.6916133761405945, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.537757437070938, | |
| "grad_norm": 3.409830093383789, | |
| "learning_rate": 1.3280272229944799e-06, | |
| "loss": 0.764272153377533, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.54233409610984, | |
| "grad_norm": 1.986831545829773, | |
| "learning_rate": 1.3216732291819096e-06, | |
| "loss": 0.5565627813339233, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.5469107551487413, | |
| "grad_norm": 6.892856597900391, | |
| "learning_rate": 1.315379096572783e-06, | |
| "loss": 0.1884869635105133, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.551487414187643, | |
| "grad_norm": 4.558145999908447, | |
| "learning_rate": 1.3091449153406024e-06, | |
| "loss": 0.4693112373352051, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.5560640732265445, | |
| "grad_norm": 6.389334201812744, | |
| "learning_rate": 1.3029707747999681e-06, | |
| "loss": 0.7146168947219849, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.5606407322654463, | |
| "grad_norm": 12.014660835266113, | |
| "learning_rate": 1.2968567634053023e-06, | |
| "loss": 0.3005984127521515, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.5652173913043477, | |
| "grad_norm": 2.894502639770508, | |
| "learning_rate": 1.290802968749584e-06, | |
| "loss": 0.6210153698921204, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.5697940503432495, | |
| "grad_norm": 13.574015617370605, | |
| "learning_rate": 1.2848094775630856e-06, | |
| "loss": 0.7224968671798706, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.5743707093821513, | |
| "grad_norm": 2.8741824626922607, | |
| "learning_rate": 1.2788763757121433e-06, | |
| "loss": 0.5549713373184204, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.5789473684210527, | |
| "grad_norm": 3.075819730758667, | |
| "learning_rate": 1.2730037481979132e-06, | |
| "loss": 0.7967220544815063, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.583524027459954, | |
| "grad_norm": 1.3015577793121338, | |
| "learning_rate": 1.2671916791551638e-06, | |
| "loss": 0.4095366597175598, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.588100686498856, | |
| "grad_norm": 7.499904155731201, | |
| "learning_rate": 1.2614402518510652e-06, | |
| "loss": 0.7936575412750244, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.5926773455377576, | |
| "grad_norm": 7.007381439208984, | |
| "learning_rate": 1.255749548683998e-06, | |
| "loss": 0.5722820162773132, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.597254004576659, | |
| "grad_norm": 1.998162031173706, | |
| "learning_rate": 1.2501196511823727e-06, | |
| "loss": 0.40031328797340393, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.6018306636155604, | |
| "grad_norm": 2.335953712463379, | |
| "learning_rate": 1.2445506400034608e-06, | |
| "loss": 0.5704180002212524, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.606407322654462, | |
| "grad_norm": 3.6565961837768555, | |
| "learning_rate": 1.239042594932243e-06, | |
| "loss": 0.5407567620277405, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.610983981693364, | |
| "grad_norm": 4.150671482086182, | |
| "learning_rate": 1.2335955948802579e-06, | |
| "loss": 0.7380489110946655, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.6155606407322654, | |
| "grad_norm": 7.434592247009277, | |
| "learning_rate": 1.2282097178844815e-06, | |
| "loss": 0.36467939615249634, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.620137299771167, | |
| "grad_norm": 2.6412289142608643, | |
| "learning_rate": 1.2228850411062023e-06, | |
| "loss": 0.4587993025779724, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.6247139588100685, | |
| "grad_norm": 18.970722198486328, | |
| "learning_rate": 1.217621640829918e-06, | |
| "loss": 0.39366841316223145, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.6292906178489703, | |
| "grad_norm": 3.4149601459503174, | |
| "learning_rate": 1.2124195924622428e-06, | |
| "loss": 0.3141392469406128, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.6338672768878717, | |
| "grad_norm": 2.6209511756896973, | |
| "learning_rate": 1.2072789705308267e-06, | |
| "loss": 0.4500223398208618, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.6384439359267735, | |
| "grad_norm": 8.329633712768555, | |
| "learning_rate": 1.2021998486832888e-06, | |
| "loss": 0.47874724864959717, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.643020594965675, | |
| "grad_norm": 8.009090423583984, | |
| "learning_rate": 1.1971822996861585e-06, | |
| "loss": 0.6222150325775146, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.6475972540045767, | |
| "grad_norm": 1.0920637845993042, | |
| "learning_rate": 1.192226395423841e-06, | |
| "loss": 0.39640748500823975, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.6521739130434785, | |
| "grad_norm": 3.376007556915283, | |
| "learning_rate": 1.1873322068975756e-06, | |
| "loss": 0.3654574751853943, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.65675057208238, | |
| "grad_norm": 4.961648941040039, | |
| "learning_rate": 1.1824998042244316e-06, | |
| "loss": 0.08938822150230408, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.6613272311212812, | |
| "grad_norm": 9.972960472106934, | |
| "learning_rate": 1.1777292566362922e-06, | |
| "loss": 0.48825398087501526, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.665903890160183, | |
| "grad_norm": 1.0946178436279297, | |
| "learning_rate": 1.1730206324788704e-06, | |
| "loss": 0.1946982443332672, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.670480549199085, | |
| "grad_norm": 3.8373024463653564, | |
| "learning_rate": 1.1683739992107267e-06, | |
| "loss": 0.38114720582962036, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.675057208237986, | |
| "grad_norm": 5.0955047607421875, | |
| "learning_rate": 1.163789423402303e-06, | |
| "loss": 0.628394365310669, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.679633867276888, | |
| "grad_norm": 3.3337788581848145, | |
| "learning_rate": 1.1592669707349685e-06, | |
| "loss": 0.5962799787521362, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 4.6492509841918945, | |
| "learning_rate": 1.1548067060000804e-06, | |
| "loss": 0.6295109987258911, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.688787185354691, | |
| "grad_norm": 18.938716888427734, | |
| "learning_rate": 1.1504086930980533e-06, | |
| "loss": 0.27833229303359985, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.6933638443935926, | |
| "grad_norm": 5.88167667388916, | |
| "learning_rate": 1.1460729950374445e-06, | |
| "loss": 0.2558402419090271, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.6979405034324944, | |
| "grad_norm": 3.370272636413574, | |
| "learning_rate": 1.1417996739340537e-06, | |
| "loss": 0.8182615637779236, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.7025171624713957, | |
| "grad_norm": 2.657801389694214, | |
| "learning_rate": 1.1375887910100295e-06, | |
| "loss": 0.7889156341552734, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.7070938215102975, | |
| "grad_norm": 1.4287097454071045, | |
| "learning_rate": 1.1334404065929939e-06, | |
| "loss": 0.625603437423706, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.7116704805491993, | |
| "grad_norm": 5.524312973022461, | |
| "learning_rate": 1.1293545801151788e-06, | |
| "loss": 0.6062618494033813, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.7162471395881007, | |
| "grad_norm": 3.7626378536224365, | |
| "learning_rate": 1.1253313701125727e-06, | |
| "loss": 0.5309630632400513, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.720823798627002, | |
| "grad_norm": 3.515129566192627, | |
| "learning_rate": 1.1213708342240843e-06, | |
| "loss": 0.644262433052063, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.725400457665904, | |
| "grad_norm": 2.292351007461548, | |
| "learning_rate": 1.1174730291907145e-06, | |
| "loss": 0.42081764340400696, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.7299771167048057, | |
| "grad_norm": 0.5895558595657349, | |
| "learning_rate": 1.1136380108547446e-06, | |
| "loss": 0.1861320436000824, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.734553775743707, | |
| "grad_norm": 1.528093934059143, | |
| "learning_rate": 1.109865834158937e-06, | |
| "loss": 0.11224275827407837, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.7391304347826084, | |
| "grad_norm": 4.198037147521973, | |
| "learning_rate": 1.1061565531457457e-06, | |
| "loss": 0.41927570104599, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.7437070938215102, | |
| "grad_norm": 2.7169034481048584, | |
| "learning_rate": 1.1025102209565463e-06, | |
| "loss": 0.31835800409317017, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.748283752860412, | |
| "grad_norm": 2.142369270324707, | |
| "learning_rate": 1.098926889830869e-06, | |
| "loss": 0.8377334475517273, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.7528604118993134, | |
| "grad_norm": 1.0328131914138794, | |
| "learning_rate": 1.0954066111056552e-06, | |
| "loss": 0.043129995465278625, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.757437070938215, | |
| "grad_norm": 3.350853681564331, | |
| "learning_rate": 1.091949435214518e-06, | |
| "loss": 0.7317330837249756, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.7620137299771166, | |
| "grad_norm": 2.704472541809082, | |
| "learning_rate": 1.0885554116870248e-06, | |
| "loss": 0.7968192100524902, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.7665903890160184, | |
| "grad_norm": 0.1938525140285492, | |
| "learning_rate": 1.0852245891479815e-06, | |
| "loss": 0.03669770807027817, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.7711670480549198, | |
| "grad_norm": 2.804135799407959, | |
| "learning_rate": 1.08195701531674e-06, | |
| "loss": 0.6800875067710876, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.7757437070938216, | |
| "grad_norm": 0.6211162805557251, | |
| "learning_rate": 1.0787527370065134e-06, | |
| "loss": 0.49580419063568115, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.780320366132723, | |
| "grad_norm": 9.704923629760742, | |
| "learning_rate": 1.0756118001237055e-06, | |
| "loss": 0.302669882774353, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.7848970251716247, | |
| "grad_norm": 1.8114262819290161, | |
| "learning_rate": 1.0725342496672537e-06, | |
| "loss": 0.6123445630073547, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.7894736842105265, | |
| "grad_norm": 1.53504478931427, | |
| "learning_rate": 1.0695201297279822e-06, | |
| "loss": 0.5894599556922913, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.794050343249428, | |
| "grad_norm": 1.9072415828704834, | |
| "learning_rate": 1.066569483487972e-06, | |
| "loss": 0.45558661222457886, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.7986270022883293, | |
| "grad_norm": 4.523164749145508, | |
| "learning_rate": 1.063682353219944e-06, | |
| "loss": 0.510258674621582, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.803203661327231, | |
| "grad_norm": 3.0822136402130127, | |
| "learning_rate": 1.0608587802866479e-06, | |
| "loss": 0.8670095205307007, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.807780320366133, | |
| "grad_norm": 1.9081244468688965, | |
| "learning_rate": 1.0580988051402764e-06, | |
| "loss": 0.7442867755889893, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.8123569794050343, | |
| "grad_norm": 3.498737335205078, | |
| "learning_rate": 1.0554024673218808e-06, | |
| "loss": 0.48411673307418823, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.816933638443936, | |
| "grad_norm": 5.711022853851318, | |
| "learning_rate": 1.052769805460805e-06, | |
| "loss": 0.41106265783309937, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.8215102974828374, | |
| "grad_norm": 14.483606338500977, | |
| "learning_rate": 1.0502008572741354e-06, | |
| "loss": 0.6216870546340942, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.8260869565217392, | |
| "grad_norm": 3.853814125061035, | |
| "learning_rate": 1.0476956595661574e-06, | |
| "loss": 0.5673307180404663, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.8306636155606406, | |
| "grad_norm": 0.7420060038566589, | |
| "learning_rate": 1.045254248227828e-06, | |
| "loss": 0.2633235454559326, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.8352402745995424, | |
| "grad_norm": 1.8191514015197754, | |
| "learning_rate": 1.042876658236263e-06, | |
| "loss": 0.41805195808410645, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.839816933638444, | |
| "grad_norm": 3.2566258907318115, | |
| "learning_rate": 1.0405629236542371e-06, | |
| "loss": 0.4752364754676819, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.8443935926773456, | |
| "grad_norm": 0.7727957963943481, | |
| "learning_rate": 1.0383130776296923e-06, | |
| "loss": 0.4213360846042633, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.8489702517162474, | |
| "grad_norm": 20.79123878479004, | |
| "learning_rate": 1.036127152395266e-06, | |
| "loss": 0.6792758703231812, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.8535469107551488, | |
| "grad_norm": 2.3199901580810547, | |
| "learning_rate": 1.0340051792678276e-06, | |
| "loss": 0.7548638582229614, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.85812356979405, | |
| "grad_norm": 2.1454737186431885, | |
| "learning_rate": 1.0319471886480315e-06, | |
| "loss": 0.5545675754547119, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.862700228832952, | |
| "grad_norm": 2.8614749908447266, | |
| "learning_rate": 1.0299532100198784e-06, | |
| "loss": 0.5754020810127258, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.8672768878718538, | |
| "grad_norm": 5.313638687133789, | |
| "learning_rate": 1.0280232719502975e-06, | |
| "loss": 0.32507747411727905, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.871853546910755, | |
| "grad_norm": 6.3688859939575195, | |
| "learning_rate": 1.0261574020887336e-06, | |
| "loss": 0.5391194820404053, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.8764302059496565, | |
| "grad_norm": 2.4996461868286133, | |
| "learning_rate": 1.0243556271667513e-06, | |
| "loss": 0.6387747526168823, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.8810068649885583, | |
| "grad_norm": 4.952239513397217, | |
| "learning_rate": 1.0226179729976544e-06, | |
| "loss": 0.23900987207889557, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.88558352402746, | |
| "grad_norm": 2.5153167247772217, | |
| "learning_rate": 1.0209444644761138e-06, | |
| "loss": 0.7215524911880493, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.8901601830663615, | |
| "grad_norm": 2.760258197784424, | |
| "learning_rate": 1.0193351255778111e-06, | |
| "loss": 0.6749666929244995, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.8947368421052633, | |
| "grad_norm": 3.0027530193328857, | |
| "learning_rate": 1.0177899793590958e-06, | |
| "loss": 0.7268638610839844, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.8993135011441646, | |
| "grad_norm": 236.1914520263672, | |
| "learning_rate": 1.0163090479566553e-06, | |
| "loss": 0.5174607634544373, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.9038901601830664, | |
| "grad_norm": 3.04256534576416, | |
| "learning_rate": 1.0148923525871973e-06, | |
| "loss": 0.501115620136261, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.908466819221968, | |
| "grad_norm": 5.1830973625183105, | |
| "learning_rate": 1.0135399135471451e-06, | |
| "loss": 0.37597358226776123, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.9130434782608696, | |
| "grad_norm": 8.081040382385254, | |
| "learning_rate": 1.012251750212347e-06, | |
| "loss": 0.47812142968177795, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.917620137299771, | |
| "grad_norm": 6.02420711517334, | |
| "learning_rate": 1.0110278810378003e-06, | |
| "loss": 0.3577365577220917, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.922196796338673, | |
| "grad_norm": 9.104696273803711, | |
| "learning_rate": 1.0098683235573856e-06, | |
| "loss": 0.700224757194519, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.9267734553775746, | |
| "grad_norm": 6.046388626098633, | |
| "learning_rate": 1.0087730943836149e-06, | |
| "loss": 0.8385029435157776, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.931350114416476, | |
| "grad_norm": 7.305265426635742, | |
| "learning_rate": 1.0077422092073958e-06, | |
| "loss": 0.43619590997695923, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.9359267734553773, | |
| "grad_norm": 7.582434177398682, | |
| "learning_rate": 1.0067756827978048e-06, | |
| "loss": 0.4124600887298584, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.940503432494279, | |
| "grad_norm": 5.771373748779297, | |
| "learning_rate": 1.0058735290018753e-06, | |
| "loss": 0.5592625141143799, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.945080091533181, | |
| "grad_norm": 2.8298215866088867, | |
| "learning_rate": 1.0050357607444016e-06, | |
| "loss": 0.6869655847549438, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.9496567505720823, | |
| "grad_norm": 2.2002291679382324, | |
| "learning_rate": 1.0042623900277524e-06, | |
| "loss": 0.3992425799369812, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.954233409610984, | |
| "grad_norm": 1.8750855922698975, | |
| "learning_rate": 1.003553427931697e-06, | |
| "loss": 0.6239903569221497, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.9588100686498855, | |
| "grad_norm": 5.004032135009766, | |
| "learning_rate": 1.0029088846132508e-06, | |
| "loss": 0.3995712995529175, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.9633867276887873, | |
| "grad_norm": 0.870651125907898, | |
| "learning_rate": 1.0023287693065253e-06, | |
| "loss": 0.429832398891449, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.9679633867276887, | |
| "grad_norm": 6.058652400970459, | |
| "learning_rate": 1.0018130903225988e-06, | |
| "loss": 0.6913228034973145, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.9725400457665905, | |
| "grad_norm": 3.251649856567383, | |
| "learning_rate": 1.001361855049396e-06, | |
| "loss": 0.3300914764404297, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.977116704805492, | |
| "grad_norm": 7.703200340270996, | |
| "learning_rate": 1.000975069951584e-06, | |
| "loss": 0.24439480900764465, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.9816933638443937, | |
| "grad_norm": 6.612419605255127, | |
| "learning_rate": 1.0006527405704755e-06, | |
| "loss": 0.501896858215332, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.9862700228832955, | |
| "grad_norm": 2.503108024597168, | |
| "learning_rate": 1.0003948715239546e-06, | |
| "loss": 0.34206831455230713, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.990846681922197, | |
| "grad_norm": 4.86019229888916, | |
| "learning_rate": 1.0002014665064067e-06, | |
| "loss": 0.30612480640411377, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.995423340961098, | |
| "grad_norm": 2.529582977294922, | |
| "learning_rate": 1.0000725282886676e-06, | |
| "loss": 0.6712839603424072, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.009540557861328, | |
| "learning_rate": 1.000008058717983e-06, | |
| "loss": 0.042355649173259735, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1748, | |
| "total_flos": 3.4427459894320824e+18, | |
| "train_loss": 0.8253325358783353, | |
| "train_runtime": 4390.1543, | |
| "train_samples_per_second": 11.945, | |
| "train_steps_per_second": 0.398 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1748, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4427459894320824e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |