Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-82 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-82 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-82") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-82") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-82") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-82 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-82" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-82", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-82
- SGLang
How to use furproxy/9b-82 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-82" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-82", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-82" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-82", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-82 with Docker Model Runner:
docker model run hf.co/furproxy/9b-82
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1804, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004434589800443459, | |
| "grad_norm": 3.3554506301879883, | |
| "learning_rate": 1.098901098901099e-07, | |
| "loss": 1.8640056848526, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008869179600886918, | |
| "grad_norm": 6.643158435821533, | |
| "learning_rate": 3.296703296703297e-07, | |
| "loss": 2.1273629665374756, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013303769401330377, | |
| "grad_norm": 3.7049906253814697, | |
| "learning_rate": 5.494505494505495e-07, | |
| "loss": 1.900314450263977, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017738359201773836, | |
| "grad_norm": 1.3328758478164673, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 1.8142547607421875, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022172949002217297, | |
| "grad_norm": 3.059246778488159, | |
| "learning_rate": 9.890109890109891e-07, | |
| "loss": 1.6169909238815308, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026607538802660754, | |
| "grad_norm": 2.269116163253784, | |
| "learning_rate": 1.2087912087912089e-06, | |
| "loss": 2.002399444580078, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.031042128603104215, | |
| "grad_norm": 41.05389404296875, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.5568478107452393, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03547671840354767, | |
| "grad_norm": 5.92053747177124, | |
| "learning_rate": 1.6483516483516484e-06, | |
| "loss": 1.1748180389404297, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03991130820399113, | |
| "grad_norm": 2.922490119934082, | |
| "learning_rate": 1.8681318681318684e-06, | |
| "loss": 1.1770696640014648, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04434589800443459, | |
| "grad_norm": 3.8268938064575195, | |
| "learning_rate": 2.0879120879120883e-06, | |
| "loss": 1.273401141166687, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 2.3410561084747314, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 1.3016290664672852, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05321507760532151, | |
| "grad_norm": 1.2151767015457153, | |
| "learning_rate": 2.5274725274725274e-06, | |
| "loss": 1.5213745832443237, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.057649667405764965, | |
| "grad_norm": 1.567148208618164, | |
| "learning_rate": 2.7472527472527476e-06, | |
| "loss": 1.2907153367996216, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06208425720620843, | |
| "grad_norm": 1.8262158632278442, | |
| "learning_rate": 2.9670329670329673e-06, | |
| "loss": 1.0924012660980225, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06651884700665188, | |
| "grad_norm": 2.100053071975708, | |
| "learning_rate": 3.1868131868131867e-06, | |
| "loss": 1.216428518295288, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07095343680709534, | |
| "grad_norm": 2.2148892879486084, | |
| "learning_rate": 3.406593406593407e-06, | |
| "loss": 1.120039463043213, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07538802660753881, | |
| "grad_norm": 1.2118117809295654, | |
| "learning_rate": 3.6263736263736266e-06, | |
| "loss": 1.4959304332733154, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07982261640798226, | |
| "grad_norm": 23.823530197143555, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 1.2191643714904785, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08425720620842572, | |
| "grad_norm": 2.678205728530884, | |
| "learning_rate": 4.065934065934066e-06, | |
| "loss": 1.5424106121063232, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08869179600886919, | |
| "grad_norm": 1.8756370544433594, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.7748823165893555, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09312638580931264, | |
| "grad_norm": 1.0674728155136108, | |
| "learning_rate": 4.505494505494506e-06, | |
| "loss": 1.4076530933380127, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.9798895716667175, | |
| "learning_rate": 4.725274725274726e-06, | |
| "loss": 1.3454558849334717, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10199556541019955, | |
| "grad_norm": 3.0464468002319336, | |
| "learning_rate": 4.945054945054946e-06, | |
| "loss": 1.3695911169052124, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10643015521064302, | |
| "grad_norm": 5.054316520690918, | |
| "learning_rate": 5.164835164835166e-06, | |
| "loss": 1.5866491794586182, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11086474501108648, | |
| "grad_norm": 2.4682066440582275, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 1.4569354057312012, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11529933481152993, | |
| "grad_norm": 1.1039724349975586, | |
| "learning_rate": 5.604395604395605e-06, | |
| "loss": 1.375510334968567, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1197339246119734, | |
| "grad_norm": 0.911780595779419, | |
| "learning_rate": 5.824175824175825e-06, | |
| "loss": 1.3685683012008667, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12416851441241686, | |
| "grad_norm": 6.081076145172119, | |
| "learning_rate": 6.043956043956044e-06, | |
| "loss": 1.1247367858886719, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1286031042128603, | |
| "grad_norm": 1.635683536529541, | |
| "learning_rate": 6.2637362637362645e-06, | |
| "loss": 1.3490444421768188, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13303769401330376, | |
| "grad_norm": 1.733303427696228, | |
| "learning_rate": 6.483516483516485e-06, | |
| "loss": 1.4053246974945068, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13747228381374724, | |
| "grad_norm": 1.308205485343933, | |
| "learning_rate": 6.703296703296703e-06, | |
| "loss": 1.3603203296661377, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1419068736141907, | |
| "grad_norm": 2.4009580612182617, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 1.403708577156067, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 1.5370208024978638, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.4732989072799683, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15077605321507762, | |
| "grad_norm": 0.9826664328575134, | |
| "learning_rate": 7.362637362637364e-06, | |
| "loss": 1.347627878189087, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15521064301552107, | |
| "grad_norm": 0.939538300037384, | |
| "learning_rate": 7.582417582417583e-06, | |
| "loss": 1.3008030652999878, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15964523281596452, | |
| "grad_norm": 1.8689913749694824, | |
| "learning_rate": 7.802197802197802e-06, | |
| "loss": 1.3441245555877686, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.164079822616408, | |
| "grad_norm": 1.0277022123336792, | |
| "learning_rate": 8.021978021978023e-06, | |
| "loss": 1.249913215637207, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16851441241685144, | |
| "grad_norm": 2.182894706726074, | |
| "learning_rate": 8.241758241758243e-06, | |
| "loss": 1.3225178718566895, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729490022172949, | |
| "grad_norm": 1.0984985828399658, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 1.3755261898040771, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17738359201773837, | |
| "grad_norm": 1.6256319284439087, | |
| "learning_rate": 8.681318681318681e-06, | |
| "loss": 1.3338983058929443, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.7835297584533691, | |
| "learning_rate": 8.9010989010989e-06, | |
| "loss": 0.840368926525116, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18625277161862527, | |
| "grad_norm": 1.2216118574142456, | |
| "learning_rate": 9.120879120879122e-06, | |
| "loss": 1.0529584884643555, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19068736141906872, | |
| "grad_norm": 8.783498764038086, | |
| "learning_rate": 9.340659340659341e-06, | |
| "loss": 0.9759446978569031, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 1.1936975717544556, | |
| "learning_rate": 9.560439560439562e-06, | |
| "loss": 1.2668921947479248, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19955654101995565, | |
| "grad_norm": 2.084754705429077, | |
| "learning_rate": 9.780219780219781e-06, | |
| "loss": 1.4002450704574585, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2039911308203991, | |
| "grad_norm": 1.1517976522445679, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2500206232070923, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20842572062084258, | |
| "grad_norm": 0.9133433699607849, | |
| "learning_rate": 9.99996972898091e-06, | |
| "loss": 1.260838270187378, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21286031042128603, | |
| "grad_norm": 1.9567794799804688, | |
| "learning_rate": 9.999878916330893e-06, | |
| "loss": 1.7782005071640015, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21729490022172948, | |
| "grad_norm": 1.7563012838363647, | |
| "learning_rate": 9.999727563271727e-06, | |
| "loss": 0.9444801807403564, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.22172949002217296, | |
| "grad_norm": 0.9970119595527649, | |
| "learning_rate": 9.999515671839682e-06, | |
| "loss": 1.1939334869384766, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2261640798226164, | |
| "grad_norm": 0.8175113201141357, | |
| "learning_rate": 9.999243244885499e-06, | |
| "loss": 1.3335109949111938, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.23059866962305986, | |
| "grad_norm": 0.8858981728553772, | |
| "learning_rate": 9.998910286074355e-06, | |
| "loss": 1.0767525434494019, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23503325942350334, | |
| "grad_norm": 1.0978095531463623, | |
| "learning_rate": 9.998516799885806e-06, | |
| "loss": 1.301979899406433, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2394678492239468, | |
| "grad_norm": 1.8789381980895996, | |
| "learning_rate": 9.998062791613729e-06, | |
| "loss": 1.5071189403533936, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.8671095371246338, | |
| "learning_rate": 9.997548267366255e-06, | |
| "loss": 1.0696842670440674, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24833702882483372, | |
| "grad_norm": 2.358786106109619, | |
| "learning_rate": 9.996973234065685e-06, | |
| "loss": 1.3439608812332153, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.25277161862527714, | |
| "grad_norm": 2.1430983543395996, | |
| "learning_rate": 9.996337699448392e-06, | |
| "loss": 0.8265036940574646, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2572062084257206, | |
| "grad_norm": 0.876884400844574, | |
| "learning_rate": 9.995641672064726e-06, | |
| "loss": 1.3629298210144043, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2616407982261641, | |
| "grad_norm": 0.48178285360336304, | |
| "learning_rate": 9.994885161278885e-06, | |
| "loss": 1.1036193370819092, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2660753880266075, | |
| "grad_norm": 0.9723570942878723, | |
| "learning_rate": 9.994068177268807e-06, | |
| "loss": 1.2973068952560425, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.270509977827051, | |
| "grad_norm": 2.7924370765686035, | |
| "learning_rate": 9.993190731026024e-06, | |
| "loss": 1.2149885892868042, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2749445676274945, | |
| "grad_norm": 2.260781764984131, | |
| "learning_rate": 9.992252834355503e-06, | |
| "loss": 1.5927814245224, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2793791574279379, | |
| "grad_norm": 1.4566712379455566, | |
| "learning_rate": 9.99125449987551e-06, | |
| "loss": 0.9197668433189392, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2838137472283814, | |
| "grad_norm": 1.0135411024093628, | |
| "learning_rate": 9.990195741017422e-06, | |
| "loss": 1.2308801412582397, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28824833702882485, | |
| "grad_norm": 1.2406107187271118, | |
| "learning_rate": 9.989076572025554e-06, | |
| "loss": 0.9607541561126709, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 9.23150634765625, | |
| "learning_rate": 9.987897007956968e-06, | |
| "loss": 1.0329269170761108, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.29711751662971175, | |
| "grad_norm": 7.180088520050049, | |
| "learning_rate": 9.986657064681267e-06, | |
| "loss": 1.0436151027679443, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30155210643015523, | |
| "grad_norm": 5.097459316253662, | |
| "learning_rate": 9.98535675888038e-06, | |
| "loss": 1.2045552730560303, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.30598669623059865, | |
| "grad_norm": 4.074262619018555, | |
| "learning_rate": 9.983996108048345e-06, | |
| "loss": 0.8822503685951233, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.31042128603104213, | |
| "grad_norm": 2.92777156829834, | |
| "learning_rate": 9.982575130491068e-06, | |
| "loss": 1.1243302822113037, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3148558758314856, | |
| "grad_norm": 1.5820231437683105, | |
| "learning_rate": 9.981093845326079e-06, | |
| "loss": 1.0163542032241821, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.31929046563192903, | |
| "grad_norm": 7.281858921051025, | |
| "learning_rate": 9.979552272482268e-06, | |
| "loss": 0.9830036163330078, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3237250554323725, | |
| "grad_norm": 2.2801249027252197, | |
| "learning_rate": 9.977950432699629e-06, | |
| "loss": 0.952194094657898, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.328159645232816, | |
| "grad_norm": 1.4058592319488525, | |
| "learning_rate": 9.976288347528972e-06, | |
| "loss": 1.4162338972091675, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3325942350332594, | |
| "grad_norm": 6.441311359405518, | |
| "learning_rate": 9.974566039331634e-06, | |
| "loss": 1.170148253440857, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3370288248337029, | |
| "grad_norm": 2.101698160171509, | |
| "learning_rate": 9.972783531279184e-06, | |
| "loss": 1.0924369096755981, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 1.7578390836715698, | |
| "learning_rate": 9.970940847353103e-06, | |
| "loss": 1.272911548614502, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3458980044345898, | |
| "grad_norm": 1.6119791269302368, | |
| "learning_rate": 9.969038012344465e-06, | |
| "loss": 1.215100884437561, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.35033259423503327, | |
| "grad_norm": 3.253216028213501, | |
| "learning_rate": 9.967075051853609e-06, | |
| "loss": 1.2995957136154175, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.35476718403547675, | |
| "grad_norm": 0.5216699242591858, | |
| "learning_rate": 9.965051992289782e-06, | |
| "loss": 1.125442624092102, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35920177383592017, | |
| "grad_norm": 1.3521455526351929, | |
| "learning_rate": 9.962968860870798e-06, | |
| "loss": 0.8663116097450256, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.3028908967971802, | |
| "learning_rate": 9.96082568562266e-06, | |
| "loss": 0.9397580027580261, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36807095343680707, | |
| "grad_norm": 1.2469340562820435, | |
| "learning_rate": 9.958622495379193e-06, | |
| "loss": 1.2785670757293701, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.37250554323725055, | |
| "grad_norm": 2.306950330734253, | |
| "learning_rate": 9.956359319781642e-06, | |
| "loss": 1.2098650932312012, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.376940133037694, | |
| "grad_norm": 2.1364083290100098, | |
| "learning_rate": 9.954036189278292e-06, | |
| "loss": 1.1903345584869385, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.38137472283813745, | |
| "grad_norm": 3.892174482345581, | |
| "learning_rate": 9.951653135124045e-06, | |
| "loss": 0.795183539390564, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3858093126385809, | |
| "grad_norm": 2.057490825653076, | |
| "learning_rate": 9.94921018938e-06, | |
| "loss": 1.6345411539077759, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 2.5715115070343018, | |
| "learning_rate": 9.946707384913027e-06, | |
| "loss": 1.2782708406448364, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3946784922394678, | |
| "grad_norm": 1.30599045753479, | |
| "learning_rate": 9.944144755395321e-06, | |
| "loss": 1.3271397352218628, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3991130820399113, | |
| "grad_norm": 3.162742853164673, | |
| "learning_rate": 9.941522335303955e-06, | |
| "loss": 1.079615592956543, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4035476718403548, | |
| "grad_norm": 1.9985105991363525, | |
| "learning_rate": 9.938840159920406e-06, | |
| "loss": 1.2768008708953857, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4079822616407982, | |
| "grad_norm": 1.0763951539993286, | |
| "learning_rate": 9.93609826533009e-06, | |
| "loss": 0.8742649555206299, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4124168514412417, | |
| "grad_norm": 1.6700925827026367, | |
| "learning_rate": 9.933296688421872e-06, | |
| "loss": 0.9816460013389587, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41685144124168516, | |
| "grad_norm": 1.3022819757461548, | |
| "learning_rate": 9.930435466887564e-06, | |
| "loss": 0.9694766998291016, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4212860310421286, | |
| "grad_norm": 4.169641017913818, | |
| "learning_rate": 9.927514639221433e-06, | |
| "loss": 1.0260610580444336, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42572062084257206, | |
| "grad_norm": 2.331894636154175, | |
| "learning_rate": 9.92453424471967e-06, | |
| "loss": 0.8971779346466064, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.43015521064301554, | |
| "grad_norm": 1.6991792917251587, | |
| "learning_rate": 9.921494323479862e-06, | |
| "loss": 1.3024404048919678, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43458980044345896, | |
| "grad_norm": 1.732059121131897, | |
| "learning_rate": 9.918394916400465e-06, | |
| "loss": 1.5722870826721191, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 1.7187920808792114, | |
| "learning_rate": 9.915236065180235e-06, | |
| "loss": 1.2906628847122192, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4434589800443459, | |
| "grad_norm": 1.5556683540344238, | |
| "learning_rate": 9.912017812317684e-06, | |
| "loss": 1.1513667106628418, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44789356984478934, | |
| "grad_norm": 1.5006672143936157, | |
| "learning_rate": 9.908740201110497e-06, | |
| "loss": 1.2683374881744385, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4523281596452328, | |
| "grad_norm": 0.682464063167572, | |
| "learning_rate": 9.905403275654951e-06, | |
| "loss": 1.2315711975097656, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4567627494456763, | |
| "grad_norm": 1.3839211463928223, | |
| "learning_rate": 9.902007080845336e-06, | |
| "loss": 1.0794446468353271, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4611973392461197, | |
| "grad_norm": 2.0738892555236816, | |
| "learning_rate": 9.898551662373325e-06, | |
| "loss": 1.0966854095458984, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4656319290465632, | |
| "grad_norm": 3.4559130668640137, | |
| "learning_rate": 9.895037066727382e-06, | |
| "loss": 0.6951467394828796, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4700665188470067, | |
| "grad_norm": 4.855388641357422, | |
| "learning_rate": 9.891463341192124e-06, | |
| "loss": 0.8868162035942078, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4745011086474501, | |
| "grad_norm": 0.7046754360198975, | |
| "learning_rate": 9.88783053384769e-06, | |
| "loss": 1.0986382961273193, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4789356984478936, | |
| "grad_norm": 1.4185415506362915, | |
| "learning_rate": 9.884138693569095e-06, | |
| "loss": 1.1152902841567993, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48337028824833705, | |
| "grad_norm": 0.9065267443656921, | |
| "learning_rate": 9.88038787002557e-06, | |
| "loss": 1.130263090133667, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 1.9596226215362549, | |
| "learning_rate": 9.876578113679891e-06, | |
| "loss": 1.2346633672714233, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49223946784922396, | |
| "grad_norm": 1.4536895751953125, | |
| "learning_rate": 9.872709475787708e-06, | |
| "loss": 1.2405025959014893, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.49667405764966743, | |
| "grad_norm": 0.745212972164154, | |
| "learning_rate": 9.868782008396848e-06, | |
| "loss": 1.2145315408706665, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5011086474501109, | |
| "grad_norm": 0.7731147408485413, | |
| "learning_rate": 9.864795764346615e-06, | |
| "loss": 1.2132314443588257, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5055432372505543, | |
| "grad_norm": 0.931675374507904, | |
| "learning_rate": 9.860750797267085e-06, | |
| "loss": 1.2666246891021729, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5099778270509978, | |
| "grad_norm": 1.4641870260238647, | |
| "learning_rate": 9.856647161578384e-06, | |
| "loss": 1.855782151222229, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5144124168514412, | |
| "grad_norm": 1.5169061422348022, | |
| "learning_rate": 9.852484912489946e-06, | |
| "loss": 0.8463820219039917, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5188470066518847, | |
| "grad_norm": 0.7717849016189575, | |
| "learning_rate": 9.848264105999783e-06, | |
| "loss": 1.2967286109924316, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5232815964523282, | |
| "grad_norm": 0.6193521022796631, | |
| "learning_rate": 9.843984798893722e-06, | |
| "loss": 0.9528148174285889, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5277161862527716, | |
| "grad_norm": 0.6214136481285095, | |
| "learning_rate": 9.839647048744645e-06, | |
| "loss": 1.0307953357696533, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.532150776053215, | |
| "grad_norm": 0.6043981909751892, | |
| "learning_rate": 9.83525091391172e-06, | |
| "loss": 1.2823783159255981, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.8168203234672546, | |
| "learning_rate": 9.8307964535396e-06, | |
| "loss": 1.3375117778778076, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.541019955654102, | |
| "grad_norm": 10.725733757019043, | |
| "learning_rate": 9.826283727557644e-06, | |
| "loss": 1.0199899673461914, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 1.418264389038086, | |
| "learning_rate": 9.821712796679106e-06, | |
| "loss": 1.2732073068618774, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.549889135254989, | |
| "grad_norm": 0.9490866661071777, | |
| "learning_rate": 9.817083722400309e-06, | |
| "loss": 1.4992234706878662, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5543237250554324, | |
| "grad_norm": 1.1178134679794312, | |
| "learning_rate": 9.812396566999832e-06, | |
| "loss": 1.2512468099594116, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5587583148558758, | |
| "grad_norm": 5.028557777404785, | |
| "learning_rate": 9.807651393537659e-06, | |
| "loss": 0.7450082302093506, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5631929046563193, | |
| "grad_norm": 1.3556923866271973, | |
| "learning_rate": 9.802848265854343e-06, | |
| "loss": 1.4887409210205078, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5676274944567627, | |
| "grad_norm": 1.1928884983062744, | |
| "learning_rate": 9.797987248570137e-06, | |
| "loss": 1.3401446342468262, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5720620842572062, | |
| "grad_norm": 0.7480881214141846, | |
| "learning_rate": 9.793068407084125e-06, | |
| "loss": 1.306605577468872, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5764966740576497, | |
| "grad_norm": 0.641482412815094, | |
| "learning_rate": 9.78809180757335e-06, | |
| "loss": 1.3040629625320435, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5809312638580931, | |
| "grad_norm": 3.0096657276153564, | |
| "learning_rate": 9.783057516991921e-06, | |
| "loss": 0.7431791424751282, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.928672194480896, | |
| "learning_rate": 9.777965603070106e-06, | |
| "loss": 1.3897572755813599, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5898004434589801, | |
| "grad_norm": 1.2365378141403198, | |
| "learning_rate": 9.772816134313424e-06, | |
| "loss": 1.0928809642791748, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5942350332594235, | |
| "grad_norm": 0.8595125079154968, | |
| "learning_rate": 9.76760918000173e-06, | |
| "loss": 1.5679038763046265, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5986696230598669, | |
| "grad_norm": 3.3064000606536865, | |
| "learning_rate": 9.762344810188276e-06, | |
| "loss": 1.21007239818573, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6031042128603105, | |
| "grad_norm": 0.6491636037826538, | |
| "learning_rate": 9.757023095698766e-06, | |
| "loss": 1.2509498596191406, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6075388026607539, | |
| "grad_norm": 2.3098983764648438, | |
| "learning_rate": 9.751644108130405e-06, | |
| "loss": 1.2461967468261719, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6119733924611973, | |
| "grad_norm": 0.7247368097305298, | |
| "learning_rate": 9.746207919850951e-06, | |
| "loss": 1.2410979270935059, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6164079822616408, | |
| "grad_norm": 3.6421871185302734, | |
| "learning_rate": 9.740714603997712e-06, | |
| "loss": 1.2530133724212646, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6208425720620843, | |
| "grad_norm": 0.7265106439590454, | |
| "learning_rate": 9.735164234476588e-06, | |
| "loss": 1.3131635189056396, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6252771618625277, | |
| "grad_norm": 1.2309120893478394, | |
| "learning_rate": 9.729556885961064e-06, | |
| "loss": 0.9114726781845093, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6297117516629712, | |
| "grad_norm": 0.7240830063819885, | |
| "learning_rate": 9.72389263389121e-06, | |
| "loss": 1.2823668718338013, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 1.1647893190383911, | |
| "learning_rate": 9.718171554472662e-06, | |
| "loss": 1.3458237648010254, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6385809312638581, | |
| "grad_norm": 0.8723113536834717, | |
| "learning_rate": 9.712393724675597e-06, | |
| "loss": 1.2615196704864502, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6430155210643016, | |
| "grad_norm": 0.7617486119270325, | |
| "learning_rate": 9.706559222233704e-06, | |
| "loss": 1.2500044107437134, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.647450110864745, | |
| "grad_norm": 1.2043062448501587, | |
| "learning_rate": 9.700668125643132e-06, | |
| "loss": 1.3805018663406372, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6518847006651884, | |
| "grad_norm": 0.8987176418304443, | |
| "learning_rate": 9.694720514161437e-06, | |
| "loss": 0.9510623216629028, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.656319290465632, | |
| "grad_norm": 1.2220165729522705, | |
| "learning_rate": 9.688716467806508e-06, | |
| "loss": 1.0256685018539429, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6607538802660754, | |
| "grad_norm": 1.0878409147262573, | |
| "learning_rate": 9.682656067355505e-06, | |
| "loss": 1.230331301689148, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6651884700665188, | |
| "grad_norm": 0.7459701299667358, | |
| "learning_rate": 9.67653939434376e-06, | |
| "loss": 1.272396206855774, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6696230598669624, | |
| "grad_norm": 0.6311793327331543, | |
| "learning_rate": 9.670366531063686e-06, | |
| "loss": 1.216810703277588, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6740576496674058, | |
| "grad_norm": 0.9430868625640869, | |
| "learning_rate": 9.664137560563663e-06, | |
| "loss": 1.2837876081466675, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6784922394678492, | |
| "grad_norm": 0.6982176303863525, | |
| "learning_rate": 9.657852566646929e-06, | |
| "loss": 1.2521125078201294, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.9578081369400024, | |
| "learning_rate": 9.651511633870451e-06, | |
| "loss": 0.9001311659812927, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6873614190687362, | |
| "grad_norm": 0.7129071354866028, | |
| "learning_rate": 9.645114847543781e-06, | |
| "loss": 1.2543275356292725, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6917960088691796, | |
| "grad_norm": 0.7872418761253357, | |
| "learning_rate": 9.638662293727916e-06, | |
| "loss": 1.225979208946228, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6962305986696231, | |
| "grad_norm": 0.8527798652648926, | |
| "learning_rate": 9.632154059234137e-06, | |
| "loss": 1.2440277338027954, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7006651884700665, | |
| "grad_norm": 0.8361867666244507, | |
| "learning_rate": 9.625590231622837e-06, | |
| "loss": 1.3528887033462524, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.70509977827051, | |
| "grad_norm": 1.285349726676941, | |
| "learning_rate": 9.618970899202354e-06, | |
| "loss": 1.0397502183914185, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7095343680709535, | |
| "grad_norm": 1.2274253368377686, | |
| "learning_rate": 9.612296151027765e-06, | |
| "loss": 1.0072234869003296, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7139689578713969, | |
| "grad_norm": 1.74605393409729, | |
| "learning_rate": 9.605566076899714e-06, | |
| "loss": 1.0066006183624268, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7184035476718403, | |
| "grad_norm": 1.2762601375579834, | |
| "learning_rate": 9.598780767363174e-06, | |
| "loss": 1.1060127019882202, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7228381374722838, | |
| "grad_norm": 2.128355026245117, | |
| "learning_rate": 9.591940313706248e-06, | |
| "loss": 1.1243398189544678, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.4223320186138153, | |
| "learning_rate": 9.585044807958942e-06, | |
| "loss": 0.8515225648880005, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.9096373319625854, | |
| "learning_rate": 9.578094342891915e-06, | |
| "loss": 0.8710092902183533, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7361419068736141, | |
| "grad_norm": 1.7320899963378906, | |
| "learning_rate": 9.571089012015237e-06, | |
| "loss": 1.403598666191101, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7405764966740577, | |
| "grad_norm": 2.438868999481201, | |
| "learning_rate": 9.564028909577132e-06, | |
| "loss": 1.208489179611206, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7450110864745011, | |
| "grad_norm": 1.0341501235961914, | |
| "learning_rate": 9.55691413056271e-06, | |
| "loss": 1.2458648681640625, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7494456762749445, | |
| "grad_norm": 2.2480390071868896, | |
| "learning_rate": 9.54974477069269e-06, | |
| "loss": 0.7539517283439636, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.753880266075388, | |
| "grad_norm": 1.8287423849105835, | |
| "learning_rate": 9.542520926422105e-06, | |
| "loss": 0.3899018466472626, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7583148558758315, | |
| "grad_norm": 1.3207381963729858, | |
| "learning_rate": 9.535242694939011e-06, | |
| "loss": 0.9129378199577332, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7627494456762749, | |
| "grad_norm": 0.7297150492668152, | |
| "learning_rate": 9.527910174163179e-06, | |
| "loss": 1.3148428201675415, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7671840354767184, | |
| "grad_norm": 0.6148672103881836, | |
| "learning_rate": 9.520523462744776e-06, | |
| "loss": 1.2478430271148682, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7716186252771619, | |
| "grad_norm": 2.0105557441711426, | |
| "learning_rate": 9.51308266006304e-06, | |
| "loss": 1.108123540878296, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7760532150776053, | |
| "grad_norm": 0.5692620873451233, | |
| "learning_rate": 9.505587866224939e-06, | |
| "loss": 1.2364296913146973, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 1.4871420860290527, | |
| "learning_rate": 9.498039182063828e-06, | |
| "loss": 1.178447961807251, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7849223946784922, | |
| "grad_norm": 2.9917361736297607, | |
| "learning_rate": 9.49043670913809e-06, | |
| "loss": 1.0383565425872803, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7893569844789357, | |
| "grad_norm": 1.1324853897094727, | |
| "learning_rate": 9.48278054972977e-06, | |
| "loss": 1.2439326047897339, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7937915742793792, | |
| "grad_norm": 1.664652943611145, | |
| "learning_rate": 9.475070806843202e-06, | |
| "loss": 1.293602466583252, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7982261640798226, | |
| "grad_norm": 0.6531599164009094, | |
| "learning_rate": 9.467307584203619e-06, | |
| "loss": 1.2421692609786987, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.802660753880266, | |
| "grad_norm": 0.9146925806999207, | |
| "learning_rate": 9.459490986255756e-06, | |
| "loss": 0.6786575317382812, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8070953436807096, | |
| "grad_norm": 1.0817216634750366, | |
| "learning_rate": 9.451621118162453e-06, | |
| "loss": 1.3574942350387573, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.811529933481153, | |
| "grad_norm": 0.8012416958808899, | |
| "learning_rate": 9.443698085803235e-06, | |
| "loss": 1.2184292078018188, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8159645232815964, | |
| "grad_norm": 0.6288676261901855, | |
| "learning_rate": 9.435721995772884e-06, | |
| "loss": 1.0145061016082764, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8203991130820399, | |
| "grad_norm": 2.079406499862671, | |
| "learning_rate": 9.42769295538001e-06, | |
| "loss": 0.8563152551651001, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8248337028824834, | |
| "grad_norm": 0.6172999143600464, | |
| "learning_rate": 9.419611072645608e-06, | |
| "loss": 1.246902585029602, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.6710400581359863, | |
| "learning_rate": 9.4114764563016e-06, | |
| "loss": 1.2490512132644653, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8337028824833703, | |
| "grad_norm": 1.4091862440109253, | |
| "learning_rate": 9.403289215789373e-06, | |
| "loss": 1.2173607349395752, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8381374722838137, | |
| "grad_norm": 1.2435530424118042, | |
| "learning_rate": 9.395049461258318e-06, | |
| "loss": 1.2395451068878174, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8425720620842572, | |
| "grad_norm": 1.2862529754638672, | |
| "learning_rate": 9.386757303564323e-06, | |
| "loss": 0.807468593120575, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8470066518847007, | |
| "grad_norm": 0.6037282943725586, | |
| "learning_rate": 9.37841285426831e-06, | |
| "loss": 1.3310133218765259, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8514412416851441, | |
| "grad_norm": 0.6190339922904968, | |
| "learning_rate": 9.370016225634719e-06, | |
| "loss": 1.168778896331787, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8558758314855875, | |
| "grad_norm": 0.557336688041687, | |
| "learning_rate": 9.361567530629988e-06, | |
| "loss": 1.2637746334075928, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8603104212860311, | |
| "grad_norm": 0.4134604334831238, | |
| "learning_rate": 9.353066882921063e-06, | |
| "loss": 1.192622423171997, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8647450110864745, | |
| "grad_norm": 0.5234290957450867, | |
| "learning_rate": 9.344514396873837e-06, | |
| "loss": 1.2653976678848267, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8691796008869179, | |
| "grad_norm": 1.246576189994812, | |
| "learning_rate": 9.335910187551628e-06, | |
| "loss": 0.9260438084602356, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8736141906873615, | |
| "grad_norm": 0.5154649615287781, | |
| "learning_rate": 9.327254370713636e-06, | |
| "loss": 1.2277519702911377, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 1.1650840044021606, | |
| "learning_rate": 9.31854706281336e-06, | |
| "loss": 1.2231098413467407, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8824833702882483, | |
| "grad_norm": 0.8881715536117554, | |
| "learning_rate": 9.309788380997069e-06, | |
| "loss": 1.219472050666809, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8869179600886918, | |
| "grad_norm": 1.3225759267807007, | |
| "learning_rate": 9.30097844310219e-06, | |
| "loss": 0.5170519351959229, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8913525498891353, | |
| "grad_norm": 0.6402235627174377, | |
| "learning_rate": 9.292117367655749e-06, | |
| "loss": 1.1000133752822876, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8957871396895787, | |
| "grad_norm": 0.25668174028396606, | |
| "learning_rate": 9.283205273872757e-06, | |
| "loss": 0.9859495162963867, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9002217294900222, | |
| "grad_norm": 0.659263014793396, | |
| "learning_rate": 9.274242281654621e-06, | |
| "loss": 1.2722219228744507, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9046563192904656, | |
| "grad_norm": 1.187609314918518, | |
| "learning_rate": 9.265228511587525e-06, | |
| "loss": 1.1655584573745728, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.9958807229995728, | |
| "learning_rate": 9.2561640849408e-06, | |
| "loss": 1.1174354553222656, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9135254988913526, | |
| "grad_norm": 0.497241348028183, | |
| "learning_rate": 9.247049123665306e-06, | |
| "loss": 1.240470051765442, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.917960088691796, | |
| "grad_norm": 0.5129937529563904, | |
| "learning_rate": 9.237883750391786e-06, | |
| "loss": 1.2640633583068848, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9223946784922394, | |
| "grad_norm": 1.200805902481079, | |
| "learning_rate": 9.228668088429212e-06, | |
| "loss": 1.023282766342163, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 1.1263169050216675, | |
| "learning_rate": 9.219402261763129e-06, | |
| "loss": 1.2812139987945557, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9312638580931264, | |
| "grad_norm": 8.3773832321167, | |
| "learning_rate": 9.210086395053992e-06, | |
| "loss": 0.8014230728149414, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9356984478935698, | |
| "grad_norm": 1.9554097652435303, | |
| "learning_rate": 9.200720613635476e-06, | |
| "loss": 1.4770504236221313, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9401330376940134, | |
| "grad_norm": 9.581804275512695, | |
| "learning_rate": 9.191305043512806e-06, | |
| "loss": 1.068493366241455, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9445676274944568, | |
| "grad_norm": 2.6033480167388916, | |
| "learning_rate": 9.181839811361048e-06, | |
| "loss": 1.225175380706787, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9490022172949002, | |
| "grad_norm": 0.506930947303772, | |
| "learning_rate": 9.172325044523413e-06, | |
| "loss": 1.1149027347564697, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9534368070953437, | |
| "grad_norm": 2.6233949661254883, | |
| "learning_rate": 9.16276087100954e-06, | |
| "loss": 0.7080057859420776, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9578713968957872, | |
| "grad_norm": 0.4864356815814972, | |
| "learning_rate": 9.153147419493774e-06, | |
| "loss": 1.2395509481430054, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9623059866962306, | |
| "grad_norm": 10.547813415527344, | |
| "learning_rate": 9.143484819313441e-06, | |
| "loss": 1.1166844367980957, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9667405764966741, | |
| "grad_norm": 0.5617235898971558, | |
| "learning_rate": 9.133773200467095e-06, | |
| "loss": 1.2907624244689941, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9711751662971175, | |
| "grad_norm": 0.6111188530921936, | |
| "learning_rate": 9.12401269361278e-06, | |
| "loss": 1.2545419931411743, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 2.3575284481048584, | |
| "learning_rate": 9.114203430066273e-06, | |
| "loss": 0.825833797454834, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9800443458980045, | |
| "grad_norm": 0.5967570543289185, | |
| "learning_rate": 9.104345541799304e-06, | |
| "loss": 1.0687720775604248, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9844789356984479, | |
| "grad_norm": 0.7675039172172546, | |
| "learning_rate": 9.094439161437797e-06, | |
| "loss": 1.3078503608703613, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9889135254988913, | |
| "grad_norm": 0.5262892842292786, | |
| "learning_rate": 9.084484422260079e-06, | |
| "loss": 1.2215782403945923, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9933481152993349, | |
| "grad_norm": 0.5987316370010376, | |
| "learning_rate": 9.074481458195077e-06, | |
| "loss": 1.2558292150497437, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9977827050997783, | |
| "grad_norm": 0.6052929162979126, | |
| "learning_rate": 9.064430403820538e-06, | |
| "loss": 1.0702413320541382, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0022172949002217, | |
| "grad_norm": 2.1425399780273438, | |
| "learning_rate": 9.054331394361195e-06, | |
| "loss": 1.0677464008331299, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0066518847006651, | |
| "grad_norm": 1.4416669607162476, | |
| "learning_rate": 9.044184565686963e-06, | |
| "loss": 1.2889671325683594, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0110864745011086, | |
| "grad_norm": 0.6405385136604309, | |
| "learning_rate": 9.033990054311108e-06, | |
| "loss": 1.169727087020874, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0155210643015522, | |
| "grad_norm": 0.6425080895423889, | |
| "learning_rate": 9.023747997388409e-06, | |
| "loss": 0.8333257436752319, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0199556541019956, | |
| "grad_norm": 0.2827882170677185, | |
| "learning_rate": 9.013458532713303e-06, | |
| "loss": 0.76810622215271, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 0.7950817942619324, | |
| "learning_rate": 9.003121798718055e-06, | |
| "loss": 0.5744680166244507, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0288248337028825, | |
| "grad_norm": 2.603362560272217, | |
| "learning_rate": 8.992737934470875e-06, | |
| "loss": 0.8523436188697815, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.033259423503326, | |
| "grad_norm": 0.9304072856903076, | |
| "learning_rate": 8.982307079674051e-06, | |
| "loss": 0.723950207233429, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0376940133037693, | |
| "grad_norm": 1.9990200996398926, | |
| "learning_rate": 8.971829374662075e-06, | |
| "loss": 1.0109994411468506, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.042128603104213, | |
| "grad_norm": 0.8482561111450195, | |
| "learning_rate": 8.961304960399746e-06, | |
| "loss": 0.6301277875900269, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0465631929046564, | |
| "grad_norm": 1.0088205337524414, | |
| "learning_rate": 8.950733978480295e-06, | |
| "loss": 0.9854256510734558, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0509977827050998, | |
| "grad_norm": 1.1430635452270508, | |
| "learning_rate": 8.940116571123442e-06, | |
| "loss": 1.022680401802063, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0554323725055432, | |
| "grad_norm": 0.5003657341003418, | |
| "learning_rate": 8.929452881173522e-06, | |
| "loss": 1.0202995538711548, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0598669623059866, | |
| "grad_norm": 0.5999464988708496, | |
| "learning_rate": 8.91874305209754e-06, | |
| "loss": 1.0750170946121216, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.06430155210643, | |
| "grad_norm": 1.1878728866577148, | |
| "learning_rate": 8.907987227983244e-06, | |
| "loss": 0.6575189828872681, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0687361419068737, | |
| "grad_norm": 0.7090225219726562, | |
| "learning_rate": 8.897185553537199e-06, | |
| "loss": 1.1403692960739136, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0731707317073171, | |
| "grad_norm": 0.3266684412956238, | |
| "learning_rate": 8.886338174082818e-06, | |
| "loss": 0.8844245076179504, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0776053215077606, | |
| "grad_norm": 0.8314698934555054, | |
| "learning_rate": 8.875445235558429e-06, | |
| "loss": 1.0011812448501587, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.082039911308204, | |
| "grad_norm": 0.7279475927352905, | |
| "learning_rate": 8.864506884515298e-06, | |
| "loss": 0.7617220282554626, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0864745011086474, | |
| "grad_norm": 0.42657408118247986, | |
| "learning_rate": 8.853523268115662e-06, | |
| "loss": 0.922903835773468, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.6559341549873352, | |
| "learning_rate": 8.84249453413075e-06, | |
| "loss": 1.0911064147949219, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0953436807095343, | |
| "grad_norm": 1.2124531269073486, | |
| "learning_rate": 8.831420830938787e-06, | |
| "loss": 1.0689539909362793, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.099778270509978, | |
| "grad_norm": 1.9062159061431885, | |
| "learning_rate": 8.820302307523012e-06, | |
| "loss": 0.9026176929473877, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1042128603104213, | |
| "grad_norm": 0.4806932806968689, | |
| "learning_rate": 8.809139113469664e-06, | |
| "loss": 0.9866113662719727, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1086474501108647, | |
| "grad_norm": 0.7793009281158447, | |
| "learning_rate": 8.797931398965968e-06, | |
| "loss": 0.6823930144309998, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1130820399113082, | |
| "grad_norm": 0.4156184494495392, | |
| "learning_rate": 8.78667931479812e-06, | |
| "loss": 0.9841465353965759, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1175166297117516, | |
| "grad_norm": 0.5767391920089722, | |
| "learning_rate": 8.775383012349255e-06, | |
| "loss": 0.8618515729904175, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1219512195121952, | |
| "grad_norm": 1.2682608366012573, | |
| "learning_rate": 8.764042643597413e-06, | |
| "loss": 0.9228280782699585, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1263858093126387, | |
| "grad_norm": 1.26870596408844, | |
| "learning_rate": 8.75265836111349e-06, | |
| "loss": 0.6945858001708984, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.130820399113082, | |
| "grad_norm": 0.9481813311576843, | |
| "learning_rate": 8.741230318059188e-06, | |
| "loss": 1.1894787549972534, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1352549889135255, | |
| "grad_norm": 1.9494104385375977, | |
| "learning_rate": 8.72975866818496e-06, | |
| "loss": 0.5956374406814575, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.139689578713969, | |
| "grad_norm": 0.6274094581604004, | |
| "learning_rate": 8.718243565827927e-06, | |
| "loss": 0.4184134900569916, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1441241685144123, | |
| "grad_norm": 0.6972094178199768, | |
| "learning_rate": 8.706685165909817e-06, | |
| "loss": 0.8942562937736511, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1485587583148558, | |
| "grad_norm": 0.9098567366600037, | |
| "learning_rate": 8.695083623934872e-06, | |
| "loss": 1.0386896133422852, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1529933481152994, | |
| "grad_norm": 0.43444758653640747, | |
| "learning_rate": 8.683439095987758e-06, | |
| "loss": 1.1053950786590576, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1574279379157428, | |
| "grad_norm": 0.6920401453971863, | |
| "learning_rate": 8.671751738731464e-06, | |
| "loss": 0.5241594314575195, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1618625277161863, | |
| "grad_norm": 0.7615489363670349, | |
| "learning_rate": 8.660021709405197e-06, | |
| "loss": 0.9574247002601624, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1662971175166297, | |
| "grad_norm": 0.9507637023925781, | |
| "learning_rate": 8.648249165822265e-06, | |
| "loss": 1.1572948694229126, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 1.4296756982803345, | |
| "learning_rate": 8.636434266367956e-06, | |
| "loss": 0.6602965593338013, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1751662971175167, | |
| "grad_norm": 1.0683335065841675, | |
| "learning_rate": 8.624577169997394e-06, | |
| "loss": 0.8447040319442749, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1796008869179602, | |
| "grad_norm": 0.4889877438545227, | |
| "learning_rate": 8.612678036233428e-06, | |
| "loss": 0.8834771513938904, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1840354767184036, | |
| "grad_norm": 0.42208215594291687, | |
| "learning_rate": 8.600737025164454e-06, | |
| "loss": 0.9915164709091187, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.188470066518847, | |
| "grad_norm": 0.32866641879081726, | |
| "learning_rate": 8.588754297442288e-06, | |
| "loss": 0.7454093098640442, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1929046563192904, | |
| "grad_norm": 1.7769194841384888, | |
| "learning_rate": 8.576730014279982e-06, | |
| "loss": 1.0999675989151, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1973392461197339, | |
| "grad_norm": 0.7567623257637024, | |
| "learning_rate": 8.564664337449677e-06, | |
| "loss": 0.7065927982330322, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.2017738359201773, | |
| "grad_norm": 1.5624749660491943, | |
| "learning_rate": 8.552557429280407e-06, | |
| "loss": 0.6832781434059143, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.206208425720621, | |
| "grad_norm": 1.4154813289642334, | |
| "learning_rate": 8.540409452655927e-06, | |
| "loss": 1.0527125597000122, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2106430155210643, | |
| "grad_norm": 0.3779228627681732, | |
| "learning_rate": 8.528220571012518e-06, | |
| "loss": 0.8481003642082214, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2150776053215078, | |
| "grad_norm": 0.7003396153450012, | |
| "learning_rate": 8.51599094833679e-06, | |
| "loss": 1.2240773439407349, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 0.9287251234054565, | |
| "learning_rate": 8.503720749163472e-06, | |
| "loss": 0.629622220993042, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2239467849223946, | |
| "grad_norm": 1.5095727443695068, | |
| "learning_rate": 8.491410138573201e-06, | |
| "loss": 1.0369082689285278, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2283813747228383, | |
| "grad_norm": 1.6644355058670044, | |
| "learning_rate": 8.479059282190298e-06, | |
| "loss": 0.7387758493423462, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2328159645232817, | |
| "grad_norm": 0.320863276720047, | |
| "learning_rate": 8.466668346180548e-06, | |
| "loss": 0.810981035232544, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.237250554323725, | |
| "grad_norm": 1.620963215827942, | |
| "learning_rate": 8.454237497248956e-06, | |
| "loss": 0.6092830300331116, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2416851441241685, | |
| "grad_norm": 1.0662033557891846, | |
| "learning_rate": 8.441766902637506e-06, | |
| "loss": 1.2609363794326782, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.246119733924612, | |
| "grad_norm": 1.3801884651184082, | |
| "learning_rate": 8.429256730122909e-06, | |
| "loss": 0.859289824962616, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2505543237250554, | |
| "grad_norm": 0.6064059138298035, | |
| "learning_rate": 8.416707148014358e-06, | |
| "loss": 0.6251385807991028, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2549889135254988, | |
| "grad_norm": 0.5259643197059631, | |
| "learning_rate": 8.404118325151245e-06, | |
| "loss": 0.9294739961624146, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2594235033259422, | |
| "grad_norm": 0.9450163841247559, | |
| "learning_rate": 8.391490430900902e-06, | |
| "loss": 0.9038935303688049, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2638580931263859, | |
| "grad_norm": 0.5424582362174988, | |
| "learning_rate": 8.378823635156319e-06, | |
| "loss": 1.1193132400512695, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2682926829268293, | |
| "grad_norm": 0.3963155448436737, | |
| "learning_rate": 8.366118108333861e-06, | |
| "loss": 0.7388672232627869, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.823639452457428, | |
| "learning_rate": 8.353374021370967e-06, | |
| "loss": 1.1983304023742676, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2771618625277161, | |
| "grad_norm": 0.5981648564338684, | |
| "learning_rate": 8.340591545723861e-06, | |
| "loss": 1.0332672595977783, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2815964523281598, | |
| "grad_norm": 0.6439179182052612, | |
| "learning_rate": 8.327770853365238e-06, | |
| "loss": 1.1301418542861938, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2860310421286032, | |
| "grad_norm": 0.5708024501800537, | |
| "learning_rate": 8.314912116781954e-06, | |
| "loss": 1.0946240425109863, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2904656319290466, | |
| "grad_norm": 1.2981244325637817, | |
| "learning_rate": 8.302015508972702e-06, | |
| "loss": 1.3642780780792236, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.29490022172949, | |
| "grad_norm": 0.48043203353881836, | |
| "learning_rate": 8.289081203445686e-06, | |
| "loss": 0.7540053129196167, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2993348115299335, | |
| "grad_norm": 1.8940509557724, | |
| "learning_rate": 8.276109374216286e-06, | |
| "loss": 0.764034628868103, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3037694013303769, | |
| "grad_norm": 1.8195059299468994, | |
| "learning_rate": 8.263100195804722e-06, | |
| "loss": 0.5129228234291077, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3082039911308203, | |
| "grad_norm": 1.5529930591583252, | |
| "learning_rate": 8.250053843233704e-06, | |
| "loss": 1.0290241241455078, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3126385809312637, | |
| "grad_norm": 0.5189142823219299, | |
| "learning_rate": 8.236970492026063e-06, | |
| "loss": 1.0039256811141968, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 1.0931061506271362, | |
| "learning_rate": 8.223850318202415e-06, | |
| "loss": 1.0637818574905396, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3215077605321508, | |
| "grad_norm": 0.4218553602695465, | |
| "learning_rate": 8.210693498278773e-06, | |
| "loss": 1.108015537261963, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3259423503325942, | |
| "grad_norm": 0.7798098921775818, | |
| "learning_rate": 8.197500209264181e-06, | |
| "loss": 1.029466152191162, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3303769401330376, | |
| "grad_norm": 0.5583693981170654, | |
| "learning_rate": 8.18427062865833e-06, | |
| "loss": 0.49875786900520325, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3348115299334813, | |
| "grad_norm": 1.674991488456726, | |
| "learning_rate": 8.171004934449166e-06, | |
| "loss": 0.9001069664955139, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3392461197339247, | |
| "grad_norm": 1.0978165864944458, | |
| "learning_rate": 8.157703305110508e-06, | |
| "loss": 0.860775887966156, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3436807095343681, | |
| "grad_norm": 0.3894875645637512, | |
| "learning_rate": 8.144365919599632e-06, | |
| "loss": 0.7187979221343994, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3481152993348116, | |
| "grad_norm": 0.8448476791381836, | |
| "learning_rate": 8.130992957354872e-06, | |
| "loss": 1.1956108808517456, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.352549889135255, | |
| "grad_norm": 2.5347466468811035, | |
| "learning_rate": 8.117584598293204e-06, | |
| "loss": 1.1394492387771606, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3569844789356984, | |
| "grad_norm": 0.7789635062217712, | |
| "learning_rate": 8.104141022807824e-06, | |
| "loss": 0.7438350319862366, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3614190687361418, | |
| "grad_norm": 0.6115071773529053, | |
| "learning_rate": 8.090662411765726e-06, | |
| "loss": 1.1242707967758179, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3658536585365852, | |
| "grad_norm": 0.8269447088241577, | |
| "learning_rate": 8.077148946505258e-06, | |
| "loss": 0.8168088793754578, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.370288248337029, | |
| "grad_norm": 0.5584709048271179, | |
| "learning_rate": 8.063600808833698e-06, | |
| "loss": 0.9382954239845276, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3747228381374723, | |
| "grad_norm": 0.3414313495159149, | |
| "learning_rate": 8.050018181024788e-06, | |
| "loss": 0.1388121247291565, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3791574279379157, | |
| "grad_norm": 0.46918442845344543, | |
| "learning_rate": 8.036401245816306e-06, | |
| "loss": 0.9869102835655212, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3835920177383592, | |
| "grad_norm": 0.9420056939125061, | |
| "learning_rate": 8.022750186407586e-06, | |
| "loss": 0.9787265062332153, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3880266075388026, | |
| "grad_norm": 1.6520426273345947, | |
| "learning_rate": 8.009065186457061e-06, | |
| "loss": 0.9564573764801025, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3924611973392462, | |
| "grad_norm": 0.7199759483337402, | |
| "learning_rate": 7.995346430079799e-06, | |
| "loss": 0.949640691280365, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3968957871396896, | |
| "grad_norm": 0.4535790979862213, | |
| "learning_rate": 7.981594101845012e-06, | |
| "loss": 1.0623891353607178, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.401330376940133, | |
| "grad_norm": 0.4923252463340759, | |
| "learning_rate": 7.967808386773591e-06, | |
| "loss": 0.736829400062561, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4057649667405765, | |
| "grad_norm": 1.7496610879898071, | |
| "learning_rate": 7.953989470335592e-06, | |
| "loss": 1.0442922115325928, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.41019955654102, | |
| "grad_norm": 2.604696750640869, | |
| "learning_rate": 7.940137538447769e-06, | |
| "loss": 0.9943015575408936, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4146341463414633, | |
| "grad_norm": 0.8832758069038391, | |
| "learning_rate": 7.92625277747105e-06, | |
| "loss": 0.6925525069236755, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4190687361419068, | |
| "grad_norm": 0.976290225982666, | |
| "learning_rate": 7.912335374208043e-06, | |
| "loss": 0.8791317939758301, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4235033259423504, | |
| "grad_norm": 1.112613558769226, | |
| "learning_rate": 7.898385515900517e-06, | |
| "loss": 0.8423402309417725, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4279379157427938, | |
| "grad_norm": 4.072666645050049, | |
| "learning_rate": 7.884403390226883e-06, | |
| "loss": 0.5500181317329407, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4323725055432373, | |
| "grad_norm": 1.1002585887908936, | |
| "learning_rate": 7.870389185299672e-06, | |
| "loss": 1.159914255142212, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4368070953436807, | |
| "grad_norm": 1.006394386291504, | |
| "learning_rate": 7.856343089663002e-06, | |
| "loss": 1.0645030736923218, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.441241685144124, | |
| "grad_norm": 0.7567451000213623, | |
| "learning_rate": 7.842265292290039e-06, | |
| "loss": 1.143546223640442, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4456762749445677, | |
| "grad_norm": 1.4760500192642212, | |
| "learning_rate": 7.828155982580465e-06, | |
| "loss": 0.9194218516349792, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4501108647450112, | |
| "grad_norm": 1.3499608039855957, | |
| "learning_rate": 7.814015350357912e-06, | |
| "loss": 1.0461726188659668, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 0.5506664514541626, | |
| "learning_rate": 7.799843585867426e-06, | |
| "loss": 0.7790355086326599, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.458980044345898, | |
| "grad_norm": 0.4337323307991028, | |
| "learning_rate": 7.785640879772897e-06, | |
| "loss": 0.7051459550857544, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.4734741449356079, | |
| "learning_rate": 7.771407423154498e-06, | |
| "loss": 1.015939712524414, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4678492239467849, | |
| "grad_norm": 0.440094918012619, | |
| "learning_rate": 7.757143407506111e-06, | |
| "loss": 0.9792048931121826, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4722838137472283, | |
| "grad_norm": 0.7068251967430115, | |
| "learning_rate": 7.742849024732754e-06, | |
| "loss": 0.7712748050689697, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.476718403547672, | |
| "grad_norm": 0.85067218542099, | |
| "learning_rate": 7.728524467148e-06, | |
| "loss": 0.6657487750053406, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4811529933481153, | |
| "grad_norm": 1.339653730392456, | |
| "learning_rate": 7.714169927471379e-06, | |
| "loss": 0.5245797634124756, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4855875831485588, | |
| "grad_norm": 1.0584568977355957, | |
| "learning_rate": 7.699785598825805e-06, | |
| "loss": 0.9373866319656372, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4900221729490022, | |
| "grad_norm": 0.9347496032714844, | |
| "learning_rate": 7.68537167473496e-06, | |
| "loss": 1.0927127599716187, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4944567627494456, | |
| "grad_norm": 0.5240381956100464, | |
| "learning_rate": 7.670928349120699e-06, | |
| "loss": 0.6736969351768494, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4988913525498893, | |
| "grad_norm": 0.4406070411205292, | |
| "learning_rate": 7.656455816300434e-06, | |
| "loss": 0.7100923657417297, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5033259423503327, | |
| "grad_norm": 2.0313918590545654, | |
| "learning_rate": 7.641954270984532e-06, | |
| "loss": 1.05730402469635, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.507760532150776, | |
| "grad_norm": 0.420768678188324, | |
| "learning_rate": 7.627423908273683e-06, | |
| "loss": 0.570051908493042, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5121951219512195, | |
| "grad_norm": 0.6675831079483032, | |
| "learning_rate": 7.61286492365628e-06, | |
| "loss": 0.5550310015678406, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.516629711751663, | |
| "grad_norm": 0.7205184102058411, | |
| "learning_rate": 7.598277513005793e-06, | |
| "loss": 1.0391545295715332, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5210643015521064, | |
| "grad_norm": 0.4342437982559204, | |
| "learning_rate": 7.583661872578124e-06, | |
| "loss": 0.9450809955596924, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5254988913525498, | |
| "grad_norm": 1.2098145484924316, | |
| "learning_rate": 7.569018199008976e-06, | |
| "loss": 0.5710777640342712, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5299334811529932, | |
| "grad_norm": 0.5424330234527588, | |
| "learning_rate": 7.554346689311205e-06, | |
| "loss": 0.29212188720703125, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5343680709534369, | |
| "grad_norm": 0.7510618567466736, | |
| "learning_rate": 7.539647540872165e-06, | |
| "loss": 0.7841662764549255, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5388026607538803, | |
| "grad_norm": 1.6880568265914917, | |
| "learning_rate": 7.5249209514510595e-06, | |
| "loss": 0.9736351370811462, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5432372505543237, | |
| "grad_norm": 1.3462926149368286, | |
| "learning_rate": 7.510167119176273e-06, | |
| "loss": 0.6195929646492004, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5476718403547673, | |
| "grad_norm": 0.47400352358818054, | |
| "learning_rate": 7.49538624254271e-06, | |
| "loss": 1.0942884683609009, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5521064301552108, | |
| "grad_norm": 0.4019659161567688, | |
| "learning_rate": 7.48057852040913e-06, | |
| "loss": 1.01500403881073, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5565410199556542, | |
| "grad_norm": 0.7360739707946777, | |
| "learning_rate": 7.465744151995458e-06, | |
| "loss": 1.0028947591781616, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5609756097560976, | |
| "grad_norm": 1.1567124128341675, | |
| "learning_rate": 7.450883336880116e-06, | |
| "loss": 0.9144392609596252, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.565410199556541, | |
| "grad_norm": 0.9256898760795593, | |
| "learning_rate": 7.435996274997337e-06, | |
| "loss": 0.45123380422592163, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5698447893569845, | |
| "grad_norm": 0.4010877311229706, | |
| "learning_rate": 7.421083166634466e-06, | |
| "loss": 1.0565845966339111, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5742793791574279, | |
| "grad_norm": 0.986020565032959, | |
| "learning_rate": 7.40614421242928e-06, | |
| "loss": 0.8794451951980591, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5787139689578713, | |
| "grad_norm": 1.3509045839309692, | |
| "learning_rate": 7.391179613367272e-06, | |
| "loss": 0.6458362936973572, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5831485587583147, | |
| "grad_norm": 0.43885716795921326, | |
| "learning_rate": 7.37618957077896e-06, | |
| "loss": 1.005838394165039, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5875831485587582, | |
| "grad_norm": 0.5049751400947571, | |
| "learning_rate": 7.361174286337175e-06, | |
| "loss": 1.0728709697723389, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5920177383592018, | |
| "grad_norm": 1.5042154788970947, | |
| "learning_rate": 7.346133962054341e-06, | |
| "loss": 1.202171802520752, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5964523281596452, | |
| "grad_norm": 2.809225559234619, | |
| "learning_rate": 7.33106880027977e-06, | |
| "loss": 0.5905061364173889, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6008869179600886, | |
| "grad_norm": 0.27043482661247253, | |
| "learning_rate": 7.315979003696927e-06, | |
| "loss": 0.521863579750061, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6053215077605323, | |
| "grad_norm": 0.41005781292915344, | |
| "learning_rate": 7.300864775320708e-06, | |
| "loss": 1.0308693647384644, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 1.3939813375473022, | |
| "learning_rate": 7.285726318494717e-06, | |
| "loss": 1.0042451620101929, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6141906873614191, | |
| "grad_norm": 0.4165089428424835, | |
| "learning_rate": 7.2705638368885105e-06, | |
| "loss": 0.7745837569236755, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6186252771618626, | |
| "grad_norm": 0.5574784874916077, | |
| "learning_rate": 7.255377534494875e-06, | |
| "loss": 1.0095455646514893, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.623059866962306, | |
| "grad_norm": 0.5945760607719421, | |
| "learning_rate": 7.240167615627082e-06, | |
| "loss": 1.0015901327133179, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6274944567627494, | |
| "grad_norm": 0.5041150450706482, | |
| "learning_rate": 7.224934284916127e-06, | |
| "loss": 0.9732311964035034, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6319290465631928, | |
| "grad_norm": 0.3137003481388092, | |
| "learning_rate": 7.209677747307982e-06, | |
| "loss": 0.5148752331733704, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.47104862332344055, | |
| "learning_rate": 7.194398208060848e-06, | |
| "loss": 1.2126970291137695, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6407982261640797, | |
| "grad_norm": 0.3913891613483429, | |
| "learning_rate": 7.179095872742378e-06, | |
| "loss": 0.985205888748169, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6452328159645233, | |
| "grad_norm": 1.6343456506729126, | |
| "learning_rate": 7.16377094722692e-06, | |
| "loss": 1.1372039318084717, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6496674057649667, | |
| "grad_norm": 5.484721660614014, | |
| "learning_rate": 7.148423637692748e-06, | |
| "loss": 0.8813603520393372, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6541019955654102, | |
| "grad_norm": 1.7607204914093018, | |
| "learning_rate": 7.133054150619282e-06, | |
| "loss": 0.8659079074859619, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6585365853658538, | |
| "grad_norm": 0.5767048597335815, | |
| "learning_rate": 7.117662692784318e-06, | |
| "loss": 0.6968775987625122, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6629711751662972, | |
| "grad_norm": 0.4937343895435333, | |
| "learning_rate": 7.102249471261241e-06, | |
| "loss": 0.9998278617858887, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6674057649667406, | |
| "grad_norm": 0.5310612320899963, | |
| "learning_rate": 7.0868146934162365e-06, | |
| "loss": 0.7577317357063293, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.671840354767184, | |
| "grad_norm": 0.5454942584037781, | |
| "learning_rate": 7.071358566905507e-06, | |
| "loss": 0.6115183234214783, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6762749445676275, | |
| "grad_norm": 0.6721274256706238, | |
| "learning_rate": 7.055881299672476e-06, | |
| "loss": 0.9478708505630493, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.680709534368071, | |
| "grad_norm": 0.5245542526245117, | |
| "learning_rate": 7.040383099944988e-06, | |
| "loss": 0.9483290314674377, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6851441241685143, | |
| "grad_norm": 1.0505019426345825, | |
| "learning_rate": 7.02486417623251e-06, | |
| "loss": 0.9622657895088196, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6895787139689578, | |
| "grad_norm": 1.2052677869796753, | |
| "learning_rate": 7.009324737323325e-06, | |
| "loss": 0.9271701574325562, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6940133037694012, | |
| "grad_norm": 0.8925173282623291, | |
| "learning_rate": 6.993764992281722e-06, | |
| "loss": 0.962370753288269, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6984478935698448, | |
| "grad_norm": 1.8376715183258057, | |
| "learning_rate": 6.978185150445187e-06, | |
| "loss": 1.0133888721466064, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7028824833702882, | |
| "grad_norm": 0.8429685235023499, | |
| "learning_rate": 6.96258542142158e-06, | |
| "loss": 0.7847933769226074, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.451034277677536, | |
| "learning_rate": 6.946966015086321e-06, | |
| "loss": 0.9764997363090515, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7117516629711753, | |
| "grad_norm": 2.5368645191192627, | |
| "learning_rate": 6.931327141579565e-06, | |
| "loss": 1.0000364780426025, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7161862527716187, | |
| "grad_norm": 0.48420414328575134, | |
| "learning_rate": 6.915669011303374e-06, | |
| "loss": 0.8809909820556641, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7206208425720622, | |
| "grad_norm": 0.5247390866279602, | |
| "learning_rate": 6.899991834918884e-06, | |
| "loss": 1.0189868211746216, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7250554323725056, | |
| "grad_norm": 0.690968930721283, | |
| "learning_rate": 6.884295823343479e-06, | |
| "loss": 1.035364031791687, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.729490022172949, | |
| "grad_norm": 0.9502907395362854, | |
| "learning_rate": 6.868581187747941e-06, | |
| "loss": 0.6463431715965271, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7339246119733924, | |
| "grad_norm": 0.5312066674232483, | |
| "learning_rate": 6.852848139553619e-06, | |
| "loss": 0.9688764810562134, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7383592017738358, | |
| "grad_norm": 0.5402536392211914, | |
| "learning_rate": 6.837096890429582e-06, | |
| "loss": 0.6454051733016968, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7427937915742793, | |
| "grad_norm": 0.48874494433403015, | |
| "learning_rate": 6.821327652289768e-06, | |
| "loss": 0.7339925169944763, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7472283813747227, | |
| "grad_norm": 0.6470894813537598, | |
| "learning_rate": 6.8055406372901344e-06, | |
| "loss": 0.6527358293533325, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7516629711751663, | |
| "grad_norm": 0.9488041400909424, | |
| "learning_rate": 6.789736057825812e-06, | |
| "loss": 0.650081992149353, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 0.4228132665157318, | |
| "learning_rate": 6.77391412652823e-06, | |
| "loss": 0.650942862033844, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7605321507760532, | |
| "grad_norm": 0.5584551692008972, | |
| "learning_rate": 6.758075056262271e-06, | |
| "loss": 0.8236819505691528, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7649667405764968, | |
| "grad_norm": 1.0599656105041504, | |
| "learning_rate": 6.742219060123403e-06, | |
| "loss": 0.7987365126609802, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7694013303769403, | |
| "grad_norm": 0.6103171110153198, | |
| "learning_rate": 6.7263463514348095e-06, | |
| "loss": 0.9921965003013611, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7738359201773837, | |
| "grad_norm": 0.6360469460487366, | |
| "learning_rate": 6.710457143744519e-06, | |
| "loss": 1.0037264823913574, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.778270509977827, | |
| "grad_norm": 0.5509381890296936, | |
| "learning_rate": 6.6945516508225325e-06, | |
| "loss": 0.992444634437561, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7827050997782705, | |
| "grad_norm": 0.6882392168045044, | |
| "learning_rate": 6.678630086657959e-06, | |
| "loss": 1.0927345752716064, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.787139689578714, | |
| "grad_norm": 0.6332179307937622, | |
| "learning_rate": 6.662692665456115e-06, | |
| "loss": 0.7333803772926331, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7915742793791574, | |
| "grad_norm": 0.5961802005767822, | |
| "learning_rate": 6.646739601635661e-06, | |
| "loss": 1.0272966623306274, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7960088691796008, | |
| "grad_norm": 1.2394970655441284, | |
| "learning_rate": 6.6307711098257074e-06, | |
| "loss": 0.6059154272079468, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8004434589800442, | |
| "grad_norm": 1.1179450750350952, | |
| "learning_rate": 6.6147874048629294e-06, | |
| "loss": 0.6130510568618774, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8048780487804879, | |
| "grad_norm": 1.7669285535812378, | |
| "learning_rate": 6.598788701788677e-06, | |
| "loss": 1.039893388748169, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8093126385809313, | |
| "grad_norm": 4.2482709884643555, | |
| "learning_rate": 6.582775215846082e-06, | |
| "loss": 0.9353387355804443, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8137472283813747, | |
| "grad_norm": 0.8275613188743591, | |
| "learning_rate": 6.566747162477164e-06, | |
| "loss": 0.4844290316104889, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.8788929581642151, | |
| "learning_rate": 6.5507047573199235e-06, | |
| "loss": 1.1249672174453735, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8226164079822618, | |
| "grad_norm": 0.8800422549247742, | |
| "learning_rate": 6.5346482162054526e-06, | |
| "loss": 1.1043124198913574, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8270509977827052, | |
| "grad_norm": 0.4852801263332367, | |
| "learning_rate": 6.518577755155024e-06, | |
| "loss": 1.1537041664123535, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8314855875831486, | |
| "grad_norm": 1.2431832551956177, | |
| "learning_rate": 6.502493590377184e-06, | |
| "loss": 0.8821326494216919, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.835920177383592, | |
| "grad_norm": 0.9608327746391296, | |
| "learning_rate": 6.48639593826485e-06, | |
| "loss": 0.8124348521232605, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8403547671840355, | |
| "grad_norm": 0.7294570803642273, | |
| "learning_rate": 6.4702850153923915e-06, | |
| "loss": 0.9093976616859436, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8447893569844789, | |
| "grad_norm": 0.575531542301178, | |
| "learning_rate": 6.45416103851272e-06, | |
| "loss": 1.0434691905975342, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8492239467849223, | |
| "grad_norm": 0.4895908236503601, | |
| "learning_rate": 6.438024224554378e-06, | |
| "loss": 0.9609898328781128, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8536585365853657, | |
| "grad_norm": 0.33139824867248535, | |
| "learning_rate": 6.421874790618608e-06, | |
| "loss": 1.0044034719467163, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8580931263858091, | |
| "grad_norm": 0.6011033654212952, | |
| "learning_rate": 6.405712953976444e-06, | |
| "loss": 0.9612147808074951, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8625277161862528, | |
| "grad_norm": 0.7546729445457458, | |
| "learning_rate": 6.389538932065783e-06, | |
| "loss": 1.141190767288208, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8669623059866962, | |
| "grad_norm": 0.6122239828109741, | |
| "learning_rate": 6.373352942488455e-06, | |
| "loss": 0.6330398321151733, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8713968957871396, | |
| "grad_norm": 0.5186591148376465, | |
| "learning_rate": 6.357155203007307e-06, | |
| "loss": 0.7644712924957275, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8758314855875833, | |
| "grad_norm": 0.8722115159034729, | |
| "learning_rate": 6.340945931543263e-06, | |
| "loss": 0.7418773770332336, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8802660753880267, | |
| "grad_norm": 1.3620861768722534, | |
| "learning_rate": 6.324725346172399e-06, | |
| "loss": 0.9680192470550537, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8847006651884701, | |
| "grad_norm": 0.49945542216300964, | |
| "learning_rate": 6.308493665123e-06, | |
| "loss": 1.0896743535995483, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8891352549889135, | |
| "grad_norm": 0.8075128793716431, | |
| "learning_rate": 6.2922511067726365e-06, | |
| "loss": 0.667526364326477, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.893569844789357, | |
| "grad_norm": 0.7008575201034546, | |
| "learning_rate": 6.2759978896452155e-06, | |
| "loss": 1.0155971050262451, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8980044345898004, | |
| "grad_norm": 0.6262828707695007, | |
| "learning_rate": 6.259734232408047e-06, | |
| "loss": 0.9141192436218262, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.5417189002037048, | |
| "learning_rate": 6.2434603538688975e-06, | |
| "loss": 0.7273485660552979, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9068736141906872, | |
| "grad_norm": 1.6091915369033813, | |
| "learning_rate": 6.2271764729730525e-06, | |
| "loss": 0.7897534370422363, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9113082039911307, | |
| "grad_norm": 1.3260005712509155, | |
| "learning_rate": 6.210882808800366e-06, | |
| "loss": 1.0450389385223389, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9157427937915743, | |
| "grad_norm": 0.47777101397514343, | |
| "learning_rate": 6.19457958056231e-06, | |
| "loss": 0.7668794393539429, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9201773835920177, | |
| "grad_norm": 0.9053187966346741, | |
| "learning_rate": 6.178267007599034e-06, | |
| "loss": 0.6413880586624146, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9246119733924612, | |
| "grad_norm": 0.6551345586776733, | |
| "learning_rate": 6.161945309376409e-06, | |
| "loss": 1.095296859741211, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9290465631929048, | |
| "grad_norm": 0.878730297088623, | |
| "learning_rate": 6.145614705483075e-06, | |
| "loss": 0.9538235068321228, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9334811529933482, | |
| "grad_norm": 0.5680310726165771, | |
| "learning_rate": 6.129275415627485e-06, | |
| "loss": 0.6358171701431274, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9379157427937916, | |
| "grad_norm": 0.6420407295227051, | |
| "learning_rate": 6.11292765963495e-06, | |
| "loss": 0.6467251181602478, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.942350332594235, | |
| "grad_norm": 0.5206342935562134, | |
| "learning_rate": 6.09657165744469e-06, | |
| "loss": 0.6561703681945801, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9467849223946785, | |
| "grad_norm": 0.6010205745697021, | |
| "learning_rate": 6.080207629106859e-06, | |
| "loss": 0.7634451389312744, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 0.5291740894317627, | |
| "learning_rate": 6.063835794779598e-06, | |
| "loss": 0.85489821434021, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9556541019955653, | |
| "grad_norm": 4.159445762634277, | |
| "learning_rate": 6.047456374726067e-06, | |
| "loss": 0.6732465028762817, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9600886917960088, | |
| "grad_norm": 0.47579193115234375, | |
| "learning_rate": 6.031069589311481e-06, | |
| "loss": 0.7860453128814697, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9645232815964522, | |
| "grad_norm": 0.4395957887172699, | |
| "learning_rate": 6.01467565900015e-06, | |
| "loss": 0.9734126925468445, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9689578713968958, | |
| "grad_norm": 2.2899482250213623, | |
| "learning_rate": 5.99827480435251e-06, | |
| "loss": 0.9537763595581055, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9733924611973392, | |
| "grad_norm": 0.5516371130943298, | |
| "learning_rate": 5.981867246022149e-06, | |
| "loss": 0.99347984790802, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9778270509977827, | |
| "grad_norm": 0.5941886901855469, | |
| "learning_rate": 5.965453204752855e-06, | |
| "loss": 1.0517255067825317, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9822616407982263, | |
| "grad_norm": 0.631015419960022, | |
| "learning_rate": 5.949032901375627e-06, | |
| "loss": 1.0343537330627441, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9866962305986697, | |
| "grad_norm": 1.527363657951355, | |
| "learning_rate": 5.932606556805719e-06, | |
| "loss": 0.8417173624038696, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9911308203991132, | |
| "grad_norm": 0.7326000332832336, | |
| "learning_rate": 5.916174392039659e-06, | |
| "loss": 0.9509387016296387, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9955654101995566, | |
| "grad_norm": 0.5246658325195312, | |
| "learning_rate": 5.899736628152284e-06, | |
| "loss": 0.798784613609314, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.41008785367012024, | |
| "learning_rate": 5.88329348629375e-06, | |
| "loss": 0.985916018486023, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0044345898004434, | |
| "grad_norm": 0.4405089318752289, | |
| "learning_rate": 5.8668451876865736e-06, | |
| "loss": 0.7073619365692139, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.008869179600887, | |
| "grad_norm": 1.7787858247756958, | |
| "learning_rate": 5.850391953622652e-06, | |
| "loss": 0.8173685669898987, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0133037694013303, | |
| "grad_norm": 0.5108776092529297, | |
| "learning_rate": 5.8339340054602775e-06, | |
| "loss": 0.6087190508842468, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0177383592017737, | |
| "grad_norm": 0.5111180543899536, | |
| "learning_rate": 5.817471564621169e-06, | |
| "loss": 0.7211825847625732, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.022172949002217, | |
| "grad_norm": 0.7296714186668396, | |
| "learning_rate": 5.801004852587485e-06, | |
| "loss": 0.507999837398529, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0266075388026605, | |
| "grad_norm": 1.6111247539520264, | |
| "learning_rate": 5.784534090898849e-06, | |
| "loss": 0.5558914542198181, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0310421286031044, | |
| "grad_norm": 0.9995886087417603, | |
| "learning_rate": 5.768059501149369e-06, | |
| "loss": 0.5170501470565796, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.035476718403548, | |
| "grad_norm": 0.5618699193000793, | |
| "learning_rate": 5.751581304984657e-06, | |
| "loss": 0.7065697908401489, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0399113082039912, | |
| "grad_norm": 0.554235577583313, | |
| "learning_rate": 5.735099724098838e-06, | |
| "loss": 0.5513534545898438, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0443458980044347, | |
| "grad_norm": 0.6221592426300049, | |
| "learning_rate": 5.718614980231582e-06, | |
| "loss": 0.580018162727356, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.048780487804878, | |
| "grad_norm": 0.7114652395248413, | |
| "learning_rate": 5.702127295165107e-06, | |
| "loss": 0.26266422867774963, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0532150776053215, | |
| "grad_norm": 0.7338118553161621, | |
| "learning_rate": 5.685636890721205e-06, | |
| "loss": 0.7764136791229248, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.057649667405765, | |
| "grad_norm": 1.6588897705078125, | |
| "learning_rate": 5.669143988758253e-06, | |
| "loss": 0.3647955656051636, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0620842572062084, | |
| "grad_norm": 0.25482437014579773, | |
| "learning_rate": 5.652648811168228e-06, | |
| "loss": 0.5260996222496033, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.066518847006652, | |
| "grad_norm": 1.088817834854126, | |
| "learning_rate": 5.636151579873726e-06, | |
| "loss": 0.5555666089057922, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.070953436807095, | |
| "grad_norm": 0.6878843307495117, | |
| "learning_rate": 5.619652516824967e-06, | |
| "loss": 0.6538913249969482, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0753880266075386, | |
| "grad_norm": 0.5582793354988098, | |
| "learning_rate": 5.603151843996822e-06, | |
| "loss": 0.7564462423324585, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.079822616407982, | |
| "grad_norm": 0.5043266415596008, | |
| "learning_rate": 5.586649783385813e-06, | |
| "loss": 0.43487629294395447, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.084257206208426, | |
| "grad_norm": 0.964514434337616, | |
| "learning_rate": 5.570146557007141e-06, | |
| "loss": 0.4970490336418152, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0886917960088693, | |
| "grad_norm": 0.8735406398773193, | |
| "learning_rate": 5.553642386891683e-06, | |
| "loss": 0.6485542058944702, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0931263858093128, | |
| "grad_norm": 0.5761396288871765, | |
| "learning_rate": 5.537137495083018e-06, | |
| "loss": 0.49649330973625183, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.097560975609756, | |
| "grad_norm": 0.5437949895858765, | |
| "learning_rate": 5.5206321036344304e-06, | |
| "loss": 0.7156221270561218, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1019955654101996, | |
| "grad_norm": 0.9228906035423279, | |
| "learning_rate": 5.504126434605932e-06, | |
| "loss": 0.7050195932388306, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.106430155210643, | |
| "grad_norm": 3.377256393432617, | |
| "learning_rate": 5.487620710061262e-06, | |
| "loss": 0.5274829268455505, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1108647450110865, | |
| "grad_norm": 0.6513805985450745, | |
| "learning_rate": 5.471115152064916e-06, | |
| "loss": 0.5962385535240173, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.11529933481153, | |
| "grad_norm": 2.8682925701141357, | |
| "learning_rate": 5.454609982679138e-06, | |
| "loss": 0.6596555709838867, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1197339246119733, | |
| "grad_norm": 0.6173861026763916, | |
| "learning_rate": 5.4381054239609525e-06, | |
| "loss": 0.7276114821434021, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1241685144124167, | |
| "grad_norm": 0.5581717491149902, | |
| "learning_rate": 5.421601697959164e-06, | |
| "loss": 0.5600399374961853, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.12860310421286, | |
| "grad_norm": 1.5211955308914185, | |
| "learning_rate": 5.405099026711374e-06, | |
| "loss": 0.5050497055053711, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1330376940133036, | |
| "grad_norm": 1.1427018642425537, | |
| "learning_rate": 5.388597632240994e-06, | |
| "loss": 0.36328691244125366, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1374722838137474, | |
| "grad_norm": 1.5880637168884277, | |
| "learning_rate": 5.372097736554261e-06, | |
| "loss": 0.6904159784317017, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.141906873614191, | |
| "grad_norm": 0.6093219518661499, | |
| "learning_rate": 5.35559956163724e-06, | |
| "loss": 0.5827292799949646, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1463414634146343, | |
| "grad_norm": 1.1137672662734985, | |
| "learning_rate": 5.339103329452856e-06, | |
| "loss": 0.47683635354042053, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1507760532150777, | |
| "grad_norm": 0.8449851870536804, | |
| "learning_rate": 5.322609261937887e-06, | |
| "loss": 0.5655550360679626, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.155210643015521, | |
| "grad_norm": 0.5638793110847473, | |
| "learning_rate": 5.306117580999993e-06, | |
| "loss": 0.5006866455078125, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1596452328159645, | |
| "grad_norm": 0.6014860272407532, | |
| "learning_rate": 5.289628508514725e-06, | |
| "loss": 0.7802212834358215, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.164079822616408, | |
| "grad_norm": 0.6840709447860718, | |
| "learning_rate": 5.2731422663225385e-06, | |
| "loss": 0.6838681101799011, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1685144124168514, | |
| "grad_norm": 1.3036504983901978, | |
| "learning_rate": 5.256659076225813e-06, | |
| "loss": 0.41791778802871704, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.172949002217295, | |
| "grad_norm": 2.5424842834472656, | |
| "learning_rate": 5.240179159985866e-06, | |
| "loss": 0.7272178530693054, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1773835920177382, | |
| "grad_norm": 0.5120069980621338, | |
| "learning_rate": 5.2237027393199645e-06, | |
| "loss": 0.3696378171443939, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.654542088508606, | |
| "learning_rate": 5.207230035898356e-06, | |
| "loss": 0.21627305448055267, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.186252771618625, | |
| "grad_norm": 3.1496362686157227, | |
| "learning_rate": 5.190761271341268e-06, | |
| "loss": 0.5596635937690735, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1906873614190685, | |
| "grad_norm": 1.2095357179641724, | |
| "learning_rate": 5.174296667215939e-06, | |
| "loss": 0.3137320280075073, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.5719432234764099, | |
| "learning_rate": 5.157836445033636e-06, | |
| "loss": 0.7822216749191284, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.199556541019956, | |
| "grad_norm": 2.0039100646972656, | |
| "learning_rate": 5.141380826246667e-06, | |
| "loss": 0.7932789921760559, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.203991130820399, | |
| "grad_norm": 0.8511418700218201, | |
| "learning_rate": 5.124930032245415e-06, | |
| "loss": 0.5880918502807617, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.2084257206208426, | |
| "grad_norm": 0.6295179724693298, | |
| "learning_rate": 5.108484284355339e-06, | |
| "loss": 0.795894980430603, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.212860310421286, | |
| "grad_norm": 0.783069372177124, | |
| "learning_rate": 5.0920438038340194e-06, | |
| "loss": 0.6926091313362122, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.2172949002217295, | |
| "grad_norm": 0.6107291579246521, | |
| "learning_rate": 5.075608811868169e-06, | |
| "loss": 0.5917972326278687, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.221729490022173, | |
| "grad_norm": 0.68993079662323, | |
| "learning_rate": 5.059179529570657e-06, | |
| "loss": 0.3001978397369385, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2261640798226163, | |
| "grad_norm": 0.775002121925354, | |
| "learning_rate": 5.042756177977534e-06, | |
| "loss": 0.7339842319488525, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.2305986696230597, | |
| "grad_norm": 0.5174007415771484, | |
| "learning_rate": 5.026338978045062e-06, | |
| "loss": 0.600027322769165, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.235033259423503, | |
| "grad_norm": 0.4449722468852997, | |
| "learning_rate": 5.009928150646741e-06, | |
| "loss": 0.7295263409614563, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2394678492239466, | |
| "grad_norm": 0.5435464382171631, | |
| "learning_rate": 4.993523916570334e-06, | |
| "loss": 0.45167410373687744, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2439024390243905, | |
| "grad_norm": 0.6709751486778259, | |
| "learning_rate": 4.977126496514902e-06, | |
| "loss": 0.7127041816711426, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.248337028824834, | |
| "grad_norm": 1.1186648607254028, | |
| "learning_rate": 4.960736111087827e-06, | |
| "loss": 0.6675184369087219, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.2527716186252773, | |
| "grad_norm": 0.6485116481781006, | |
| "learning_rate": 4.9443529808018545e-06, | |
| "loss": 0.7672496438026428, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2572062084257207, | |
| "grad_norm": 0.9844960570335388, | |
| "learning_rate": 4.927977326072115e-06, | |
| "loss": 0.35495874285697937, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.261640798226164, | |
| "grad_norm": 0.7903894186019897, | |
| "learning_rate": 4.911609367213168e-06, | |
| "loss": 0.7469250559806824, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2660753880266076, | |
| "grad_norm": 0.3099033832550049, | |
| "learning_rate": 4.895249324436035e-06, | |
| "loss": 0.40709808468818665, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.270509977827051, | |
| "grad_norm": 1.1226407289505005, | |
| "learning_rate": 4.8788974178452316e-06, | |
| "loss": 0.9115332365036011, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.2749445676274944, | |
| "grad_norm": 0.6671955585479736, | |
| "learning_rate": 4.86255386743582e-06, | |
| "loss": 0.5178396105766296, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.279379157427938, | |
| "grad_norm": 0.5912867188453674, | |
| "learning_rate": 4.846218893090426e-06, | |
| "loss": 0.7622277736663818, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2838137472283813, | |
| "grad_norm": 0.6438608765602112, | |
| "learning_rate": 4.829892714576307e-06, | |
| "loss": 0.7653957009315491, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2882483370288247, | |
| "grad_norm": 0.4936668574810028, | |
| "learning_rate": 4.813575551542381e-06, | |
| "loss": 0.5053679347038269, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.292682926829268, | |
| "grad_norm": 0.8145703673362732, | |
| "learning_rate": 4.7972676235162714e-06, | |
| "loss": 0.4616087079048157, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2971175166297115, | |
| "grad_norm": 0.9270339608192444, | |
| "learning_rate": 4.780969149901354e-06, | |
| "loss": 0.6849997043609619, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.3015521064301554, | |
| "grad_norm": 0.9029300808906555, | |
| "learning_rate": 4.764680349973812e-06, | |
| "loss": 0.6518898010253906, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.305986696230599, | |
| "grad_norm": 1.1026612520217896, | |
| "learning_rate": 4.748401442879674e-06, | |
| "loss": 0.575007438659668, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3104212860310422, | |
| "grad_norm": 1.147935152053833, | |
| "learning_rate": 4.732132647631881e-06, | |
| "loss": 0.7933302521705627, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3148558758314857, | |
| "grad_norm": 0.4572734236717224, | |
| "learning_rate": 4.715874183107324e-06, | |
| "loss": 0.5096555352210999, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.319290465631929, | |
| "grad_norm": 0.623441219329834, | |
| "learning_rate": 4.699626268043911e-06, | |
| "loss": 0.7747717499732971, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.3237250554323725, | |
| "grad_norm": 0.5140348076820374, | |
| "learning_rate": 4.683389121037618e-06, | |
| "loss": 0.661880612373352, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.328159645232816, | |
| "grad_norm": 0.5512512922286987, | |
| "learning_rate": 4.667162960539552e-06, | |
| "loss": 0.7839210033416748, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3325942350332594, | |
| "grad_norm": 1.5569580793380737, | |
| "learning_rate": 4.650948004853006e-06, | |
| "loss": 0.5787954330444336, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.337028824833703, | |
| "grad_norm": 0.5759721398353577, | |
| "learning_rate": 4.634744472130529e-06, | |
| "loss": 0.5592602491378784, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.341463414634146, | |
| "grad_norm": 0.5359609127044678, | |
| "learning_rate": 4.618552580370988e-06, | |
| "loss": 0.5897871851921082, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3458980044345896, | |
| "grad_norm": 0.3575122356414795, | |
| "learning_rate": 4.6023725474166324e-06, | |
| "loss": 0.47162723541259766, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3503325942350335, | |
| "grad_norm": 0.6168625950813293, | |
| "learning_rate": 4.586204590950169e-06, | |
| "loss": 0.8093945384025574, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.354767184035477, | |
| "grad_norm": 0.6786574721336365, | |
| "learning_rate": 4.570048928491824e-06, | |
| "loss": 0.3322570323944092, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3592017738359203, | |
| "grad_norm": 0.3918606638908386, | |
| "learning_rate": 4.5539057773964316e-06, | |
| "loss": 0.4114290177822113, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 1.4159204959869385, | |
| "learning_rate": 4.537775354850496e-06, | |
| "loss": 0.6567169427871704, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.368070953436807, | |
| "grad_norm": 0.5967050194740295, | |
| "learning_rate": 4.5216578778692725e-06, | |
| "loss": 0.734316349029541, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3725055432372506, | |
| "grad_norm": 0.5874074697494507, | |
| "learning_rate": 4.5055535632938526e-06, | |
| "loss": 0.7882179021835327, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.376940133037694, | |
| "grad_norm": 1.1057568788528442, | |
| "learning_rate": 4.489462627788242e-06, | |
| "loss": 0.6857914924621582, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3813747228381374, | |
| "grad_norm": 0.6758074164390564, | |
| "learning_rate": 4.473385287836448e-06, | |
| "loss": 0.04006706923246384, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.385809312638581, | |
| "grad_norm": 0.6099052429199219, | |
| "learning_rate": 4.457321759739567e-06, | |
| "loss": 0.43440449237823486, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3902439024390243, | |
| "grad_norm": 0.838287353515625, | |
| "learning_rate": 4.4412722596128686e-06, | |
| "loss": 0.19644105434417725, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3946784922394677, | |
| "grad_norm": 1.0746098756790161, | |
| "learning_rate": 4.425237003382903e-06, | |
| "loss": 0.6213241815567017, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.399113082039911, | |
| "grad_norm": 0.2223556786775589, | |
| "learning_rate": 4.409216206784577e-06, | |
| "loss": 0.43604913353919983, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.4035476718403546, | |
| "grad_norm": 1.25760018825531, | |
| "learning_rate": 4.393210085358265e-06, | |
| "loss": 0.5678226947784424, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4079822616407984, | |
| "grad_norm": 0.4775606393814087, | |
| "learning_rate": 4.3772188544469016e-06, | |
| "loss": 0.6794226765632629, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.412416851441242, | |
| "grad_norm": 0.33771422505378723, | |
| "learning_rate": 4.3612427291930915e-06, | |
| "loss": 0.1676941215991974, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.4168514412416853, | |
| "grad_norm": 0.485148549079895, | |
| "learning_rate": 4.345281924536208e-06, | |
| "loss": 0.7475972175598145, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4212860310421287, | |
| "grad_norm": 1.9281665086746216, | |
| "learning_rate": 4.329336655209505e-06, | |
| "loss": 0.5811200141906738, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.425720620842572, | |
| "grad_norm": 0.96091628074646, | |
| "learning_rate": 4.31340713573723e-06, | |
| "loss": 0.3428981304168701, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4301552106430155, | |
| "grad_norm": 1.0888780355453491, | |
| "learning_rate": 4.297493580431732e-06, | |
| "loss": 0.4109230041503906, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.434589800443459, | |
| "grad_norm": 0.2943512201309204, | |
| "learning_rate": 4.281596203390582e-06, | |
| "loss": 0.3823660910129547, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 1.013668179512024, | |
| "learning_rate": 4.265715218493695e-06, | |
| "loss": 0.6369096040725708, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.443458980044346, | |
| "grad_norm": 0.41791486740112305, | |
| "learning_rate": 4.249850839400446e-06, | |
| "loss": 0.8213289380073547, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4478935698447892, | |
| "grad_norm": 1.5500551462173462, | |
| "learning_rate": 4.2340032795468e-06, | |
| "loss": 0.38706445693969727, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4523281596452327, | |
| "grad_norm": 1.2918660640716553, | |
| "learning_rate": 4.218172752142442e-06, | |
| "loss": 0.8401514291763306, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.4567627494456765, | |
| "grad_norm": 1.1685500144958496, | |
| "learning_rate": 4.202359470167903e-06, | |
| "loss": 0.6909348964691162, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4611973392461195, | |
| "grad_norm": 0.5699374079704285, | |
| "learning_rate": 4.186563646371696e-06, | |
| "loss": 0.7599969506263733, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4656319290465634, | |
| "grad_norm": 0.4491530954837799, | |
| "learning_rate": 4.170785493267463e-06, | |
| "loss": 0.5214658975601196, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.470066518847007, | |
| "grad_norm": 0.5719757676124573, | |
| "learning_rate": 4.155025223131102e-06, | |
| "loss": 0.7296863794326782, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.47450110864745, | |
| "grad_norm": 1.554071307182312, | |
| "learning_rate": 4.139283047997919e-06, | |
| "loss": 0.2472933977842331, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4789356984478936, | |
| "grad_norm": 0.4289678931236267, | |
| "learning_rate": 4.123559179659771e-06, | |
| "loss": 0.7022151350975037, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.483370288248337, | |
| "grad_norm": 0.6308154463768005, | |
| "learning_rate": 4.107853829662224e-06, | |
| "loss": 0.6172606945037842, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4878048780487805, | |
| "grad_norm": 0.5270412564277649, | |
| "learning_rate": 4.0921672093017e-06, | |
| "loss": 0.7171418070793152, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.492239467849224, | |
| "grad_norm": 1.6710797548294067, | |
| "learning_rate": 4.076499529622636e-06, | |
| "loss": 0.7357991933822632, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4966740576496673, | |
| "grad_norm": 0.7722313404083252, | |
| "learning_rate": 4.0608510014146455e-06, | |
| "loss": 0.7875959277153015, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5011086474501107, | |
| "grad_norm": 0.580020010471344, | |
| "learning_rate": 4.045221835209684e-06, | |
| "loss": 0.4804987907409668, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.505543237250554, | |
| "grad_norm": 1.0426214933395386, | |
| "learning_rate": 4.02961224127921e-06, | |
| "loss": 0.517763078212738, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.5099778270509976, | |
| "grad_norm": 10.173712730407715, | |
| "learning_rate": 4.014022429631368e-06, | |
| "loss": 0.6159908771514893, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5144124168514415, | |
| "grad_norm": 0.5241939425468445, | |
| "learning_rate": 3.998452610008147e-06, | |
| "loss": 0.4526905417442322, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5188470066518844, | |
| "grad_norm": 0.6242166757583618, | |
| "learning_rate": 3.982902991882578e-06, | |
| "loss": 0.8381586670875549, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5232815964523283, | |
| "grad_norm": 0.5091142058372498, | |
| "learning_rate": 3.967373784455896e-06, | |
| "loss": 0.5896800756454468, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.5277161862527717, | |
| "grad_norm": 0.6498262286186218, | |
| "learning_rate": 3.951865196654738e-06, | |
| "loss": 0.7107540965080261, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.532150776053215, | |
| "grad_norm": 1.1750524044036865, | |
| "learning_rate": 3.936377437128329e-06, | |
| "loss": 0.35893017053604126, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.5365853658536586, | |
| "grad_norm": 0.3123358190059662, | |
| "learning_rate": 3.920910714245679e-06, | |
| "loss": 0.4256970286369324, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.541019955654102, | |
| "grad_norm": 0.533246636390686, | |
| "learning_rate": 3.905465236092771e-06, | |
| "loss": 0.7800043225288391, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.6559436321258545, | |
| "learning_rate": 3.890041210469765e-06, | |
| "loss": 0.7252997756004333, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.549889135254989, | |
| "grad_norm": 0.8817735314369202, | |
| "learning_rate": 3.8746388448882055e-06, | |
| "loss": 0.6513781547546387, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5543237250554323, | |
| "grad_norm": 0.6646784543991089, | |
| "learning_rate": 3.859258346568228e-06, | |
| "loss": 0.3836185336112976, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5587583148558757, | |
| "grad_norm": 0.2349381297826767, | |
| "learning_rate": 3.843899922435767e-06, | |
| "loss": 0.24700769782066345, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5631929046563195, | |
| "grad_norm": 0.49869391322135925, | |
| "learning_rate": 3.8285637791197815e-06, | |
| "loss": 0.38181808590888977, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5676274944567625, | |
| "grad_norm": 4.80774450302124, | |
| "learning_rate": 3.8132501229494635e-06, | |
| "loss": 0.33294159173965454, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5720620842572064, | |
| "grad_norm": 1.0527015924453735, | |
| "learning_rate": 3.7979591599514696e-06, | |
| "loss": 0.44462141394615173, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.57649667405765, | |
| "grad_norm": 0.4929417669773102, | |
| "learning_rate": 3.782691095847151e-06, | |
| "loss": 0.7220413088798523, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5809312638580932, | |
| "grad_norm": 0.9527971744537354, | |
| "learning_rate": 3.767446136049775e-06, | |
| "loss": 0.8302287459373474, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5853658536585367, | |
| "grad_norm": 0.634364664554596, | |
| "learning_rate": 3.752224485661775e-06, | |
| "loss": 0.5409159660339355, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.58980044345898, | |
| "grad_norm": 0.5789323449134827, | |
| "learning_rate": 3.7370263494719805e-06, | |
| "loss": 0.6534980535507202, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5942350332594235, | |
| "grad_norm": 0.8696730732917786, | |
| "learning_rate": 3.721851931952869e-06, | |
| "loss": 0.7098603248596191, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.598669623059867, | |
| "grad_norm": 0.7621782422065735, | |
| "learning_rate": 3.706701437257808e-06, | |
| "loss": 0.25166431069374084, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.6031042128603104, | |
| "grad_norm": 0.5634275078773499, | |
| "learning_rate": 3.691575069218314e-06, | |
| "loss": 0.5319882035255432, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6075388026607538, | |
| "grad_norm": 0.6240749955177307, | |
| "learning_rate": 3.676473031341313e-06, | |
| "loss": 0.5341241359710693, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.611973392461197, | |
| "grad_norm": 0.5434238314628601, | |
| "learning_rate": 3.661395526806395e-06, | |
| "loss": 0.43045657873153687, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6164079822616406, | |
| "grad_norm": 1.4436023235321045, | |
| "learning_rate": 3.6463427584630806e-06, | |
| "loss": 0.5974833965301514, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6208425720620845, | |
| "grad_norm": 0.4753042757511139, | |
| "learning_rate": 3.631314928828099e-06, | |
| "loss": 0.6559595465660095, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6252771618625275, | |
| "grad_norm": 0.5069239139556885, | |
| "learning_rate": 3.616312240082659e-06, | |
| "loss": 0.7961637377738953, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6297117516629713, | |
| "grad_norm": 0.5121076703071594, | |
| "learning_rate": 3.601334894069728e-06, | |
| "loss": 0.7357903718948364, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6341463414634148, | |
| "grad_norm": 1.606050968170166, | |
| "learning_rate": 3.5863830922913147e-06, | |
| "loss": 0.7505167722702026, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.638580931263858, | |
| "grad_norm": 0.6388944387435913, | |
| "learning_rate": 3.5714570359057676e-06, | |
| "loss": 0.43692290782928467, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6430155210643016, | |
| "grad_norm": 1.7327947616577148, | |
| "learning_rate": 3.556556925725061e-06, | |
| "loss": 0.688886821269989, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.647450110864745, | |
| "grad_norm": 1.5744296312332153, | |
| "learning_rate": 3.5416829622120875e-06, | |
| "loss": 0.4643045663833618, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6518847006651884, | |
| "grad_norm": 0.5231549739837646, | |
| "learning_rate": 3.526835345477978e-06, | |
| "loss": 0.6326463222503662, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.656319290465632, | |
| "grad_norm": 0.3871590495109558, | |
| "learning_rate": 3.5120142752793907e-06, | |
| "loss": 0.2630988359451294, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6607538802660753, | |
| "grad_norm": 0.6133447885513306, | |
| "learning_rate": 3.4972199510158393e-06, | |
| "loss": 0.8605660796165466, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6651884700665187, | |
| "grad_norm": 0.49589312076568604, | |
| "learning_rate": 3.4824525717269975e-06, | |
| "loss": 0.7755805850028992, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6696230598669626, | |
| "grad_norm": 2.0768580436706543, | |
| "learning_rate": 3.4677123360900342e-06, | |
| "loss": 0.3658536672592163, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6740576496674056, | |
| "grad_norm": 0.6321128606796265, | |
| "learning_rate": 3.4529994424169233e-06, | |
| "loss": 0.5949617624282837, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6784922394678494, | |
| "grad_norm": 0.720273494720459, | |
| "learning_rate": 3.4383140886517953e-06, | |
| "loss": 0.6172499060630798, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 3.1463682651519775, | |
| "learning_rate": 3.423656472368262e-06, | |
| "loss": 0.38858482241630554, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6873614190687363, | |
| "grad_norm": 1.7371691465377808, | |
| "learning_rate": 3.409026790766756e-06, | |
| "loss": 0.25850269198417664, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6917960088691797, | |
| "grad_norm": 0.6183602213859558, | |
| "learning_rate": 3.394425240671891e-06, | |
| "loss": 0.4422842860221863, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.696230598669623, | |
| "grad_norm": 1.6302183866500854, | |
| "learning_rate": 3.379852018529799e-06, | |
| "loss": 0.5197972059249878, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7006651884700665, | |
| "grad_norm": 1.3484753370285034, | |
| "learning_rate": 3.3653073204054942e-06, | |
| "loss": 0.4698919355869293, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.70509977827051, | |
| "grad_norm": 0.6926991939544678, | |
| "learning_rate": 3.3507913419802403e-06, | |
| "loss": 0.7520615458488464, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.7095343680709534, | |
| "grad_norm": 0.7605311870574951, | |
| "learning_rate": 3.336304278548903e-06, | |
| "loss": 0.5865758061408997, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.713968957871397, | |
| "grad_norm": 1.110818862915039, | |
| "learning_rate": 3.321846325017342e-06, | |
| "loss": 0.759763240814209, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.7184035476718402, | |
| "grad_norm": 1.391050934791565, | |
| "learning_rate": 3.3074176758997744e-06, | |
| "loss": 0.34599849581718445, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7228381374722836, | |
| "grad_norm": 0.7273187041282654, | |
| "learning_rate": 3.2930185253161574e-06, | |
| "loss": 0.8604745864868164, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.6875346899032593, | |
| "learning_rate": 3.2786490669895883e-06, | |
| "loss": 0.575916588306427, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7317073170731705, | |
| "grad_norm": 0.5254970788955688, | |
| "learning_rate": 3.2643094942436865e-06, | |
| "loss": 0.654707670211792, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.7361419068736144, | |
| "grad_norm": 1.1695361137390137, | |
| "learning_rate": 3.2500000000000015e-06, | |
| "loss": 0.33159804344177246, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.740576496674058, | |
| "grad_norm": 0.5698898434638977, | |
| "learning_rate": 3.2357207767754063e-06, | |
| "loss": 0.7334117293357849, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.745011086474501, | |
| "grad_norm": 0.5489004254341125, | |
| "learning_rate": 3.221472016679521e-06, | |
| "loss": 0.4428709149360657, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7494456762749446, | |
| "grad_norm": 0.5650718212127686, | |
| "learning_rate": 3.2072539114121188e-06, | |
| "loss": 0.44653040170669556, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.753880266075388, | |
| "grad_norm": 0.5928937792778015, | |
| "learning_rate": 3.193066652260547e-06, | |
| "loss": 0.782917320728302, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7583148558758315, | |
| "grad_norm": 0.5975604057312012, | |
| "learning_rate": 3.1789104300971603e-06, | |
| "loss": 0.7677651047706604, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.762749445676275, | |
| "grad_norm": 1.0852833986282349, | |
| "learning_rate": 3.164785435376745e-06, | |
| "loss": 0.29107940196990967, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.7671840354767183, | |
| "grad_norm": 1.17136549949646, | |
| "learning_rate": 3.1506918581339583e-06, | |
| "loss": 0.4287865459918976, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7716186252771617, | |
| "grad_norm": 0.9015536308288574, | |
| "learning_rate": 3.136629887980781e-06, | |
| "loss": 0.49741506576538086, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.776053215077605, | |
| "grad_norm": 0.573412299156189, | |
| "learning_rate": 3.122599714103949e-06, | |
| "loss": 0.8777692317962646, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7804878048780486, | |
| "grad_norm": 0.5751115679740906, | |
| "learning_rate": 3.1086015252624257e-06, | |
| "loss": 0.8302069306373596, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7849223946784925, | |
| "grad_norm": 0.4423518180847168, | |
| "learning_rate": 3.0946355097848535e-06, | |
| "loss": 0.7646183371543884, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7893569844789354, | |
| "grad_norm": 0.403327614068985, | |
| "learning_rate": 3.0807018555670153e-06, | |
| "loss": 0.15144073963165283, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7937915742793793, | |
| "grad_norm": 0.5304398536682129, | |
| "learning_rate": 3.0668007500693216e-06, | |
| "loss": 0.711767852306366, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7982261640798227, | |
| "grad_norm": 0.8088931441307068, | |
| "learning_rate": 3.0529323803142697e-06, | |
| "loss": 0.41084200143814087, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.802660753880266, | |
| "grad_norm": 0.5133860111236572, | |
| "learning_rate": 3.0390969328839464e-06, | |
| "loss": 0.42946991324424744, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.8070953436807096, | |
| "grad_norm": 0.5490220189094543, | |
| "learning_rate": 3.0252945939175004e-06, | |
| "loss": 0.6761541366577148, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.811529933481153, | |
| "grad_norm": 1.2554481029510498, | |
| "learning_rate": 3.0115255491086537e-06, | |
| "loss": 0.6815743446350098, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.8159645232815964, | |
| "grad_norm": 0.7049453258514404, | |
| "learning_rate": 2.9977899837031895e-06, | |
| "loss": 0.7387552261352539, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.82039911308204, | |
| "grad_norm": 1.1351791620254517, | |
| "learning_rate": 2.984088082496469e-06, | |
| "loss": 0.21328306198120117, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8248337028824833, | |
| "grad_norm": 0.5017495155334473, | |
| "learning_rate": 2.970420029830946e-06, | |
| "loss": 0.4749630391597748, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.8292682926829267, | |
| "grad_norm": 0.6224768161773682, | |
| "learning_rate": 2.9567860095936775e-06, | |
| "loss": 0.7620760798454285, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8337028824833705, | |
| "grad_norm": 0.6925188899040222, | |
| "learning_rate": 2.9431862052138545e-06, | |
| "loss": 0.7254369258880615, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8381374722838135, | |
| "grad_norm": 0.7251638174057007, | |
| "learning_rate": 2.929620799660343e-06, | |
| "loss": 0.3205258846282959, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8425720620842574, | |
| "grad_norm": 6.005808353424072, | |
| "learning_rate": 2.916089975439207e-06, | |
| "loss": 0.44966956973075867, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.847006651884701, | |
| "grad_norm": 0.6776400208473206, | |
| "learning_rate": 2.9025939145912655e-06, | |
| "loss": 0.5670607089996338, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8514412416851442, | |
| "grad_norm": 0.5101284980773926, | |
| "learning_rate": 2.8891327986896345e-06, | |
| "loss": 0.7107180953025818, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8558758314855877, | |
| "grad_norm": 3.805804967880249, | |
| "learning_rate": 2.875706808837292e-06, | |
| "loss": 0.1649579256772995, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.860310421286031, | |
| "grad_norm": 0.5193606615066528, | |
| "learning_rate": 2.862316125664636e-06, | |
| "loss": 0.7318248152732849, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8647450110864745, | |
| "grad_norm": 1.6448044776916504, | |
| "learning_rate": 2.848960929327053e-06, | |
| "loss": 0.5531004071235657, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.869179600886918, | |
| "grad_norm": 0.5975578427314758, | |
| "learning_rate": 2.8356413995025044e-06, | |
| "loss": 0.7080908417701721, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.8736141906873613, | |
| "grad_norm": 0.5933077335357666, | |
| "learning_rate": 2.8223577153890934e-06, | |
| "loss": 0.7111974358558655, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8780487804878048, | |
| "grad_norm": 1.380179762840271, | |
| "learning_rate": 2.8091100557026702e-06, | |
| "loss": 0.568051815032959, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.882483370288248, | |
| "grad_norm": 0.5458784699440002, | |
| "learning_rate": 2.795898598674415e-06, | |
| "loss": 0.7207503318786621, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8869179600886916, | |
| "grad_norm": 1.1317119598388672, | |
| "learning_rate": 2.782723522048444e-06, | |
| "loss": 0.2552308738231659, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8913525498891355, | |
| "grad_norm": 0.7093331217765808, | |
| "learning_rate": 2.7695850030794293e-06, | |
| "loss": 0.6807090044021606, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.8957871396895785, | |
| "grad_norm": 0.9031757116317749, | |
| "learning_rate": 2.7564832185301915e-06, | |
| "loss": 0.6350713968276978, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.9002217294900223, | |
| "grad_norm": 0.3347035348415375, | |
| "learning_rate": 2.7434183446693397e-06, | |
| "loss": 0.2564702033996582, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.9046563192904657, | |
| "grad_norm": 0.7028330564498901, | |
| "learning_rate": 2.730390557268897e-06, | |
| "loss": 0.37558212876319885, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 1.2789974212646484, | |
| "learning_rate": 2.7174000316019277e-06, | |
| "loss": 0.7542843222618103, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.9135254988913526, | |
| "grad_norm": 0.5418480634689331, | |
| "learning_rate": 2.704446942440191e-06, | |
| "loss": 0.7207885980606079, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.917960088691796, | |
| "grad_norm": 1.9766089916229248, | |
| "learning_rate": 2.6915314640517755e-06, | |
| "loss": 0.5357068181037903, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.9223946784922394, | |
| "grad_norm": 0.6477085947990417, | |
| "learning_rate": 2.6786537701987703e-06, | |
| "loss": 0.813840925693512, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 0.5277987122535706, | |
| "learning_rate": 2.665814034134916e-06, | |
| "loss": 0.4807235598564148, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9312638580931263, | |
| "grad_norm": 0.4396364390850067, | |
| "learning_rate": 2.6530124286032755e-06, | |
| "loss": 0.7007963061332703, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9356984478935697, | |
| "grad_norm": 0.8923091292381287, | |
| "learning_rate": 2.640249125833915e-06, | |
| "loss": 0.7382723689079285, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9401330376940136, | |
| "grad_norm": 0.4707968533039093, | |
| "learning_rate": 2.6275242975415804e-06, | |
| "loss": 0.638217568397522, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9445676274944566, | |
| "grad_norm": 0.5562184453010559, | |
| "learning_rate": 2.614838114923394e-06, | |
| "loss": 0.8006188869476318, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9490022172949004, | |
| "grad_norm": 0.6460016369819641, | |
| "learning_rate": 2.6021907486565447e-06, | |
| "loss": 0.7337307333946228, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.953436807095344, | |
| "grad_norm": 0.600333034992218, | |
| "learning_rate": 2.589582368895992e-06, | |
| "loss": 0.7522150278091431, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9578713968957873, | |
| "grad_norm": 0.47927215695381165, | |
| "learning_rate": 2.577013145272185e-06, | |
| "loss": 0.38160401582717896, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9623059866962307, | |
| "grad_norm": 0.6014701128005981, | |
| "learning_rate": 2.564483246888772e-06, | |
| "loss": 0.7021808624267578, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.966740576496674, | |
| "grad_norm": 1.0804996490478516, | |
| "learning_rate": 2.5519928423203266e-06, | |
| "loss": 0.7971820831298828, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9711751662971175, | |
| "grad_norm": 0.5336750745773315, | |
| "learning_rate": 2.539542099610084e-06, | |
| "loss": 0.5081973075866699, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.975609756097561, | |
| "grad_norm": 2.955822467803955, | |
| "learning_rate": 2.5271311862676727e-06, | |
| "loss": 0.4675310254096985, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9800443458980044, | |
| "grad_norm": 0.7282045483589172, | |
| "learning_rate": 2.514760269266871e-06, | |
| "loss": 0.16825823485851288, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.984478935698448, | |
| "grad_norm": 0.5478266477584839, | |
| "learning_rate": 2.50242951504335e-06, | |
| "loss": 0.4867478311061859, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.988913525498891, | |
| "grad_norm": 0.22204731404781342, | |
| "learning_rate": 2.490139089492443e-06, | |
| "loss": 0.4467255175113678, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.9933481152993346, | |
| "grad_norm": 4.795533657073975, | |
| "learning_rate": 2.4778891579669067e-06, | |
| "loss": 0.3938838839530945, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9977827050997785, | |
| "grad_norm": 0.496768981218338, | |
| "learning_rate": 2.4656798852747023e-06, | |
| "loss": 0.5702564120292664, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.002217294900222, | |
| "grad_norm": 0.7324950098991394, | |
| "learning_rate": 2.453511435676777e-06, | |
| "loss": 0.5976329445838928, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.0066518847006654, | |
| "grad_norm": 0.5204404592514038, | |
| "learning_rate": 2.441383972884848e-06, | |
| "loss": 0.2519686818122864, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.011086474501109, | |
| "grad_norm": 0.6069619655609131, | |
| "learning_rate": 2.4292976600592095e-06, | |
| "loss": 0.3836074471473694, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.015521064301552, | |
| "grad_norm": 0.6137452721595764, | |
| "learning_rate": 2.4172526598065304e-06, | |
| "loss": 0.5434846878051758, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.0199556541019956, | |
| "grad_norm": 1.0837262868881226, | |
| "learning_rate": 2.4052491341776686e-06, | |
| "loss": 0.4407395124435425, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.024390243902439, | |
| "grad_norm": 0.8856160640716553, | |
| "learning_rate": 2.393287244665494e-06, | |
| "loss": 0.5295175313949585, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.0288248337028825, | |
| "grad_norm": 0.12850333750247955, | |
| "learning_rate": 2.3813671522027094e-06, | |
| "loss": 0.13662414252758026, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.033259423503326, | |
| "grad_norm": 0.762299120426178, | |
| "learning_rate": 2.369489017159692e-06, | |
| "loss": 0.2511744797229767, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.0376940133037693, | |
| "grad_norm": 0.7555387616157532, | |
| "learning_rate": 2.357652999342334e-06, | |
| "loss": 0.28912025690078735, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.0421286031042127, | |
| "grad_norm": 0.7977571487426758, | |
| "learning_rate": 2.345859257989886e-06, | |
| "loss": 0.4399268925189972, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.046563192904656, | |
| "grad_norm": 0.47056496143341064, | |
| "learning_rate": 2.334107951772826e-06, | |
| "loss": 0.13275624811649323, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.0509977827050996, | |
| "grad_norm": 1.6538853645324707, | |
| "learning_rate": 2.3223992387907137e-06, | |
| "loss": 0.2295996993780136, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.0554323725055434, | |
| "grad_norm": 0.35899239778518677, | |
| "learning_rate": 2.3107332765700733e-06, | |
| "loss": 0.1384759098291397, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.059866962305987, | |
| "grad_norm": 3.042902708053589, | |
| "learning_rate": 2.2991102220622647e-06, | |
| "loss": 0.21765412390232086, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0643015521064303, | |
| "grad_norm": 1.761853575706482, | |
| "learning_rate": 2.2875302316413807e-06, | |
| "loss": 0.10996652394533157, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.0687361419068737, | |
| "grad_norm": 1.1698344945907593, | |
| "learning_rate": 2.275993461102138e-06, | |
| "loss": 0.19330096244812012, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.073170731707317, | |
| "grad_norm": 0.32080015540122986, | |
| "learning_rate": 2.2645000656577793e-06, | |
| "loss": 0.14769285917282104, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.0776053215077606, | |
| "grad_norm": 0.19042012095451355, | |
| "learning_rate": 2.2530501999379932e-06, | |
| "loss": 0.1551397293806076, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.082039911308204, | |
| "grad_norm": 0.5313494801521301, | |
| "learning_rate": 2.2416440179868236e-06, | |
| "loss": 0.3368770480155945, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.0864745011086474, | |
| "grad_norm": 1.5543434619903564, | |
| "learning_rate": 2.230281673260605e-06, | |
| "loss": 0.09978589415550232, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 2.5066184997558594, | |
| "learning_rate": 2.218963318625895e-06, | |
| "loss": 0.2049475759267807, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.0953436807095343, | |
| "grad_norm": 0.8068859577178955, | |
| "learning_rate": 2.2076891063574167e-06, | |
| "loss": 0.4105300307273865, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.0997782705099777, | |
| "grad_norm": 0.9558060765266418, | |
| "learning_rate": 2.196459188136014e-06, | |
| "loss": 0.40823429822921753, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.104212860310421, | |
| "grad_norm": 0.2851542830467224, | |
| "learning_rate": 2.1852737150466064e-06, | |
| "loss": 0.31296348571777344, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.1086474501108645, | |
| "grad_norm": 0.3126837909221649, | |
| "learning_rate": 2.174132837576156e-06, | |
| "loss": 0.0858599990606308, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.1130820399113084, | |
| "grad_norm": 1.1437623500823975, | |
| "learning_rate": 2.1630367056116496e-06, | |
| "loss": 0.16902470588684082, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.117516629711752, | |
| "grad_norm": 1.4211009740829468, | |
| "learning_rate": 2.1519854684380724e-06, | |
| "loss": 0.44321006536483765, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.1219512195121952, | |
| "grad_norm": 0.5866262912750244, | |
| "learning_rate": 2.1409792747364103e-06, | |
| "loss": 0.4670795202255249, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.1263858093126387, | |
| "grad_norm": 0.5906457901000977, | |
| "learning_rate": 2.1300182725816378e-06, | |
| "loss": 0.3769230246543884, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.130820399113082, | |
| "grad_norm": 0.6624063849449158, | |
| "learning_rate": 2.1191026094407386e-06, | |
| "loss": 0.3948790431022644, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.1352549889135255, | |
| "grad_norm": 0.45142582058906555, | |
| "learning_rate": 2.1082324321707075e-06, | |
| "loss": 0.26148101687431335, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.139689578713969, | |
| "grad_norm": 0.08900812268257141, | |
| "learning_rate": 2.0974078870165882e-06, | |
| "loss": 0.0190610121935606, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.1441241685144123, | |
| "grad_norm": 0.7073218822479248, | |
| "learning_rate": 2.086629119609499e-06, | |
| "loss": 0.3549107611179352, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.1485587583148558, | |
| "grad_norm": 0.5838420391082764, | |
| "learning_rate": 2.0758962749646716e-06, | |
| "loss": 0.39513227343559265, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.152993348115299, | |
| "grad_norm": 0.4758341312408447, | |
| "learning_rate": 2.065209497479502e-06, | |
| "loss": 0.3184177279472351, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.1574279379157426, | |
| "grad_norm": 0.33181366324424744, | |
| "learning_rate": 2.0545689309316138e-06, | |
| "loss": 0.05200305953621864, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.1618625277161865, | |
| "grad_norm": 1.9294317960739136, | |
| "learning_rate": 2.043974718476911e-06, | |
| "loss": 0.34890684485435486, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.16629711751663, | |
| "grad_norm": 1.2482831478118896, | |
| "learning_rate": 2.033427002647668e-06, | |
| "loss": 0.2931206524372101, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 0.8384535312652588, | |
| "learning_rate": 2.0229259253505946e-06, | |
| "loss": 0.15022891759872437, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.1751662971175167, | |
| "grad_norm": 0.5024890303611755, | |
| "learning_rate": 2.012471627864943e-06, | |
| "loss": 0.24945132434368134, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.17960088691796, | |
| "grad_norm": 0.7453292608261108, | |
| "learning_rate": 2.0020642508405984e-06, | |
| "loss": 0.27870461344718933, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.1840354767184036, | |
| "grad_norm": 0.5649028420448303, | |
| "learning_rate": 1.9917039342961837e-06, | |
| "loss": 0.04178987070918083, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.188470066518847, | |
| "grad_norm": 0.5637784600257874, | |
| "learning_rate": 1.9813908176171857e-06, | |
| "loss": 0.2853415310382843, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.1929046563192904, | |
| "grad_norm": 1.2113947868347168, | |
| "learning_rate": 1.97112503955407e-06, | |
| "loss": 0.3286460340023041, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.197339246119734, | |
| "grad_norm": 0.7789623737335205, | |
| "learning_rate": 1.9609067382204224e-06, | |
| "loss": 0.41224563121795654, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.2017738359201773, | |
| "grad_norm": 1.0445942878723145, | |
| "learning_rate": 1.950736051091084e-06, | |
| "loss": 0.5211740136146545, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.2062084257206207, | |
| "grad_norm": 0.4848485589027405, | |
| "learning_rate": 1.9406131150003036e-06, | |
| "loss": 0.37894371151924133, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.210643015521064, | |
| "grad_norm": 0.6881194710731506, | |
| "learning_rate": 1.930538066139904e-06, | |
| "loss": 0.1102185845375061, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.2150776053215075, | |
| "grad_norm": 0.7072343230247498, | |
| "learning_rate": 1.9205110400574368e-06, | |
| "loss": 0.46290311217308044, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.2195121951219514, | |
| "grad_norm": 0.7401267290115356, | |
| "learning_rate": 1.910532171654367e-06, | |
| "loss": 0.1061285138130188, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.223946784922395, | |
| "grad_norm": 0.6795309782028198, | |
| "learning_rate": 1.9006015951842587e-06, | |
| "loss": 0.30657243728637695, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.2283813747228383, | |
| "grad_norm": 1.8200849294662476, | |
| "learning_rate": 1.8907194442509642e-06, | |
| "loss": 0.29492244124412537, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.2328159645232817, | |
| "grad_norm": 0.4630158841609955, | |
| "learning_rate": 1.8808858518068312e-06, | |
| "loss": 0.2674718499183655, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.237250554323725, | |
| "grad_norm": 1.3433810472488403, | |
| "learning_rate": 1.8711009501509087e-06, | |
| "loss": 0.287025511264801, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.2416851441241685, | |
| "grad_norm": 0.596284031867981, | |
| "learning_rate": 1.8613648709271732e-06, | |
| "loss": 0.30234360694885254, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.246119733924612, | |
| "grad_norm": 1.2054084539413452, | |
| "learning_rate": 1.8516777451227552e-06, | |
| "loss": 0.26377224922180176, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.2505543237250554, | |
| "grad_norm": 1.0872267484664917, | |
| "learning_rate": 1.842039703066172e-06, | |
| "loss": 0.44247758388519287, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.254988913525499, | |
| "grad_norm": 0.12414921820163727, | |
| "learning_rate": 1.8324508744255842e-06, | |
| "loss": 0.028142806142568588, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.259423503325942, | |
| "grad_norm": 1.1836111545562744, | |
| "learning_rate": 1.8229113882070398e-06, | |
| "loss": 0.258672297000885, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.2638580931263856, | |
| "grad_norm": 0.5712287425994873, | |
| "learning_rate": 1.8134213727527504e-06, | |
| "loss": 0.3421834409236908, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.2682926829268295, | |
| "grad_norm": 0.5858505964279175, | |
| "learning_rate": 1.803980955739354e-06, | |
| "loss": 0.08477696031332016, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 0.5068755149841309, | |
| "learning_rate": 1.7945902641762027e-06, | |
| "loss": 0.2908667027950287, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.2771618625277164, | |
| "grad_norm": 0.4555286169052124, | |
| "learning_rate": 1.785249424403654e-06, | |
| "loss": 0.23708750307559967, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.2815964523281598, | |
| "grad_norm": 0.4396187365055084, | |
| "learning_rate": 1.7759585620913723e-06, | |
| "loss": 0.2890844941139221, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.286031042128603, | |
| "grad_norm": 0.7039048671722412, | |
| "learning_rate": 1.7667178022366294e-06, | |
| "loss": 0.42803099751472473, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.2904656319290466, | |
| "grad_norm": 0.6853517293930054, | |
| "learning_rate": 1.757527269162636e-06, | |
| "loss": 0.31256699562072754, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.29490022172949, | |
| "grad_norm": 0.5980814695358276, | |
| "learning_rate": 1.7483870865168585e-06, | |
| "loss": 0.23612843453884125, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.2993348115299335, | |
| "grad_norm": 0.6018959879875183, | |
| "learning_rate": 1.739297377269361e-06, | |
| "loss": 0.1575717329978943, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.303769401330377, | |
| "grad_norm": 0.9202043414115906, | |
| "learning_rate": 1.730258263711149e-06, | |
| "loss": 0.0800296813249588, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.3082039911308203, | |
| "grad_norm": 0.804567277431488, | |
| "learning_rate": 1.7212698674525246e-06, | |
| "loss": 0.384365975856781, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.3126385809312637, | |
| "grad_norm": 0.6774597764015198, | |
| "learning_rate": 1.7123323094214485e-06, | |
| "loss": 0.35183945298194885, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.317073170731707, | |
| "grad_norm": 0.5413676500320435, | |
| "learning_rate": 1.7034457098619176e-06, | |
| "loss": 0.3031509518623352, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.3215077605321506, | |
| "grad_norm": 0.6860851645469666, | |
| "learning_rate": 1.6946101883323435e-06, | |
| "loss": 0.4678665101528168, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.3259423503325944, | |
| "grad_norm": 0.8729199767112732, | |
| "learning_rate": 1.6858258637039421e-06, | |
| "loss": 0.2541211247444153, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.330376940133038, | |
| "grad_norm": 0.5793163776397705, | |
| "learning_rate": 1.677092854159142e-06, | |
| "loss": 0.409402996301651, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.3348115299334813, | |
| "grad_norm": 0.6284571886062622, | |
| "learning_rate": 1.6684112771899858e-06, | |
| "loss": 0.44005826115608215, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.3392461197339247, | |
| "grad_norm": 0.23389166593551636, | |
| "learning_rate": 1.6597812495965537e-06, | |
| "loss": 0.18956291675567627, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.343680709534368, | |
| "grad_norm": 0.6742655038833618, | |
| "learning_rate": 1.651202887485394e-06, | |
| "loss": 0.19848978519439697, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.3481152993348116, | |
| "grad_norm": 1.077938199043274, | |
| "learning_rate": 1.6426763062679553e-06, | |
| "loss": 0.2809881567955017, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.352549889135255, | |
| "grad_norm": 0.15749704837799072, | |
| "learning_rate": 1.63420162065904e-06, | |
| "loss": 0.2086062878370285, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.3569844789356984, | |
| "grad_norm": 0.796462893486023, | |
| "learning_rate": 1.625778944675257e-06, | |
| "loss": 0.4808502495288849, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.361419068736142, | |
| "grad_norm": 0.3194371163845062, | |
| "learning_rate": 1.6174083916334877e-06, | |
| "loss": 0.2608613967895508, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.3658536585365852, | |
| "grad_norm": 0.6312937140464783, | |
| "learning_rate": 1.609090074149366e-06, | |
| "loss": 0.30664098262786865, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.3702882483370287, | |
| "grad_norm": 0.6045692563056946, | |
| "learning_rate": 1.6008241041357535e-06, | |
| "loss": 0.4134957492351532, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.374722838137472, | |
| "grad_norm": 0.35350367426872253, | |
| "learning_rate": 1.5926105928012486e-06, | |
| "loss": 0.25765693187713623, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.3791574279379155, | |
| "grad_norm": 0.6642202138900757, | |
| "learning_rate": 1.5844496506486734e-06, | |
| "loss": 0.45118772983551025, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.3835920177383594, | |
| "grad_norm": 1.6221054792404175, | |
| "learning_rate": 1.576341387473601e-06, | |
| "loss": 0.26244989037513733, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.388026607538803, | |
| "grad_norm": 0.6710484623908997, | |
| "learning_rate": 1.568285912362872e-06, | |
| "loss": 0.18830682337284088, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.3924611973392462, | |
| "grad_norm": 0.8072285652160645, | |
| "learning_rate": 1.5602833336931242e-06, | |
| "loss": 0.20579344034194946, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.3968957871396896, | |
| "grad_norm": 1.560577392578125, | |
| "learning_rate": 1.552333759129344e-06, | |
| "loss": 0.07648878544569016, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.401330376940133, | |
| "grad_norm": 0.8869048357009888, | |
| "learning_rate": 1.5444372956234062e-06, | |
| "loss": 0.3228890001773834, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.4057649667405765, | |
| "grad_norm": 0.6825631260871887, | |
| "learning_rate": 1.5365940494126424e-06, | |
| "loss": 0.412392795085907, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.41019955654102, | |
| "grad_norm": 0.7702367901802063, | |
| "learning_rate": 1.5288041260184132e-06, | |
| "loss": 0.33175259828567505, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 0.9447566270828247, | |
| "learning_rate": 1.5210676302446801e-06, | |
| "loss": 0.437308669090271, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.4190687361419068, | |
| "grad_norm": 1.0833908319473267, | |
| "learning_rate": 1.5133846661766058e-06, | |
| "loss": 0.29791808128356934, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.42350332594235, | |
| "grad_norm": 0.7799423933029175, | |
| "learning_rate": 1.5057553371791461e-06, | |
| "loss": 0.40058910846710205, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.4279379157427936, | |
| "grad_norm": 1.4001895189285278, | |
| "learning_rate": 1.4981797458956624e-06, | |
| "loss": 0.031288325786590576, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.4323725055432375, | |
| "grad_norm": 0.24610136449337006, | |
| "learning_rate": 1.490657994246542e-06, | |
| "loss": 0.09260788559913635, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.436807095343681, | |
| "grad_norm": 0.6165451407432556, | |
| "learning_rate": 1.4831901834278212e-06, | |
| "loss": 0.5242594480514526, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.4412416851441243, | |
| "grad_norm": 0.738479495048523, | |
| "learning_rate": 1.4757764139098332e-06, | |
| "loss": 0.37381070852279663, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.4456762749445677, | |
| "grad_norm": 0.11110386252403259, | |
| "learning_rate": 1.468416785435847e-06, | |
| "loss": 0.22732020914554596, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.450110864745011, | |
| "grad_norm": 0.8456872701644897, | |
| "learning_rate": 1.461111397020732e-06, | |
| "loss": 0.47603723406791687, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 1.9469002485275269, | |
| "learning_rate": 1.4538603469496215e-06, | |
| "loss": 0.6115579009056091, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.458980044345898, | |
| "grad_norm": 0.6602691411972046, | |
| "learning_rate": 1.4466637327765937e-06, | |
| "loss": 0.4858705699443817, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.4634146341463414, | |
| "grad_norm": 0.5199502110481262, | |
| "learning_rate": 1.4395216513233584e-06, | |
| "loss": 0.06964029371738434, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.467849223946785, | |
| "grad_norm": 0.6388575434684753, | |
| "learning_rate": 1.4324341986779527e-06, | |
| "loss": 0.4515414237976074, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.4722838137472283, | |
| "grad_norm": 0.5326871871948242, | |
| "learning_rate": 1.4254014701934481e-06, | |
| "loss": 0.20355406403541565, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.4767184035476717, | |
| "grad_norm": 0.6119282841682434, | |
| "learning_rate": 1.4184235604866725e-06, | |
| "loss": 0.3266424834728241, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.481152993348115, | |
| "grad_norm": 0.621752917766571, | |
| "learning_rate": 1.4115005634369296e-06, | |
| "loss": 0.19875553250312805, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.4855875831485585, | |
| "grad_norm": 0.22762054204940796, | |
| "learning_rate": 1.4046325721847443e-06, | |
| "loss": 0.3008459210395813, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.4900221729490024, | |
| "grad_norm": 0.6187798380851746, | |
| "learning_rate": 1.397819679130601e-06, | |
| "loss": 0.6530707478523254, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.494456762749446, | |
| "grad_norm": 0.4245932996273041, | |
| "learning_rate": 1.3910619759337074e-06, | |
| "loss": 0.21562115848064423, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.4988913525498893, | |
| "grad_norm": 0.7150433659553528, | |
| "learning_rate": 1.3843595535107587e-06, | |
| "loss": 0.21964870393276215, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.5033259423503327, | |
| "grad_norm": 1.0825732946395874, | |
| "learning_rate": 1.377712502034712e-06, | |
| "loss": 0.4543288052082062, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.507760532150776, | |
| "grad_norm": 0.8185349702835083, | |
| "learning_rate": 1.3711209109335793e-06, | |
| "loss": 0.5709398984909058, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.5121951219512195, | |
| "grad_norm": 0.7394730448722839, | |
| "learning_rate": 1.3645848688892162e-06, | |
| "loss": 0.31390172243118286, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.516629711751663, | |
| "grad_norm": 1.0575904846191406, | |
| "learning_rate": 1.3581044638361373e-06, | |
| "loss": 0.2191072404384613, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.5210643015521064, | |
| "grad_norm": 1.3772166967391968, | |
| "learning_rate": 1.3516797829603256e-06, | |
| "loss": 0.40813693404197693, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.52549889135255, | |
| "grad_norm": 1.6636643409729004, | |
| "learning_rate": 1.3453109126980643e-06, | |
| "loss": 0.13340415060520172, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.529933481152993, | |
| "grad_norm": 1.4304341077804565, | |
| "learning_rate": 1.3389979387347743e-06, | |
| "loss": 0.24174071848392487, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.5343680709534366, | |
| "grad_norm": 0.7794627547264099, | |
| "learning_rate": 1.332740946003857e-06, | |
| "loss": 0.485975980758667, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.5388026607538805, | |
| "grad_norm": 1.178125023841858, | |
| "learning_rate": 1.3265400186855548e-06, | |
| "loss": 0.07965230196714401, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.5432372505543235, | |
| "grad_norm": 1.4382803440093994, | |
| "learning_rate": 1.320395240205819e-06, | |
| "loss": 0.16565276682376862, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.5476718403547673, | |
| "grad_norm": 0.6275985836982727, | |
| "learning_rate": 1.3143066932351856e-06, | |
| "loss": 0.25433921813964844, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.5521064301552108, | |
| "grad_norm": 0.6336963772773743, | |
| "learning_rate": 1.308274459687665e-06, | |
| "loss": 0.3951624035835266, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.556541019955654, | |
| "grad_norm": 0.611705482006073, | |
| "learning_rate": 1.3022986207196367e-06, | |
| "loss": 0.623420000076294, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.5609756097560976, | |
| "grad_norm": 0.7854931354522705, | |
| "learning_rate": 1.2963792567287617e-06, | |
| "loss": 0.5146090984344482, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.565410199556541, | |
| "grad_norm": 0.1012968197464943, | |
| "learning_rate": 1.290516447352899e-06, | |
| "loss": 0.023020246997475624, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.5698447893569845, | |
| "grad_norm": 1.4004569053649902, | |
| "learning_rate": 1.2847102714690308e-06, | |
| "loss": 0.2902672588825226, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.574279379157428, | |
| "grad_norm": 1.025610089302063, | |
| "learning_rate": 1.2789608071922076e-06, | |
| "loss": 0.03987615182995796, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.5787139689578713, | |
| "grad_norm": 0.9752769470214844, | |
| "learning_rate": 1.2732681318744923e-06, | |
| "loss": 0.30136820673942566, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.5831485587583147, | |
| "grad_norm": 0.9108896851539612, | |
| "learning_rate": 1.2676323221039236e-06, | |
| "loss": 0.5326212644577026, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.587583148558758, | |
| "grad_norm": 0.5897319316864014, | |
| "learning_rate": 1.2620534537034795e-06, | |
| "loss": 0.2587262690067291, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.5920177383592016, | |
| "grad_norm": 2.681466817855835, | |
| "learning_rate": 1.2565316017300635e-06, | |
| "loss": 0.4006616473197937, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.5964523281596454, | |
| "grad_norm": 1.1970038414001465, | |
| "learning_rate": 1.2510668404734924e-06, | |
| "loss": 0.4482450783252716, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.6008869179600884, | |
| "grad_norm": 0.7043033838272095, | |
| "learning_rate": 1.2456592434554963e-06, | |
| "loss": 0.3349531888961792, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.6053215077605323, | |
| "grad_norm": 0.3154018223285675, | |
| "learning_rate": 1.2403088834287282e-06, | |
| "loss": 0.08102969080209732, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.6097560975609757, | |
| "grad_norm": 0.5838890671730042, | |
| "learning_rate": 1.2350158323757903e-06, | |
| "loss": 0.41221365332603455, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.614190687361419, | |
| "grad_norm": 1.219826102256775, | |
| "learning_rate": 1.229780161508259e-06, | |
| "loss": 0.25824180245399475, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.6186252771618626, | |
| "grad_norm": 0.6512527465820312, | |
| "learning_rate": 1.2246019412657319e-06, | |
| "loss": 0.298809289932251, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.623059866962306, | |
| "grad_norm": 0.8826231360435486, | |
| "learning_rate": 1.2194812413148756e-06, | |
| "loss": 0.4713267683982849, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.6274944567627494, | |
| "grad_norm": 1.1182727813720703, | |
| "learning_rate": 1.214418130548495e-06, | |
| "loss": 0.23571792244911194, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.631929046563193, | |
| "grad_norm": 3.134111166000366, | |
| "learning_rate": 1.2094126770845986e-06, | |
| "loss": 0.2846839427947998, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.7134626507759094, | |
| "learning_rate": 1.2044649482654876e-06, | |
| "loss": 0.41180163621902466, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.6407982261640797, | |
| "grad_norm": 0.2941051423549652, | |
| "learning_rate": 1.1995750106568496e-06, | |
| "loss": 0.18607045710086823, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.6452328159645235, | |
| "grad_norm": 0.8682871460914612, | |
| "learning_rate": 1.1947429300468575e-06, | |
| "loss": 0.2725341320037842, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.6496674057649665, | |
| "grad_norm": 0.22906602919101715, | |
| "learning_rate": 1.1899687714452932e-06, | |
| "loss": 0.1510142982006073, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.6541019955654104, | |
| "grad_norm": 0.5846609473228455, | |
| "learning_rate": 1.1852525990826658e-06, | |
| "loss": 0.17548693716526031, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 0.8649150729179382, | |
| "learning_rate": 1.1805944764093484e-06, | |
| "loss": 0.3178820312023163, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.662971175166297, | |
| "grad_norm": 0.7545923590660095, | |
| "learning_rate": 1.1759944660947301e-06, | |
| "loss": 0.4674905240535736, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.6674057649667406, | |
| "grad_norm": 0.5282281041145325, | |
| "learning_rate": 1.171452630026365e-06, | |
| "loss": 0.22073353826999664, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.671840354767184, | |
| "grad_norm": 0.6379572749137878, | |
| "learning_rate": 1.1669690293091452e-06, | |
| "loss": 0.44962742924690247, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.6762749445676275, | |
| "grad_norm": 0.8793405890464783, | |
| "learning_rate": 1.1625437242644772e-06, | |
| "loss": 0.21119025349617004, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.680709534368071, | |
| "grad_norm": 0.5134512186050415, | |
| "learning_rate": 1.1581767744294682e-06, | |
| "loss": 0.27141135931015015, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.6851441241685143, | |
| "grad_norm": 0.17565104365348816, | |
| "learning_rate": 1.1538682385561286e-06, | |
| "loss": 0.23845592141151428, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.6895787139689578, | |
| "grad_norm": 0.32826969027519226, | |
| "learning_rate": 1.1496181746105784e-06, | |
| "loss": 0.14338834583759308, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.694013303769401, | |
| "grad_norm": 1.2581610679626465, | |
| "learning_rate": 1.1454266397722707e-06, | |
| "loss": 0.4462907016277313, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.6984478935698446, | |
| "grad_norm": 0.7050212025642395, | |
| "learning_rate": 1.1412936904332181e-06, | |
| "loss": 0.2973634600639343, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.7028824833702885, | |
| "grad_norm": 0.8381198644638062, | |
| "learning_rate": 1.1372193821972379e-06, | |
| "loss": 0.3616812229156494, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.7073170731707314, | |
| "grad_norm": 0.5880244374275208, | |
| "learning_rate": 1.1332037698792033e-06, | |
| "loss": 0.317374587059021, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.7117516629711753, | |
| "grad_norm": 0.5628880262374878, | |
| "learning_rate": 1.1292469075043026e-06, | |
| "loss": 0.5282438397407532, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.7161862527716187, | |
| "grad_norm": 1.1323150396347046, | |
| "learning_rate": 1.1253488483073177e-06, | |
| "loss": 0.5078736543655396, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.720620842572062, | |
| "grad_norm": 0.5901404023170471, | |
| "learning_rate": 1.1215096447319038e-06, | |
| "loss": 0.3895023763179779, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.7250554323725056, | |
| "grad_norm": 1.2219599485397339, | |
| "learning_rate": 1.117729348429884e-06, | |
| "loss": 0.08083083480596542, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.729490022172949, | |
| "grad_norm": 0.5432906150817871, | |
| "learning_rate": 1.114008010260558e-06, | |
| "loss": 0.27891483902931213, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.7339246119733924, | |
| "grad_norm": 0.590613603591919, | |
| "learning_rate": 1.1103456802900134e-06, | |
| "loss": 0.25149670243263245, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.738359201773836, | |
| "grad_norm": 1.1293264627456665, | |
| "learning_rate": 1.1067424077904555e-06, | |
| "loss": 0.2085466980934143, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.7427937915742793, | |
| "grad_norm": 0.21245625615119934, | |
| "learning_rate": 1.103198241239542e-06, | |
| "loss": 0.08489034324884415, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.7472283813747227, | |
| "grad_norm": 1.0891178846359253, | |
| "learning_rate": 1.0997132283197324e-06, | |
| "loss": 0.4237329363822937, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.7516629711751666, | |
| "grad_norm": 0.7265070080757141, | |
| "learning_rate": 1.0962874159176454e-06, | |
| "loss": 0.3137209713459015, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.7560975609756095, | |
| "grad_norm": 0.6292901635169983, | |
| "learning_rate": 1.0929208501234286e-06, | |
| "loss": 0.5190775394439697, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.7605321507760534, | |
| "grad_norm": 0.8824191093444824, | |
| "learning_rate": 1.0896135762301393e-06, | |
| "loss": 0.4801103472709656, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.764966740576497, | |
| "grad_norm": 1.3474018573760986, | |
| "learning_rate": 1.0863656387331328e-06, | |
| "loss": 0.2235155552625656, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.7694013303769403, | |
| "grad_norm": 0.7029469013214111, | |
| "learning_rate": 1.0831770813294668e-06, | |
| "loss": 0.226326584815979, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.7738359201773837, | |
| "grad_norm": 0.7810203433036804, | |
| "learning_rate": 1.0800479469173101e-06, | |
| "loss": 0.7419723272323608, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.778270509977827, | |
| "grad_norm": 0.42261672019958496, | |
| "learning_rate": 1.076978277595369e-06, | |
| "loss": 0.07026417553424835, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.7827050997782705, | |
| "grad_norm": 0.1440313309431076, | |
| "learning_rate": 1.0739681146623185e-06, | |
| "loss": 0.24512067437171936, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.787139689578714, | |
| "grad_norm": 0.6102575659751892, | |
| "learning_rate": 1.0710174986162471e-06, | |
| "loss": 0.22913426160812378, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.7915742793791574, | |
| "grad_norm": 0.6097363233566284, | |
| "learning_rate": 1.0681264691541127e-06, | |
| "loss": 0.5384188294410706, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.796008869179601, | |
| "grad_norm": 0.7359468936920166, | |
| "learning_rate": 1.0652950651712072e-06, | |
| "loss": 0.24644701182842255, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.800443458980044, | |
| "grad_norm": 1.484494924545288, | |
| "learning_rate": 1.0625233247606348e-06, | |
| "loss": 0.2666923403739929, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.8048780487804876, | |
| "grad_norm": 0.5770846605300903, | |
| "learning_rate": 1.059811285212799e-06, | |
| "loss": 0.39251670241355896, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.8093126385809315, | |
| "grad_norm": 0.5093443393707275, | |
| "learning_rate": 1.0571589830149e-06, | |
| "loss": 0.2737593352794647, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.8137472283813745, | |
| "grad_norm": 0.7546675205230713, | |
| "learning_rate": 1.054566453850444e-06, | |
| "loss": 0.30364954471588135, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.9405109286308289, | |
| "learning_rate": 1.0520337325987649e-06, | |
| "loss": 0.464808851480484, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.8226164079822618, | |
| "grad_norm": 0.9105086922645569, | |
| "learning_rate": 1.049560853334553e-06, | |
| "loss": 0.4284362494945526, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.827050997782705, | |
| "grad_norm": 0.5973249673843384, | |
| "learning_rate": 1.0471478493273976e-06, | |
| "loss": 0.45257946848869324, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.8314855875831486, | |
| "grad_norm": 0.5348697900772095, | |
| "learning_rate": 1.0447947530413389e-06, | |
| "loss": 0.19484858214855194, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.835920177383592, | |
| "grad_norm": 1.245671033859253, | |
| "learning_rate": 1.042501596134431e-06, | |
| "loss": 0.24068011343479156, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.8403547671840355, | |
| "grad_norm": 0.6368365287780762, | |
| "learning_rate": 1.0402684094583173e-06, | |
| "loss": 0.5048990249633789, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.844789356984479, | |
| "grad_norm": 2.171820640563965, | |
| "learning_rate": 1.0380952230578125e-06, | |
| "loss": 0.29761821031570435, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.8492239467849223, | |
| "grad_norm": 2.3513731956481934, | |
| "learning_rate": 1.0359820661705042e-06, | |
| "loss": 0.3343811333179474, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.8536585365853657, | |
| "grad_norm": 0.7197679877281189, | |
| "learning_rate": 1.0339289672263519e-06, | |
| "loss": 0.4054519534111023, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.858093126385809, | |
| "grad_norm": 0.6081451773643494, | |
| "learning_rate": 1.0319359538473107e-06, | |
| "loss": 0.219305619597435, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.8625277161862526, | |
| "grad_norm": 0.8351064324378967, | |
| "learning_rate": 1.0300030528469564e-06, | |
| "loss": 0.20432864129543304, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.8669623059866964, | |
| "grad_norm": 0.7205188274383545, | |
| "learning_rate": 1.0281302902301254e-06, | |
| "loss": 0.31986573338508606, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.8713968957871394, | |
| "grad_norm": 0.6454185843467712, | |
| "learning_rate": 1.026317691192567e-06, | |
| "loss": 0.35778456926345825, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.8758314855875833, | |
| "grad_norm": 0.6062719225883484, | |
| "learning_rate": 1.0245652801205999e-06, | |
| "loss": 0.21619272232055664, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.8802660753880267, | |
| "grad_norm": 0.6591951251029968, | |
| "learning_rate": 1.0228730805907891e-06, | |
| "loss": 0.29133281111717224, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.88470066518847, | |
| "grad_norm": 2.8523550033569336, | |
| "learning_rate": 1.0212411153696247e-06, | |
| "loss": 0.36294376850128174, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.8891352549889135, | |
| "grad_norm": 0.8326625227928162, | |
| "learning_rate": 1.019669406413218e-06, | |
| "loss": 0.3876746892929077, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.893569844789357, | |
| "grad_norm": 2.4112775325775146, | |
| "learning_rate": 1.0181579748670054e-06, | |
| "loss": 0.3648989200592041, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.8980044345898004, | |
| "grad_norm": 0.5865671038627625, | |
| "learning_rate": 1.0167068410654643e-06, | |
| "loss": 0.4565258026123047, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.07741643488407135, | |
| "learning_rate": 1.0153160245318384e-06, | |
| "loss": 0.0015238127671182156, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.9068736141906872, | |
| "grad_norm": 1.0577608346939087, | |
| "learning_rate": 1.0139855439778766e-06, | |
| "loss": 0.08189034461975098, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.9113082039911307, | |
| "grad_norm": 0.07507964968681335, | |
| "learning_rate": 1.0127154173035787e-06, | |
| "loss": 0.17938612401485443, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.9157427937915745, | |
| "grad_norm": 0.647083044052124, | |
| "learning_rate": 1.0115056615969584e-06, | |
| "loss": 0.3286856710910797, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.9201773835920175, | |
| "grad_norm": 0.8342782855033875, | |
| "learning_rate": 1.0103562931338105e-06, | |
| "loss": 0.47932499647140503, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.9246119733924614, | |
| "grad_norm": 0.5373421907424927, | |
| "learning_rate": 1.009267327377492e-06, | |
| "loss": 0.465029776096344, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.929046563192905, | |
| "grad_norm": 0.5635690689086914, | |
| "learning_rate": 1.008238778978716e-06, | |
| "loss": 0.05487694591283798, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.933481152993348, | |
| "grad_norm": 0.8110321164131165, | |
| "learning_rate": 1.0072706617753528e-06, | |
| "loss": 0.3653331696987152, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.9379157427937916, | |
| "grad_norm": 1.7579855918884277, | |
| "learning_rate": 1.0063629887922441e-06, | |
| "loss": 0.44433099031448364, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.942350332594235, | |
| "grad_norm": 0.13488821685314178, | |
| "learning_rate": 1.0055157722410279e-06, | |
| "loss": 0.02499130368232727, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.9467849223946785, | |
| "grad_norm": 1.413166880607605, | |
| "learning_rate": 1.0047290235199753e-06, | |
| "loss": 0.21658755838871002, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.951219512195122, | |
| "grad_norm": 0.14947852492332458, | |
| "learning_rate": 1.0040027532138351e-06, | |
| "loss": 0.2654849886894226, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.9556541019955653, | |
| "grad_norm": 0.6576520204544067, | |
| "learning_rate": 1.0033369710936928e-06, | |
| "loss": 0.27802592515945435, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.9600886917960088, | |
| "grad_norm": 0.6993730068206787, | |
| "learning_rate": 1.0027316861168388e-06, | |
| "loss": 0.35919517278671265, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.964523281596452, | |
| "grad_norm": 0.5883303284645081, | |
| "learning_rate": 1.0021869064266472e-06, | |
| "loss": 0.2748097777366638, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.9689578713968956, | |
| "grad_norm": 0.6255540251731873, | |
| "learning_rate": 1.0017026393524684e-06, | |
| "loss": 0.281380295753479, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.9733924611973395, | |
| "grad_norm": 0.6950498223304749, | |
| "learning_rate": 1.0012788914095275e-06, | |
| "loss": 0.3902501165866852, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.9778270509977824, | |
| "grad_norm": 0.7842665314674377, | |
| "learning_rate": 1.0009156682988395e-06, | |
| "loss": 0.19079414010047913, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.9822616407982263, | |
| "grad_norm": 0.9547889232635498, | |
| "learning_rate": 1.0006129749071298e-06, | |
| "loss": 0.2992710769176483, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.9866962305986697, | |
| "grad_norm": 0.7517938017845154, | |
| "learning_rate": 1.00037081530677e-06, | |
| "loss": 0.3780803680419922, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.991130820399113, | |
| "grad_norm": 1.7430669069290161, | |
| "learning_rate": 1.0001891927557255e-06, | |
| "loss": 0.32227784395217896, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.9955654101995566, | |
| "grad_norm": 0.7806469202041626, | |
| "learning_rate": 1.0000681096975056e-06, | |
| "loss": 0.3458644151687622, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5159145593643188, | |
| "learning_rate": 1.0000075677611364e-06, | |
| "loss": 0.12477380037307739, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1804, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_loss": 0.7412061936664963, | |
| "train_runtime": 8152.0644, | |
| "train_samples_per_second": 6.639, | |
| "train_steps_per_second": 0.221 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1804, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |