Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-108 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-108 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-108") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-108") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-108") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-108 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-108" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-108", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-108
- SGLang
How to use furproxy/9b-108 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-108" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-108", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-108" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-108", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-108 with Docker Model Runner:
docker model run hf.co/furproxy/9b-108
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1896, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004219409282700422, | |
| "grad_norm": 7.742424964904785, | |
| "learning_rate": 1.263157894736842e-08, | |
| "loss": 2.195801019668579, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008438818565400843, | |
| "grad_norm": 9.064373016357422, | |
| "learning_rate": 3.7894736842105265e-08, | |
| "loss": 1.77604079246521, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012658227848101266, | |
| "grad_norm": 1.5363776683807373, | |
| "learning_rate": 6.31578947368421e-08, | |
| "loss": 1.9369428157806396, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.016877637130801686, | |
| "grad_norm": 3.058716297149658, | |
| "learning_rate": 8.842105263157893e-08, | |
| "loss": 1.949758768081665, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02109704641350211, | |
| "grad_norm": 2.559150457382202, | |
| "learning_rate": 1.1368421052631579e-07, | |
| "loss": 1.855597972869873, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02531645569620253, | |
| "grad_norm": 1.0957772731781006, | |
| "learning_rate": 1.3894736842105263e-07, | |
| "loss": 1.3166148662567139, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029535864978902954, | |
| "grad_norm": 2.902003049850464, | |
| "learning_rate": 1.642105263157895e-07, | |
| "loss": 1.662704348564148, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03375527426160337, | |
| "grad_norm": 7.220351219177246, | |
| "learning_rate": 1.894736842105263e-07, | |
| "loss": 2.225470542907715, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0379746835443038, | |
| "grad_norm": 1.3415447473526, | |
| "learning_rate": 2.1473684210526315e-07, | |
| "loss": 1.8447153568267822, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04219409282700422, | |
| "grad_norm": 1.5747126340866089, | |
| "learning_rate": 2.4e-07, | |
| "loss": 1.9135501384735107, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046413502109704644, | |
| "grad_norm": 1.7888033390045166, | |
| "learning_rate": 2.6526315789473684e-07, | |
| "loss": 1.6221210956573486, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05063291139240506, | |
| "grad_norm": 2.052851915359497, | |
| "learning_rate": 2.905263157894737e-07, | |
| "loss": 1.8789974451065063, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05485232067510549, | |
| "grad_norm": 2.472539186477661, | |
| "learning_rate": 3.157894736842105e-07, | |
| "loss": 1.7755436897277832, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05907172995780591, | |
| "grad_norm": 2.0235300064086914, | |
| "learning_rate": 3.4105263157894735e-07, | |
| "loss": 1.9977495670318604, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06329113924050633, | |
| "grad_norm": 3.731635808944702, | |
| "learning_rate": 3.663157894736842e-07, | |
| "loss": 2.019644021987915, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06751054852320675, | |
| "grad_norm": 1.7156628370285034, | |
| "learning_rate": 3.9157894736842107e-07, | |
| "loss": 1.8407037258148193, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07172995780590717, | |
| "grad_norm": 7.599488735198975, | |
| "learning_rate": 4.168421052631579e-07, | |
| "loss": 2.1601576805114746, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0759493670886076, | |
| "grad_norm": 1.4482383728027344, | |
| "learning_rate": 4.4210526315789467e-07, | |
| "loss": 1.8958334922790527, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08016877637130802, | |
| "grad_norm": 3.731816530227661, | |
| "learning_rate": 4.6736842105263153e-07, | |
| "loss": 2.1196088790893555, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08438818565400844, | |
| "grad_norm": 1.4769682884216309, | |
| "learning_rate": 4.926315789473684e-07, | |
| "loss": 1.9134398698806763, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08860759493670886, | |
| "grad_norm": 13.363183975219727, | |
| "learning_rate": 5.178947368421052e-07, | |
| "loss": 1.8113877773284912, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09282700421940929, | |
| "grad_norm": 3.724055051803589, | |
| "learning_rate": 5.431578947368421e-07, | |
| "loss": 2.103419542312622, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0970464135021097, | |
| "grad_norm": 2.3887927532196045, | |
| "learning_rate": 5.684210526315788e-07, | |
| "loss": 1.8661903142929077, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10126582278481013, | |
| "grad_norm": 1.5525636672973633, | |
| "learning_rate": 5.936842105263157e-07, | |
| "loss": 1.8071659803390503, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10548523206751055, | |
| "grad_norm": 5.839144229888916, | |
| "learning_rate": 6.189473684210527e-07, | |
| "loss": 1.561785340309143, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10970464135021098, | |
| "grad_norm": 5.124746799468994, | |
| "learning_rate": 6.442105263157894e-07, | |
| "loss": 1.3494197130203247, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11392405063291139, | |
| "grad_norm": 5.008734703063965, | |
| "learning_rate": 6.694736842105263e-07, | |
| "loss": 1.3085637092590332, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11814345991561181, | |
| "grad_norm": 1.1680630445480347, | |
| "learning_rate": 6.947368421052631e-07, | |
| "loss": 1.710934042930603, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12236286919831224, | |
| "grad_norm": 11.505062103271484, | |
| "learning_rate": 7.2e-07, | |
| "loss": 1.460722804069519, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12658227848101267, | |
| "grad_norm": 7.362393856048584, | |
| "learning_rate": 7.452631578947368e-07, | |
| "loss": 1.740147590637207, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1308016877637131, | |
| "grad_norm": 1.551930546760559, | |
| "learning_rate": 7.705263157894736e-07, | |
| "loss": 1.7590422630310059, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1350210970464135, | |
| "grad_norm": 1.2569609880447388, | |
| "learning_rate": 7.957894736842105e-07, | |
| "loss": 1.2291865348815918, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13924050632911392, | |
| "grad_norm": 2.3231699466705322, | |
| "learning_rate": 8.210526315789473e-07, | |
| "loss": 1.040055513381958, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14345991561181434, | |
| "grad_norm": 1.0935379266738892, | |
| "learning_rate": 8.463157894736842e-07, | |
| "loss": 1.300035834312439, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14767932489451477, | |
| "grad_norm": 4.188493728637695, | |
| "learning_rate": 8.71578947368421e-07, | |
| "loss": 1.1873421669006348, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1518987341772152, | |
| "grad_norm": 1.0681216716766357, | |
| "learning_rate": 8.968421052631579e-07, | |
| "loss": 1.4782516956329346, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15611814345991562, | |
| "grad_norm": 2.1197876930236816, | |
| "learning_rate": 9.221052631578946e-07, | |
| "loss": 1.2450737953186035, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16033755274261605, | |
| "grad_norm": 4.197497844696045, | |
| "learning_rate": 9.473684210526316e-07, | |
| "loss": 1.0491926670074463, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16455696202531644, | |
| "grad_norm": 1.161306619644165, | |
| "learning_rate": 9.726315789473682e-07, | |
| "loss": 1.60398268699646, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16877637130801687, | |
| "grad_norm": 1.0192948579788208, | |
| "learning_rate": 9.978947368421053e-07, | |
| "loss": 1.5951966047286987, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1729957805907173, | |
| "grad_norm": 2.497844934463501, | |
| "learning_rate": 1.023157894736842e-06, | |
| "loss": 1.564704179763794, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17721518987341772, | |
| "grad_norm": 9.568504333496094, | |
| "learning_rate": 1.048421052631579e-06, | |
| "loss": 1.1448438167572021, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18143459915611815, | |
| "grad_norm": 1.6581389904022217, | |
| "learning_rate": 1.0736842105263157e-06, | |
| "loss": 1.2066229581832886, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18565400843881857, | |
| "grad_norm": 2.0455548763275146, | |
| "learning_rate": 1.0989473684210525e-06, | |
| "loss": 1.475752353668213, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.189873417721519, | |
| "grad_norm": 1.7110133171081543, | |
| "learning_rate": 1.1242105263157894e-06, | |
| "loss": 0.7750993967056274, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1940928270042194, | |
| "grad_norm": 1.7819750308990479, | |
| "learning_rate": 1.1494736842105262e-06, | |
| "loss": 1.8029006719589233, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19831223628691982, | |
| "grad_norm": 2.48787784576416, | |
| "learning_rate": 1.174736842105263e-06, | |
| "loss": 1.0352967977523804, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 1.236476182937622, | |
| "learning_rate": 1.2e-06, | |
| "loss": 1.5603318214416504, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20675105485232068, | |
| "grad_norm": 1.040940761566162, | |
| "learning_rate": 1.1999967137875644e-06, | |
| "loss": 1.6248691082000732, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2109704641350211, | |
| "grad_norm": 2.421082019805908, | |
| "learning_rate": 1.199986855190255e-06, | |
| "loss": 1.3791676759719849, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21518987341772153, | |
| "grad_norm": 1.4071706533432007, | |
| "learning_rate": 1.1999704243280622e-06, | |
| "loss": 1.1831879615783691, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.21940928270042195, | |
| "grad_norm": 0.912856936454773, | |
| "learning_rate": 1.1999474214009684e-06, | |
| "loss": 1.097001552581787, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22362869198312235, | |
| "grad_norm": 2.3082234859466553, | |
| "learning_rate": 1.1999178466889462e-06, | |
| "loss": 1.089848518371582, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22784810126582278, | |
| "grad_norm": 1.7334387302398682, | |
| "learning_rate": 1.1998817005519536e-06, | |
| "loss": 1.0864239931106567, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2320675105485232, | |
| "grad_norm": 1.158636212348938, | |
| "learning_rate": 1.1998389834299315e-06, | |
| "loss": 1.135922908782959, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23628691983122363, | |
| "grad_norm": 1.3626004457473755, | |
| "learning_rate": 1.1997896958427962e-06, | |
| "loss": 1.511846661567688, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24050632911392406, | |
| "grad_norm": 2.417889356613159, | |
| "learning_rate": 1.199733838390435e-06, | |
| "loss": 1.387232780456543, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24472573839662448, | |
| "grad_norm": 2.854128837585449, | |
| "learning_rate": 1.1996714117526975e-06, | |
| "loss": 1.7170121669769287, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2489451476793249, | |
| "grad_norm": 1.1655317544937134, | |
| "learning_rate": 1.1996024166893883e-06, | |
| "loss": 1.4113752841949463, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25316455696202533, | |
| "grad_norm": 0.6216784715652466, | |
| "learning_rate": 1.199526854040257e-06, | |
| "loss": 1.0603108406066895, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25738396624472576, | |
| "grad_norm": 8.596741676330566, | |
| "learning_rate": 1.1994447247249886e-06, | |
| "loss": 1.3766067028045654, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2616033755274262, | |
| "grad_norm": 6.650040149688721, | |
| "learning_rate": 1.199356029743192e-06, | |
| "loss": 1.3911750316619873, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.26582278481012656, | |
| "grad_norm": 1.3944730758666992, | |
| "learning_rate": 1.1992607701743877e-06, | |
| "loss": 1.479828953742981, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.270042194092827, | |
| "grad_norm": 1.8790662288665771, | |
| "learning_rate": 1.1991589471779944e-06, | |
| "loss": 1.0149238109588623, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2742616033755274, | |
| "grad_norm": 2.3544161319732666, | |
| "learning_rate": 1.1990505619933166e-06, | |
| "loss": 1.2252846956253052, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27848101265822783, | |
| "grad_norm": 1.664776086807251, | |
| "learning_rate": 1.1989356159395268e-06, | |
| "loss": 1.3019721508026123, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28270042194092826, | |
| "grad_norm": 9.16057014465332, | |
| "learning_rate": 1.1988141104156518e-06, | |
| "loss": 0.8186183571815491, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2869198312236287, | |
| "grad_norm": 1.194026231765747, | |
| "learning_rate": 1.1986860469005543e-06, | |
| "loss": 1.0649269819259644, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2911392405063291, | |
| "grad_norm": 1.9782294034957886, | |
| "learning_rate": 1.1985514269529155e-06, | |
| "loss": 1.479400873184204, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29535864978902954, | |
| "grad_norm": 5.742581367492676, | |
| "learning_rate": 1.1984102522112159e-06, | |
| "loss": 1.0161385536193848, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29957805907172996, | |
| "grad_norm": 1.9590661525726318, | |
| "learning_rate": 1.1982625243937158e-06, | |
| "loss": 1.3290033340454102, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 7.880887031555176, | |
| "learning_rate": 1.198108245298433e-06, | |
| "loss": 1.1459200382232666, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3080168776371308, | |
| "grad_norm": 3.6530940532684326, | |
| "learning_rate": 1.1979474168031232e-06, | |
| "loss": 1.4865257740020752, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.31223628691983124, | |
| "grad_norm": 1.9205989837646484, | |
| "learning_rate": 1.1977800408652552e-06, | |
| "loss": 1.4399386644363403, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.31645569620253167, | |
| "grad_norm": 1.6300798654556274, | |
| "learning_rate": 1.1976061195219877e-06, | |
| "loss": 1.3092478513717651, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3206751054852321, | |
| "grad_norm": 1.216039776802063, | |
| "learning_rate": 1.1974256548901447e-06, | |
| "loss": 1.3857874870300293, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32489451476793246, | |
| "grad_norm": 1.1664661169052124, | |
| "learning_rate": 1.1972386491661896e-06, | |
| "loss": 1.5414711236953735, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3291139240506329, | |
| "grad_norm": 6.513598918914795, | |
| "learning_rate": 1.1970451046261986e-06, | |
| "loss": 1.3435574769973755, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 1.0399960279464722, | |
| "learning_rate": 1.196845023625833e-06, | |
| "loss": 1.4113006591796875, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33755274261603374, | |
| "grad_norm": 1.2614532709121704, | |
| "learning_rate": 1.196638408600309e-06, | |
| "loss": 1.4127811193466187, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34177215189873417, | |
| "grad_norm": 1.940744161605835, | |
| "learning_rate": 1.1964252620643718e-06, | |
| "loss": 0.9027857184410095, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3459915611814346, | |
| "grad_norm": 2.2784762382507324, | |
| "learning_rate": 1.1962055866122608e-06, | |
| "loss": 1.305877447128296, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.350210970464135, | |
| "grad_norm": 2.4151477813720703, | |
| "learning_rate": 1.1959793849176804e-06, | |
| "loss": 0.8810802698135376, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35443037974683544, | |
| "grad_norm": 1.4606540203094482, | |
| "learning_rate": 1.195746659733767e-06, | |
| "loss": 1.3153032064437866, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35864978902953587, | |
| "grad_norm": 2.1664512157440186, | |
| "learning_rate": 1.1955074138930558e-06, | |
| "loss": 1.409055233001709, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3628691983122363, | |
| "grad_norm": 1.8031892776489258, | |
| "learning_rate": 1.1952616503074452e-06, | |
| "loss": 1.288240909576416, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3670886075949367, | |
| "grad_norm": 1.2246201038360596, | |
| "learning_rate": 1.1950093719681623e-06, | |
| "loss": 1.0962798595428467, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37130801687763715, | |
| "grad_norm": 2.090580701828003, | |
| "learning_rate": 1.1947505819457264e-06, | |
| "loss": 1.4130232334136963, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3755274261603376, | |
| "grad_norm": 0.9846044778823853, | |
| "learning_rate": 1.1944852833899122e-06, | |
| "loss": 1.4005430936813354, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 1.043843388557434, | |
| "learning_rate": 1.1942134795297092e-06, | |
| "loss": 1.0696699619293213, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38396624472573837, | |
| "grad_norm": 1.0868744850158691, | |
| "learning_rate": 1.1939351736732854e-06, | |
| "loss": 1.3760430812835693, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3881856540084388, | |
| "grad_norm": 1.203220009803772, | |
| "learning_rate": 1.193650369207945e-06, | |
| "loss": 1.3777748346328735, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3924050632911392, | |
| "grad_norm": 2.734304666519165, | |
| "learning_rate": 1.1933590696000883e-06, | |
| "loss": 0.5890464186668396, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.39662447257383965, | |
| "grad_norm": 1.4362255334854126, | |
| "learning_rate": 1.193061278395168e-06, | |
| "loss": 1.0182992219924927, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4008438818565401, | |
| "grad_norm": 1.4891494512557983, | |
| "learning_rate": 1.1927569992176479e-06, | |
| "loss": 1.1124638319015503, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 1.0771912336349487, | |
| "learning_rate": 1.1924462357709577e-06, | |
| "loss": 1.3731889724731445, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4092827004219409, | |
| "grad_norm": 1.2685896158218384, | |
| "learning_rate": 1.1921289918374481e-06, | |
| "loss": 1.1032942533493042, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41350210970464135, | |
| "grad_norm": 1.8579025268554688, | |
| "learning_rate": 1.1918052712783451e-06, | |
| "loss": 1.364923357963562, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4177215189873418, | |
| "grad_norm": 1.3035788536071777, | |
| "learning_rate": 1.1914750780337023e-06, | |
| "loss": 1.0887572765350342, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4219409282700422, | |
| "grad_norm": 1.1359857320785522, | |
| "learning_rate": 1.1911384161223538e-06, | |
| "loss": 1.1425602436065674, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42616033755274263, | |
| "grad_norm": 1.8307956457138062, | |
| "learning_rate": 1.1907952896418643e-06, | |
| "loss": 1.177668809890747, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43037974683544306, | |
| "grad_norm": 1.2392957210540771, | |
| "learning_rate": 1.1904457027684802e-06, | |
| "loss": 0.9585235714912415, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4345991561181435, | |
| "grad_norm": 1.794783115386963, | |
| "learning_rate": 1.1900896597570784e-06, | |
| "loss": 1.6650797128677368, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4388185654008439, | |
| "grad_norm": 1.6542989015579224, | |
| "learning_rate": 1.1897271649411145e-06, | |
| "loss": 0.7469709515571594, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4430379746835443, | |
| "grad_norm": 1.3410842418670654, | |
| "learning_rate": 1.1893582227325694e-06, | |
| "loss": 1.3532118797302246, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4472573839662447, | |
| "grad_norm": 3.8952300548553467, | |
| "learning_rate": 1.1889828376218972e-06, | |
| "loss": 0.8239259719848633, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45147679324894513, | |
| "grad_norm": 1.3895829916000366, | |
| "learning_rate": 1.1886010141779688e-06, | |
| "loss": 1.2587556838989258, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45569620253164556, | |
| "grad_norm": 0.944612443447113, | |
| "learning_rate": 1.1882127570480174e-06, | |
| "loss": 1.3315932750701904, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.459915611814346, | |
| "grad_norm": 1.4940253496170044, | |
| "learning_rate": 1.1878180709575815e-06, | |
| "loss": 1.3548877239227295, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4641350210970464, | |
| "grad_norm": 1.9045593738555908, | |
| "learning_rate": 1.1874169607104478e-06, | |
| "loss": 1.3191989660263062, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.46835443037974683, | |
| "grad_norm": 2.8294312953948975, | |
| "learning_rate": 1.187009431188592e-06, | |
| "loss": 1.270053505897522, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47257383966244726, | |
| "grad_norm": 1.5677937269210815, | |
| "learning_rate": 1.1865954873521197e-06, | |
| "loss": 1.4200479984283447, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4767932489451477, | |
| "grad_norm": 1.1733640432357788, | |
| "learning_rate": 1.1861751342392067e-06, | |
| "loss": 1.3603910207748413, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4810126582278481, | |
| "grad_norm": 2.0203163623809814, | |
| "learning_rate": 1.185748376966037e-06, | |
| "loss": 0.8049441576004028, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48523206751054854, | |
| "grad_norm": 1.1351509094238281, | |
| "learning_rate": 1.18531522072674e-06, | |
| "loss": 0.9551719427108765, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48945147679324896, | |
| "grad_norm": 2.4217798709869385, | |
| "learning_rate": 1.1848756707933284e-06, | |
| "loss": 0.9277099967002869, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4936708860759494, | |
| "grad_norm": 1.298305869102478, | |
| "learning_rate": 1.1844297325156337e-06, | |
| "loss": 1.334661602973938, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4978902953586498, | |
| "grad_norm": 1.7692943811416626, | |
| "learning_rate": 1.183977411321241e-06, | |
| "loss": 1.372158169746399, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5021097046413502, | |
| "grad_norm": 2.271902322769165, | |
| "learning_rate": 1.1835187127154221e-06, | |
| "loss": 1.036437749862671, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 2.205810070037842, | |
| "learning_rate": 1.18305364228107e-06, | |
| "loss": 0.470305472612381, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.510548523206751, | |
| "grad_norm": 1.814501404762268, | |
| "learning_rate": 1.1825822056786304e-06, | |
| "loss": 1.4641677141189575, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5147679324894515, | |
| "grad_norm": 1.974550724029541, | |
| "learning_rate": 1.182104408646032e-06, | |
| "loss": 0.9201871156692505, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5189873417721519, | |
| "grad_norm": 1.1704046726226807, | |
| "learning_rate": 1.1816202569986176e-06, | |
| "loss": 1.5398619174957275, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5232067510548524, | |
| "grad_norm": 2.400852918624878, | |
| "learning_rate": 1.181129756629073e-06, | |
| "loss": 1.265621542930603, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5274261603375527, | |
| "grad_norm": 1.2383782863616943, | |
| "learning_rate": 1.1806329135073552e-06, | |
| "loss": 1.3679600954055786, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5316455696202531, | |
| "grad_norm": 1.7232836484909058, | |
| "learning_rate": 1.18012973368062e-06, | |
| "loss": 1.171999216079712, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5358649789029536, | |
| "grad_norm": 1.5587044954299927, | |
| "learning_rate": 1.1796202232731485e-06, | |
| "loss": 1.2946254014968872, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.540084388185654, | |
| "grad_norm": 2.5242385864257812, | |
| "learning_rate": 1.1791043884862711e-06, | |
| "loss": 1.254220724105835, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5443037974683544, | |
| "grad_norm": 2.730691432952881, | |
| "learning_rate": 1.178582235598295e-06, | |
| "loss": 1.078680396080017, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5485232067510548, | |
| "grad_norm": 4.930171489715576, | |
| "learning_rate": 1.1780537709644245e-06, | |
| "loss": 1.0161340236663818, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5527426160337553, | |
| "grad_norm": 1.1866450309753418, | |
| "learning_rate": 1.177519001016686e-06, | |
| "loss": 1.352670431137085, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5569620253164557, | |
| "grad_norm": 1.242305040359497, | |
| "learning_rate": 1.1769779322638483e-06, | |
| "loss": 1.3570655584335327, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5611814345991561, | |
| "grad_norm": 3.957782745361328, | |
| "learning_rate": 1.1764305712913445e-06, | |
| "loss": 1.311238169670105, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5654008438818565, | |
| "grad_norm": 2.596217393875122, | |
| "learning_rate": 1.1758769247611908e-06, | |
| "loss": 1.5630828142166138, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.569620253164557, | |
| "grad_norm": 2.2337851524353027, | |
| "learning_rate": 1.1753169994119063e-06, | |
| "loss": 1.0898045301437378, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5738396624472574, | |
| "grad_norm": 1.5337291955947876, | |
| "learning_rate": 1.1747508020584302e-06, | |
| "loss": 1.3198161125183105, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5780590717299579, | |
| "grad_norm": 4.55377721786499, | |
| "learning_rate": 1.17417833959204e-06, | |
| "loss": 1.1360241174697876, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5822784810126582, | |
| "grad_norm": 10.100658416748047, | |
| "learning_rate": 1.173599618980266e-06, | |
| "loss": 1.3351401090621948, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5864978902953587, | |
| "grad_norm": 1.0856088399887085, | |
| "learning_rate": 1.1730146472668075e-06, | |
| "loss": 1.4669663906097412, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5907172995780591, | |
| "grad_norm": 1.9721051454544067, | |
| "learning_rate": 1.1724234315714474e-06, | |
| "loss": 1.003104329109192, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5949367088607594, | |
| "grad_norm": 1.5582189559936523, | |
| "learning_rate": 1.1718259790899647e-06, | |
| "loss": 1.405082106590271, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5991561181434599, | |
| "grad_norm": 1.6864080429077148, | |
| "learning_rate": 1.1712222970940478e-06, | |
| "loss": 1.595037579536438, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6033755274261603, | |
| "grad_norm": 0.3938112258911133, | |
| "learning_rate": 1.1706123929312049e-06, | |
| "loss": 1.1622782945632935, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 1.0948325395584106, | |
| "learning_rate": 1.1699962740246754e-06, | |
| "loss": 1.325197458267212, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6118143459915611, | |
| "grad_norm": 1.514491081237793, | |
| "learning_rate": 1.1693739478733393e-06, | |
| "loss": 0.8543146848678589, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6160337552742616, | |
| "grad_norm": 1.247450590133667, | |
| "learning_rate": 1.1687454220516262e-06, | |
| "loss": 0.6629498600959778, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.620253164556962, | |
| "grad_norm": 0.9832797050476074, | |
| "learning_rate": 1.1681107042094227e-06, | |
| "loss": 1.3061555624008179, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6244725738396625, | |
| "grad_norm": 1.6159130334854126, | |
| "learning_rate": 1.1674698020719791e-06, | |
| "loss": 0.7377375364303589, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6286919831223629, | |
| "grad_norm": 2.47055983543396, | |
| "learning_rate": 1.1668227234398165e-06, | |
| "loss": 0.6730928421020508, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6329113924050633, | |
| "grad_norm": 3.4460909366607666, | |
| "learning_rate": 1.16616947618863e-06, | |
| "loss": 1.8364771604537964, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6371308016877637, | |
| "grad_norm": 1.025884985923767, | |
| "learning_rate": 1.1655100682691951e-06, | |
| "loss": 1.3243968486785889, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6413502109704642, | |
| "grad_norm": 4.711573600769043, | |
| "learning_rate": 1.1648445077072692e-06, | |
| "loss": 0.9149092435836792, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6455696202531646, | |
| "grad_norm": 1.3360319137573242, | |
| "learning_rate": 1.164172802603494e-06, | |
| "loss": 1.0405997037887573, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6497890295358649, | |
| "grad_norm": 0.9437978267669678, | |
| "learning_rate": 1.1634949611332986e-06, | |
| "loss": 1.3173035383224487, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6540084388185654, | |
| "grad_norm": 1.0983142852783203, | |
| "learning_rate": 1.1628109915467975e-06, | |
| "loss": 1.251430869102478, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6582278481012658, | |
| "grad_norm": 2.2314326763153076, | |
| "learning_rate": 1.1621209021686924e-06, | |
| "loss": 1.0687130689620972, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6624472573839663, | |
| "grad_norm": 1.2847200632095337, | |
| "learning_rate": 1.1614247013981692e-06, | |
| "loss": 1.2770864963531494, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.491546630859375, | |
| "learning_rate": 1.1607223977087972e-06, | |
| "loss": 1.1677052974700928, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6708860759493671, | |
| "grad_norm": 1.112823486328125, | |
| "learning_rate": 1.160013999648425e-06, | |
| "loss": 1.1452233791351318, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6751054852320675, | |
| "grad_norm": 2.3609695434570312, | |
| "learning_rate": 1.1592995158390764e-06, | |
| "loss": 1.1290454864501953, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.679324894514768, | |
| "grad_norm": 1.2427384853363037, | |
| "learning_rate": 1.1585789549768468e-06, | |
| "loss": 0.9067545533180237, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6835443037974683, | |
| "grad_norm": 1.1474452018737793, | |
| "learning_rate": 1.157852325831795e-06, | |
| "loss": 1.0441116094589233, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6877637130801688, | |
| "grad_norm": 2.173767328262329, | |
| "learning_rate": 1.157119637247839e-06, | |
| "loss": 0.8966209292411804, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6919831223628692, | |
| "grad_norm": 1.1117126941680908, | |
| "learning_rate": 1.1563808981426463e-06, | |
| "loss": 0.9047636985778809, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6962025316455697, | |
| "grad_norm": 5.761388778686523, | |
| "learning_rate": 1.155636117507527e-06, | |
| "loss": 1.6347975730895996, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.70042194092827, | |
| "grad_norm": 2.036907434463501, | |
| "learning_rate": 1.1548853044073231e-06, | |
| "loss": 1.1312888860702515, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7046413502109705, | |
| "grad_norm": 1.1030317544937134, | |
| "learning_rate": 1.1541284679802987e-06, | |
| "loss": 1.441202163696289, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7088607594936709, | |
| "grad_norm": 1.0779141187667847, | |
| "learning_rate": 1.1533656174380295e-06, | |
| "loss": 1.3240406513214111, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7130801687763713, | |
| "grad_norm": 1.0553290843963623, | |
| "learning_rate": 1.1525967620652888e-06, | |
| "loss": 1.355104684829712, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7172995780590717, | |
| "grad_norm": 1.1798067092895508, | |
| "learning_rate": 1.151821911219936e-06, | |
| "loss": 1.3105881214141846, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7215189873417721, | |
| "grad_norm": 1.144148349761963, | |
| "learning_rate": 1.151041074332803e-06, | |
| "loss": 1.3141815662384033, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7257383966244726, | |
| "grad_norm": 7.016842365264893, | |
| "learning_rate": 1.1502542609075783e-06, | |
| "loss": 1.1324222087860107, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.729957805907173, | |
| "grad_norm": 2.61010479927063, | |
| "learning_rate": 1.1494614805206915e-06, | |
| "loss": 0.908640444278717, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7341772151898734, | |
| "grad_norm": 1.0395554304122925, | |
| "learning_rate": 1.1486627428211974e-06, | |
| "loss": 1.308266282081604, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7383966244725738, | |
| "grad_norm": 2.721623659133911, | |
| "learning_rate": 1.147858057530658e-06, | |
| "loss": 1.228571891784668, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7426160337552743, | |
| "grad_norm": 22.471782684326172, | |
| "learning_rate": 1.1470474344430244e-06, | |
| "loss": 1.149246335029602, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7468354430379747, | |
| "grad_norm": 2.0179569721221924, | |
| "learning_rate": 1.1462308834245177e-06, | |
| "loss": 0.6629557013511658, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7510548523206751, | |
| "grad_norm": 2.0603108406066895, | |
| "learning_rate": 1.1454084144135089e-06, | |
| "loss": 1.0916632413864136, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7552742616033755, | |
| "grad_norm": 12.69516372680664, | |
| "learning_rate": 1.1445800374203972e-06, | |
| "loss": 1.026712417602539, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 1.3232800960540771, | |
| "learning_rate": 1.1437457625274893e-06, | |
| "loss": 1.2708055973052979, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7637130801687764, | |
| "grad_norm": 2.96313738822937, | |
| "learning_rate": 1.1429055998888764e-06, | |
| "loss": 1.0283684730529785, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7679324894514767, | |
| "grad_norm": 2.2174739837646484, | |
| "learning_rate": 1.1420595597303093e-06, | |
| "loss": 1.6322853565216064, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7721518987341772, | |
| "grad_norm": 3.5580825805664062, | |
| "learning_rate": 1.1412076523490762e-06, | |
| "loss": 0.7543882727622986, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7763713080168776, | |
| "grad_norm": 1.3570228815078735, | |
| "learning_rate": 1.140349888113876e-06, | |
| "loss": 1.1952807903289795, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7805907172995781, | |
| "grad_norm": 3.309746503829956, | |
| "learning_rate": 1.1394862774646915e-06, | |
| "loss": 1.5346460342407227, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7848101265822784, | |
| "grad_norm": 2.107482433319092, | |
| "learning_rate": 1.1386168309126637e-06, | |
| "loss": 1.0200968980789185, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7890295358649789, | |
| "grad_norm": 1.8146216869354248, | |
| "learning_rate": 1.1377415590399635e-06, | |
| "loss": 1.0982069969177246, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7932489451476793, | |
| "grad_norm": 1.029420256614685, | |
| "learning_rate": 1.1368604724996625e-06, | |
| "loss": 1.197360873222351, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7974683544303798, | |
| "grad_norm": 1.1954386234283447, | |
| "learning_rate": 1.1359735820156029e-06, | |
| "loss": 1.1520774364471436, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8016877637130801, | |
| "grad_norm": 1.2728540897369385, | |
| "learning_rate": 1.1350808983822688e-06, | |
| "loss": 0.7453869581222534, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8059071729957806, | |
| "grad_norm": 2.447286605834961, | |
| "learning_rate": 1.134182432464653e-06, | |
| "loss": 1.3108738660812378, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 5.362612247467041, | |
| "learning_rate": 1.1332781951981248e-06, | |
| "loss": 1.0827962160110474, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8143459915611815, | |
| "grad_norm": 1.7063276767730713, | |
| "learning_rate": 1.1323681975882984e-06, | |
| "loss": 1.3062907457351685, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8185654008438819, | |
| "grad_norm": 2.8370184898376465, | |
| "learning_rate": 1.131452450710898e-06, | |
| "loss": 0.9684048295021057, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8227848101265823, | |
| "grad_norm": 1.1811680793762207, | |
| "learning_rate": 1.1305309657116222e-06, | |
| "loss": 1.2863088846206665, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8270042194092827, | |
| "grad_norm": 3.667228937149048, | |
| "learning_rate": 1.1296037538060104e-06, | |
| "loss": 1.0412209033966064, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8312236286919831, | |
| "grad_norm": 4.117892265319824, | |
| "learning_rate": 1.128670826279304e-06, | |
| "loss": 0.9639609456062317, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8354430379746836, | |
| "grad_norm": 1.29248046875, | |
| "learning_rate": 1.1277321944863108e-06, | |
| "loss": 1.2934151887893677, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8396624472573839, | |
| "grad_norm": 0.26427099108695984, | |
| "learning_rate": 1.1267878698512655e-06, | |
| "loss": 1.1188089847564697, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8438818565400844, | |
| "grad_norm": 0.8574454188346863, | |
| "learning_rate": 1.125837863867692e-06, | |
| "loss": 0.9975463151931763, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8481012658227848, | |
| "grad_norm": 1.629779577255249, | |
| "learning_rate": 1.1248821880982622e-06, | |
| "loss": 0.7363186478614807, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8523206751054853, | |
| "grad_norm": 1.8325449228286743, | |
| "learning_rate": 1.1239208541746565e-06, | |
| "loss": 1.2270734310150146, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8565400843881856, | |
| "grad_norm": 0.7708742618560791, | |
| "learning_rate": 1.1229538737974207e-06, | |
| "loss": 0.9653185606002808, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8607594936708861, | |
| "grad_norm": 2.376756429672241, | |
| "learning_rate": 1.1219812587358254e-06, | |
| "loss": 0.997606098651886, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8649789029535865, | |
| "grad_norm": 1.2060413360595703, | |
| "learning_rate": 1.121003020827721e-06, | |
| "loss": 1.2897322177886963, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.869198312236287, | |
| "grad_norm": 1.555523157119751, | |
| "learning_rate": 1.1200191719793948e-06, | |
| "loss": 0.876572847366333, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8734177215189873, | |
| "grad_norm": 3.1254689693450928, | |
| "learning_rate": 1.1190297241654262e-06, | |
| "loss": 1.2611523866653442, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8776371308016878, | |
| "grad_norm": 1.103677749633789, | |
| "learning_rate": 1.1180346894285397e-06, | |
| "loss": 1.0928722620010376, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8818565400843882, | |
| "grad_norm": 2.221696615219116, | |
| "learning_rate": 1.1170340798794594e-06, | |
| "loss": 1.2073904275894165, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8860759493670886, | |
| "grad_norm": 1.7576788663864136, | |
| "learning_rate": 1.1160279076967616e-06, | |
| "loss": 0.9891563057899475, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.890295358649789, | |
| "grad_norm": 2.0383450984954834, | |
| "learning_rate": 1.1150161851267262e-06, | |
| "loss": 1.399549126625061, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8945147679324894, | |
| "grad_norm": 3.365711212158203, | |
| "learning_rate": 1.1139989244831874e-06, | |
| "loss": 1.029995083808899, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8987341772151899, | |
| "grad_norm": 2.773817539215088, | |
| "learning_rate": 1.1129761381473842e-06, | |
| "loss": 1.2264801263809204, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9029535864978903, | |
| "grad_norm": 2.2570652961730957, | |
| "learning_rate": 1.11194783856781e-06, | |
| "loss": 1.0824590921401978, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9071729957805907, | |
| "grad_norm": 5.947412967681885, | |
| "learning_rate": 1.1109140382600606e-06, | |
| "loss": 1.057291865348816, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 5.3977580070495605, | |
| "learning_rate": 1.1098747498066824e-06, | |
| "loss": 1.1226750612258911, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9156118143459916, | |
| "grad_norm": 3.355656385421753, | |
| "learning_rate": 1.108829985857018e-06, | |
| "loss": 1.3119703531265259, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.919831223628692, | |
| "grad_norm": 3.1750526428222656, | |
| "learning_rate": 1.1077797591270538e-06, | |
| "loss": 0.9117200970649719, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9240506329113924, | |
| "grad_norm": 1.7613136768341064, | |
| "learning_rate": 1.1067240823992643e-06, | |
| "loss": 1.2639193534851074, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9282700421940928, | |
| "grad_norm": 1.1626863479614258, | |
| "learning_rate": 1.105662968522457e-06, | |
| "loss": 1.0154443979263306, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9324894514767933, | |
| "grad_norm": 4.403058052062988, | |
| "learning_rate": 1.1045964304116158e-06, | |
| "loss": 0.9742609262466431, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9367088607594937, | |
| "grad_norm": 4.4023237228393555, | |
| "learning_rate": 1.1035244810477435e-06, | |
| "loss": 1.161311388015747, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9409282700421941, | |
| "grad_norm": 1.6864513158798218, | |
| "learning_rate": 1.1024471334777044e-06, | |
| "loss": 1.3747820854187012, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9451476793248945, | |
| "grad_norm": 1.197826623916626, | |
| "learning_rate": 1.1013644008140647e-06, | |
| "loss": 1.0570836067199707, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9493670886075949, | |
| "grad_norm": 4.425671577453613, | |
| "learning_rate": 1.1002762962349342e-06, | |
| "loss": 1.066590666770935, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9535864978902954, | |
| "grad_norm": 1.4791566133499146, | |
| "learning_rate": 1.0991828329838048e-06, | |
| "loss": 1.3325567245483398, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9578059071729957, | |
| "grad_norm": 1.0424067974090576, | |
| "learning_rate": 1.0980840243693891e-06, | |
| "loss": 1.0253040790557861, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9620253164556962, | |
| "grad_norm": 1.6803632974624634, | |
| "learning_rate": 1.0969798837654603e-06, | |
| "loss": 1.2115472555160522, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9662447257383966, | |
| "grad_norm": 1.7260364294052124, | |
| "learning_rate": 1.0958704246106864e-06, | |
| "loss": 0.9136871695518494, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9704641350210971, | |
| "grad_norm": 4.201066493988037, | |
| "learning_rate": 1.0947556604084698e-06, | |
| "loss": 0.7265217304229736, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9746835443037974, | |
| "grad_norm": 1.5730266571044922, | |
| "learning_rate": 1.09363560472678e-06, | |
| "loss": 0.9232859015464783, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9789029535864979, | |
| "grad_norm": 1.9785159826278687, | |
| "learning_rate": 1.0925102711979916e-06, | |
| "loss": 1.2320111989974976, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9831223628691983, | |
| "grad_norm": 2.112661123275757, | |
| "learning_rate": 1.0913796735187152e-06, | |
| "loss": 0.7564235925674438, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9873417721518988, | |
| "grad_norm": 3.1255481243133545, | |
| "learning_rate": 1.0902438254496335e-06, | |
| "loss": 1.3790355920791626, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9915611814345991, | |
| "grad_norm": 2.643756866455078, | |
| "learning_rate": 1.0891027408153311e-06, | |
| "loss": 0.8968592286109924, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9957805907172996, | |
| "grad_norm": 1.2613961696624756, | |
| "learning_rate": 1.087956433504129e-06, | |
| "loss": 1.2724238634109497, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.897484302520752, | |
| "learning_rate": 1.0868049174679133e-06, | |
| "loss": 1.3249882459640503, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0042194092827004, | |
| "grad_norm": 15.572712898254395, | |
| "learning_rate": 1.0856482067219672e-06, | |
| "loss": 1.1418063640594482, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0084388185654007, | |
| "grad_norm": 0.9338006377220154, | |
| "learning_rate": 1.0844863153447983e-06, | |
| "loss": 1.2509591579437256, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 1.7618024349212646, | |
| "learning_rate": 1.0833192574779696e-06, | |
| "loss": 1.2292466163635254, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0168776371308017, | |
| "grad_norm": 2.1609816551208496, | |
| "learning_rate": 1.0821470473259254e-06, | |
| "loss": 0.9470843076705933, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.021097046413502, | |
| "grad_norm": 1.7773792743682861, | |
| "learning_rate": 1.0809696991558202e-06, | |
| "loss": 1.2175320386886597, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0253164556962024, | |
| "grad_norm": 3.248727560043335, | |
| "learning_rate": 1.0797872272973435e-06, | |
| "loss": 0.5157210230827332, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.029535864978903, | |
| "grad_norm": 4.235684394836426, | |
| "learning_rate": 1.078599646142546e-06, | |
| "loss": 1.0747886896133423, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0337552742616034, | |
| "grad_norm": 1.0086420774459839, | |
| "learning_rate": 1.0774069701456646e-06, | |
| "loss": 0.91233229637146, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0379746835443038, | |
| "grad_norm": 1.760449767112732, | |
| "learning_rate": 1.0762092138229461e-06, | |
| "loss": 1.2355482578277588, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0421940928270041, | |
| "grad_norm": 2.7897939682006836, | |
| "learning_rate": 1.0750063917524715e-06, | |
| "loss": 0.876376748085022, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0464135021097047, | |
| "grad_norm": 1.583694577217102, | |
| "learning_rate": 1.073798518573977e-06, | |
| "loss": 0.9621012806892395, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0506329113924051, | |
| "grad_norm": 1.2283833026885986, | |
| "learning_rate": 1.0725856089886768e-06, | |
| "loss": 1.3705410957336426, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0548523206751055, | |
| "grad_norm": 1.892619013786316, | |
| "learning_rate": 1.071367677759084e-06, | |
| "loss": 1.2057194709777832, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0590717299578059, | |
| "grad_norm": 1.5852138996124268, | |
| "learning_rate": 1.0701447397088314e-06, | |
| "loss": 1.225092887878418, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0632911392405062, | |
| "grad_norm": 1.6359580755233765, | |
| "learning_rate": 1.0689168097224896e-06, | |
| "loss": 1.3359899520874023, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0675105485232068, | |
| "grad_norm": 2.100905418395996, | |
| "learning_rate": 1.0676839027453882e-06, | |
| "loss": 0.8091757297515869, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0717299578059072, | |
| "grad_norm": 0.9373227953910828, | |
| "learning_rate": 1.0664460337834312e-06, | |
| "loss": 1.20570969581604, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0759493670886076, | |
| "grad_norm": 1.1788032054901123, | |
| "learning_rate": 1.0652032179029165e-06, | |
| "loss": 1.2286429405212402, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.080168776371308, | |
| "grad_norm": 2.070732355117798, | |
| "learning_rate": 1.0639554702303516e-06, | |
| "loss": 1.1464022397994995, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0843881856540085, | |
| "grad_norm": 1.4905924797058105, | |
| "learning_rate": 1.0627028059522697e-06, | |
| "loss": 1.2270240783691406, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0886075949367089, | |
| "grad_norm": 1.2873064279556274, | |
| "learning_rate": 1.061445240315044e-06, | |
| "loss": 1.2191872596740723, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0928270042194093, | |
| "grad_norm": 1.5833098888397217, | |
| "learning_rate": 1.060182788624704e-06, | |
| "loss": 1.0899208784103394, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0970464135021096, | |
| "grad_norm": 1.2056680917739868, | |
| "learning_rate": 1.0589154662467476e-06, | |
| "loss": 1.002990484237671, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1012658227848102, | |
| "grad_norm": 2.3490617275238037, | |
| "learning_rate": 1.0576432886059546e-06, | |
| "loss": 0.9123169779777527, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1054852320675106, | |
| "grad_norm": 1.398703694343567, | |
| "learning_rate": 1.056366271186199e-06, | |
| "loss": 1.1336543560028076, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.109704641350211, | |
| "grad_norm": 5.56015682220459, | |
| "learning_rate": 1.0550844295302604e-06, | |
| "loss": 0.8910406231880188, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1139240506329113, | |
| "grad_norm": 1.501484751701355, | |
| "learning_rate": 1.0537977792396352e-06, | |
| "loss": 1.4902470111846924, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1181434599156117, | |
| "grad_norm": 3.0271458625793457, | |
| "learning_rate": 1.0525063359743461e-06, | |
| "loss": 1.2566696405410767, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1223628691983123, | |
| "grad_norm": 3.5508389472961426, | |
| "learning_rate": 1.0512101154527524e-06, | |
| "loss": 0.6722557544708252, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1265822784810127, | |
| "grad_norm": 5.183070182800293, | |
| "learning_rate": 1.049909133451358e-06, | |
| "loss": 1.16892409324646, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.130801687763713, | |
| "grad_norm": 1.5916978120803833, | |
| "learning_rate": 1.0486034058046184e-06, | |
| "loss": 1.2602534294128418, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1350210970464134, | |
| "grad_norm": 2.219564914703369, | |
| "learning_rate": 1.0472929484047508e-06, | |
| "loss": 0.9274411797523499, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 2.6550791263580322, | |
| "learning_rate": 1.0459777772015377e-06, | |
| "loss": 0.7955924868583679, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1434599156118144, | |
| "grad_norm": 1.0860302448272705, | |
| "learning_rate": 1.044657908202135e-06, | |
| "loss": 0.8460701704025269, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1476793248945147, | |
| "grad_norm": 2.0455169677734375, | |
| "learning_rate": 1.0433333574708754e-06, | |
| "loss": 0.9194719791412354, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1518987341772151, | |
| "grad_norm": 0.8281774520874023, | |
| "learning_rate": 1.042004141129074e-06, | |
| "loss": 1.209435224533081, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1561181434599157, | |
| "grad_norm": 3.4284002780914307, | |
| "learning_rate": 1.040670275354832e-06, | |
| "loss": 1.1639091968536377, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.160337552742616, | |
| "grad_norm": 3.964017152786255, | |
| "learning_rate": 1.0393317763828394e-06, | |
| "loss": 1.0248503684997559, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1645569620253164, | |
| "grad_norm": 1.5311849117279053, | |
| "learning_rate": 1.0379886605041773e-06, | |
| "loss": 1.5549976825714111, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1687763713080168, | |
| "grad_norm": 1.1133424043655396, | |
| "learning_rate": 1.0366409440661203e-06, | |
| "loss": 1.2537164688110352, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1729957805907172, | |
| "grad_norm": 2.7838144302368164, | |
| "learning_rate": 1.035288643471937e-06, | |
| "loss": 0.6379430890083313, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1772151898734178, | |
| "grad_norm": 1.4451053142547607, | |
| "learning_rate": 1.0339317751806905e-06, | |
| "loss": 1.1707175970077515, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1814345991561181, | |
| "grad_norm": 1.5942399501800537, | |
| "learning_rate": 1.0325703557070377e-06, | |
| "loss": 0.7751450538635254, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1856540084388185, | |
| "grad_norm": 0.90716153383255, | |
| "learning_rate": 1.0312044016210299e-06, | |
| "loss": 0.9596038460731506, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.189873417721519, | |
| "grad_norm": 3.479564905166626, | |
| "learning_rate": 1.029833929547908e-06, | |
| "loss": 1.4486083984375, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1940928270042195, | |
| "grad_norm": 1.8199161291122437, | |
| "learning_rate": 1.028458956167903e-06, | |
| "loss": 1.2960246801376343, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1983122362869199, | |
| "grad_norm": 2.1159839630126953, | |
| "learning_rate": 1.0270794982160328e-06, | |
| "loss": 0.9260680079460144, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2025316455696202, | |
| "grad_norm": 1.1287225484848022, | |
| "learning_rate": 1.0256955724818963e-06, | |
| "loss": 1.1793110370635986, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2067510548523206, | |
| "grad_norm": 3.6729793548583984, | |
| "learning_rate": 1.0243071958094713e-06, | |
| "loss": 1.1447832584381104, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2109704641350212, | |
| "grad_norm": 1.18032705783844, | |
| "learning_rate": 1.0229143850969086e-06, | |
| "loss": 1.230734944343567, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2151898734177216, | |
| "grad_norm": 1.3310970067977905, | |
| "learning_rate": 1.0215171572963262e-06, | |
| "loss": 1.0188127756118774, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.219409282700422, | |
| "grad_norm": 1.4861373901367188, | |
| "learning_rate": 1.020115529413603e-06, | |
| "loss": 0.6179706454277039, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2236286919831223, | |
| "grad_norm": 1.791669249534607, | |
| "learning_rate": 1.0187095185081726e-06, | |
| "loss": 1.0826208591461182, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2278481012658227, | |
| "grad_norm": 2.172065258026123, | |
| "learning_rate": 1.0172991416928149e-06, | |
| "loss": 0.9076665639877319, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2320675105485233, | |
| "grad_norm": 1.2151978015899658, | |
| "learning_rate": 1.0158844161334472e-06, | |
| "loss": 0.9629290103912354, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2362869198312236, | |
| "grad_norm": 3.7680106163024902, | |
| "learning_rate": 1.014465359048917e-06, | |
| "loss": 1.0498627424240112, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.240506329113924, | |
| "grad_norm": 1.3523688316345215, | |
| "learning_rate": 1.0130419877107911e-06, | |
| "loss": 0.8714591860771179, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2447257383966246, | |
| "grad_norm": 1.3897624015808105, | |
| "learning_rate": 1.0116143194431453e-06, | |
| "loss": 1.247403860092163, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.248945147679325, | |
| "grad_norm": 0.8131434917449951, | |
| "learning_rate": 1.0101823716223555e-06, | |
| "loss": 0.6486424207687378, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2531645569620253, | |
| "grad_norm": 0.7691041827201843, | |
| "learning_rate": 1.0087461616768827e-06, | |
| "loss": 0.8923141956329346, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2573839662447257, | |
| "grad_norm": 3.8645286560058594, | |
| "learning_rate": 1.0073057070870643e-06, | |
| "loss": 0.8870598673820496, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.261603375527426, | |
| "grad_norm": 1.9958916902542114, | |
| "learning_rate": 1.0058610253848993e-06, | |
| "loss": 0.6330664753913879, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2658227848101267, | |
| "grad_norm": 0.7626611590385437, | |
| "learning_rate": 1.0044121341538363e-06, | |
| "loss": 1.0315228700637817, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.270042194092827, | |
| "grad_norm": 3.175126791000366, | |
| "learning_rate": 1.0029590510285573e-06, | |
| "loss": 1.4815832376480103, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2742616033755274, | |
| "grad_norm": 1.2133333683013916, | |
| "learning_rate": 1.001501793694766e-06, | |
| "loss": 1.1330845355987549, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2784810126582278, | |
| "grad_norm": 6.052526473999023, | |
| "learning_rate": 1.0000403798889702e-06, | |
| "loss": 1.0692338943481445, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2827004219409281, | |
| "grad_norm": 1.9215449094772339, | |
| "learning_rate": 9.985748273982674e-07, | |
| "loss": 0.8957496285438538, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2869198312236287, | |
| "grad_norm": 2.6900336742401123, | |
| "learning_rate": 9.97105154060127e-07, | |
| "loss": 1.0588608980178833, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2911392405063291, | |
| "grad_norm": 4.2595014572143555, | |
| "learning_rate": 9.956313777621743e-07, | |
| "loss": 0.6556817293167114, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2953586497890295, | |
| "grad_norm": 8.070136070251465, | |
| "learning_rate": 9.941535164419721e-07, | |
| "loss": 0.718927800655365, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.29957805907173, | |
| "grad_norm": 1.7044512033462524, | |
| "learning_rate": 9.926715880868028e-07, | |
| "loss": 1.1856049299240112, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3037974683544304, | |
| "grad_norm": 1.7254352569580078, | |
| "learning_rate": 9.911856107334497e-07, | |
| "loss": 1.2073801755905151, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3080168776371308, | |
| "grad_norm": 1.0367379188537598, | |
| "learning_rate": 9.896956024679761e-07, | |
| "loss": 0.7765376567840576, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3122362869198312, | |
| "grad_norm": 1.8108422756195068, | |
| "learning_rate": 9.882015814255073e-07, | |
| "loss": 1.221542477607727, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3164556962025316, | |
| "grad_norm": 1.8170989751815796, | |
| "learning_rate": 9.867035657900079e-07, | |
| "loss": 0.9256758689880371, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3206751054852321, | |
| "grad_norm": 1.397377610206604, | |
| "learning_rate": 9.852015737940618e-07, | |
| "loss": 1.1996105909347534, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3248945147679325, | |
| "grad_norm": 2.3642630577087402, | |
| "learning_rate": 9.836956237186495e-07, | |
| "loss": 1.7291648387908936, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3291139240506329, | |
| "grad_norm": 1.3181530237197876, | |
| "learning_rate": 9.821857338929266e-07, | |
| "loss": 0.9664700627326965, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.441937804222107, | |
| "learning_rate": 9.806719226939986e-07, | |
| "loss": 1.2257553339004517, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3375527426160336, | |
| "grad_norm": 0.5612751841545105, | |
| "learning_rate": 9.791542085467003e-07, | |
| "loss": 0.9133172035217285, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3417721518987342, | |
| "grad_norm": 1.5648330450057983, | |
| "learning_rate": 9.776326099233684e-07, | |
| "loss": 0.7176555395126343, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3459915611814346, | |
| "grad_norm": 21.417667388916016, | |
| "learning_rate": 9.761071453436195e-07, | |
| "loss": 0.9039233326911926, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.350210970464135, | |
| "grad_norm": 1.9340534210205078, | |
| "learning_rate": 9.745778333741227e-07, | |
| "loss": 1.2601927518844604, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3544303797468356, | |
| "grad_norm": 2.36677885055542, | |
| "learning_rate": 9.73044692628374e-07, | |
| "loss": 0.9230378866195679, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.358649789029536, | |
| "grad_norm": 0.7526681423187256, | |
| "learning_rate": 9.715077417664705e-07, | |
| "loss": 1.3141403198242188, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3628691983122363, | |
| "grad_norm": 1.5295689105987549, | |
| "learning_rate": 9.699669994948829e-07, | |
| "loss": 1.20694899559021, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3670886075949367, | |
| "grad_norm": 8.918047904968262, | |
| "learning_rate": 9.684224845662273e-07, | |
| "loss": 0.9112899899482727, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.371308016877637, | |
| "grad_norm": 2.3975322246551514, | |
| "learning_rate": 9.668742157790378e-07, | |
| "loss": 1.4381672143936157, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3755274261603376, | |
| "grad_norm": 1.3441905975341797, | |
| "learning_rate": 9.653222119775373e-07, | |
| "loss": 1.224355936050415, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.379746835443038, | |
| "grad_norm": 8.170360565185547, | |
| "learning_rate": 9.637664920514075e-07, | |
| "loss": 0.9496920108795166, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3839662447257384, | |
| "grad_norm": 1.037866234779358, | |
| "learning_rate": 9.622070749355605e-07, | |
| "loss": 1.2685517072677612, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3881856540084387, | |
| "grad_norm": 5.456931114196777, | |
| "learning_rate": 9.60643979609907e-07, | |
| "loss": 0.676283597946167, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3924050632911391, | |
| "grad_norm": 1.2280727624893188, | |
| "learning_rate": 9.59077225099126e-07, | |
| "loss": 1.1961660385131836, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3966244725738397, | |
| "grad_norm": 2.4184653759002686, | |
| "learning_rate": 9.57506830472433e-07, | |
| "loss": 0.7105515599250793, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.40084388185654, | |
| "grad_norm": 2.039471387863159, | |
| "learning_rate": 9.559328148433473e-07, | |
| "loss": 1.2236860990524292, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4050632911392404, | |
| "grad_norm": 3.3632094860076904, | |
| "learning_rate": 9.54355197369461e-07, | |
| "loss": 0.8225454092025757, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.409282700421941, | |
| "grad_norm": 2.8079605102539062, | |
| "learning_rate": 9.527739972522041e-07, | |
| "loss": 1.224509835243225, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4135021097046414, | |
| "grad_norm": 1.0237503051757812, | |
| "learning_rate": 9.511892337366117e-07, | |
| "loss": 1.2146466970443726, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4177215189873418, | |
| "grad_norm": 2.5967676639556885, | |
| "learning_rate": 9.496009261110901e-07, | |
| "loss": 1.5150516033172607, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4219409282700421, | |
| "grad_norm": 1.5330960750579834, | |
| "learning_rate": 9.480090937071802e-07, | |
| "loss": 0.8809629082679749, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4261603375527425, | |
| "grad_norm": 1.5120795965194702, | |
| "learning_rate": 9.464137558993251e-07, | |
| "loss": 0.7257891893386841, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4303797468354431, | |
| "grad_norm": 1.8336877822875977, | |
| "learning_rate": 9.448149321046316e-07, | |
| "loss": 1.0394529104232788, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4345991561181435, | |
| "grad_norm": 3.8357579708099365, | |
| "learning_rate": 9.432126417826358e-07, | |
| "loss": 1.1556706428527832, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4388185654008439, | |
| "grad_norm": 1.9536528587341309, | |
| "learning_rate": 9.416069044350646e-07, | |
| "loss": 0.9677222967147827, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4430379746835442, | |
| "grad_norm": 3.1365737915039062, | |
| "learning_rate": 9.399977396055995e-07, | |
| "loss": 1.2571027278900146, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4472573839662446, | |
| "grad_norm": 3.375725746154785, | |
| "learning_rate": 9.383851668796392e-07, | |
| "loss": 0.7981452345848083, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4514767932489452, | |
| "grad_norm": 2.0981504917144775, | |
| "learning_rate": 9.367692058840594e-07, | |
| "loss": 0.9887269735336304, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4556962025316456, | |
| "grad_norm": 5.0241265296936035, | |
| "learning_rate": 9.351498762869752e-07, | |
| "loss": 1.1597225666046143, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.459915611814346, | |
| "grad_norm": 3.389521598815918, | |
| "learning_rate": 9.33527197797502e-07, | |
| "loss": 0.7292091846466064, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4641350210970465, | |
| "grad_norm": 2.6481471061706543, | |
| "learning_rate": 9.319011901655145e-07, | |
| "loss": 1.3359123468399048, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4683544303797469, | |
| "grad_norm": 3.0631887912750244, | |
| "learning_rate": 9.302718731814072e-07, | |
| "loss": 0.7314563393592834, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4725738396624473, | |
| "grad_norm": 1.1294174194335938, | |
| "learning_rate": 9.286392666758532e-07, | |
| "loss": 1.202915072441101, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4767932489451476, | |
| "grad_norm": 0.9764413237571716, | |
| "learning_rate": 9.270033905195628e-07, | |
| "loss": 1.2414040565490723, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.481012658227848, | |
| "grad_norm": 2.000211000442505, | |
| "learning_rate": 9.25364264623042e-07, | |
| "loss": 1.1095331907272339, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4852320675105486, | |
| "grad_norm": 2.9179534912109375, | |
| "learning_rate": 9.237219089363494e-07, | |
| "loss": 0.8434455990791321, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.489451476793249, | |
| "grad_norm": 1.4670263528823853, | |
| "learning_rate": 9.220763434488545e-07, | |
| "loss": 1.1951138973236084, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4936708860759493, | |
| "grad_norm": 1.2732899188995361, | |
| "learning_rate": 9.204275881889934e-07, | |
| "loss": 1.2532763481140137, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.49789029535865, | |
| "grad_norm": 1.302393913269043, | |
| "learning_rate": 9.187756632240253e-07, | |
| "loss": 1.1061906814575195, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.50210970464135, | |
| "grad_norm": 3.0175118446350098, | |
| "learning_rate": 9.171205886597887e-07, | |
| "loss": 0.5435208082199097, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5063291139240507, | |
| "grad_norm": 2.06999135017395, | |
| "learning_rate": 9.154623846404564e-07, | |
| "loss": 1.2072559595108032, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.510548523206751, | |
| "grad_norm": 0.7862725853919983, | |
| "learning_rate": 9.138010713482899e-07, | |
| "loss": 1.1671605110168457, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5147679324894514, | |
| "grad_norm": 1.871321439743042, | |
| "learning_rate": 9.121366690033944e-07, | |
| "loss": 1.1794459819793701, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 2.5938880443573, | |
| "learning_rate": 9.104691978634728e-07, | |
| "loss": 1.0995539426803589, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5232067510548524, | |
| "grad_norm": 3.1606552600860596, | |
| "learning_rate": 9.08798678223578e-07, | |
| "loss": 1.231619954109192, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5274261603375527, | |
| "grad_norm": 0.9736570715904236, | |
| "learning_rate": 9.071251304158672e-07, | |
| "loss": 1.250243067741394, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5316455696202531, | |
| "grad_norm": 3.1312761306762695, | |
| "learning_rate": 9.054485748093538e-07, | |
| "loss": 0.6082893013954163, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5358649789029535, | |
| "grad_norm": 2.6216931343078613, | |
| "learning_rate": 9.037690318096597e-07, | |
| "loss": 0.4211277663707733, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.540084388185654, | |
| "grad_norm": 1.668655276298523, | |
| "learning_rate": 9.020865218587668e-07, | |
| "loss": 1.0038397312164307, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5443037974683544, | |
| "grad_norm": 2.301889419555664, | |
| "learning_rate": 9.004010654347677e-07, | |
| "loss": 0.9896605610847473, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5485232067510548, | |
| "grad_norm": 1.4354939460754395, | |
| "learning_rate": 8.98712683051618e-07, | |
| "loss": 1.235560417175293, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5527426160337554, | |
| "grad_norm": 1.394313097000122, | |
| "learning_rate": 8.970213952588844e-07, | |
| "loss": 0.986316442489624, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5569620253164556, | |
| "grad_norm": 2.5624778270721436, | |
| "learning_rate": 8.953272226414971e-07, | |
| "loss": 0.9202096462249756, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5611814345991561, | |
| "grad_norm": 2.074122667312622, | |
| "learning_rate": 8.936301858194968e-07, | |
| "loss": 1.1290022134780884, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5654008438818565, | |
| "grad_norm": 1.5154873132705688, | |
| "learning_rate": 8.919303054477857e-07, | |
| "loss": 0.8514289855957031, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5696202531645569, | |
| "grad_norm": 6.750024318695068, | |
| "learning_rate": 8.90227602215875e-07, | |
| "loss": 0.5599585175514221, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5738396624472575, | |
| "grad_norm": 1.8320426940917969, | |
| "learning_rate": 8.885220968476331e-07, | |
| "loss": 0.780780017375946, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5780590717299579, | |
| "grad_norm": 3.8813395500183105, | |
| "learning_rate": 8.868138101010339e-07, | |
| "loss": 0.656001091003418, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5822784810126582, | |
| "grad_norm": 1.3229504823684692, | |
| "learning_rate": 8.85102762767904e-07, | |
| "loss": 1.215933084487915, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5864978902953588, | |
| "grad_norm": 1.5343960523605347, | |
| "learning_rate": 8.833889756736696e-07, | |
| "loss": 0.7347640991210938, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.590717299578059, | |
| "grad_norm": 1.3849875926971436, | |
| "learning_rate": 8.816724696771023e-07, | |
| "loss": 0.8356782793998718, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5949367088607596, | |
| "grad_norm": 1.4311785697937012, | |
| "learning_rate": 8.799532656700668e-07, | |
| "loss": 0.6571628451347351, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.59915611814346, | |
| "grad_norm": 1.954759955406189, | |
| "learning_rate": 8.78231384577265e-07, | |
| "loss": 0.8940713405609131, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6033755274261603, | |
| "grad_norm": 1.7239028215408325, | |
| "learning_rate": 8.765068473559826e-07, | |
| "loss": 1.1826146841049194, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6075949367088609, | |
| "grad_norm": 0.5811662673950195, | |
| "learning_rate": 8.747796749958329e-07, | |
| "loss": 0.8342135548591614, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.611814345991561, | |
| "grad_norm": 2.9614205360412598, | |
| "learning_rate": 8.730498885185022e-07, | |
| "loss": 1.2261645793914795, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6160337552742616, | |
| "grad_norm": 1.8469215631484985, | |
| "learning_rate": 8.713175089774935e-07, | |
| "loss": 1.0828239917755127, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.620253164556962, | |
| "grad_norm": 0.7462615966796875, | |
| "learning_rate": 8.695825574578708e-07, | |
| "loss": 1.08014976978302, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6244725738396624, | |
| "grad_norm": 1.5869735479354858, | |
| "learning_rate": 8.678450550760013e-07, | |
| "loss": 1.2228014469146729, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.628691983122363, | |
| "grad_norm": 1.267874836921692, | |
| "learning_rate": 8.661050229793e-07, | |
| "loss": 1.2381342649459839, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6329113924050633, | |
| "grad_norm": 1.8816311359405518, | |
| "learning_rate": 8.643624823459705e-07, | |
| "loss": 1.2392218112945557, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6371308016877637, | |
| "grad_norm": 2.270045280456543, | |
| "learning_rate": 8.626174543847494e-07, | |
| "loss": 1.2957593202590942, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6413502109704643, | |
| "grad_norm": 0.9629765152931213, | |
| "learning_rate": 8.608699603346457e-07, | |
| "loss": 0.8434277772903442, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6455696202531644, | |
| "grad_norm": 6.275035381317139, | |
| "learning_rate": 8.591200214646842e-07, | |
| "loss": 0.3582332730293274, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.649789029535865, | |
| "grad_norm": 1.902158260345459, | |
| "learning_rate": 8.573676590736464e-07, | |
| "loss": 1.1803405284881592, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6540084388185654, | |
| "grad_norm": 1.5325170755386353, | |
| "learning_rate": 8.556128944898098e-07, | |
| "loss": 0.9606213569641113, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6582278481012658, | |
| "grad_norm": 1.5803859233856201, | |
| "learning_rate": 8.538557490706904e-07, | |
| "loss": 1.1115106344223022, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6624472573839664, | |
| "grad_norm": 1.5015286207199097, | |
| "learning_rate": 8.520962442027808e-07, | |
| "loss": 0.5854233503341675, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.0308812856674194, | |
| "learning_rate": 8.503344013012916e-07, | |
| "loss": 1.2016632556915283, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6708860759493671, | |
| "grad_norm": 3.8510706424713135, | |
| "learning_rate": 8.485702418098897e-07, | |
| "loss": 0.648362398147583, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6751054852320675, | |
| "grad_norm": 0.47625789046287537, | |
| "learning_rate": 8.468037872004374e-07, | |
| "loss": 1.0536069869995117, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6793248945147679, | |
| "grad_norm": 1.4682717323303223, | |
| "learning_rate": 8.450350589727312e-07, | |
| "loss": 1.2215386629104614, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.6835443037974684, | |
| "grad_norm": 4.169456481933594, | |
| "learning_rate": 8.432640786542407e-07, | |
| "loss": 0.9762102961540222, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.6877637130801688, | |
| "grad_norm": 4.598972797393799, | |
| "learning_rate": 8.414908677998456e-07, | |
| "loss": 1.2525511980056763, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6919831223628692, | |
| "grad_norm": 4.160123825073242, | |
| "learning_rate": 8.39715447991574e-07, | |
| "loss": 0.6331847906112671, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6962025316455698, | |
| "grad_norm": 1.725115180015564, | |
| "learning_rate": 8.379378408383392e-07, | |
| "loss": 1.2866941690444946, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.70042194092827, | |
| "grad_norm": 1.8015742301940918, | |
| "learning_rate": 8.361580679756771e-07, | |
| "loss": 1.1813989877700806, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7046413502109705, | |
| "grad_norm": 2.260415554046631, | |
| "learning_rate": 8.343761510654834e-07, | |
| "loss": 0.8856143355369568, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7088607594936709, | |
| "grad_norm": 3.748908042907715, | |
| "learning_rate": 8.325921117957487e-07, | |
| "loss": 0.9216241240501404, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7130801687763713, | |
| "grad_norm": 1.0273261070251465, | |
| "learning_rate": 8.308059718802953e-07, | |
| "loss": 1.1896474361419678, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7172995780590719, | |
| "grad_norm": 5.24169921875, | |
| "learning_rate": 8.290177530585126e-07, | |
| "loss": 1.525089144706726, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.721518987341772, | |
| "grad_norm": 2.1483330726623535, | |
| "learning_rate": 8.272274770950934e-07, | |
| "loss": 1.2185280323028564, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7257383966244726, | |
| "grad_norm": 3.7641489505767822, | |
| "learning_rate": 8.254351657797674e-07, | |
| "loss": 0.8334339261054993, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.729957805907173, | |
| "grad_norm": 1.9054092168807983, | |
| "learning_rate": 8.236408409270376e-07, | |
| "loss": 0.4915008842945099, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7341772151898733, | |
| "grad_norm": 18.341960906982422, | |
| "learning_rate": 8.218445243759137e-07, | |
| "loss": 0.7150586843490601, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.738396624472574, | |
| "grad_norm": 1.671027421951294, | |
| "learning_rate": 8.200462379896468e-07, | |
| "loss": 0.7935347557067871, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7426160337552743, | |
| "grad_norm": 1.1407239437103271, | |
| "learning_rate": 8.182460036554631e-07, | |
| "loss": 1.0441514253616333, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7468354430379747, | |
| "grad_norm": 1.8266801834106445, | |
| "learning_rate": 8.164438432842973e-07, | |
| "loss": 1.0361227989196777, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7510548523206753, | |
| "grad_norm": 1.3403441905975342, | |
| "learning_rate": 8.146397788105272e-07, | |
| "loss": 1.1865990161895752, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7552742616033754, | |
| "grad_norm": 0.9253147840499878, | |
| "learning_rate": 8.128338321917045e-07, | |
| "loss": 1.1751179695129395, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.759493670886076, | |
| "grad_norm": 3.3475301265716553, | |
| "learning_rate": 8.110260254082898e-07, | |
| "loss": 0.9232848286628723, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7637130801687764, | |
| "grad_norm": 1.080689549446106, | |
| "learning_rate": 8.092163804633832e-07, | |
| "loss": 1.2128963470458984, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7679324894514767, | |
| "grad_norm": 1.2559332847595215, | |
| "learning_rate": 8.074049193824579e-07, | |
| "loss": 1.0571973323822021, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7721518987341773, | |
| "grad_norm": 1.474366307258606, | |
| "learning_rate": 8.055916642130914e-07, | |
| "loss": 1.1260405778884888, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7763713080168775, | |
| "grad_norm": 2.3742828369140625, | |
| "learning_rate": 8.037766370246972e-07, | |
| "loss": 1.0088326930999756, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.780590717299578, | |
| "grad_norm": 0.6461037397384644, | |
| "learning_rate": 8.019598599082567e-07, | |
| "loss": 0.5369378328323364, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.7848101265822784, | |
| "grad_norm": 0.5748505592346191, | |
| "learning_rate": 8.001413549760496e-07, | |
| "loss": 0.8393441438674927, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.7890295358649788, | |
| "grad_norm": 3.184478282928467, | |
| "learning_rate": 7.983211443613853e-07, | |
| "loss": 0.7285841107368469, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.7932489451476794, | |
| "grad_norm": 6.552399635314941, | |
| "learning_rate": 7.964992502183333e-07, | |
| "loss": 0.8242054581642151, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7974683544303798, | |
| "grad_norm": 1.3520995378494263, | |
| "learning_rate": 7.946756947214536e-07, | |
| "loss": 1.210748314857483, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8016877637130801, | |
| "grad_norm": 2.167078733444214, | |
| "learning_rate": 7.928505000655264e-07, | |
| "loss": 1.4572898149490356, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8059071729957807, | |
| "grad_norm": 0.506519615650177, | |
| "learning_rate": 7.910236884652833e-07, | |
| "loss": 1.0607579946517944, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.810126582278481, | |
| "grad_norm": 2.641108512878418, | |
| "learning_rate": 7.891952821551348e-07, | |
| "loss": 1.0674760341644287, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8143459915611815, | |
| "grad_norm": 2.2153725624084473, | |
| "learning_rate": 7.87365303388902e-07, | |
| "loss": 0.8174270987510681, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8185654008438819, | |
| "grad_norm": 2.081165075302124, | |
| "learning_rate": 7.855337744395437e-07, | |
| "loss": 1.2201720476150513, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8227848101265822, | |
| "grad_norm": 1.6072843074798584, | |
| "learning_rate": 7.837007175988869e-07, | |
| "loss": 1.0889828205108643, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8270042194092828, | |
| "grad_norm": 5.830190181732178, | |
| "learning_rate": 7.818661551773542e-07, | |
| "loss": 1.2174073457717896, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.831223628691983, | |
| "grad_norm": 3.3919594287872314, | |
| "learning_rate": 7.800301095036933e-07, | |
| "loss": 0.9814926385879517, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8354430379746836, | |
| "grad_norm": 3.2243967056274414, | |
| "learning_rate": 7.781926029247048e-07, | |
| "loss": 1.1042759418487549, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.839662447257384, | |
| "grad_norm": 3.5643749237060547, | |
| "learning_rate": 7.763536578049699e-07, | |
| "loss": 0.8058743476867676, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8438818565400843, | |
| "grad_norm": 1.4269522428512573, | |
| "learning_rate": 7.745132965265788e-07, | |
| "loss": 0.987337052822113, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8481012658227849, | |
| "grad_norm": 0.985817015171051, | |
| "learning_rate": 7.726715414888577e-07, | |
| "loss": 1.2107572555541992, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8523206751054853, | |
| "grad_norm": 1.9935749769210815, | |
| "learning_rate": 7.708284151080968e-07, | |
| "loss": 0.9476048946380615, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8565400843881856, | |
| "grad_norm": 3.2491185665130615, | |
| "learning_rate": 7.689839398172767e-07, | |
| "loss": 0.9019596576690674, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8607594936708862, | |
| "grad_norm": 2.6126883029937744, | |
| "learning_rate": 7.671381380657965e-07, | |
| "loss": 1.1691335439682007, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8649789029535864, | |
| "grad_norm": 1.3800222873687744, | |
| "learning_rate": 7.65291032319199e-07, | |
| "loss": 0.8417655229568481, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.869198312236287, | |
| "grad_norm": 2.0879945755004883, | |
| "learning_rate": 7.634426450588988e-07, | |
| "loss": 0.8084736466407776, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8734177215189873, | |
| "grad_norm": 1.3853508234024048, | |
| "learning_rate": 7.615929987819075e-07, | |
| "loss": 1.136643648147583, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8776371308016877, | |
| "grad_norm": 7.130331993103027, | |
| "learning_rate": 7.597421160005612e-07, | |
| "loss": 0.4776380956172943, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8818565400843883, | |
| "grad_norm": 3.002958059310913, | |
| "learning_rate": 7.578900192422443e-07, | |
| "loss": 0.7818654179573059, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.8860759493670884, | |
| "grad_norm": 1.7899680137634277, | |
| "learning_rate": 7.560367310491182e-07, | |
| "loss": 1.1894859075546265, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.890295358649789, | |
| "grad_norm": 1.8424100875854492, | |
| "learning_rate": 7.541822739778445e-07, | |
| "loss": 1.3867307901382446, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.8945147679324894, | |
| "grad_norm": 11.084500312805176, | |
| "learning_rate": 7.523266705993115e-07, | |
| "loss": 0.8175121545791626, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 2.078657388687134, | |
| "learning_rate": 7.504699434983602e-07, | |
| "loss": 1.1003247499465942, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9029535864978904, | |
| "grad_norm": 1.9573427438735962, | |
| "learning_rate": 7.486121152735074e-07, | |
| "loss": 1.3067007064819336, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9071729957805907, | |
| "grad_norm": 6.904273509979248, | |
| "learning_rate": 7.467532085366726e-07, | |
| "loss": 1.073278784751892, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9113924050632911, | |
| "grad_norm": 0.9965894818305969, | |
| "learning_rate": 7.448932459129016e-07, | |
| "loss": 1.3775935173034668, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9156118143459917, | |
| "grad_norm": 4.340345859527588, | |
| "learning_rate": 7.430322500400924e-07, | |
| "loss": 0.5346195697784424, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9198312236286919, | |
| "grad_norm": 1.2633851766586304, | |
| "learning_rate": 7.411702435687177e-07, | |
| "loss": 1.1176321506500244, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9240506329113924, | |
| "grad_norm": 2.212251901626587, | |
| "learning_rate": 7.393072491615511e-07, | |
| "loss": 0.8476999402046204, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9282700421940928, | |
| "grad_norm": 1.6803202629089355, | |
| "learning_rate": 7.374432894933905e-07, | |
| "loss": 1.2019180059432983, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9324894514767932, | |
| "grad_norm": 2.0837478637695312, | |
| "learning_rate": 7.355783872507818e-07, | |
| "loss": 0.9530687928199768, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9367088607594938, | |
| "grad_norm": 2.765504837036133, | |
| "learning_rate": 7.337125651317433e-07, | |
| "loss": 1.0955183506011963, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9409282700421941, | |
| "grad_norm": 2.821669101715088, | |
| "learning_rate": 7.318458458454892e-07, | |
| "loss": 0.5842803120613098, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9451476793248945, | |
| "grad_norm": 1.241335153579712, | |
| "learning_rate": 7.299782521121536e-07, | |
| "loss": 1.1832818984985352, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9493670886075949, | |
| "grad_norm": 1.0857776403427124, | |
| "learning_rate": 7.281098066625129e-07, | |
| "loss": 1.262142539024353, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9535864978902953, | |
| "grad_norm": 1.392290472984314, | |
| "learning_rate": 7.262405322377109e-07, | |
| "loss": 0.9511996507644653, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9578059071729959, | |
| "grad_norm": 1.1899210214614868, | |
| "learning_rate": 7.243704515889799e-07, | |
| "loss": 0.797012448310852, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9620253164556962, | |
| "grad_norm": 3.176696300506592, | |
| "learning_rate": 7.224995874773657e-07, | |
| "loss": 1.2408126592636108, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9662447257383966, | |
| "grad_norm": 3.178877592086792, | |
| "learning_rate": 7.206279626734492e-07, | |
| "loss": 0.9860198497772217, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9704641350210972, | |
| "grad_norm": 1.3276498317718506, | |
| "learning_rate": 7.187555999570705e-07, | |
| "loss": 1.2460663318634033, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9746835443037973, | |
| "grad_norm": 1.3889448642730713, | |
| "learning_rate": 7.1688252211705e-07, | |
| "loss": 1.2101694345474243, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.978902953586498, | |
| "grad_norm": 0.8887938261032104, | |
| "learning_rate": 7.150087519509128e-07, | |
| "loss": 0.8580332398414612, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.9831223628691983, | |
| "grad_norm": 1.4792304039001465, | |
| "learning_rate": 7.131343122646098e-07, | |
| "loss": 1.231054663658142, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9873417721518987, | |
| "grad_norm": 2.457331418991089, | |
| "learning_rate": 7.11259225872241e-07, | |
| "loss": 1.006805658340454, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.9915611814345993, | |
| "grad_norm": 1.956710696220398, | |
| "learning_rate": 7.093835155957782e-07, | |
| "loss": 0.7936272025108337, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.9957805907172996, | |
| "grad_norm": 1.3758666515350342, | |
| "learning_rate": 7.075072042647852e-07, | |
| "loss": 1.1611456871032715, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.4326858520507812, | |
| "learning_rate": 7.056303147161428e-07, | |
| "loss": 0.5819499492645264, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0042194092827006, | |
| "grad_norm": 6.350503921508789, | |
| "learning_rate": 7.03752869793768e-07, | |
| "loss": 0.9798819422721863, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0084388185654007, | |
| "grad_norm": 3.770968437194824, | |
| "learning_rate": 7.018748923483386e-07, | |
| "loss": 0.6936891078948975, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0126582278481013, | |
| "grad_norm": 3.1057989597320557, | |
| "learning_rate": 6.99996405237013e-07, | |
| "loss": 0.857315182685852, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0168776371308015, | |
| "grad_norm": 1.2099494934082031, | |
| "learning_rate": 6.98117431323153e-07, | |
| "loss": 1.0093313455581665, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.021097046413502, | |
| "grad_norm": 2.805772542953491, | |
| "learning_rate": 6.962379934760456e-07, | |
| "loss": 0.7519159913063049, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 2.69637131690979, | |
| "learning_rate": 6.94358114570624e-07, | |
| "loss": 0.8004332780838013, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.029535864978903, | |
| "grad_norm": 4.524166584014893, | |
| "learning_rate": 6.924778174871901e-07, | |
| "loss": 1.2693367004394531, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.0337552742616034, | |
| "grad_norm": 1.710188388824463, | |
| "learning_rate": 6.905971251111349e-07, | |
| "loss": 0.8327010869979858, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.037974683544304, | |
| "grad_norm": 1.4968762397766113, | |
| "learning_rate": 6.887160603326612e-07, | |
| "loss": 0.8057103753089905, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.042194092827004, | |
| "grad_norm": 2.4308996200561523, | |
| "learning_rate": 6.868346460465038e-07, | |
| "loss": 0.7996687889099121, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0464135021097047, | |
| "grad_norm": 1.531032681465149, | |
| "learning_rate": 6.849529051516521e-07, | |
| "loss": 1.125715732574463, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.050632911392405, | |
| "grad_norm": 3.428903579711914, | |
| "learning_rate": 6.830708605510697e-07, | |
| "loss": 1.0384615659713745, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0548523206751055, | |
| "grad_norm": 1.0824832916259766, | |
| "learning_rate": 6.811885351514176e-07, | |
| "loss": 0.9185305237770081, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.059071729957806, | |
| "grad_norm": 1.7839653491973877, | |
| "learning_rate": 6.793059518627739e-07, | |
| "loss": 0.8305885195732117, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0632911392405062, | |
| "grad_norm": 0.7381780743598938, | |
| "learning_rate": 6.77423133598356e-07, | |
| "loss": 0.8384730815887451, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.067510548523207, | |
| "grad_norm": 1.6481800079345703, | |
| "learning_rate": 6.755401032742407e-07, | |
| "loss": 0.8727558255195618, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.071729957805907, | |
| "grad_norm": 5.477509021759033, | |
| "learning_rate": 6.736568838090859e-07, | |
| "loss": 1.1277180910110474, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.0759493670886076, | |
| "grad_norm": 2.758972644805908, | |
| "learning_rate": 6.71773498123852e-07, | |
| "loss": 1.0967183113098145, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.080168776371308, | |
| "grad_norm": 1.1603978872299194, | |
| "learning_rate": 6.698899691415218e-07, | |
| "loss": 1.1284269094467163, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.0843881856540083, | |
| "grad_norm": 1.3078337907791138, | |
| "learning_rate": 6.680063197868228e-07, | |
| "loss": 1.166777491569519, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.088607594936709, | |
| "grad_norm": 3.5238006114959717, | |
| "learning_rate": 6.661225729859475e-07, | |
| "loss": 0.5711318850517273, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0928270042194095, | |
| "grad_norm": 2.0197713375091553, | |
| "learning_rate": 6.64238751666274e-07, | |
| "loss": 0.608964204788208, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.0970464135021096, | |
| "grad_norm": 1.3378883600234985, | |
| "learning_rate": 6.623548787560878e-07, | |
| "loss": 1.175323247909546, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1012658227848102, | |
| "grad_norm": 1.223233938217163, | |
| "learning_rate": 6.604709771843022e-07, | |
| "loss": 1.1399847269058228, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1054852320675104, | |
| "grad_norm": 0.5097165703773499, | |
| "learning_rate": 6.585870698801791e-07, | |
| "loss": 0.8538580536842346, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.109704641350211, | |
| "grad_norm": 1.8075917959213257, | |
| "learning_rate": 6.567031797730507e-07, | |
| "loss": 1.2541990280151367, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1139240506329116, | |
| "grad_norm": 1.6272530555725098, | |
| "learning_rate": 6.548193297920393e-07, | |
| "loss": 1.182500958442688, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.1181434599156117, | |
| "grad_norm": 1.8821264505386353, | |
| "learning_rate": 6.529355428657795e-07, | |
| "loss": 1.1924080848693848, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1223628691983123, | |
| "grad_norm": 1.0999635457992554, | |
| "learning_rate": 6.510518419221377e-07, | |
| "loss": 0.6417333483695984, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1265822784810124, | |
| "grad_norm": 1.3833292722702026, | |
| "learning_rate": 6.49168249887934e-07, | |
| "loss": 0.7661027908325195, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.130801687763713, | |
| "grad_norm": 1.4525195360183716, | |
| "learning_rate": 6.472847896886636e-07, | |
| "loss": 0.7349141240119934, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1350210970464136, | |
| "grad_norm": 3.5440096855163574, | |
| "learning_rate": 6.454014842482162e-07, | |
| "loss": 0.9432771801948547, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1392405063291138, | |
| "grad_norm": 4.978313446044922, | |
| "learning_rate": 6.435183564885985e-07, | |
| "loss": 1.375197172164917, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1434599156118144, | |
| "grad_norm": 1.7762482166290283, | |
| "learning_rate": 6.416354293296542e-07, | |
| "loss": 0.8380042910575867, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.147679324894515, | |
| "grad_norm": 1.8821486234664917, | |
| "learning_rate": 6.39752725688786e-07, | |
| "loss": 0.9462857842445374, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.151898734177215, | |
| "grad_norm": 1.470024585723877, | |
| "learning_rate": 6.378702684806757e-07, | |
| "loss": 0.8377196192741394, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1561181434599157, | |
| "grad_norm": 2.115182638168335, | |
| "learning_rate": 6.359880806170058e-07, | |
| "loss": 0.9362459182739258, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.160337552742616, | |
| "grad_norm": 2.337805986404419, | |
| "learning_rate": 6.341061850061807e-07, | |
| "loss": 0.8514955639839172, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1645569620253164, | |
| "grad_norm": 9.63266372680664, | |
| "learning_rate": 6.322246045530474e-07, | |
| "loss": 1.1533026695251465, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.168776371308017, | |
| "grad_norm": 1.6961092948913574, | |
| "learning_rate": 6.303433621586177e-07, | |
| "loss": 1.1458700895309448, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.172995780590717, | |
| "grad_norm": 1.3575078248977661, | |
| "learning_rate": 6.28462480719788e-07, | |
| "loss": 1.1239484548568726, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1772151898734178, | |
| "grad_norm": 1.2787476778030396, | |
| "learning_rate": 6.265819831290624e-07, | |
| "loss": 1.1294289827346802, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.181434599156118, | |
| "grad_norm": 4.088858604431152, | |
| "learning_rate": 6.247018922742722e-07, | |
| "loss": 1.1388219594955444, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.1856540084388185, | |
| "grad_norm": 6.764144420623779, | |
| "learning_rate": 6.228222310382992e-07, | |
| "loss": 1.0533146858215332, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.189873417721519, | |
| "grad_norm": 2.094905138015747, | |
| "learning_rate": 6.209430222987952e-07, | |
| "loss": 1.132552146911621, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.1940928270042193, | |
| "grad_norm": 1.7523225545883179, | |
| "learning_rate": 6.190642889279052e-07, | |
| "loss": 1.2820512056350708, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.19831223628692, | |
| "grad_norm": 4.281554222106934, | |
| "learning_rate": 6.171860537919886e-07, | |
| "loss": 0.39310938119888306, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2025316455696204, | |
| "grad_norm": 2.323817491531372, | |
| "learning_rate": 6.153083397513404e-07, | |
| "loss": 1.1017502546310425, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2067510548523206, | |
| "grad_norm": 4.524064064025879, | |
| "learning_rate": 6.134311696599129e-07, | |
| "loss": 0.6054593324661255, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.210970464135021, | |
| "grad_norm": 2.6248085498809814, | |
| "learning_rate": 6.115545663650389e-07, | |
| "loss": 0.9862580299377441, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2151898734177213, | |
| "grad_norm": 1.9876245260238647, | |
| "learning_rate": 6.096785527071516e-07, | |
| "loss": 1.1376148462295532, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.219409282700422, | |
| "grad_norm": 2.210066080093384, | |
| "learning_rate": 6.078031515195085e-07, | |
| "loss": 0.9529132843017578, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2236286919831225, | |
| "grad_norm": 3.2140283584594727, | |
| "learning_rate": 6.059283856279118e-07, | |
| "loss": 1.0213066339492798, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.2278481012658227, | |
| "grad_norm": 6.621954917907715, | |
| "learning_rate": 6.040542778504319e-07, | |
| "loss": 0.9980672001838684, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2320675105485233, | |
| "grad_norm": 10.540366172790527, | |
| "learning_rate": 6.021808509971293e-07, | |
| "loss": 0.5453277826309204, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.2362869198312234, | |
| "grad_norm": 1.3416770696640015, | |
| "learning_rate": 6.003081278697764e-07, | |
| "loss": 1.1391900777816772, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.240506329113924, | |
| "grad_norm": 0.30088382959365845, | |
| "learning_rate": 5.984361312615811e-07, | |
| "loss": 0.9888620972633362, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2447257383966246, | |
| "grad_norm": 1.483581781387329, | |
| "learning_rate": 5.96564883956908e-07, | |
| "loss": 0.6946426033973694, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.2489451476793247, | |
| "grad_norm": 2.5259406566619873, | |
| "learning_rate": 5.946944087310022e-07, | |
| "loss": 1.0866342782974243, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2531645569620253, | |
| "grad_norm": 2.395719528198242, | |
| "learning_rate": 5.928247283497117e-07, | |
| "loss": 1.3847568035125732, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.257383966244726, | |
| "grad_norm": 2.874040126800537, | |
| "learning_rate": 5.909558655692104e-07, | |
| "loss": 1.1452842950820923, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.261603375527426, | |
| "grad_norm": 2.1399810314178467, | |
| "learning_rate": 5.890878431357208e-07, | |
| "loss": 1.1274282932281494, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.2658227848101267, | |
| "grad_norm": 3.358569383621216, | |
| "learning_rate": 5.872206837852376e-07, | |
| "loss": 1.3512498140335083, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.270042194092827, | |
| "grad_norm": 1.4806420803070068, | |
| "learning_rate": 5.853544102432505e-07, | |
| "loss": 1.14762282371521, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.2742616033755274, | |
| "grad_norm": 1.1972980499267578, | |
| "learning_rate": 5.834890452244685e-07, | |
| "loss": 0.9154924750328064, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 10.489628791809082, | |
| "learning_rate": 5.816246114325421e-07, | |
| "loss": 0.9368666410446167, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.282700421940928, | |
| "grad_norm": 5.601263046264648, | |
| "learning_rate": 5.79761131559788e-07, | |
| "loss": 0.6107386350631714, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.2869198312236287, | |
| "grad_norm": 2.7437796592712402, | |
| "learning_rate": 5.778986282869127e-07, | |
| "loss": 0.7205576300621033, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.291139240506329, | |
| "grad_norm": 0.8865097761154175, | |
| "learning_rate": 5.760371242827363e-07, | |
| "loss": 0.6305662393569946, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.2953586497890295, | |
| "grad_norm": 2.2365691661834717, | |
| "learning_rate": 5.741766422039167e-07, | |
| "loss": 0.9999610781669617, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.29957805907173, | |
| "grad_norm": 1.5019956827163696, | |
| "learning_rate": 5.723172046946733e-07, | |
| "loss": 0.589636504650116, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3037974683544302, | |
| "grad_norm": 2.1107327938079834, | |
| "learning_rate": 5.704588343865127e-07, | |
| "loss": 0.8981572389602661, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.308016877637131, | |
| "grad_norm": 4.003733158111572, | |
| "learning_rate": 5.686015538979518e-07, | |
| "loss": 0.732837438583374, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3122362869198314, | |
| "grad_norm": 2.012057065963745, | |
| "learning_rate": 5.667453858342434e-07, | |
| "loss": 0.4853237271308899, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3164556962025316, | |
| "grad_norm": 2.796154260635376, | |
| "learning_rate": 5.648903527871006e-07, | |
| "loss": 1.1909679174423218, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.320675105485232, | |
| "grad_norm": 1.6839478015899658, | |
| "learning_rate": 5.630364773344224e-07, | |
| "loss": 1.0224688053131104, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3248945147679323, | |
| "grad_norm": 1.592947006225586, | |
| "learning_rate": 5.611837820400182e-07, | |
| "loss": 1.1030757427215576, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.329113924050633, | |
| "grad_norm": 1.691872239112854, | |
| "learning_rate": 5.593322894533334e-07, | |
| "loss": 1.2941904067993164, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 1.7891680002212524, | |
| "learning_rate": 5.574820221091757e-07, | |
| "loss": 0.8782735466957092, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3375527426160336, | |
| "grad_norm": 3.5078885555267334, | |
| "learning_rate": 5.556330025274393e-07, | |
| "loss": 0.5180922150611877, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3417721518987342, | |
| "grad_norm": 1.8680453300476074, | |
| "learning_rate": 5.537852532128322e-07, | |
| "loss": 1.1475764513015747, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3459915611814344, | |
| "grad_norm": 1.262511968612671, | |
| "learning_rate": 5.519387966546021e-07, | |
| "loss": 1.1460936069488525, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.350210970464135, | |
| "grad_norm": 12.242781639099121, | |
| "learning_rate": 5.500936553262616e-07, | |
| "loss": 1.1747325658798218, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.3544303797468356, | |
| "grad_norm": 0.7147314548492432, | |
| "learning_rate": 5.48249851685316e-07, | |
| "loss": 0.7451015114784241, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.3586497890295357, | |
| "grad_norm": 4.066142559051514, | |
| "learning_rate": 5.464074081729892e-07, | |
| "loss": 1.0633448362350464, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3628691983122363, | |
| "grad_norm": 1.6116374731063843, | |
| "learning_rate": 5.445663472139506e-07, | |
| "loss": 0.8038894534111023, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.367088607594937, | |
| "grad_norm": 2.5959835052490234, | |
| "learning_rate": 5.427266912160427e-07, | |
| "loss": 1.0548654794692993, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.371308016877637, | |
| "grad_norm": 1.4511165618896484, | |
| "learning_rate": 5.408884625700076e-07, | |
| "loss": 0.744436502456665, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.3755274261603376, | |
| "grad_norm": 2.0259265899658203, | |
| "learning_rate": 5.390516836492152e-07, | |
| "loss": 1.0626447200775146, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.379746835443038, | |
| "grad_norm": 1.5352128744125366, | |
| "learning_rate": 5.372163768093903e-07, | |
| "loss": 1.1404402256011963, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.3839662447257384, | |
| "grad_norm": 3.401780366897583, | |
| "learning_rate": 5.35382564388341e-07, | |
| "loss": 0.5039758086204529, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.388185654008439, | |
| "grad_norm": 1.8972293138504028, | |
| "learning_rate": 5.335502687056865e-07, | |
| "loss": 0.345048725605011, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.392405063291139, | |
| "grad_norm": 4.107486248016357, | |
| "learning_rate": 5.317195120625855e-07, | |
| "loss": 0.4859941303730011, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.3966244725738397, | |
| "grad_norm": 2.5772571563720703, | |
| "learning_rate": 5.298903167414648e-07, | |
| "loss": 0.5732159614562988, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.40084388185654, | |
| "grad_norm": 1.3114792108535767, | |
| "learning_rate": 5.280627050057483e-07, | |
| "loss": 1.1417685747146606, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4050632911392404, | |
| "grad_norm": 7.5032267570495605, | |
| "learning_rate": 5.262366990995852e-07, | |
| "loss": 0.8103894591331482, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.409282700421941, | |
| "grad_norm": 3.7041962146759033, | |
| "learning_rate": 5.244123212475811e-07, | |
| "loss": 0.3755455017089844, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.413502109704641, | |
| "grad_norm": 1.3423445224761963, | |
| "learning_rate": 5.22589593654525e-07, | |
| "loss": 0.8771740198135376, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4177215189873418, | |
| "grad_norm": 1.499751329421997, | |
| "learning_rate": 5.207685385051213e-07, | |
| "loss": 1.168401837348938, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4219409282700424, | |
| "grad_norm": 4.436310291290283, | |
| "learning_rate": 5.189491779637181e-07, | |
| "loss": 0.8418995141983032, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4261603375527425, | |
| "grad_norm": 1.6216802597045898, | |
| "learning_rate": 5.171315341740387e-07, | |
| "loss": 1.147579550743103, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.430379746835443, | |
| "grad_norm": 43.39120864868164, | |
| "learning_rate": 5.153156292589112e-07, | |
| "loss": 0.8518908619880676, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4345991561181437, | |
| "grad_norm": 1.7255734205245972, | |
| "learning_rate": 5.1350148532e-07, | |
| "loss": 1.205424427986145, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.438818565400844, | |
| "grad_norm": 6.3630475997924805, | |
| "learning_rate": 5.116891244375358e-07, | |
| "loss": 0.43493425846099854, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4430379746835444, | |
| "grad_norm": 2.129798412322998, | |
| "learning_rate": 5.098785686700478e-07, | |
| "loss": 0.9413697719573975, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.4472573839662446, | |
| "grad_norm": 1.4703646898269653, | |
| "learning_rate": 5.080698400540949e-07, | |
| "loss": 1.1531509160995483, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.451476793248945, | |
| "grad_norm": 1.771309494972229, | |
| "learning_rate": 5.062629606039975e-07, | |
| "loss": 0.7602155208587646, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4556962025316453, | |
| "grad_norm": 1.3786754608154297, | |
| "learning_rate": 5.04457952311569e-07, | |
| "loss": 1.1161296367645264, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.459915611814346, | |
| "grad_norm": 2.190340280532837, | |
| "learning_rate": 5.026548371458493e-07, | |
| "loss": 1.1393266916275024, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4641350210970465, | |
| "grad_norm": 0.6447933316230774, | |
| "learning_rate": 5.008536370528365e-07, | |
| "loss": 0.728462815284729, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4683544303797467, | |
| "grad_norm": 1.4482827186584473, | |
| "learning_rate": 4.990543739552197e-07, | |
| "loss": 1.0875799655914307, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4725738396624473, | |
| "grad_norm": 1.4591885805130005, | |
| "learning_rate": 4.972570697521133e-07, | |
| "loss": 1.124202013015747, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.476793248945148, | |
| "grad_norm": 1.3290364742279053, | |
| "learning_rate": 4.954617463187888e-07, | |
| "loss": 1.1189545392990112, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.481012658227848, | |
| "grad_norm": 2.232417106628418, | |
| "learning_rate": 4.936684255064102e-07, | |
| "loss": 0.8213171362876892, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.4852320675105486, | |
| "grad_norm": 1.8304226398468018, | |
| "learning_rate": 4.918771291417669e-07, | |
| "loss": 0.40340158343315125, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.489451476793249, | |
| "grad_norm": 8.472685813903809, | |
| "learning_rate": 4.900878790270084e-07, | |
| "loss": 0.9105018973350525, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4936708860759493, | |
| "grad_norm": 4.292173385620117, | |
| "learning_rate": 4.883006969393791e-07, | |
| "loss": 1.0442423820495605, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.49789029535865, | |
| "grad_norm": 4.11490535736084, | |
| "learning_rate": 4.865156046309528e-07, | |
| "loss": 0.5216444730758667, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.50210970464135, | |
| "grad_norm": 0.9223915338516235, | |
| "learning_rate": 4.847326238283692e-07, | |
| "loss": 0.7885441780090332, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5063291139240507, | |
| "grad_norm": 1.5045500993728638, | |
| "learning_rate": 4.829517762325671e-07, | |
| "loss": 0.8654785752296448, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.510548523206751, | |
| "grad_norm": 1.1637619733810425, | |
| "learning_rate": 4.811730835185232e-07, | |
| "loss": 1.1407520771026611, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5147679324894514, | |
| "grad_norm": 6.294982433319092, | |
| "learning_rate": 4.793965673349857e-07, | |
| "loss": 0.5034950971603394, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.518987341772152, | |
| "grad_norm": 3.0815131664276123, | |
| "learning_rate": 4.776222493042122e-07, | |
| "loss": 1.443105697631836, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.523206751054852, | |
| "grad_norm": 1.1364555358886719, | |
| "learning_rate": 4.758501510217066e-07, | |
| "loss": 1.1503655910491943, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5274261603375527, | |
| "grad_norm": 2.4966022968292236, | |
| "learning_rate": 4.740802940559553e-07, | |
| "loss": 1.0758484601974487, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5316455696202533, | |
| "grad_norm": 2.249464511871338, | |
| "learning_rate": 4.7231269994816584e-07, | |
| "loss": 0.6718664765357971, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5358649789029535, | |
| "grad_norm": 1.933441400527954, | |
| "learning_rate": 4.705473902120039e-07, | |
| "loss": 0.8221999406814575, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.540084388185654, | |
| "grad_norm": 2.7778429985046387, | |
| "learning_rate": 4.687843863333317e-07, | |
| "loss": 0.7672927975654602, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5443037974683547, | |
| "grad_norm": 1.3384257555007935, | |
| "learning_rate": 4.670237097699464e-07, | |
| "loss": 1.0449153184890747, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.548523206751055, | |
| "grad_norm": 4.268535137176514, | |
| "learning_rate": 4.6526538195131944e-07, | |
| "loss": 0.7585489749908447, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.5527426160337554, | |
| "grad_norm": 1.7707507610321045, | |
| "learning_rate": 4.6350942427833463e-07, | |
| "loss": 1.191308617591858, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5569620253164556, | |
| "grad_norm": 1.2368897199630737, | |
| "learning_rate": 4.6175585812302914e-07, | |
| "loss": 1.115039348602295, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.561181434599156, | |
| "grad_norm": 6.681468486785889, | |
| "learning_rate": 4.600047048283323e-07, | |
| "loss": 0.35992902517318726, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.5654008438818563, | |
| "grad_norm": 2.588292360305786, | |
| "learning_rate": 4.582559857078059e-07, | |
| "loss": 0.831079363822937, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.569620253164557, | |
| "grad_norm": 1.0166317224502563, | |
| "learning_rate": 4.565097220453852e-07, | |
| "loss": 1.160988211631775, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.5738396624472575, | |
| "grad_norm": 4.55487060546875, | |
| "learning_rate": 4.5476593509511975e-07, | |
| "loss": 0.8059465289115906, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5780590717299576, | |
| "grad_norm": 1.7319614887237549, | |
| "learning_rate": 4.5302464608091444e-07, | |
| "loss": 0.9973964095115662, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.5822784810126582, | |
| "grad_norm": 2.5319430828094482, | |
| "learning_rate": 4.512858761962719e-07, | |
| "loss": 0.8335304260253906, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.586497890295359, | |
| "grad_norm": 2.232879161834717, | |
| "learning_rate": 4.495496466040333e-07, | |
| "loss": 0.7188448309898376, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.590717299578059, | |
| "grad_norm": 1.1091829538345337, | |
| "learning_rate": 4.478159784361222e-07, | |
| "loss": 1.0995886325836182, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.5949367088607596, | |
| "grad_norm": 1.6400138139724731, | |
| "learning_rate": 4.4608489279328616e-07, | |
| "loss": 1.197192907333374, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.59915611814346, | |
| "grad_norm": 2.313340187072754, | |
| "learning_rate": 4.443564107448406e-07, | |
| "loss": 1.024308204650879, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6033755274261603, | |
| "grad_norm": 3.4521191120147705, | |
| "learning_rate": 4.4263055332841223e-07, | |
| "loss": 0.30793383717536926, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.607594936708861, | |
| "grad_norm": 3.510732889175415, | |
| "learning_rate": 4.409073415496829e-07, | |
| "loss": 1.2074471712112427, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.611814345991561, | |
| "grad_norm": 5.145284652709961, | |
| "learning_rate": 4.391867963821341e-07, | |
| "loss": 1.3546441793441772, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6160337552742616, | |
| "grad_norm": 1.3848285675048828, | |
| "learning_rate": 4.374689387667913e-07, | |
| "loss": 0.7564114332199097, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.620253164556962, | |
| "grad_norm": 1.5747932195663452, | |
| "learning_rate": 4.3575378961196987e-07, | |
| "loss": 1.1020171642303467, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6244725738396624, | |
| "grad_norm": 7.985814094543457, | |
| "learning_rate": 4.340413697930193e-07, | |
| "loss": 0.6297235488891602, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.628691983122363, | |
| "grad_norm": 1.5127233266830444, | |
| "learning_rate": 4.3233170015207045e-07, | |
| "loss": 0.7452877163887024, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.632911392405063, | |
| "grad_norm": 2.9100558757781982, | |
| "learning_rate": 4.306248014977816e-07, | |
| "loss": 1.5952140092849731, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.6371308016877637, | |
| "grad_norm": 2.4428465366363525, | |
| "learning_rate": 4.2892069460508416e-07, | |
| "loss": 1.142899990081787, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6413502109704643, | |
| "grad_norm": 1.664016604423523, | |
| "learning_rate": 4.27219400214931e-07, | |
| "loss": 1.067954182624817, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.6455696202531644, | |
| "grad_norm": 14.406804084777832, | |
| "learning_rate": 4.255209390340436e-07, | |
| "loss": 0.608812689781189, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.649789029535865, | |
| "grad_norm": 1.7003490924835205, | |
| "learning_rate": 4.238253317346602e-07, | |
| "loss": 0.5725827813148499, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.6540084388185656, | |
| "grad_norm": 2.5306010246276855, | |
| "learning_rate": 4.221325989542832e-07, | |
| "loss": 0.9772995710372925, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 1.6626094579696655, | |
| "learning_rate": 4.2044276129542956e-07, | |
| "loss": 1.0970871448516846, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6624472573839664, | |
| "grad_norm": 1.178289771080017, | |
| "learning_rate": 4.1875583932537926e-07, | |
| "loss": 1.2285281419754028, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.0838637351989746, | |
| "learning_rate": 4.1707185357592434e-07, | |
| "loss": 0.6816955208778381, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.670886075949367, | |
| "grad_norm": 2.826573133468628, | |
| "learning_rate": 4.1539082454312016e-07, | |
| "loss": 0.9266291856765747, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.6751054852320673, | |
| "grad_norm": 2.5557596683502197, | |
| "learning_rate": 4.1371277268703537e-07, | |
| "loss": 0.7625723481178284, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.679324894514768, | |
| "grad_norm": 1.970330834388733, | |
| "learning_rate": 4.120377184315029e-07, | |
| "loss": 0.6248302459716797, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.6835443037974684, | |
| "grad_norm": 1.4884620904922485, | |
| "learning_rate": 4.103656821638711e-07, | |
| "loss": 1.0319654941558838, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.6877637130801686, | |
| "grad_norm": 7.556288242340088, | |
| "learning_rate": 4.086966842347563e-07, | |
| "loss": 0.745881199836731, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.691983122362869, | |
| "grad_norm": 9.8837890625, | |
| "learning_rate": 4.0703074495779464e-07, | |
| "loss": 0.8159171342849731, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.6962025316455698, | |
| "grad_norm": 0.8211575746536255, | |
| "learning_rate": 4.053678846093952e-07, | |
| "loss": 0.4533369243144989, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.70042194092827, | |
| "grad_norm": 2.684162139892578, | |
| "learning_rate": 4.03708123428492e-07, | |
| "loss": 0.9859198331832886, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7046413502109705, | |
| "grad_norm": 2.0273070335388184, | |
| "learning_rate": 4.0205148161629964e-07, | |
| "loss": 1.131312608718872, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.708860759493671, | |
| "grad_norm": 1.63193678855896, | |
| "learning_rate": 4.003979793360661e-07, | |
| "loss": 0.5977147221565247, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7130801687763713, | |
| "grad_norm": 4.451190948486328, | |
| "learning_rate": 3.987476367128271e-07, | |
| "loss": 0.6326662302017212, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.717299578059072, | |
| "grad_norm": 0.9102901816368103, | |
| "learning_rate": 3.9710047383316225e-07, | |
| "loss": 0.8215235471725464, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.721518987341772, | |
| "grad_norm": 0.5546138286590576, | |
| "learning_rate": 3.954565107449499e-07, | |
| "loss": 1.0023081302642822, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.7257383966244726, | |
| "grad_norm": 1.5768028497695923, | |
| "learning_rate": 3.9381576745712347e-07, | |
| "loss": 1.1236157417297363, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7299578059071727, | |
| "grad_norm": 1.9878129959106445, | |
| "learning_rate": 3.921782639394268e-07, | |
| "loss": 0.7208356857299805, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7341772151898733, | |
| "grad_norm": 1.5068297386169434, | |
| "learning_rate": 3.905440201221729e-07, | |
| "loss": 1.1069350242614746, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.738396624472574, | |
| "grad_norm": 1.8150042295455933, | |
| "learning_rate": 3.8891305589600005e-07, | |
| "loss": 1.1665513515472412, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.742616033755274, | |
| "grad_norm": 1.199442744255066, | |
| "learning_rate": 3.872853911116304e-07, | |
| "loss": 0.8418156504631042, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7468354430379747, | |
| "grad_norm": 1.2970781326293945, | |
| "learning_rate": 3.856610455796275e-07, | |
| "loss": 1.1775513887405396, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.7510548523206753, | |
| "grad_norm": 2.245298147201538, | |
| "learning_rate": 3.840400390701562e-07, | |
| "loss": 0.740407407283783, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7552742616033754, | |
| "grad_norm": 1.889854073524475, | |
| "learning_rate": 3.824223913127419e-07, | |
| "loss": 1.4258309602737427, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.759493670886076, | |
| "grad_norm": 1.5117489099502563, | |
| "learning_rate": 3.808081219960292e-07, | |
| "loss": 1.0796724557876587, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7637130801687766, | |
| "grad_norm": 5.329049110412598, | |
| "learning_rate": 3.791972507675438e-07, | |
| "loss": 0.9499403834342957, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7679324894514767, | |
| "grad_norm": 1.1606426239013672, | |
| "learning_rate": 3.775897972334526e-07, | |
| "loss": 1.1145509481430054, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.7721518987341773, | |
| "grad_norm": 5.624031066894531, | |
| "learning_rate": 3.759857809583255e-07, | |
| "loss": 1.1557338237762451, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.7763713080168775, | |
| "grad_norm": 2.0686893463134766, | |
| "learning_rate": 3.7438522146489624e-07, | |
| "loss": 0.9982014894485474, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.780590717299578, | |
| "grad_norm": 7.816877841949463, | |
| "learning_rate": 3.727881382338262e-07, | |
| "loss": 0.642890453338623, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.7848101265822782, | |
| "grad_norm": 8.81386661529541, | |
| "learning_rate": 3.711945507034663e-07, | |
| "loss": 1.1752903461456299, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.789029535864979, | |
| "grad_norm": 0.7249853014945984, | |
| "learning_rate": 3.696044782696211e-07, | |
| "loss": 0.7932354807853699, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.7932489451476794, | |
| "grad_norm": 4.055787563323975, | |
| "learning_rate": 3.680179402853118e-07, | |
| "loss": 1.365350604057312, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.7974683544303796, | |
| "grad_norm": 3.504054546356201, | |
| "learning_rate": 3.6643495606054153e-07, | |
| "loss": 0.9429040551185608, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.80168776371308, | |
| "grad_norm": 1.6331150531768799, | |
| "learning_rate": 3.6485554486206035e-07, | |
| "loss": 0.8298648595809937, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8059071729957807, | |
| "grad_norm": 1.505171298980713, | |
| "learning_rate": 3.632797259131301e-07, | |
| "loss": 1.119720458984375, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.810126582278481, | |
| "grad_norm": 1.824774980545044, | |
| "learning_rate": 3.6170751839329087e-07, | |
| "loss": 1.1578552722930908, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8143459915611815, | |
| "grad_norm": 1.4384500980377197, | |
| "learning_rate": 3.601389414381272e-07, | |
| "loss": 0.7492596507072449, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.818565400843882, | |
| "grad_norm": 0.6595861911773682, | |
| "learning_rate": 3.585740141390362e-07, | |
| "loss": 1.0319997072219849, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8227848101265822, | |
| "grad_norm": 1.1182562112808228, | |
| "learning_rate": 3.570127555429937e-07, | |
| "loss": 0.8679478168487549, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.827004219409283, | |
| "grad_norm": 2.751737117767334, | |
| "learning_rate": 3.554551846523234e-07, | |
| "loss": 0.992285430431366, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.831223628691983, | |
| "grad_norm": 1.0553044080734253, | |
| "learning_rate": 3.5390132042446593e-07, | |
| "loss": 1.0697180032730103, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8354430379746836, | |
| "grad_norm": 6.8535590171813965, | |
| "learning_rate": 3.5235118177174633e-07, | |
| "loss": 1.3121901750564575, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8396624472573837, | |
| "grad_norm": 3.6509854793548584, | |
| "learning_rate": 3.5080478756114603e-07, | |
| "loss": 0.7838273048400879, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8438818565400843, | |
| "grad_norm": 1.319709062576294, | |
| "learning_rate": 3.4926215661407224e-07, | |
| "loss": 0.6845376491546631, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.848101265822785, | |
| "grad_norm": 1.3884485960006714, | |
| "learning_rate": 3.4772330770612856e-07, | |
| "loss": 1.1699258089065552, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.852320675105485, | |
| "grad_norm": 1.5885751247406006, | |
| "learning_rate": 3.4618825956688674e-07, | |
| "loss": 1.0469439029693604, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8565400843881856, | |
| "grad_norm": 3.556104898452759, | |
| "learning_rate": 3.4465703087965895e-07, | |
| "loss": 0.8466750383377075, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.8607594936708862, | |
| "grad_norm": 4.589015960693359, | |
| "learning_rate": 3.4312964028127036e-07, | |
| "loss": 0.5300393104553223, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.8649789029535864, | |
| "grad_norm": 1.3245023488998413, | |
| "learning_rate": 3.416061063618321e-07, | |
| "loss": 1.1446274518966675, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.869198312236287, | |
| "grad_norm": 2.1873321533203125, | |
| "learning_rate": 3.400864476645146e-07, | |
| "loss": 0.9219729900360107, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8734177215189876, | |
| "grad_norm": 8.96743392944336, | |
| "learning_rate": 3.3857068268532285e-07, | |
| "loss": 0.7180023789405823, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.8776371308016877, | |
| "grad_norm": 2.621079444885254, | |
| "learning_rate": 3.3705882987287096e-07, | |
| "loss": 1.0849711894989014, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.8818565400843883, | |
| "grad_norm": 1.501388669013977, | |
| "learning_rate": 3.355509076281567e-07, | |
| "loss": 1.1244922876358032, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.8860759493670884, | |
| "grad_norm": 2.502393960952759, | |
| "learning_rate": 3.3404693430433883e-07, | |
| "loss": 1.1720871925354004, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.890295358649789, | |
| "grad_norm": 1.8280988931655884, | |
| "learning_rate": 3.32546928206513e-07, | |
| "loss": 1.0966358184814453, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.894514767932489, | |
| "grad_norm": 19.55905532836914, | |
| "learning_rate": 3.3105090759148967e-07, | |
| "loss": 0.48501160740852356, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.8987341772151898, | |
| "grad_norm": 1.4679443836212158, | |
| "learning_rate": 3.2955889066757016e-07, | |
| "loss": 0.8926799297332764, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9029535864978904, | |
| "grad_norm": 1.4338287115097046, | |
| "learning_rate": 3.280708955943272e-07, | |
| "loss": 1.1578876972198486, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9071729957805905, | |
| "grad_norm": 3.8778481483459473, | |
| "learning_rate": 3.265869404823828e-07, | |
| "loss": 0.9735660552978516, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.911392405063291, | |
| "grad_norm": 2.506485939025879, | |
| "learning_rate": 3.2510704339318803e-07, | |
| "loss": 1.3276560306549072, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9156118143459917, | |
| "grad_norm": 2.2147409915924072, | |
| "learning_rate": 3.2363122233880246e-07, | |
| "loss": 0.7593087553977966, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.919831223628692, | |
| "grad_norm": 5.710489749908447, | |
| "learning_rate": 3.221594952816764e-07, | |
| "loss": 0.7504158616065979, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9240506329113924, | |
| "grad_norm": 1.8435603380203247, | |
| "learning_rate": 3.2069188013443137e-07, | |
| "loss": 0.8508476614952087, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.928270042194093, | |
| "grad_norm": 1.3359285593032837, | |
| "learning_rate": 3.192283947596416e-07, | |
| "loss": 1.1549383401870728, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.932489451476793, | |
| "grad_norm": 1.435840129852295, | |
| "learning_rate": 3.1776905696961776e-07, | |
| "loss": 1.0318659543991089, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9367088607594938, | |
| "grad_norm": 1.5136826038360596, | |
| "learning_rate": 3.163138845261895e-07, | |
| "loss": 0.7768437266349792, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.9409282700421944, | |
| "grad_norm": 2.1672825813293457, | |
| "learning_rate": 3.148628951404894e-07, | |
| "loss": 0.7318160533905029, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.9451476793248945, | |
| "grad_norm": 2.231234550476074, | |
| "learning_rate": 3.134161064727371e-07, | |
| "loss": 1.1114449501037598, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9493670886075947, | |
| "grad_norm": 1.1347553730010986, | |
| "learning_rate": 3.1197353613202493e-07, | |
| "loss": 0.98956298828125, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.9535864978902953, | |
| "grad_norm": 2.6513452529907227, | |
| "learning_rate": 3.1053520167610327e-07, | |
| "loss": 0.8672858476638794, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.957805907172996, | |
| "grad_norm": 5.388697147369385, | |
| "learning_rate": 3.0910112061116706e-07, | |
| "loss": 0.8895928263664246, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.962025316455696, | |
| "grad_norm": 1.2345776557922363, | |
| "learning_rate": 3.07671310391642e-07, | |
| "loss": 1.0770245790481567, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.9662447257383966, | |
| "grad_norm": 2.581882953643799, | |
| "learning_rate": 3.06245788419973e-07, | |
| "loss": 0.9955227971076965, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.970464135021097, | |
| "grad_norm": 1.2137199640274048, | |
| "learning_rate": 3.0482457204641244e-07, | |
| "loss": 0.6025493741035461, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.9746835443037973, | |
| "grad_norm": 7.410897731781006, | |
| "learning_rate": 3.0340767856880765e-07, | |
| "loss": 0.9356251358985901, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.978902953586498, | |
| "grad_norm": 1.3034472465515137, | |
| "learning_rate": 3.019951252323922e-07, | |
| "loss": 1.1050803661346436, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.9831223628691985, | |
| "grad_norm": 4.278261184692383, | |
| "learning_rate": 3.005869292295745e-07, | |
| "loss": 0.9199661016464233, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.9873417721518987, | |
| "grad_norm": 1.678770661354065, | |
| "learning_rate": 2.9918310769972974e-07, | |
| "loss": 1.006180763244629, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.9915611814345993, | |
| "grad_norm": 1.6135848760604858, | |
| "learning_rate": 2.9778367772899007e-07, | |
| "loss": 1.0220967531204224, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.9957805907173, | |
| "grad_norm": 4.610077857971191, | |
| "learning_rate": 2.963886563500377e-07, | |
| "loss": 1.10872220993042, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.7171008586883545, | |
| "learning_rate": 2.949980605418972e-07, | |
| "loss": 0.4870656132698059, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.0042194092827006, | |
| "grad_norm": 1.3645132780075073, | |
| "learning_rate": 2.936119072297288e-07, | |
| "loss": 0.8511791825294495, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.0084388185654007, | |
| "grad_norm": 1.9679698944091797, | |
| "learning_rate": 2.9223021328462197e-07, | |
| "loss": 0.7651324272155762, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.0126582278481013, | |
| "grad_norm": 3.9621288776397705, | |
| "learning_rate": 2.908529955233911e-07, | |
| "loss": 0.699533224105835, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.0168776371308015, | |
| "grad_norm": 3.126701831817627, | |
| "learning_rate": 2.8948027070836994e-07, | |
| "loss": 0.4490070939064026, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.021097046413502, | |
| "grad_norm": 2.446420431137085, | |
| "learning_rate": 2.881120555472082e-07, | |
| "loss": 1.0999044179916382, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.0253164556962027, | |
| "grad_norm": 1.639694333076477, | |
| "learning_rate": 2.867483666926673e-07, | |
| "loss": 1.0761295557022095, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.029535864978903, | |
| "grad_norm": 2.0383009910583496, | |
| "learning_rate": 2.853892207424188e-07, | |
| "loss": 1.2911527156829834, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.0337552742616034, | |
| "grad_norm": 2.1497604846954346, | |
| "learning_rate": 2.840346342388418e-07, | |
| "loss": 0.7010747790336609, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 1.3137015104293823, | |
| "learning_rate": 2.8268462366882116e-07, | |
| "loss": 1.0549767017364502, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.042194092827004, | |
| "grad_norm": 2.2534055709838867, | |
| "learning_rate": 2.81339205463548e-07, | |
| "loss": 0.7904849052429199, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.0464135021097047, | |
| "grad_norm": 1.8378784656524658, | |
| "learning_rate": 2.7999839599831866e-07, | |
| "loss": 0.9793230891227722, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.050632911392405, | |
| "grad_norm": 1.6699494123458862, | |
| "learning_rate": 2.786622115923361e-07, | |
| "loss": 1.100398302078247, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.0548523206751055, | |
| "grad_norm": 4.9398722648620605, | |
| "learning_rate": 2.773306685085103e-07, | |
| "loss": 0.7494297027587891, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.059071729957806, | |
| "grad_norm": 2.751260757446289, | |
| "learning_rate": 2.760037829532616e-07, | |
| "loss": 0.9139360189437866, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.0632911392405062, | |
| "grad_norm": 1.659805178642273, | |
| "learning_rate": 2.746815710763228e-07, | |
| "loss": 1.121703028678894, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.067510548523207, | |
| "grad_norm": 17.309215545654297, | |
| "learning_rate": 2.733640489705424e-07, | |
| "loss": 0.8850579261779785, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.071729957805907, | |
| "grad_norm": 1.963599443435669, | |
| "learning_rate": 2.7205123267168884e-07, | |
| "loss": 0.7342712879180908, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.0759493670886076, | |
| "grad_norm": 1.344913125038147, | |
| "learning_rate": 2.7074313815825577e-07, | |
| "loss": 0.8235659003257751, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.080168776371308, | |
| "grad_norm": 2.194878101348877, | |
| "learning_rate": 2.694397813512672e-07, | |
| "loss": 0.8748940229415894, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.0843881856540083, | |
| "grad_norm": 1.611878514289856, | |
| "learning_rate": 2.6814117811408343e-07, | |
| "loss": 1.0315779447555542, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.088607594936709, | |
| "grad_norm": 1.17129647731781, | |
| "learning_rate": 2.668473442522087e-07, | |
| "loss": 1.089264154434204, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.0928270042194095, | |
| "grad_norm": 1.8487638235092163, | |
| "learning_rate": 2.655582955130983e-07, | |
| "loss": 0.9789541959762573, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.0970464135021096, | |
| "grad_norm": 2.393946409225464, | |
| "learning_rate": 2.6427404758596716e-07, | |
| "loss": 0.7049380540847778, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.1012658227848102, | |
| "grad_norm": 6.393697261810303, | |
| "learning_rate": 2.6299461610159823e-07, | |
| "loss": 0.2891662120819092, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.1054852320675104, | |
| "grad_norm": 3.184678316116333, | |
| "learning_rate": 2.617200166321536e-07, | |
| "loss": 1.5170872211456299, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.109704641350211, | |
| "grad_norm": 0.9037976264953613, | |
| "learning_rate": 2.604502646909835e-07, | |
| "loss": 0.6711030602455139, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.1139240506329116, | |
| "grad_norm": 1.8876357078552246, | |
| "learning_rate": 2.591853757324387e-07, | |
| "loss": 1.0795202255249023, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.1181434599156117, | |
| "grad_norm": 2.756838083267212, | |
| "learning_rate": 2.579253651516811e-07, | |
| "loss": 1.132811427116394, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.1223628691983123, | |
| "grad_norm": 1.3386019468307495, | |
| "learning_rate": 2.566702482844977e-07, | |
| "loss": 1.08835768699646, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.1265822784810124, | |
| "grad_norm": 2.480353593826294, | |
| "learning_rate": 2.554200404071133e-07, | |
| "loss": 1.070718765258789, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.130801687763713, | |
| "grad_norm": 1.40932297706604, | |
| "learning_rate": 2.541747567360042e-07, | |
| "loss": 1.0528981685638428, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.1350210970464136, | |
| "grad_norm": 1.5161710977554321, | |
| "learning_rate": 2.529344124277137e-07, | |
| "loss": 0.701133131980896, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.1392405063291138, | |
| "grad_norm": 1.7773646116256714, | |
| "learning_rate": 2.516990225786675e-07, | |
| "loss": 0.714127242565155, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.1434599156118144, | |
| "grad_norm": 1.856155276298523, | |
| "learning_rate": 2.5046860222498974e-07, | |
| "loss": 1.374661922454834, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.147679324894515, | |
| "grad_norm": 1.7023481130599976, | |
| "learning_rate": 2.492431663423195e-07, | |
| "loss": 0.7714812159538269, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.151898734177215, | |
| "grad_norm": 2.849262237548828, | |
| "learning_rate": 2.480227298456298e-07, | |
| "loss": 0.9089514017105103, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.1561181434599157, | |
| "grad_norm": 1.426505208015442, | |
| "learning_rate": 2.468073075890449e-07, | |
| "loss": 0.885564386844635, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.160337552742616, | |
| "grad_norm": 1.386016845703125, | |
| "learning_rate": 2.455969143656604e-07, | |
| "loss": 0.6194628477096558, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.1645569620253164, | |
| "grad_norm": 3.545844316482544, | |
| "learning_rate": 2.4439156490736206e-07, | |
| "loss": 0.6920610070228577, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.168776371308017, | |
| "grad_norm": 2.4662020206451416, | |
| "learning_rate": 2.431912738846479e-07, | |
| "loss": 1.0780019760131836, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.172995780590717, | |
| "grad_norm": 1.5884943008422852, | |
| "learning_rate": 2.4199605590644834e-07, | |
| "loss": 0.987308144569397, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.1772151898734178, | |
| "grad_norm": 1.7786238193511963, | |
| "learning_rate": 2.4080592551994957e-07, | |
| "loss": 1.1196187734603882, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.181434599156118, | |
| "grad_norm": 1.3663359880447388, | |
| "learning_rate": 2.396208972104153e-07, | |
| "loss": 1.2225620746612549, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.1856540084388185, | |
| "grad_norm": 2.5622196197509766, | |
| "learning_rate": 2.384409854010114e-07, | |
| "loss": 1.0651240348815918, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.189873417721519, | |
| "grad_norm": 0.9567521214485168, | |
| "learning_rate": 2.372662044526301e-07, | |
| "loss": 0.3738023042678833, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.1940928270042193, | |
| "grad_norm": 1.9998040199279785, | |
| "learning_rate": 2.3609656866371468e-07, | |
| "loss": 1.1397721767425537, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.19831223628692, | |
| "grad_norm": 0.7790340781211853, | |
| "learning_rate": 2.3493209227008635e-07, | |
| "loss": 0.7803550958633423, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.2025316455696204, | |
| "grad_norm": 1.4339203834533691, | |
| "learning_rate": 2.3377278944477026e-07, | |
| "loss": 1.136408805847168, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.2067510548523206, | |
| "grad_norm": 2.4172418117523193, | |
| "learning_rate": 2.3261867429782352e-07, | |
| "loss": 1.0867120027542114, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.210970464135021, | |
| "grad_norm": 5.30928373336792, | |
| "learning_rate": 2.3146976087616251e-07, | |
| "loss": 0.40863823890686035, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.2151898734177213, | |
| "grad_norm": 1.3400903940200806, | |
| "learning_rate": 2.3032606316339343e-07, | |
| "loss": 0.9426780343055725, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.219409282700422, | |
| "grad_norm": 2.4984984397888184, | |
| "learning_rate": 2.2918759507964067e-07, | |
| "loss": 1.065047025680542, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.2236286919831225, | |
| "grad_norm": 1.2303318977355957, | |
| "learning_rate": 2.280543704813786e-07, | |
| "loss": 0.7552684545516968, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.2278481012658227, | |
| "grad_norm": 8.25938606262207, | |
| "learning_rate": 2.2692640316126142e-07, | |
| "loss": 0.8803672790527344, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.2320675105485233, | |
| "grad_norm": 0.49941709637641907, | |
| "learning_rate": 2.258037068479569e-07, | |
| "loss": 0.4145871102809906, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.2362869198312234, | |
| "grad_norm": 4.213127613067627, | |
| "learning_rate": 2.246862952059784e-07, | |
| "loss": 0.8059659600257874, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.240506329113924, | |
| "grad_norm": 1.6297084093093872, | |
| "learning_rate": 2.2357418183551847e-07, | |
| "loss": 1.0444282293319702, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.2447257383966246, | |
| "grad_norm": 1.131995677947998, | |
| "learning_rate": 2.2246738027228375e-07, | |
| "loss": 1.0914216041564941, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.2489451476793247, | |
| "grad_norm": 4.478993892669678, | |
| "learning_rate": 2.2136590398733008e-07, | |
| "loss": 0.9430460929870605, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.2531645569620253, | |
| "grad_norm": 3.0573625564575195, | |
| "learning_rate": 2.2026976638689858e-07, | |
| "loss": 0.911579966545105, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.257383966244726, | |
| "grad_norm": 3.4742343425750732, | |
| "learning_rate": 2.1917898081225196e-07, | |
| "loss": 0.7584477066993713, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.261603375527426, | |
| "grad_norm": 8.812678337097168, | |
| "learning_rate": 2.1809356053951312e-07, | |
| "loss": 0.8638182878494263, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.2658227848101267, | |
| "grad_norm": 2.5531651973724365, | |
| "learning_rate": 2.1701351877950265e-07, | |
| "loss": 0.9924852848052979, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.270042194092827, | |
| "grad_norm": 2.971946954727173, | |
| "learning_rate": 2.1593886867757877e-07, | |
| "loss": 0.4322529435157776, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.2742616033755274, | |
| "grad_norm": 1.717172384262085, | |
| "learning_rate": 2.148696233134765e-07, | |
| "loss": 0.550542414188385, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.278481012658228, | |
| "grad_norm": 5.607646942138672, | |
| "learning_rate": 2.1380579570114936e-07, | |
| "loss": 0.5011199116706848, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.282700421940928, | |
| "grad_norm": 1.612561821937561, | |
| "learning_rate": 2.1274739878861052e-07, | |
| "loss": 1.0595111846923828, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.2869198312236287, | |
| "grad_norm": 0.5656753182411194, | |
| "learning_rate": 2.1169444545777492e-07, | |
| "loss": 0.9489805102348328, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.291139240506329, | |
| "grad_norm": 1.542765736579895, | |
| "learning_rate": 2.1064694852430298e-07, | |
| "loss": 0.7409214377403259, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.2953586497890295, | |
| "grad_norm": 4.1754326820373535, | |
| "learning_rate": 2.0960492073744497e-07, | |
| "loss": 0.6657558679580688, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.29957805907173, | |
| "grad_norm": 2.3946285247802734, | |
| "learning_rate": 2.0856837477988444e-07, | |
| "loss": 1.0093276500701904, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.3037974683544302, | |
| "grad_norm": 54.370628356933594, | |
| "learning_rate": 2.075373232675853e-07, | |
| "loss": 0.911258339881897, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.308016877637131, | |
| "grad_norm": 0.5367670655250549, | |
| "learning_rate": 2.0651177874963756e-07, | |
| "loss": 0.5720005035400391, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.3122362869198314, | |
| "grad_norm": 2.9743804931640625, | |
| "learning_rate": 2.054917537081048e-07, | |
| "loss": 0.7077758312225342, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.3164556962025316, | |
| "grad_norm": 1.33404541015625, | |
| "learning_rate": 2.0447726055787184e-07, | |
| "loss": 0.7469961047172546, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.320675105485232, | |
| "grad_norm": 5.848537445068359, | |
| "learning_rate": 2.0346831164649456e-07, | |
| "loss": 1.2882143259048462, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.3248945147679323, | |
| "grad_norm": 2.0500552654266357, | |
| "learning_rate": 2.024649192540486e-07, | |
| "loss": 1.0107818841934204, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.329113924050633, | |
| "grad_norm": 1.4133131504058838, | |
| "learning_rate": 2.0146709559298057e-07, | |
| "loss": 1.098578929901123, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 2.475172281265259, | |
| "learning_rate": 2.004748528079589e-07, | |
| "loss": 0.907584547996521, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.3375527426160336, | |
| "grad_norm": 1.8427865505218506, | |
| "learning_rate": 1.9948820297572654e-07, | |
| "loss": 0.5680180191993713, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.3417721518987342, | |
| "grad_norm": 2.7834925651550293, | |
| "learning_rate": 1.9850715810495388e-07, | |
| "loss": 0.8737412095069885, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.3459915611814344, | |
| "grad_norm": 3.1142473220825195, | |
| "learning_rate": 1.9753173013609188e-07, | |
| "loss": 0.9088540077209473, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.350210970464135, | |
| "grad_norm": 1.0896648168563843, | |
| "learning_rate": 1.9656193094122788e-07, | |
| "loss": 0.6729345917701721, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.3544303797468356, | |
| "grad_norm": 0.7042174339294434, | |
| "learning_rate": 1.955977723239402e-07, | |
| "loss": 1.0873976945877075, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.3586497890295357, | |
| "grad_norm": 2.3321895599365234, | |
| "learning_rate": 1.946392660191551e-07, | |
| "loss": 1.0663033723831177, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.3628691983122363, | |
| "grad_norm": 0.490595281124115, | |
| "learning_rate": 1.9368642369300324e-07, | |
| "loss": 0.9354673624038696, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.367088607594937, | |
| "grad_norm": 10.656190872192383, | |
| "learning_rate": 1.927392569426783e-07, | |
| "loss": 0.4992368817329407, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.371308016877637, | |
| "grad_norm": 0.5064166784286499, | |
| "learning_rate": 1.917977772962959e-07, | |
| "loss": 0.528096616268158, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.3755274261603376, | |
| "grad_norm": 5.858240604400635, | |
| "learning_rate": 1.9086199621275264e-07, | |
| "loss": 0.8440109491348267, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.379746835443038, | |
| "grad_norm": 8.531730651855469, | |
| "learning_rate": 1.899319250815872e-07, | |
| "loss": 0.6302809119224548, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.3839662447257384, | |
| "grad_norm": 0.5061826705932617, | |
| "learning_rate": 1.8900757522284133e-07, | |
| "loss": 0.8138654828071594, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.388185654008439, | |
| "grad_norm": 2.710231065750122, | |
| "learning_rate": 1.880889578869227e-07, | |
| "loss": 1.1358734369277954, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.392405063291139, | |
| "grad_norm": 2.9734416007995605, | |
| "learning_rate": 1.8717608425446727e-07, | |
| "loss": 0.7783518433570862, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.3966244725738397, | |
| "grad_norm": 1.6831233501434326, | |
| "learning_rate": 1.8626896543620322e-07, | |
| "loss": 0.7331032156944275, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.40084388185654, | |
| "grad_norm": 1.832513451576233, | |
| "learning_rate": 1.853676124728165e-07, | |
| "loss": 1.0596171617507935, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.4050632911392404, | |
| "grad_norm": 1.777066946029663, | |
| "learning_rate": 1.8447203633481567e-07, | |
| "loss": 0.5832729935646057, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.409282700421941, | |
| "grad_norm": 3.6729393005371094, | |
| "learning_rate": 1.8358224792239858e-07, | |
| "loss": 0.9451841115951538, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.413502109704641, | |
| "grad_norm": 1.5150253772735596, | |
| "learning_rate": 1.8269825806531981e-07, | |
| "loss": 1.205118179321289, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.4177215189873418, | |
| "grad_norm": 1.506641149520874, | |
| "learning_rate": 1.8182007752275897e-07, | |
| "loss": 1.1017844676971436, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.4219409282700424, | |
| "grad_norm": 1.7625582218170166, | |
| "learning_rate": 1.8094771698318949e-07, | |
| "loss": 0.7701492309570312, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.4261603375527425, | |
| "grad_norm": 3.7757952213287354, | |
| "learning_rate": 1.8008118706424835e-07, | |
| "loss": 0.47009673714637756, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.430379746835443, | |
| "grad_norm": 86.28419494628906, | |
| "learning_rate": 1.792204983126077e-07, | |
| "loss": 0.3835935592651367, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.4345991561181437, | |
| "grad_norm": 1.6593104600906372, | |
| "learning_rate": 1.7836566120384535e-07, | |
| "loss": 1.0729460716247559, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.438818565400844, | |
| "grad_norm": 1.3321086168289185, | |
| "learning_rate": 1.7751668614231838e-07, | |
| "loss": 0.5311670303344727, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.4430379746835444, | |
| "grad_norm": 1.7757083177566528, | |
| "learning_rate": 1.7667358346103543e-07, | |
| "loss": 1.0757611989974976, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.4472573839662446, | |
| "grad_norm": 0.7050431370735168, | |
| "learning_rate": 1.7583636342153186e-07, | |
| "loss": 0.8372207283973694, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.451476793248945, | |
| "grad_norm": 3.041806221008301, | |
| "learning_rate": 1.7500503621374447e-07, | |
| "loss": 1.3023487329483032, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.4556962025316453, | |
| "grad_norm": 2.8929758071899414, | |
| "learning_rate": 1.7417961195588712e-07, | |
| "loss": 1.2805616855621338, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.459915611814346, | |
| "grad_norm": 1.4591811895370483, | |
| "learning_rate": 1.733601006943283e-07, | |
| "loss": 1.0746394395828247, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.4641350210970465, | |
| "grad_norm": 21.10038185119629, | |
| "learning_rate": 1.7254651240346834e-07, | |
| "loss": 1.2883800268173218, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.4683544303797467, | |
| "grad_norm": 0.8419481515884399, | |
| "learning_rate": 1.717388569856184e-07, | |
| "loss": 0.4558939039707184, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.4725738396624473, | |
| "grad_norm": 1.598176121711731, | |
| "learning_rate": 1.7093714427087921e-07, | |
| "loss": 1.1013548374176025, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.476793248945148, | |
| "grad_norm": 1.9482252597808838, | |
| "learning_rate": 1.7014138401702235e-07, | |
| "loss": 1.064300537109375, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.481012658227848, | |
| "grad_norm": 2.4247756004333496, | |
| "learning_rate": 1.6935158590937102e-07, | |
| "loss": 0.5595088005065918, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.4852320675105486, | |
| "grad_norm": 1.5676363706588745, | |
| "learning_rate": 1.685677595606821e-07, | |
| "loss": 0.9377724528312683, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.489451476793249, | |
| "grad_norm": 1.7761136293411255, | |
| "learning_rate": 1.6778991451102917e-07, | |
| "loss": 0.6129472255706787, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.4936708860759493, | |
| "grad_norm": 1.6247411966323853, | |
| "learning_rate": 1.6701806022768664e-07, | |
| "loss": 0.9987605214118958, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.49789029535865, | |
| "grad_norm": 7.621754169464111, | |
| "learning_rate": 1.662522061050143e-07, | |
| "loss": 0.7994301319122314, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.50210970464135, | |
| "grad_norm": 1.6483778953552246, | |
| "learning_rate": 1.6549236146434306e-07, | |
| "loss": 1.0804067850112915, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.5063291139240507, | |
| "grad_norm": 2.4437475204467773, | |
| "learning_rate": 1.6473853555386138e-07, | |
| "loss": 1.301591396331787, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.510548523206751, | |
| "grad_norm": 6.270905017852783, | |
| "learning_rate": 1.63990737548503e-07, | |
| "loss": 0.5238262414932251, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.5147679324894514, | |
| "grad_norm": 1.6719293594360352, | |
| "learning_rate": 1.6324897654983497e-07, | |
| "loss": 1.1141690015792847, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.518987341772152, | |
| "grad_norm": 3.9029476642608643, | |
| "learning_rate": 1.6251326158594697e-07, | |
| "loss": 0.9623671770095825, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.523206751054852, | |
| "grad_norm": 1.746028184890747, | |
| "learning_rate": 1.617836016113414e-07, | |
| "loss": 1.0135071277618408, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.5274261603375527, | |
| "grad_norm": 3.1107168197631836, | |
| "learning_rate": 1.610600055068245e-07, | |
| "loss": 0.4389096200466156, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.5316455696202533, | |
| "grad_norm": 1.7027398347854614, | |
| "learning_rate": 1.603424820793983e-07, | |
| "loss": 0.6981071829795837, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.5358649789029535, | |
| "grad_norm": 2.8486416339874268, | |
| "learning_rate": 1.5963104006215308e-07, | |
| "loss": 0.7279437780380249, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.540084388185654, | |
| "grad_norm": 3.396284341812134, | |
| "learning_rate": 1.589256881141614e-07, | |
| "loss": 0.9122246503829956, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.5443037974683547, | |
| "grad_norm": 3.791874647140503, | |
| "learning_rate": 1.5822643482037287e-07, | |
| "loss": 1.1270490884780884, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.548523206751055, | |
| "grad_norm": 1.448197364807129, | |
| "learning_rate": 1.5753328869150915e-07, | |
| "loss": 0.958101749420166, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.5527426160337554, | |
| "grad_norm": 2.4740562438964844, | |
| "learning_rate": 1.5684625816396065e-07, | |
| "loss": 0.9169100522994995, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.5569620253164556, | |
| "grad_norm": 4.803852081298828, | |
| "learning_rate": 1.5616535159968395e-07, | |
| "loss": 0.4023887515068054, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.561181434599156, | |
| "grad_norm": 1.6774110794067383, | |
| "learning_rate": 1.5549057728609994e-07, | |
| "loss": 0.7174091935157776, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.5654008438818563, | |
| "grad_norm": 2.055140972137451, | |
| "learning_rate": 1.5482194343599262e-07, | |
| "loss": 1.1519484519958496, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.569620253164557, | |
| "grad_norm": 2.408010482788086, | |
| "learning_rate": 1.5415945818740984e-07, | |
| "loss": 0.2424314320087433, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.5738396624472575, | |
| "grad_norm": 0.4710818827152252, | |
| "learning_rate": 1.5350312960356366e-07, | |
| "loss": 0.975223183631897, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.5780590717299576, | |
| "grad_norm": 7.146688461303711, | |
| "learning_rate": 1.5285296567273247e-07, | |
| "loss": 0.2773347795009613, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.5822784810126582, | |
| "grad_norm": 2.3770270347595215, | |
| "learning_rate": 1.5220897430816355e-07, | |
| "loss": 0.8169768452644348, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.586497890295359, | |
| "grad_norm": 1.4037396907806396, | |
| "learning_rate": 1.5157116334797708e-07, | |
| "loss": 0.900860071182251, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.590717299578059, | |
| "grad_norm": 1.6098082065582275, | |
| "learning_rate": 1.5093954055507043e-07, | |
| "loss": 0.6856269240379333, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.5949367088607596, | |
| "grad_norm": 1.4536845684051514, | |
| "learning_rate": 1.5031411361702408e-07, | |
| "loss": 1.1157587766647339, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.59915611814346, | |
| "grad_norm": 3.0524935722351074, | |
| "learning_rate": 1.4969489014600732e-07, | |
| "loss": 0.812619149684906, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.6033755274261603, | |
| "grad_norm": 4.811793804168701, | |
| "learning_rate": 1.4908187767868651e-07, | |
| "loss": 0.7652060389518738, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.607594936708861, | |
| "grad_norm": 0.5443377494812012, | |
| "learning_rate": 1.484750836761328e-07, | |
| "loss": 0.677264392375946, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.611814345991561, | |
| "grad_norm": 6.806301593780518, | |
| "learning_rate": 1.4787451552373115e-07, | |
| "loss": 1.052730679512024, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.6160337552742616, | |
| "grad_norm": 4.058206081390381, | |
| "learning_rate": 1.4728018053109103e-07, | |
| "loss": 1.285649299621582, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.620253164556962, | |
| "grad_norm": 3.216102361679077, | |
| "learning_rate": 1.4669208593195704e-07, | |
| "loss": 0.6992135047912598, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.6244725738396624, | |
| "grad_norm": 2.728694438934326, | |
| "learning_rate": 1.4611023888412115e-07, | |
| "loss": 0.8372994065284729, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.628691983122363, | |
| "grad_norm": 8.481232643127441, | |
| "learning_rate": 1.4553464646933492e-07, | |
| "loss": 0.5174750685691833, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.632911392405063, | |
| "grad_norm": 3.1336352825164795, | |
| "learning_rate": 1.4496531569322426e-07, | |
| "loss": 1.101250410079956, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.6371308016877637, | |
| "grad_norm": 3.442155122756958, | |
| "learning_rate": 1.4440225348520354e-07, | |
| "loss": 0.6749483346939087, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.6413502109704643, | |
| "grad_norm": 3.023040771484375, | |
| "learning_rate": 1.4384546669839147e-07, | |
| "loss": 0.48659658432006836, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.6455696202531644, | |
| "grad_norm": 6.006860733032227, | |
| "learning_rate": 1.432949621095273e-07, | |
| "loss": 1.0057132244110107, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.649789029535865, | |
| "grad_norm": 5.072360992431641, | |
| "learning_rate": 1.4275074641888904e-07, | |
| "loss": 0.29357773065567017, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.6540084388185656, | |
| "grad_norm": 18.242097854614258, | |
| "learning_rate": 1.4221282625021142e-07, | |
| "loss": 1.019067406654358, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.6582278481012658, | |
| "grad_norm": 2.2106029987335205, | |
| "learning_rate": 1.4168120815060542e-07, | |
| "loss": 0.5755662322044373, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.6624472573839664, | |
| "grad_norm": 2.0836057662963867, | |
| "learning_rate": 1.4115589859047829e-07, | |
| "loss": 0.5893323421478271, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 1.689981460571289, | |
| "learning_rate": 1.4063690396345539e-07, | |
| "loss": 0.8215257525444031, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.670886075949367, | |
| "grad_norm": 2.466362714767456, | |
| "learning_rate": 1.401242305863019e-07, | |
| "loss": 0.5873066782951355, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.6751054852320673, | |
| "grad_norm": 2.1418519020080566, | |
| "learning_rate": 1.3961788469884597e-07, | |
| "loss": 1.2188622951507568, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.679324894514768, | |
| "grad_norm": 2.1476902961730957, | |
| "learning_rate": 1.39117872463903e-07, | |
| "loss": 0.6782402396202087, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.6835443037974684, | |
| "grad_norm": 2.313478946685791, | |
| "learning_rate": 1.3862419996720055e-07, | |
| "loss": 0.6638330817222595, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.6877637130801686, | |
| "grad_norm": 1.2573710680007935, | |
| "learning_rate": 1.381368732173042e-07, | |
| "loss": 1.1310936212539673, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.691983122362869, | |
| "grad_norm": 4.773893356323242, | |
| "learning_rate": 1.376558981455443e-07, | |
| "loss": 0.9830767512321472, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.6962025316455698, | |
| "grad_norm": 1.9760856628417969, | |
| "learning_rate": 1.371812806059441e-07, | |
| "loss": 1.0266754627227783, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.70042194092827, | |
| "grad_norm": 1.8001806735992432, | |
| "learning_rate": 1.3671302637514825e-07, | |
| "loss": 1.1445378065109253, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.7046413502109705, | |
| "grad_norm": 2.3651130199432373, | |
| "learning_rate": 1.3625114115235267e-07, | |
| "loss": 0.8746024370193481, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.708860759493671, | |
| "grad_norm": 2.966754913330078, | |
| "learning_rate": 1.357956305592349e-07, | |
| "loss": 0.8632293343544006, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.7130801687763713, | |
| "grad_norm": 2.7932474613189697, | |
| "learning_rate": 1.35346500139886e-07, | |
| "loss": 0.8797197937965393, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.717299578059072, | |
| "grad_norm": 3.4520580768585205, | |
| "learning_rate": 1.3490375536074293e-07, | |
| "loss": 0.4202856123447418, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.721518987341772, | |
| "grad_norm": 5.053709506988525, | |
| "learning_rate": 1.3446740161052182e-07, | |
| "loss": 0.7906475067138672, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.7257383966244726, | |
| "grad_norm": 1.492531418800354, | |
| "learning_rate": 1.3403744420015293e-07, | |
| "loss": 1.0731313228607178, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.7299578059071727, | |
| "grad_norm": 4.506521701812744, | |
| "learning_rate": 1.3361388836271545e-07, | |
| "loss": 0.6830440163612366, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.7341772151898733, | |
| "grad_norm": 2.127143383026123, | |
| "learning_rate": 1.33196739253374e-07, | |
| "loss": 0.7407412528991699, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.738396624472574, | |
| "grad_norm": 2.162644147872925, | |
| "learning_rate": 1.3278600194931595e-07, | |
| "loss": 1.099405288696289, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.742616033755274, | |
| "grad_norm": 0.9268086552619934, | |
| "learning_rate": 1.323816814496896e-07, | |
| "loss": 0.7270370721817017, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.7468354430379747, | |
| "grad_norm": 0.7520632743835449, | |
| "learning_rate": 1.3198378267554327e-07, | |
| "loss": 0.7462360262870789, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.7510548523206753, | |
| "grad_norm": 1.411445140838623, | |
| "learning_rate": 1.3159231046976552e-07, | |
| "loss": 1.026281476020813, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.7552742616033754, | |
| "grad_norm": 2.437485456466675, | |
| "learning_rate": 1.3120726959702608e-07, | |
| "loss": 1.0296030044555664, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.759493670886076, | |
| "grad_norm": 2.7060513496398926, | |
| "learning_rate": 1.308286647437179e-07, | |
| "loss": 0.9808471202850342, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.7637130801687766, | |
| "grad_norm": 2.146833658218384, | |
| "learning_rate": 1.3045650051790027e-07, | |
| "loss": 0.9502108097076416, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.7679324894514767, | |
| "grad_norm": 1.3278952836990356, | |
| "learning_rate": 1.300907814492422e-07, | |
| "loss": 1.123317003250122, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.7721518987341773, | |
| "grad_norm": 14.552665710449219, | |
| "learning_rate": 1.2973151198896823e-07, | |
| "loss": 0.525389552116394, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.7763713080168775, | |
| "grad_norm": 1.6734447479248047, | |
| "learning_rate": 1.2937869650980342e-07, | |
| "loss": 0.7029292583465576, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.780590717299578, | |
| "grad_norm": 1.3970534801483154, | |
| "learning_rate": 1.2903233930592022e-07, | |
| "loss": 1.0671159029006958, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.7848101265822782, | |
| "grad_norm": 2.8452141284942627, | |
| "learning_rate": 1.2869244459288677e-07, | |
| "loss": 0.7484707832336426, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.789029535864979, | |
| "grad_norm": 2.7676146030426025, | |
| "learning_rate": 1.2835901650761496e-07, | |
| "loss": 1.1054531335830688, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.7932489451476794, | |
| "grad_norm": 2.6690499782562256, | |
| "learning_rate": 1.2803205910831044e-07, | |
| "loss": 1.1910511255264282, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.7974683544303796, | |
| "grad_norm": 2.3067097663879395, | |
| "learning_rate": 1.2771157637442308e-07, | |
| "loss": 1.0350401401519775, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.80168776371308, | |
| "grad_norm": 2.0456929206848145, | |
| "learning_rate": 1.273975722065986e-07, | |
| "loss": 1.1489591598510742, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 3.8059071729957807, | |
| "grad_norm": 1.7378591299057007, | |
| "learning_rate": 1.2709005042663118e-07, | |
| "loss": 0.6581465005874634, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 3.810126582278481, | |
| "grad_norm": 6.99116849899292, | |
| "learning_rate": 1.267890147774167e-07, | |
| "loss": 0.29897159337997437, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 3.8143459915611815, | |
| "grad_norm": 4.381340026855469, | |
| "learning_rate": 1.264944689229072e-07, | |
| "loss": 1.0360081195831299, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 3.818565400843882, | |
| "grad_norm": 2.1093826293945312, | |
| "learning_rate": 1.2620641644806678e-07, | |
| "loss": 1.0628427267074585, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.8227848101265822, | |
| "grad_norm": 9.374409675598145, | |
| "learning_rate": 1.2592486085882725e-07, | |
| "loss": 0.7481462955474854, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 3.827004219409283, | |
| "grad_norm": 1.5822006464004517, | |
| "learning_rate": 1.25649805582046e-07, | |
| "loss": 1.0469331741333008, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 3.831223628691983, | |
| "grad_norm": 2.6007158756256104, | |
| "learning_rate": 1.25381253965464e-07, | |
| "loss": 0.9370917081832886, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 3.8354430379746836, | |
| "grad_norm": 3.8402206897735596, | |
| "learning_rate": 1.2511920927766525e-07, | |
| "loss": 0.9214923977851868, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 3.8396624472573837, | |
| "grad_norm": 11.853067398071289, | |
| "learning_rate": 1.2486367470803673e-07, | |
| "loss": 0.8060356378555298, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.8438818565400843, | |
| "grad_norm": 1.407483696937561, | |
| "learning_rate": 1.246146533667299e-07, | |
| "loss": 1.076265573501587, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 3.848101265822785, | |
| "grad_norm": 3.0918633937835693, | |
| "learning_rate": 1.243721482846227e-07, | |
| "loss": 0.9416312575340271, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 3.852320675105485, | |
| "grad_norm": 2.7018940448760986, | |
| "learning_rate": 1.2413616241328252e-07, | |
| "loss": 1.026483416557312, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 3.8565400843881856, | |
| "grad_norm": 2.9005277156829834, | |
| "learning_rate": 1.2390669862493044e-07, | |
| "loss": 1.033530354499817, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 3.8607594936708862, | |
| "grad_norm": 1.5697400569915771, | |
| "learning_rate": 1.2368375971240647e-07, | |
| "loss": 1.0893433094024658, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.8649789029535864, | |
| "grad_norm": 7.101255893707275, | |
| "learning_rate": 1.2346734838913498e-07, | |
| "loss": 0.4264039397239685, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 3.869198312236287, | |
| "grad_norm": 1.942752718925476, | |
| "learning_rate": 1.2325746728909227e-07, | |
| "loss": 0.6822599172592163, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 3.8734177215189876, | |
| "grad_norm": 2.711249351501465, | |
| "learning_rate": 1.2305411896677423e-07, | |
| "loss": 0.8705965280532837, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 3.8776371308016877, | |
| "grad_norm": 3.3902530670166016, | |
| "learning_rate": 1.228573058971652e-07, | |
| "loss": 0.7575594186782837, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 3.8818565400843883, | |
| "grad_norm": 5.287688732147217, | |
| "learning_rate": 1.2266703047570794e-07, | |
| "loss": 0.8974352478981018, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.8860759493670884, | |
| "grad_norm": 2.1966428756713867, | |
| "learning_rate": 1.2248329501827461e-07, | |
| "loss": 0.7821562886238098, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 3.890295358649789, | |
| "grad_norm": 2.125584125518799, | |
| "learning_rate": 1.2230610176113828e-07, | |
| "loss": 0.7629109621047974, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 3.894514767932489, | |
| "grad_norm": 1.5011521577835083, | |
| "learning_rate": 1.2213545286094602e-07, | |
| "loss": 1.0465257167816162, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.8987341772151898, | |
| "grad_norm": 3.0355629920959473, | |
| "learning_rate": 1.219713503946922e-07, | |
| "loss": 0.5780481100082397, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 3.9029535864978904, | |
| "grad_norm": 2.1277599334716797, | |
| "learning_rate": 1.21813796359694e-07, | |
| "loss": 1.0891631841659546, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.9071729957805905, | |
| "grad_norm": 1.4144175052642822, | |
| "learning_rate": 1.2166279267356617e-07, | |
| "loss": 1.0926233530044556, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 3.911392405063291, | |
| "grad_norm": 1.8264589309692383, | |
| "learning_rate": 1.2151834117419832e-07, | |
| "loss": 1.0842887163162231, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 3.9156118143459917, | |
| "grad_norm": 1.3971328735351562, | |
| "learning_rate": 1.2138044361973238e-07, | |
| "loss": 1.1029634475708008, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 3.919831223628692, | |
| "grad_norm": 1.3931989669799805, | |
| "learning_rate": 1.2124910168854125e-07, | |
| "loss": 1.092046856880188, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 3.9240506329113924, | |
| "grad_norm": 1.2768291234970093, | |
| "learning_rate": 1.21124316979208e-07, | |
| "loss": 1.0661836862564087, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.928270042194093, | |
| "grad_norm": 5.809596538543701, | |
| "learning_rate": 1.210060910105071e-07, | |
| "loss": 0.9497167468070984, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 3.932489451476793, | |
| "grad_norm": 1.809336543083191, | |
| "learning_rate": 1.208944252213854e-07, | |
| "loss": 0.7419611811637878, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 3.9367088607594938, | |
| "grad_norm": 3.3719143867492676, | |
| "learning_rate": 1.2078932097094474e-07, | |
| "loss": 1.3616517782211304, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 3.9409282700421944, | |
| "grad_norm": 1.4581533670425415, | |
| "learning_rate": 1.2069077953842544e-07, | |
| "loss": 1.0452879667282104, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 3.9451476793248945, | |
| "grad_norm": 3.9840292930603027, | |
| "learning_rate": 1.2059880212319078e-07, | |
| "loss": 0.7806097269058228, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.9493670886075947, | |
| "grad_norm": 0.3003561794757843, | |
| "learning_rate": 1.2051338984471242e-07, | |
| "loss": 0.568496584892273, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 3.9535864978902953, | |
| "grad_norm": 3.0003912448883057, | |
| "learning_rate": 1.2043454374255645e-07, | |
| "loss": 0.5840458273887634, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 3.957805907172996, | |
| "grad_norm": 17.255149841308594, | |
| "learning_rate": 1.203622647763713e-07, | |
| "loss": 0.9891324639320374, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 3.962025316455696, | |
| "grad_norm": 4.442596435546875, | |
| "learning_rate": 1.2029655382587557e-07, | |
| "loss": 0.937990665435791, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.9662447257383966, | |
| "grad_norm": 1.3784996271133423, | |
| "learning_rate": 1.2023741169084767e-07, | |
| "loss": 0.6944407224655151, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.970464135021097, | |
| "grad_norm": 1.8049193620681763, | |
| "learning_rate": 1.2018483909111572e-07, | |
| "loss": 1.0277503728866577, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 3.9746835443037973, | |
| "grad_norm": 6.727908611297607, | |
| "learning_rate": 1.2013883666654907e-07, | |
| "loss": 0.509749174118042, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 3.978902953586498, | |
| "grad_norm": 2.5991525650024414, | |
| "learning_rate": 1.2009940497705058e-07, | |
| "loss": 1.0679656267166138, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 3.9831223628691985, | |
| "grad_norm": 2.2473011016845703, | |
| "learning_rate": 1.2006654450254938e-07, | |
| "loss": 0.7142981290817261, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 3.9873417721518987, | |
| "grad_norm": 3.0477726459503174, | |
| "learning_rate": 1.2004025564299563e-07, | |
| "loss": 1.0713993310928345, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.9915611814345993, | |
| "grad_norm": 8.713078498840332, | |
| "learning_rate": 1.2002053871835507e-07, | |
| "loss": 0.6879635453224182, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 3.9957805907173, | |
| "grad_norm": 1.979125738143921, | |
| "learning_rate": 1.2000739396860554e-07, | |
| "loss": 1.0905542373657227, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.923147439956665, | |
| "learning_rate": 1.2000082155373382e-07, | |
| "loss": 1.1579601764678955, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1896, | |
| "total_flos": 3.5948540672197263e+18, | |
| "train_loss": 1.0285371271618309, | |
| "train_runtime": 8697.1879, | |
| "train_samples_per_second": 6.54, | |
| "train_steps_per_second": 0.218 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1896, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.5948540672197263e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |