Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-109 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-109 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-109") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-109") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-109") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-109 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-109" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-109", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-109
- SGLang
How to use furproxy/9b-109 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-109" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-109", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-109" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-109", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-109 with Docker Model Runner:
docker model run hf.co/furproxy/9b-109
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2844, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004219409282700422, | |
| "grad_norm": 14.014748573303223, | |
| "learning_rate": 2.797202797202797e-08, | |
| "loss": 2.1982650756835938, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008438818565400843, | |
| "grad_norm": 12.766752243041992, | |
| "learning_rate": 8.391608391608391e-08, | |
| "loss": 1.7798584699630737, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012658227848101266, | |
| "grad_norm": 2.7050275802612305, | |
| "learning_rate": 1.3986013986013987e-07, | |
| "loss": 1.9378855228424072, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.016877637130801686, | |
| "grad_norm": 6.62382173538208, | |
| "learning_rate": 1.958041958041958e-07, | |
| "loss": 1.9494550228118896, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02109704641350211, | |
| "grad_norm": 6.674213886260986, | |
| "learning_rate": 2.517482517482518e-07, | |
| "loss": 1.8559846878051758, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02531645569620253, | |
| "grad_norm": 2.3814892768859863, | |
| "learning_rate": 3.076923076923077e-07, | |
| "loss": 1.3210176229476929, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029535864978902954, | |
| "grad_norm": 10.13178825378418, | |
| "learning_rate": 3.636363636363636e-07, | |
| "loss": 1.6618098020553589, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03375527426160337, | |
| "grad_norm": 10.594144821166992, | |
| "learning_rate": 4.1958041958041957e-07, | |
| "loss": 2.217015266418457, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0379746835443038, | |
| "grad_norm": 2.019131898880005, | |
| "learning_rate": 4.755244755244755e-07, | |
| "loss": 1.8345531225204468, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04219409282700422, | |
| "grad_norm": 2.9380502700805664, | |
| "learning_rate": 5.314685314685314e-07, | |
| "loss": 1.9022338390350342, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046413502109704644, | |
| "grad_norm": 2.7503366470336914, | |
| "learning_rate": 5.874125874125873e-07, | |
| "loss": 1.5993010997772217, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05063291139240506, | |
| "grad_norm": 9.334077835083008, | |
| "learning_rate": 6.433566433566433e-07, | |
| "loss": 1.841583013534546, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05485232067510549, | |
| "grad_norm": 4.110686302185059, | |
| "learning_rate": 6.993006993006993e-07, | |
| "loss": 1.7132441997528076, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05907172995780591, | |
| "grad_norm": 5.482414245605469, | |
| "learning_rate": 7.552447552447552e-07, | |
| "loss": 1.9471144676208496, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06329113924050633, | |
| "grad_norm": 7.981461524963379, | |
| "learning_rate": 8.111888111888111e-07, | |
| "loss": 1.8706644773483276, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06751054852320675, | |
| "grad_norm": 2.825488567352295, | |
| "learning_rate": 8.67132867132867e-07, | |
| "loss": 1.7588139772415161, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07172995780590717, | |
| "grad_norm": 27.41690444946289, | |
| "learning_rate": 9.230769230769231e-07, | |
| "loss": 1.775272011756897, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0759493670886076, | |
| "grad_norm": 3.1006486415863037, | |
| "learning_rate": 9.79020979020979e-07, | |
| "loss": 1.784650444984436, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08016877637130802, | |
| "grad_norm": 7.034008026123047, | |
| "learning_rate": 1.034965034965035e-06, | |
| "loss": 1.5920319557189941, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08438818565400844, | |
| "grad_norm": 2.675238847732544, | |
| "learning_rate": 1.0909090909090908e-06, | |
| "loss": 1.7519234418869019, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08860759493670886, | |
| "grad_norm": 10.898767471313477, | |
| "learning_rate": 1.1468531468531469e-06, | |
| "loss": 1.3292922973632812, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09282700421940929, | |
| "grad_norm": 5.946654796600342, | |
| "learning_rate": 1.2027972027972026e-06, | |
| "loss": 1.9151337146759033, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0970464135021097, | |
| "grad_norm": 4.006372451782227, | |
| "learning_rate": 1.2587412587412587e-06, | |
| "loss": 1.734480619430542, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10126582278481013, | |
| "grad_norm": 1.6106147766113281, | |
| "learning_rate": 1.3146853146853144e-06, | |
| "loss": 1.6714043617248535, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10548523206751055, | |
| "grad_norm": 3.555082321166992, | |
| "learning_rate": 1.3706293706293705e-06, | |
| "loss": 0.9601479172706604, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10970464135021098, | |
| "grad_norm": 18.376201629638672, | |
| "learning_rate": 1.4265734265734267e-06, | |
| "loss": 0.9682204723358154, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11392405063291139, | |
| "grad_norm": 3.829577684402466, | |
| "learning_rate": 1.4825174825174824e-06, | |
| "loss": 0.9149891138076782, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11814345991561181, | |
| "grad_norm": 8.751733779907227, | |
| "learning_rate": 1.5384615384615385e-06, | |
| "loss": 1.5466492176055908, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12236286919831224, | |
| "grad_norm": 43.25166702270508, | |
| "learning_rate": 1.5944055944055942e-06, | |
| "loss": 0.8738414645195007, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12658227848101267, | |
| "grad_norm": 2.858604669570923, | |
| "learning_rate": 1.6503496503496503e-06, | |
| "loss": 1.5882339477539062, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1308016877637131, | |
| "grad_norm": 2.080610990524292, | |
| "learning_rate": 1.7062937062937063e-06, | |
| "loss": 1.6133513450622559, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1350210970464135, | |
| "grad_norm": 1.6210132837295532, | |
| "learning_rate": 1.7622377622377622e-06, | |
| "loss": 1.1352812051773071, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13924050632911392, | |
| "grad_norm": 4.165830135345459, | |
| "learning_rate": 1.818181818181818e-06, | |
| "loss": 0.8928266763687134, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14345991561181434, | |
| "grad_norm": 2.4804110527038574, | |
| "learning_rate": 1.874125874125874e-06, | |
| "loss": 1.182489275932312, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14767932489451477, | |
| "grad_norm": 11.683263778686523, | |
| "learning_rate": 1.9300699300699297e-06, | |
| "loss": 1.0528309345245361, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1518987341772152, | |
| "grad_norm": 5.113679885864258, | |
| "learning_rate": 1.986013986013986e-06, | |
| "loss": 1.3555092811584473, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15611814345991562, | |
| "grad_norm": 3.419110059738159, | |
| "learning_rate": 2.041958041958042e-06, | |
| "loss": 1.1131813526153564, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16033755274261605, | |
| "grad_norm": 5.5904622077941895, | |
| "learning_rate": 2.097902097902098e-06, | |
| "loss": 0.9376708269119263, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16455696202531644, | |
| "grad_norm": 4.4593892097473145, | |
| "learning_rate": 2.1538461538461538e-06, | |
| "loss": 1.4518260955810547, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16877637130801687, | |
| "grad_norm": 1.9147013425827026, | |
| "learning_rate": 2.2097902097902093e-06, | |
| "loss": 1.4421272277832031, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1729957805907173, | |
| "grad_norm": 4.915895462036133, | |
| "learning_rate": 2.2657342657342656e-06, | |
| "loss": 1.4590272903442383, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17721518987341772, | |
| "grad_norm": 6.905501842498779, | |
| "learning_rate": 2.3216783216783215e-06, | |
| "loss": 0.9708279371261597, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18143459915611815, | |
| "grad_norm": 7.524752140045166, | |
| "learning_rate": 2.3776223776223774e-06, | |
| "loss": 1.141646385192871, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18565400843881857, | |
| "grad_norm": 1.9856427907943726, | |
| "learning_rate": 2.4335664335664338e-06, | |
| "loss": 1.3669147491455078, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.189873417721519, | |
| "grad_norm": 5.223474025726318, | |
| "learning_rate": 2.4895104895104893e-06, | |
| "loss": 0.6930243968963623, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1940928270042194, | |
| "grad_norm": 3.9480249881744385, | |
| "learning_rate": 2.545454545454545e-06, | |
| "loss": 1.7789967060089111, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19831223628691982, | |
| "grad_norm": 6.213054180145264, | |
| "learning_rate": 2.601398601398601e-06, | |
| "loss": 0.9946894645690918, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 2.132254123687744, | |
| "learning_rate": 2.6573426573426574e-06, | |
| "loss": 1.4530797004699707, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20675105485232068, | |
| "grad_norm": 1.8356496095657349, | |
| "learning_rate": 2.7132867132867134e-06, | |
| "loss": 1.5200846195220947, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2109704641350211, | |
| "grad_norm": 14.19537353515625, | |
| "learning_rate": 2.769230769230769e-06, | |
| "loss": 1.292062759399414, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21518987341772153, | |
| "grad_norm": 2.111111640930176, | |
| "learning_rate": 2.8251748251748248e-06, | |
| "loss": 1.1042956113815308, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.21940928270042195, | |
| "grad_norm": 1.8971158266067505, | |
| "learning_rate": 2.881118881118881e-06, | |
| "loss": 1.0220731496810913, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22362869198312235, | |
| "grad_norm": 5.727835178375244, | |
| "learning_rate": 2.937062937062937e-06, | |
| "loss": 1.0205355882644653, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22784810126582278, | |
| "grad_norm": 3.1581368446350098, | |
| "learning_rate": 2.993006993006993e-06, | |
| "loss": 1.0161347389221191, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2320675105485232, | |
| "grad_norm": 2.3190581798553467, | |
| "learning_rate": 3.0489510489510484e-06, | |
| "loss": 1.0544636249542236, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23628691983122363, | |
| "grad_norm": 5.929664611816406, | |
| "learning_rate": 3.1048951048951048e-06, | |
| "loss": 1.4253602027893066, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24050632911392406, | |
| "grad_norm": 2.6725683212280273, | |
| "learning_rate": 3.1608391608391607e-06, | |
| "loss": 1.318920612335205, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24472573839662448, | |
| "grad_norm": 7.776963710784912, | |
| "learning_rate": 3.2167832167832166e-06, | |
| "loss": 1.6443480253219604, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2489451476793249, | |
| "grad_norm": 2.3923261165618896, | |
| "learning_rate": 3.272727272727273e-06, | |
| "loss": 1.3153703212738037, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25316455696202533, | |
| "grad_norm": 3.2848472595214844, | |
| "learning_rate": 3.3286713286713284e-06, | |
| "loss": 1.0184035301208496, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25738396624472576, | |
| "grad_norm": 4.440483093261719, | |
| "learning_rate": 3.3846153846153843e-06, | |
| "loss": 1.312201976776123, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2616033755274262, | |
| "grad_norm": 4.970678806304932, | |
| "learning_rate": 3.4405594405594402e-06, | |
| "loss": 1.3157330751419067, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.26582278481012656, | |
| "grad_norm": 3.659862995147705, | |
| "learning_rate": 3.4965034965034966e-06, | |
| "loss": 1.4062931537628174, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.270042194092827, | |
| "grad_norm": 4.357997894287109, | |
| "learning_rate": 3.5524475524475525e-06, | |
| "loss": 0.9154614210128784, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2742616033755274, | |
| "grad_norm": 4.5792341232299805, | |
| "learning_rate": 3.608391608391608e-06, | |
| "loss": 1.1704046726226807, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27848101265822783, | |
| "grad_norm": 5.039772033691406, | |
| "learning_rate": 3.664335664335664e-06, | |
| "loss": 1.2377243041992188, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28270042194092826, | |
| "grad_norm": 6.672406196594238, | |
| "learning_rate": 3.7202797202797202e-06, | |
| "loss": 0.7351927757263184, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2869198312236287, | |
| "grad_norm": 2.329267740249634, | |
| "learning_rate": 3.776223776223776e-06, | |
| "loss": 0.9117053151130676, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2911392405063291, | |
| "grad_norm": 4.902188777923584, | |
| "learning_rate": 3.832167832167832e-06, | |
| "loss": 1.4102413654327393, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29535864978902954, | |
| "grad_norm": 7.462285041809082, | |
| "learning_rate": 3.888111888111888e-06, | |
| "loss": 0.9595804214477539, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29957805907172996, | |
| "grad_norm": 4.3409953117370605, | |
| "learning_rate": 3.944055944055944e-06, | |
| "loss": 1.2982699871063232, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 5.797815799713135, | |
| "learning_rate": 4e-06, | |
| "loss": 1.0992412567138672, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3080168776371308, | |
| "grad_norm": 3.4705042839050293, | |
| "learning_rate": 3.999995129731755e-06, | |
| "loss": 1.4175902605056763, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.31223628691983124, | |
| "grad_norm": 3.2805113792419434, | |
| "learning_rate": 3.999980518953377e-06, | |
| "loss": 1.3948296308517456, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.31645569620253167, | |
| "grad_norm": 2.5500190258026123, | |
| "learning_rate": 3.9999561677439284e-06, | |
| "loss": 1.2504572868347168, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3206751054852321, | |
| "grad_norm": 2.943164825439453, | |
| "learning_rate": 3.999922076235186e-06, | |
| "loss": 1.3152413368225098, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32489451476793246, | |
| "grad_norm": 1.8291728496551514, | |
| "learning_rate": 3.999878244611632e-06, | |
| "loss": 1.4914839267730713, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3291139240506329, | |
| "grad_norm": 3.691744327545166, | |
| "learning_rate": 3.999824673110458e-06, | |
| "loss": 1.2806551456451416, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.6490440368652344, | |
| "learning_rate": 3.999761362021559e-06, | |
| "loss": 1.3481640815734863, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33755274261603374, | |
| "grad_norm": 2.0211308002471924, | |
| "learning_rate": 3.999688311687539e-06, | |
| "loss": 1.3426798582077026, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34177215189873417, | |
| "grad_norm": 3.4758718013763428, | |
| "learning_rate": 3.9996055225037035e-06, | |
| "loss": 0.8756759762763977, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3459915611814346, | |
| "grad_norm": 3.027031660079956, | |
| "learning_rate": 3.999512994918057e-06, | |
| "loss": 1.2513983249664307, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.350210970464135, | |
| "grad_norm": 4.0340094566345215, | |
| "learning_rate": 3.999410729431306e-06, | |
| "loss": 0.83528733253479, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35443037974683544, | |
| "grad_norm": 4.2334747314453125, | |
| "learning_rate": 3.9992987265968506e-06, | |
| "loss": 1.2495150566101074, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35864978902953587, | |
| "grad_norm": 2.4250214099884033, | |
| "learning_rate": 3.999176987020782e-06, | |
| "loss": 1.3424336910247803, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3628691983122363, | |
| "grad_norm": 2.0446016788482666, | |
| "learning_rate": 3.999045511361886e-06, | |
| "loss": 1.2304866313934326, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3670886075949367, | |
| "grad_norm": 2.2647955417633057, | |
| "learning_rate": 3.998904300331629e-06, | |
| "loss": 1.0302658081054688, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37130801687763715, | |
| "grad_norm": 4.148885250091553, | |
| "learning_rate": 3.998753354694162e-06, | |
| "loss": 1.3435766696929932, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3755274261603376, | |
| "grad_norm": 2.1456167697906494, | |
| "learning_rate": 3.998592675266313e-06, | |
| "loss": 1.3384077548980713, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 1.8021888732910156, | |
| "learning_rate": 3.998422262917586e-06, | |
| "loss": 1.0130809545516968, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38396624472573837, | |
| "grad_norm": 1.8628857135772705, | |
| "learning_rate": 3.99824211857015e-06, | |
| "loss": 1.3068010807037354, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3881856540084388, | |
| "grad_norm": 2.337610960006714, | |
| "learning_rate": 3.998052243198841e-06, | |
| "loss": 1.3072583675384521, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3924050632911392, | |
| "grad_norm": 4.762563228607178, | |
| "learning_rate": 3.997852637831152e-06, | |
| "loss": 0.5184736847877502, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.39662447257383965, | |
| "grad_norm": 5.280208110809326, | |
| "learning_rate": 3.9976433035472296e-06, | |
| "loss": 0.9710695743560791, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4008438818565401, | |
| "grad_norm": 2.887589693069458, | |
| "learning_rate": 3.997424241479867e-06, | |
| "loss": 1.0692715644836426, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 1.577860951423645, | |
| "learning_rate": 3.997195452814498e-06, | |
| "loss": 1.315537452697754, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4092827004219409, | |
| "grad_norm": 3.5055530071258545, | |
| "learning_rate": 3.996956938789193e-06, | |
| "loss": 1.0743625164031982, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41350210970464135, | |
| "grad_norm": 2.70391583442688, | |
| "learning_rate": 3.996708700694647e-06, | |
| "loss": 1.2994472980499268, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4177215189873418, | |
| "grad_norm": 2.665532112121582, | |
| "learning_rate": 3.99645073987418e-06, | |
| "loss": 1.0376091003417969, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4219409282700422, | |
| "grad_norm": 3.4091718196868896, | |
| "learning_rate": 3.9961830577237225e-06, | |
| "loss": 1.1265370845794678, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42616033755274263, | |
| "grad_norm": 3.360374689102173, | |
| "learning_rate": 3.9959056556918125e-06, | |
| "loss": 1.1382226943969727, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43037974683544306, | |
| "grad_norm": 3.247422218322754, | |
| "learning_rate": 3.9956185352795864e-06, | |
| "loss": 0.9122767448425293, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4345991561181435, | |
| "grad_norm": 3.775322198867798, | |
| "learning_rate": 3.995321698040768e-06, | |
| "loss": 1.5471869707107544, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4388185654008439, | |
| "grad_norm": 11.316990852355957, | |
| "learning_rate": 3.995015145581668e-06, | |
| "loss": 0.7269084453582764, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4430379746835443, | |
| "grad_norm": 1.767858862876892, | |
| "learning_rate": 3.994698879561165e-06, | |
| "loss": 1.2886333465576172, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4472573839662447, | |
| "grad_norm": 3.727637767791748, | |
| "learning_rate": 3.994372901690705e-06, | |
| "loss": 0.8034701943397522, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45147679324894513, | |
| "grad_norm": 2.0933773517608643, | |
| "learning_rate": 3.994037213734287e-06, | |
| "loss": 1.209691047668457, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45569620253164556, | |
| "grad_norm": 2.345202684402466, | |
| "learning_rate": 3.993691817508457e-06, | |
| "loss": 1.2683181762695312, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.459915611814346, | |
| "grad_norm": 6.4172868728637695, | |
| "learning_rate": 3.993336714882294e-06, | |
| "loss": 1.3031342029571533, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4641350210970464, | |
| "grad_norm": 4.881870269775391, | |
| "learning_rate": 3.992971907777404e-06, | |
| "loss": 1.259873390197754, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.46835443037974683, | |
| "grad_norm": 4.619325637817383, | |
| "learning_rate": 3.992597398167907e-06, | |
| "loss": 1.2213921546936035, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47257383966244726, | |
| "grad_norm": 2.6401724815368652, | |
| "learning_rate": 3.99221318808043e-06, | |
| "loss": 1.2425501346588135, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4767932489451477, | |
| "grad_norm": 2.318206548690796, | |
| "learning_rate": 3.9918192795940875e-06, | |
| "loss": 1.2931036949157715, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4810126582278481, | |
| "grad_norm": 3.360222339630127, | |
| "learning_rate": 3.991415674840482e-06, | |
| "loss": 0.7865722179412842, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48523206751054854, | |
| "grad_norm": 7.906117916107178, | |
| "learning_rate": 3.9910023760036835e-06, | |
| "loss": 0.920839250087738, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48945147679324896, | |
| "grad_norm": 4.246833324432373, | |
| "learning_rate": 3.99057938532022e-06, | |
| "loss": 0.8984707593917847, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4936708860759494, | |
| "grad_norm": 1.9855449199676514, | |
| "learning_rate": 3.990146705079069e-06, | |
| "loss": 1.2834184169769287, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4978902953586498, | |
| "grad_norm": 2.732619285583496, | |
| "learning_rate": 3.989704337621639e-06, | |
| "loss": 1.3313374519348145, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5021097046413502, | |
| "grad_norm": 2.2487165927886963, | |
| "learning_rate": 3.989252285341761e-06, | |
| "loss": 0.9914782047271729, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 2.918333053588867, | |
| "learning_rate": 3.988790550685677e-06, | |
| "loss": 0.4503798186779022, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.510548523206751, | |
| "grad_norm": 5.367257118225098, | |
| "learning_rate": 3.98831913615202e-06, | |
| "loss": 1.4287300109863281, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5147679324894515, | |
| "grad_norm": 4.372511863708496, | |
| "learning_rate": 3.987838044291807e-06, | |
| "loss": 0.8704193830490112, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5189873417721519, | |
| "grad_norm": 2.685379981994629, | |
| "learning_rate": 3.987347277708424e-06, | |
| "loss": 1.4937043190002441, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5232067510548524, | |
| "grad_norm": 2.241354465484619, | |
| "learning_rate": 3.986846839057609e-06, | |
| "loss": 1.2054930925369263, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5274261603375527, | |
| "grad_norm": 2.666008472442627, | |
| "learning_rate": 3.98633673104744e-06, | |
| "loss": 1.322192907333374, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5316455696202531, | |
| "grad_norm": 3.0313169956207275, | |
| "learning_rate": 3.985816956438322e-06, | |
| "loss": 1.1353508234024048, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5358649789029536, | |
| "grad_norm": 1.7615196704864502, | |
| "learning_rate": 3.985287518042965e-06, | |
| "loss": 1.2446702718734741, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.540084388185654, | |
| "grad_norm": 2.7614693641662598, | |
| "learning_rate": 3.984748418726381e-06, | |
| "loss": 1.2152833938598633, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5443037974683544, | |
| "grad_norm": 1.3947678804397583, | |
| "learning_rate": 3.9841996614058536e-06, | |
| "loss": 1.0362350940704346, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5485232067510548, | |
| "grad_norm": 3.6117563247680664, | |
| "learning_rate": 3.983641249050933e-06, | |
| "loss": 0.9856378436088562, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5527426160337553, | |
| "grad_norm": 2.348914861679077, | |
| "learning_rate": 3.983073184683419e-06, | |
| "loss": 1.2900649309158325, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5569620253164557, | |
| "grad_norm": 2.4478940963745117, | |
| "learning_rate": 3.98249547137734e-06, | |
| "loss": 1.30060613155365, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5611814345991561, | |
| "grad_norm": 1.8957366943359375, | |
| "learning_rate": 3.981908112258938e-06, | |
| "loss": 1.2571529150009155, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5654008438818565, | |
| "grad_norm": 2.468729257583618, | |
| "learning_rate": 3.981311110506654e-06, | |
| "loss": 1.522542119026184, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.569620253164557, | |
| "grad_norm": 5.101961612701416, | |
| "learning_rate": 3.9807044693511086e-06, | |
| "loss": 1.0608189105987549, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5738396624472574, | |
| "grad_norm": 3.0331854820251465, | |
| "learning_rate": 3.980088192075085e-06, | |
| "loss": 1.3017442226409912, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5780590717299579, | |
| "grad_norm": 2.463477373123169, | |
| "learning_rate": 3.979462282013513e-06, | |
| "loss": 1.099843144416809, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5822784810126582, | |
| "grad_norm": 1.7117162942886353, | |
| "learning_rate": 3.978826742553447e-06, | |
| "loss": 1.2798070907592773, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5864978902953587, | |
| "grad_norm": 3.3944342136383057, | |
| "learning_rate": 3.978181577134051e-06, | |
| "loss": 1.4166996479034424, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5907172995780591, | |
| "grad_norm": 2.0399510860443115, | |
| "learning_rate": 3.97752678924658e-06, | |
| "loss": 0.9708434343338013, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5949367088607594, | |
| "grad_norm": 5.146090984344482, | |
| "learning_rate": 3.976862382434358e-06, | |
| "loss": 1.3494899272918701, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5991561181434599, | |
| "grad_norm": 2.0854623317718506, | |
| "learning_rate": 3.976188360292762e-06, | |
| "loss": 1.551278829574585, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6033755274261603, | |
| "grad_norm": 0.7903197407722473, | |
| "learning_rate": 3.975504726469204e-06, | |
| "loss": 1.1335902214050293, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 1.5145395994186401, | |
| "learning_rate": 3.9748114846631025e-06, | |
| "loss": 1.2714455127716064, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6118143459915611, | |
| "grad_norm": 2.4970903396606445, | |
| "learning_rate": 3.974108638625875e-06, | |
| "loss": 0.8297945857048035, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6160337552742616, | |
| "grad_norm": 1.9116922616958618, | |
| "learning_rate": 3.973396192160909e-06, | |
| "loss": 0.6557431221008301, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.620253164556962, | |
| "grad_norm": 1.597800374031067, | |
| "learning_rate": 3.972674149123543e-06, | |
| "loss": 1.251997709274292, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6244725738396625, | |
| "grad_norm": 5.221956253051758, | |
| "learning_rate": 3.971942513421049e-06, | |
| "loss": 0.7073361873626709, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6286919831223629, | |
| "grad_norm": 8.381784439086914, | |
| "learning_rate": 3.971201289012605e-06, | |
| "loss": 0.6594762802124023, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6329113924050633, | |
| "grad_norm": 4.704819202423096, | |
| "learning_rate": 3.97045047990928e-06, | |
| "loss": 1.7869096994400024, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6371308016877637, | |
| "grad_norm": 1.746824026107788, | |
| "learning_rate": 3.969690090174009e-06, | |
| "loss": 1.2827584743499756, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6413502109704642, | |
| "grad_norm": 2.3811588287353516, | |
| "learning_rate": 3.968920123921574e-06, | |
| "loss": 0.8861095905303955, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6455696202531646, | |
| "grad_norm": 2.874070644378662, | |
| "learning_rate": 3.968140585318575e-06, | |
| "loss": 1.0074717998504639, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6497890295358649, | |
| "grad_norm": 1.4178441762924194, | |
| "learning_rate": 3.967351478583417e-06, | |
| "loss": 1.271646499633789, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6540084388185654, | |
| "grad_norm": 2.7072203159332275, | |
| "learning_rate": 3.9665528079862766e-06, | |
| "loss": 1.2094981670379639, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6582278481012658, | |
| "grad_norm": 2.434222936630249, | |
| "learning_rate": 3.965744577849089e-06, | |
| "loss": 1.016772747039795, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6624472573839663, | |
| "grad_norm": 1.4761089086532593, | |
| "learning_rate": 3.964926792545517e-06, | |
| "loss": 1.2257163524627686, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.9905054569244385, | |
| "learning_rate": 3.964099456500932e-06, | |
| "loss": 1.1116795539855957, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6708860759493671, | |
| "grad_norm": 2.5824759006500244, | |
| "learning_rate": 3.963262574192388e-06, | |
| "loss": 1.0979809761047363, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6751054852320675, | |
| "grad_norm": 2.126721143722534, | |
| "learning_rate": 3.962416150148598e-06, | |
| "loss": 1.0931775569915771, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.679324894514768, | |
| "grad_norm": 2.355828046798706, | |
| "learning_rate": 3.961560188949909e-06, | |
| "loss": 0.8760429620742798, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6835443037974683, | |
| "grad_norm": 2.159811496734619, | |
| "learning_rate": 3.9606946952282745e-06, | |
| "loss": 0.8803830146789551, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6877637130801688, | |
| "grad_norm": 2.930659294128418, | |
| "learning_rate": 3.959819673667239e-06, | |
| "loss": 0.8701751232147217, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6919831223628692, | |
| "grad_norm": 1.615522027015686, | |
| "learning_rate": 3.958935129001899e-06, | |
| "loss": 0.8708148002624512, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6962025316455697, | |
| "grad_norm": 5.590799331665039, | |
| "learning_rate": 3.958041066018891e-06, | |
| "loss": 1.591496229171753, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.70042194092827, | |
| "grad_norm": 3.333008050918579, | |
| "learning_rate": 3.957137489556352e-06, | |
| "loss": 1.1004414558410645, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7046413502109705, | |
| "grad_norm": 2.3116865158081055, | |
| "learning_rate": 3.956224404503906e-06, | |
| "loss": 1.4001518487930298, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7088607594936709, | |
| "grad_norm": 4.017354965209961, | |
| "learning_rate": 3.955301815802629e-06, | |
| "loss": 1.2720857858657837, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7130801687763713, | |
| "grad_norm": 2.1855573654174805, | |
| "learning_rate": 3.954369728445028e-06, | |
| "loss": 1.2939956188201904, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7172995780590717, | |
| "grad_norm": 2.4703359603881836, | |
| "learning_rate": 3.953428147475006e-06, | |
| "loss": 1.2735445499420166, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7215189873417721, | |
| "grad_norm": 2.1738462448120117, | |
| "learning_rate": 3.952477077987845e-06, | |
| "loss": 1.2617197036743164, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7257383966244726, | |
| "grad_norm": 2.704313278198242, | |
| "learning_rate": 3.95151652513017e-06, | |
| "loss": 1.0853008031845093, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.729957805907173, | |
| "grad_norm": 10.06601333618164, | |
| "learning_rate": 3.950546494099926e-06, | |
| "loss": 0.8921165466308594, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7341772151898734, | |
| "grad_norm": 1.9999581575393677, | |
| "learning_rate": 3.949566990146349e-06, | |
| "loss": 1.256639003753662, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7383966244725738, | |
| "grad_norm": 5.633319854736328, | |
| "learning_rate": 3.948578018569932e-06, | |
| "loss": 1.1841363906860352, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7426160337552743, | |
| "grad_norm": 7.676711559295654, | |
| "learning_rate": 3.94757958472241e-06, | |
| "loss": 1.0944801568984985, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7468354430379747, | |
| "grad_norm": 4.892640590667725, | |
| "learning_rate": 3.946571694006712e-06, | |
| "loss": 0.6508228182792664, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7510548523206751, | |
| "grad_norm": 2.564443349838257, | |
| "learning_rate": 3.945554351876951e-06, | |
| "loss": 1.0562660694122314, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7552742616033755, | |
| "grad_norm": 4.787500858306885, | |
| "learning_rate": 3.94452756383838e-06, | |
| "loss": 0.998831033706665, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 1.8746553659439087, | |
| "learning_rate": 3.943491335447368e-06, | |
| "loss": 1.2303812503814697, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7637130801687764, | |
| "grad_norm": 2.7792534828186035, | |
| "learning_rate": 3.942445672311373e-06, | |
| "loss": 0.9920629858970642, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7679324894514767, | |
| "grad_norm": 5.023082733154297, | |
| "learning_rate": 3.941390580088905e-06, | |
| "loss": 1.5890564918518066, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7721518987341772, | |
| "grad_norm": 3.2253143787384033, | |
| "learning_rate": 3.940326064489499e-06, | |
| "loss": 0.7020189166069031, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7763713080168776, | |
| "grad_norm": 4.751897811889648, | |
| "learning_rate": 3.939252131273686e-06, | |
| "loss": 1.1662057638168335, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7805907172995781, | |
| "grad_norm": 4.707884788513184, | |
| "learning_rate": 3.938168786252957e-06, | |
| "loss": 1.490715742111206, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7848101265822784, | |
| "grad_norm": 4.896017074584961, | |
| "learning_rate": 3.937076035289735e-06, | |
| "loss": 0.9990431070327759, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7890295358649789, | |
| "grad_norm": 5.3917059898376465, | |
| "learning_rate": 3.935973884297344e-06, | |
| "loss": 1.06167471408844, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7932489451476793, | |
| "grad_norm": 1.6713993549346924, | |
| "learning_rate": 3.934862339239972e-06, | |
| "loss": 1.1578385829925537, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7974683544303798, | |
| "grad_norm": 1.267899990081787, | |
| "learning_rate": 3.933741406132645e-06, | |
| "loss": 1.1280488967895508, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8016877637130801, | |
| "grad_norm": 2.5772478580474854, | |
| "learning_rate": 3.932611091041192e-06, | |
| "loss": 0.7228022217750549, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8059071729957806, | |
| "grad_norm": 2.877981185913086, | |
| "learning_rate": 3.931471400082208e-06, | |
| "loss": 1.275989294052124, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 4.086211204528809, | |
| "learning_rate": 3.930322339423029e-06, | |
| "loss": 1.0468356609344482, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8143459915611815, | |
| "grad_norm": 3.2680745124816895, | |
| "learning_rate": 3.929163915281692e-06, | |
| "loss": 1.2617956399917603, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8185654008438819, | |
| "grad_norm": 2.128434181213379, | |
| "learning_rate": 3.927996133926903e-06, | |
| "loss": 0.9376715421676636, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8227848101265823, | |
| "grad_norm": 1.895815372467041, | |
| "learning_rate": 3.926819001678005e-06, | |
| "loss": 1.2338812351226807, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8270042194092827, | |
| "grad_norm": 2.774864435195923, | |
| "learning_rate": 3.925632524904943e-06, | |
| "loss": 0.9890301823616028, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8312236286919831, | |
| "grad_norm": 3.1884961128234863, | |
| "learning_rate": 3.924436710028228e-06, | |
| "loss": 0.9189957976341248, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8354430379746836, | |
| "grad_norm": 2.6990184783935547, | |
| "learning_rate": 3.923231563518904e-06, | |
| "loss": 1.2466810941696167, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8396624472573839, | |
| "grad_norm": 0.5147901773452759, | |
| "learning_rate": 3.922017091898511e-06, | |
| "loss": 1.0888053178787231, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8438818565400844, | |
| "grad_norm": 2.1018054485321045, | |
| "learning_rate": 3.920793301739052e-06, | |
| "loss": 0.9585396647453308, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8481012658227848, | |
| "grad_norm": 4.2678046226501465, | |
| "learning_rate": 3.9195601996629564e-06, | |
| "loss": 0.702578067779541, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8523206751054853, | |
| "grad_norm": 3.8625717163085938, | |
| "learning_rate": 3.9183177923430445e-06, | |
| "loss": 1.2020361423492432, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8565400843881856, | |
| "grad_norm": 2.158465623855591, | |
| "learning_rate": 3.917066086502491e-06, | |
| "loss": 0.9442514181137085, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8607594936708861, | |
| "grad_norm": 2.1818642616271973, | |
| "learning_rate": 3.915805088914787e-06, | |
| "loss": 0.9750051498413086, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8649789029535865, | |
| "grad_norm": 1.9446742534637451, | |
| "learning_rate": 3.914534806403707e-06, | |
| "loss": 1.247662901878357, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.869198312236287, | |
| "grad_norm": 2.9858086109161377, | |
| "learning_rate": 3.913255245843269e-06, | |
| "loss": 0.8505547642707825, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8734177215189873, | |
| "grad_norm": 3.3264975547790527, | |
| "learning_rate": 3.911966414157699e-06, | |
| "loss": 1.222496509552002, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8776371308016878, | |
| "grad_norm": 1.9070676565170288, | |
| "learning_rate": 3.910668318321395e-06, | |
| "loss": 1.0650990009307861, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8818565400843882, | |
| "grad_norm": 49.54351806640625, | |
| "learning_rate": 3.90936096535888e-06, | |
| "loss": 1.170435905456543, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8860759493670886, | |
| "grad_norm": 3.324521064758301, | |
| "learning_rate": 3.90804436234478e-06, | |
| "loss": 0.9483715295791626, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.890295358649789, | |
| "grad_norm": 2.001574754714966, | |
| "learning_rate": 3.9067185164037705e-06, | |
| "loss": 1.3522322177886963, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8945147679324894, | |
| "grad_norm": 13.360381126403809, | |
| "learning_rate": 3.905383434710546e-06, | |
| "loss": 0.980687141418457, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8987341772151899, | |
| "grad_norm": 2.832037925720215, | |
| "learning_rate": 3.904039124489782e-06, | |
| "loss": 1.1890883445739746, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9029535864978903, | |
| "grad_norm": 3.036261796951294, | |
| "learning_rate": 3.902685593016088e-06, | |
| "loss": 1.0536837577819824, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9071729957805907, | |
| "grad_norm": 3.503538131713867, | |
| "learning_rate": 3.90132284761398e-06, | |
| "loss": 1.016420602798462, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 2.102992534637451, | |
| "learning_rate": 3.899950895657829e-06, | |
| "loss": 1.0863244533538818, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9156118143459916, | |
| "grad_norm": 2.5443339347839355, | |
| "learning_rate": 3.8985697445718275e-06, | |
| "loss": 1.2617383003234863, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.919831223628692, | |
| "grad_norm": 13.239272117614746, | |
| "learning_rate": 3.8971794018299515e-06, | |
| "loss": 0.8763201832771301, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9240506329113924, | |
| "grad_norm": 1.944677710533142, | |
| "learning_rate": 3.895779874955913e-06, | |
| "loss": 1.2141039371490479, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9282700421940928, | |
| "grad_norm": 1.6930376291275024, | |
| "learning_rate": 3.894371171523124e-06, | |
| "loss": 0.9925521016120911, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9324894514767933, | |
| "grad_norm": 2.417435646057129, | |
| "learning_rate": 3.892953299154657e-06, | |
| "loss": 0.9523521661758423, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9367088607594937, | |
| "grad_norm": 4.125819683074951, | |
| "learning_rate": 3.8915262655231985e-06, | |
| "loss": 1.1057894229888916, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9409282700421941, | |
| "grad_norm": 5.843780517578125, | |
| "learning_rate": 3.890090078351011e-06, | |
| "loss": 1.3123371601104736, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9451476793248945, | |
| "grad_norm": 1.6658388376235962, | |
| "learning_rate": 3.8886447454098914e-06, | |
| "loss": 1.013564109802246, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9493670886075949, | |
| "grad_norm": 12.320473670959473, | |
| "learning_rate": 3.887190274521128e-06, | |
| "loss": 0.9290477633476257, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9535864978902954, | |
| "grad_norm": 1.9203139543533325, | |
| "learning_rate": 3.885726673555457e-06, | |
| "loss": 1.2885007858276367, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9578059071729957, | |
| "grad_norm": 1.4699382781982422, | |
| "learning_rate": 3.884253950433022e-06, | |
| "loss": 1.0010005235671997, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9620253164556962, | |
| "grad_norm": 2.8639562129974365, | |
| "learning_rate": 3.882772113123332e-06, | |
| "loss": 1.1654586791992188, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9662447257383966, | |
| "grad_norm": 3.0785443782806396, | |
| "learning_rate": 3.881281169645212e-06, | |
| "loss": 0.8937104940414429, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9704641350210971, | |
| "grad_norm": 4.43109655380249, | |
| "learning_rate": 3.879781128066771e-06, | |
| "loss": 0.7110123634338379, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9746835443037974, | |
| "grad_norm": 2.914869785308838, | |
| "learning_rate": 3.878271996505345e-06, | |
| "loss": 0.8978859186172485, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9789029535864979, | |
| "grad_norm": 2.207864999771118, | |
| "learning_rate": 3.876753783127464e-06, | |
| "loss": 1.1789137125015259, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9831223628691983, | |
| "grad_norm": 3.6072731018066406, | |
| "learning_rate": 3.875226496148799e-06, | |
| "loss": 0.7317770719528198, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9873417721518988, | |
| "grad_norm": 6.211583614349365, | |
| "learning_rate": 3.873690143834129e-06, | |
| "loss": 1.3279008865356445, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9915611814345991, | |
| "grad_norm": 8.5204496383667, | |
| "learning_rate": 3.872144734497281e-06, | |
| "loss": 0.8850146532058716, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9957805907172996, | |
| "grad_norm": 2.666997194290161, | |
| "learning_rate": 3.870590276501099e-06, | |
| "loss": 1.220442295074463, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.6749136447906494, | |
| "learning_rate": 3.869026778257392e-06, | |
| "loss": 1.2717642784118652, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0042194092827004, | |
| "grad_norm": 5.584980010986328, | |
| "learning_rate": 3.867454248226887e-06, | |
| "loss": 1.0543200969696045, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0084388185654007, | |
| "grad_norm": 4.494807243347168, | |
| "learning_rate": 3.86587269491919e-06, | |
| "loss": 1.1978576183319092, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 2.680006504058838, | |
| "learning_rate": 3.86428212689273e-06, | |
| "loss": 1.1809672117233276, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0168776371308017, | |
| "grad_norm": 3.9369754791259766, | |
| "learning_rate": 3.862682552754722e-06, | |
| "loss": 0.9172142744064331, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.021097046413502, | |
| "grad_norm": 7.680518627166748, | |
| "learning_rate": 3.861073981161118e-06, | |
| "loss": 1.1449049711227417, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0253164556962024, | |
| "grad_norm": 2.746133804321289, | |
| "learning_rate": 3.859456420816556e-06, | |
| "loss": 0.5115264654159546, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.029535864978903, | |
| "grad_norm": 2.72514271736145, | |
| "learning_rate": 3.857829880474316e-06, | |
| "loss": 0.9918684363365173, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0337552742616034, | |
| "grad_norm": 2.223912000656128, | |
| "learning_rate": 3.856194368936275e-06, | |
| "loss": 0.8463398814201355, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0379746835443038, | |
| "grad_norm": 2.9955148696899414, | |
| "learning_rate": 3.8545498950528535e-06, | |
| "loss": 1.173925757408142, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0421940928270041, | |
| "grad_norm": 4.594770431518555, | |
| "learning_rate": 3.852896467722974e-06, | |
| "loss": 0.8562051057815552, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0464135021097047, | |
| "grad_norm": 9.129888534545898, | |
| "learning_rate": 3.851234095894007e-06, | |
| "loss": 0.9281083345413208, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0506329113924051, | |
| "grad_norm": 2.604607105255127, | |
| "learning_rate": 3.849562788561727e-06, | |
| "loss": 1.2945480346679688, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0548523206751055, | |
| "grad_norm": 2.3840718269348145, | |
| "learning_rate": 3.847882554770263e-06, | |
| "loss": 1.1486706733703613, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0590717299578059, | |
| "grad_norm": 1.9679715633392334, | |
| "learning_rate": 3.846193403612046e-06, | |
| "loss": 1.1716930866241455, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0632911392405062, | |
| "grad_norm": 1.7950235605239868, | |
| "learning_rate": 3.844495344227765e-06, | |
| "loss": 1.2809019088745117, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0675105485232068, | |
| "grad_norm": 2.0246713161468506, | |
| "learning_rate": 3.842788385806312e-06, | |
| "loss": 0.7856377363204956, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0717299578059072, | |
| "grad_norm": 2.0895354747772217, | |
| "learning_rate": 3.841072537584741e-06, | |
| "loss": 1.1074151992797852, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0759493670886076, | |
| "grad_norm": 2.316358804702759, | |
| "learning_rate": 3.8393478088482065e-06, | |
| "loss": 1.1439809799194336, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.080168776371308, | |
| "grad_norm": 4.703127384185791, | |
| "learning_rate": 3.837614208929921e-06, | |
| "loss": 1.035994291305542, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0843881856540085, | |
| "grad_norm": 7.031744003295898, | |
| "learning_rate": 3.835871747211105e-06, | |
| "loss": 1.151397705078125, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0886075949367089, | |
| "grad_norm": 2.653866767883301, | |
| "learning_rate": 3.83412043312093e-06, | |
| "loss": 1.16837739944458, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0928270042194093, | |
| "grad_norm": 2.976186752319336, | |
| "learning_rate": 3.832360276136474e-06, | |
| "loss": 0.9901262521743774, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0970464135021096, | |
| "grad_norm": 4.738975524902344, | |
| "learning_rate": 3.830591285782666e-06, | |
| "loss": 0.9500905871391296, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1012658227848102, | |
| "grad_norm": 8.483416557312012, | |
| "learning_rate": 3.828813471632237e-06, | |
| "loss": 0.8555248975753784, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1054852320675106, | |
| "grad_norm": 4.0885467529296875, | |
| "learning_rate": 3.827026843305667e-06, | |
| "loss": 1.0695732831954956, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.109704641350211, | |
| "grad_norm": 2.929239273071289, | |
| "learning_rate": 3.825231410471132e-06, | |
| "loss": 0.868694543838501, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1139240506329113, | |
| "grad_norm": 2.6514179706573486, | |
| "learning_rate": 3.823427182844455e-06, | |
| "loss": 1.3674180507659912, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1181434599156117, | |
| "grad_norm": 3.984480142593384, | |
| "learning_rate": 3.821614170189049e-06, | |
| "loss": 1.2144532203674316, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1223628691983123, | |
| "grad_norm": 7.298747539520264, | |
| "learning_rate": 3.819792382315868e-06, | |
| "loss": 0.6592221260070801, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1265822784810127, | |
| "grad_norm": 5.481675624847412, | |
| "learning_rate": 3.81796182908335e-06, | |
| "loss": 1.1008317470550537, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.130801687763713, | |
| "grad_norm": 2.6566853523254395, | |
| "learning_rate": 3.816122520397369e-06, | |
| "loss": 1.1687147617340088, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1350210970464134, | |
| "grad_norm": 2.098435163497925, | |
| "learning_rate": 3.8142744662111767e-06, | |
| "loss": 0.8460148572921753, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 2.0900216102600098, | |
| "learning_rate": 3.81241767652535e-06, | |
| "loss": 0.7578733563423157, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1434599156118144, | |
| "grad_norm": 2.375847578048706, | |
| "learning_rate": 3.8105521613877386e-06, | |
| "loss": 0.8102576732635498, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1476793248945147, | |
| "grad_norm": 3.2528064250946045, | |
| "learning_rate": 3.8086779308934066e-06, | |
| "loss": 0.8352131247520447, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1518987341772151, | |
| "grad_norm": 2.7880918979644775, | |
| "learning_rate": 3.8067949951845836e-06, | |
| "loss": 1.108149766921997, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1561181434599157, | |
| "grad_norm": 2.515939712524414, | |
| "learning_rate": 3.8049033644506043e-06, | |
| "loss": 1.1225923299789429, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.160337552742616, | |
| "grad_norm": 7.062304973602295, | |
| "learning_rate": 3.8030030489278563e-06, | |
| "loss": 0.9247970581054688, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1645569620253164, | |
| "grad_norm": 4.359371662139893, | |
| "learning_rate": 3.8010940588997253e-06, | |
| "loss": 1.4258188009262085, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1687763713080168, | |
| "grad_norm": 2.2747061252593994, | |
| "learning_rate": 3.799176404696537e-06, | |
| "loss": 1.1855448484420776, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1729957805907172, | |
| "grad_norm": 4.772888660430908, | |
| "learning_rate": 3.797250096695503e-06, | |
| "loss": 0.6528091430664062, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1772151898734178, | |
| "grad_norm": 6.059512138366699, | |
| "learning_rate": 3.7953151453206635e-06, | |
| "loss": 1.0413281917572021, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1814345991561181, | |
| "grad_norm": 3.8079075813293457, | |
| "learning_rate": 3.793371561042833e-06, | |
| "loss": 0.6656049489974976, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1856540084388185, | |
| "grad_norm": 3.2168707847595215, | |
| "learning_rate": 3.791419354379541e-06, | |
| "loss": 0.8556336164474487, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.189873417721519, | |
| "grad_norm": 6.392472267150879, | |
| "learning_rate": 3.7894585358949758e-06, | |
| "loss": 1.3849632740020752, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1940928270042195, | |
| "grad_norm": 6.333314418792725, | |
| "learning_rate": 3.78748911619993e-06, | |
| "loss": 1.1986020803451538, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1983122362869199, | |
| "grad_norm": 3.8843421936035156, | |
| "learning_rate": 3.7855111059517376e-06, | |
| "loss": 0.834921658039093, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2025316455696202, | |
| "grad_norm": 2.22169828414917, | |
| "learning_rate": 3.7835245158542225e-06, | |
| "loss": 1.1095911264419556, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2067510548523206, | |
| "grad_norm": 2.5398857593536377, | |
| "learning_rate": 3.7815293566576367e-06, | |
| "loss": 1.06223464012146, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2109704641350212, | |
| "grad_norm": 1.9426056146621704, | |
| "learning_rate": 3.779525639158602e-06, | |
| "loss": 1.1437506675720215, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2151898734177216, | |
| "grad_norm": 3.523289203643799, | |
| "learning_rate": 3.7775133742000542e-06, | |
| "loss": 0.9638210535049438, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.219409282700422, | |
| "grad_norm": 2.9455223083496094, | |
| "learning_rate": 3.7754925726711832e-06, | |
| "loss": 0.6213325262069702, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2236286919831223, | |
| "grad_norm": 1.926129698753357, | |
| "learning_rate": 3.773463245507371e-06, | |
| "loss": 0.9760810732841492, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2278481012658227, | |
| "grad_norm": 5.75839900970459, | |
| "learning_rate": 3.7714254036901382e-06, | |
| "loss": 0.8893729448318481, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2320675105485233, | |
| "grad_norm": 2.0400707721710205, | |
| "learning_rate": 3.7693790582470815e-06, | |
| "loss": 0.7321144342422485, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2362869198312236, | |
| "grad_norm": 9.54411792755127, | |
| "learning_rate": 3.767324220251812e-06, | |
| "loss": 0.938395082950592, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.240506329113924, | |
| "grad_norm": 3.1993234157562256, | |
| "learning_rate": 3.7652609008238994e-06, | |
| "loss": 0.8318843841552734, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2447257383966246, | |
| "grad_norm": 2.4239490032196045, | |
| "learning_rate": 3.76318911112881e-06, | |
| "loss": 1.1875081062316895, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.248945147679325, | |
| "grad_norm": 7.202500820159912, | |
| "learning_rate": 3.761108862377844e-06, | |
| "loss": 0.6182510852813721, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2531645569620253, | |
| "grad_norm": 1.383612871170044, | |
| "learning_rate": 3.75902016582808e-06, | |
| "loss": 0.8994504809379578, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2573839662447257, | |
| "grad_norm": 4.613704204559326, | |
| "learning_rate": 3.756923032782309e-06, | |
| "loss": 0.7695854902267456, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.261603375527426, | |
| "grad_norm": 3.9212303161621094, | |
| "learning_rate": 3.754817474588976e-06, | |
| "loss": 0.6324819922447205, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2658227848101267, | |
| "grad_norm": 2.7459237575531006, | |
| "learning_rate": 3.752703502642118e-06, | |
| "loss": 1.0705938339233398, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.270042194092827, | |
| "grad_norm": 6.447327613830566, | |
| "learning_rate": 3.7505811283813028e-06, | |
| "loss": 1.4245244264602661, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2742616033755274, | |
| "grad_norm": 1.7515556812286377, | |
| "learning_rate": 3.7484503632915642e-06, | |
| "loss": 1.0706822872161865, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2784810126582278, | |
| "grad_norm": 4.614502429962158, | |
| "learning_rate": 3.7463112189033452e-06, | |
| "loss": 0.9431329965591431, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2827004219409281, | |
| "grad_norm": 8.263338088989258, | |
| "learning_rate": 3.7441637067924314e-06, | |
| "loss": 0.8352319598197937, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2869198312236287, | |
| "grad_norm": 3.6502585411071777, | |
| "learning_rate": 3.7420078385798895e-06, | |
| "loss": 0.9339005351066589, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2911392405063291, | |
| "grad_norm": 8.820695877075195, | |
| "learning_rate": 3.739843625932004e-06, | |
| "loss": 0.6273094415664673, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2953586497890295, | |
| "grad_norm": 2.1156527996063232, | |
| "learning_rate": 3.737671080560215e-06, | |
| "loss": 0.6872820854187012, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.29957805907173, | |
| "grad_norm": 2.442565679550171, | |
| "learning_rate": 3.7354902142210548e-06, | |
| "loss": 1.1194093227386475, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3037974683544304, | |
| "grad_norm": 1.8104244470596313, | |
| "learning_rate": 3.7333010387160834e-06, | |
| "loss": 1.1286826133728027, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3080168776371308, | |
| "grad_norm": 2.462080955505371, | |
| "learning_rate": 3.7311035658918248e-06, | |
| "loss": 0.7162832617759705, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3122362869198312, | |
| "grad_norm": 3.075747013092041, | |
| "learning_rate": 3.728897807639705e-06, | |
| "loss": 0.9384140968322754, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3164556962025316, | |
| "grad_norm": 30.50847053527832, | |
| "learning_rate": 3.7266837758959825e-06, | |
| "loss": 0.8814220428466797, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3206751054852321, | |
| "grad_norm": 2.7363264560699463, | |
| "learning_rate": 3.7244614826416896e-06, | |
| "loss": 1.1194790601730347, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3248945147679325, | |
| "grad_norm": 11.446985244750977, | |
| "learning_rate": 3.722230939902565e-06, | |
| "loss": 1.6146903038024902, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3291139240506329, | |
| "grad_norm": 1.5937474966049194, | |
| "learning_rate": 3.7199921597489876e-06, | |
| "loss": 0.8981386423110962, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.8236477375030518, | |
| "learning_rate": 3.717745154295913e-06, | |
| "loss": 1.0962973833084106, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3375527426160336, | |
| "grad_norm": 1.031929850578308, | |
| "learning_rate": 3.7154899357028072e-06, | |
| "loss": 0.8632595539093018, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3417721518987342, | |
| "grad_norm": 6.748950958251953, | |
| "learning_rate": 3.7132265161735803e-06, | |
| "loss": 0.6589536666870117, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3459915611814346, | |
| "grad_norm": 9.24288558959961, | |
| "learning_rate": 3.710954907956522e-06, | |
| "loss": 0.8823557496070862, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.350210970464135, | |
| "grad_norm": 5.132577419281006, | |
| "learning_rate": 3.7086751233442327e-06, | |
| "loss": 1.2359545230865479, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3544303797468356, | |
| "grad_norm": 2.1931583881378174, | |
| "learning_rate": 3.7063871746735615e-06, | |
| "loss": 0.839038610458374, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.358649789029536, | |
| "grad_norm": 1.920567512512207, | |
| "learning_rate": 3.704091074325534e-06, | |
| "loss": 1.2603816986083984, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3628691983122363, | |
| "grad_norm": 1.3721178770065308, | |
| "learning_rate": 3.7017868347252882e-06, | |
| "loss": 1.1347554922103882, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3670886075949367, | |
| "grad_norm": 6.712429523468018, | |
| "learning_rate": 3.699474468342008e-06, | |
| "loss": 0.8782555460929871, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.371308016877637, | |
| "grad_norm": 3.626140594482422, | |
| "learning_rate": 3.6971539876888525e-06, | |
| "loss": 1.3546593189239502, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3755274261603376, | |
| "grad_norm": 2.531872034072876, | |
| "learning_rate": 3.694825405322894e-06, | |
| "loss": 1.1074378490447998, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.379746835443038, | |
| "grad_norm": 1.418874740600586, | |
| "learning_rate": 3.692488733845044e-06, | |
| "loss": 0.8609563112258911, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3839662447257384, | |
| "grad_norm": 1.9295591115951538, | |
| "learning_rate": 3.690143985899987e-06, | |
| "loss": 1.2149752378463745, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3881856540084387, | |
| "grad_norm": 9.573609352111816, | |
| "learning_rate": 3.687791174176115e-06, | |
| "loss": 0.6435118317604065, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3924050632911391, | |
| "grad_norm": 2.0520520210266113, | |
| "learning_rate": 3.685430311405453e-06, | |
| "loss": 1.1482752561569214, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3966244725738397, | |
| "grad_norm": 5.835472583770752, | |
| "learning_rate": 3.6830614103635977e-06, | |
| "loss": 0.6969774961471558, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.40084388185654, | |
| "grad_norm": 1.448106288909912, | |
| "learning_rate": 3.6806844838696397e-06, | |
| "loss": 1.1494622230529785, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4050632911392404, | |
| "grad_norm": 2.3839871883392334, | |
| "learning_rate": 3.6782995447861017e-06, | |
| "loss": 0.7210063934326172, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.409282700421941, | |
| "grad_norm": 3.103909492492676, | |
| "learning_rate": 3.675906606018865e-06, | |
| "loss": 1.1002976894378662, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4135021097046414, | |
| "grad_norm": 1.7114917039871216, | |
| "learning_rate": 3.6735056805171012e-06, | |
| "loss": 1.154873013496399, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4177215189873418, | |
| "grad_norm": 3.427095651626587, | |
| "learning_rate": 3.6710967812731994e-06, | |
| "loss": 1.3804283142089844, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4219409282700421, | |
| "grad_norm": 2.9029994010925293, | |
| "learning_rate": 3.6686799213226984e-06, | |
| "loss": 0.7311358451843262, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4261603375527425, | |
| "grad_norm": 2.845263719558716, | |
| "learning_rate": 3.666255113744218e-06, | |
| "loss": 0.6623574495315552, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4303797468354431, | |
| "grad_norm": 5.403914451599121, | |
| "learning_rate": 3.663822371659383e-06, | |
| "loss": 0.9805995225906372, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4345991561181435, | |
| "grad_norm": 3.444819927215576, | |
| "learning_rate": 3.6613817082327565e-06, | |
| "loss": 1.088465690612793, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4388185654008439, | |
| "grad_norm": 4.646100997924805, | |
| "learning_rate": 3.658933136671767e-06, | |
| "loss": 0.8819342851638794, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4430379746835442, | |
| "grad_norm": 3.1290183067321777, | |
| "learning_rate": 3.656476670226637e-06, | |
| "loss": 1.2142698764801025, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4472573839662446, | |
| "grad_norm": 4.68398904800415, | |
| "learning_rate": 3.6540123221903123e-06, | |
| "loss": 0.7775373458862305, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4514767932489452, | |
| "grad_norm": 3.9637718200683594, | |
| "learning_rate": 3.651540105898387e-06, | |
| "loss": 0.9440705180168152, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4556962025316456, | |
| "grad_norm": 6.741257190704346, | |
| "learning_rate": 3.6490600347290353e-06, | |
| "loss": 1.0546112060546875, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.459915611814346, | |
| "grad_norm": 4.779881000518799, | |
| "learning_rate": 3.6465721221029376e-06, | |
| "loss": 0.7046493887901306, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4641350210970465, | |
| "grad_norm": 5.674314498901367, | |
| "learning_rate": 3.6440763814832075e-06, | |
| "loss": 1.2944858074188232, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4683544303797469, | |
| "grad_norm": 2.4671552181243896, | |
| "learning_rate": 3.6415728263753176e-06, | |
| "loss": 0.6650893688201904, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4725738396624473, | |
| "grad_norm": 3.0560495853424072, | |
| "learning_rate": 3.63906147032703e-06, | |
| "loss": 1.177491545677185, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4767932489451476, | |
| "grad_norm": 2.7282063961029053, | |
| "learning_rate": 3.6365423269283187e-06, | |
| "loss": 1.2095248699188232, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.481012658227848, | |
| "grad_norm": 5.56691837310791, | |
| "learning_rate": 3.6340154098113e-06, | |
| "loss": 1.0211296081542969, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4852320675105486, | |
| "grad_norm": 11.867128372192383, | |
| "learning_rate": 3.631480732650156e-06, | |
| "loss": 0.8005210161209106, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.489451476793249, | |
| "grad_norm": 1.5090935230255127, | |
| "learning_rate": 3.6289383091610625e-06, | |
| "loss": 1.1544265747070312, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4936708860759493, | |
| "grad_norm": 1.969177484512329, | |
| "learning_rate": 3.626388153102113e-06, | |
| "loss": 1.180321455001831, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.49789029535865, | |
| "grad_norm": 1.4724305868148804, | |
| "learning_rate": 3.6238302782732446e-06, | |
| "loss": 1.0343523025512695, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.50210970464135, | |
| "grad_norm": 4.455009937286377, | |
| "learning_rate": 3.621264698516166e-06, | |
| "loss": 0.48465144634246826, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5063291139240507, | |
| "grad_norm": 2.1380884647369385, | |
| "learning_rate": 3.6186914277142776e-06, | |
| "loss": 1.1161589622497559, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.510548523206751, | |
| "grad_norm": 3.7489266395568848, | |
| "learning_rate": 3.6161104797926013e-06, | |
| "loss": 1.091984510421753, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5147679324894514, | |
| "grad_norm": 2.2989237308502197, | |
| "learning_rate": 3.613521868717703e-06, | |
| "loss": 1.1017979383468628, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 4.086328506469727, | |
| "learning_rate": 3.6109256084976147e-06, | |
| "loss": 1.0278382301330566, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5232067510548524, | |
| "grad_norm": 4.82416296005249, | |
| "learning_rate": 3.608321713181764e-06, | |
| "loss": 1.198899745941162, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5274261603375527, | |
| "grad_norm": 2.247619867324829, | |
| "learning_rate": 3.6057101968608936e-06, | |
| "loss": 1.2113308906555176, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5316455696202531, | |
| "grad_norm": 5.557096004486084, | |
| "learning_rate": 3.603091073666987e-06, | |
| "loss": 0.5562316179275513, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5358649789029535, | |
| "grad_norm": 8.159991264343262, | |
| "learning_rate": 3.600464357773191e-06, | |
| "loss": 0.414279580116272, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.540084388185654, | |
| "grad_norm": 2.0832576751708984, | |
| "learning_rate": 3.5978300633937403e-06, | |
| "loss": 0.9449454545974731, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5443037974683544, | |
| "grad_norm": 2.1067464351654053, | |
| "learning_rate": 3.5951882047838798e-06, | |
| "loss": 0.9659292101860046, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5485232067510548, | |
| "grad_norm": 1.711477518081665, | |
| "learning_rate": 3.5925387962397866e-06, | |
| "loss": 1.1613965034484863, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5527426160337554, | |
| "grad_norm": 3.1845133304595947, | |
| "learning_rate": 3.589881852098495e-06, | |
| "loss": 0.864007830619812, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5569620253164556, | |
| "grad_norm": 3.9110360145568848, | |
| "learning_rate": 3.5872173867378177e-06, | |
| "loss": 0.902462363243103, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5611814345991561, | |
| "grad_norm": 3.437896490097046, | |
| "learning_rate": 3.5845454145762657e-06, | |
| "loss": 1.0834063291549683, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5654008438818565, | |
| "grad_norm": 1.5851118564605713, | |
| "learning_rate": 3.5818659500729735e-06, | |
| "loss": 0.7697902917861938, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5696202531645569, | |
| "grad_norm": 7.4633588790893555, | |
| "learning_rate": 3.5791790077276214e-06, | |
| "loss": 0.5523649454116821, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5738396624472575, | |
| "grad_norm": 1.9582291841506958, | |
| "learning_rate": 3.576484602080352e-06, | |
| "loss": 0.6860834360122681, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5780590717299579, | |
| "grad_norm": 3.9132864475250244, | |
| "learning_rate": 3.573782747711697e-06, | |
| "loss": 0.6468961834907532, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5822784810126582, | |
| "grad_norm": 2.304565906524658, | |
| "learning_rate": 3.571073459242498e-06, | |
| "loss": 1.1524250507354736, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5864978902953588, | |
| "grad_norm": 2.1101715564727783, | |
| "learning_rate": 3.56835675133382e-06, | |
| "loss": 0.7160176038742065, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.590717299578059, | |
| "grad_norm": 2.8462789058685303, | |
| "learning_rate": 3.565632638686884e-06, | |
| "loss": 0.7810688018798828, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5949367088607596, | |
| "grad_norm": 2.3834588527679443, | |
| "learning_rate": 3.562901136042977e-06, | |
| "loss": 0.6207853555679321, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.59915611814346, | |
| "grad_norm": 3.6158013343811035, | |
| "learning_rate": 3.560162258183377e-06, | |
| "loss": 0.8702360987663269, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6033755274261603, | |
| "grad_norm": 2.5689971446990967, | |
| "learning_rate": 3.5574160199292737e-06, | |
| "loss": 1.1135127544403076, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6075949367088609, | |
| "grad_norm": 1.0458358526229858, | |
| "learning_rate": 3.5546624361416855e-06, | |
| "loss": 0.7249690294265747, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.611814345991561, | |
| "grad_norm": 1.9451916217803955, | |
| "learning_rate": 3.55190152172138e-06, | |
| "loss": 1.1511328220367432, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6160337552742616, | |
| "grad_norm": 3.351893901824951, | |
| "learning_rate": 3.549133291608796e-06, | |
| "loss": 1.0460021495819092, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.620253164556962, | |
| "grad_norm": 4.358265399932861, | |
| "learning_rate": 3.5463577607839588e-06, | |
| "loss": 0.9370321035385132, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6244725738396624, | |
| "grad_norm": 3.3822832107543945, | |
| "learning_rate": 3.5435749442664016e-06, | |
| "loss": 1.1469030380249023, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.628691983122363, | |
| "grad_norm": 2.77669358253479, | |
| "learning_rate": 3.540784857115084e-06, | |
| "loss": 1.1965186595916748, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6329113924050633, | |
| "grad_norm": 2.8289971351623535, | |
| "learning_rate": 3.537987514428307e-06, | |
| "loss": 1.1629645824432373, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6371308016877637, | |
| "grad_norm": 2.216648817062378, | |
| "learning_rate": 3.535182931343638e-06, | |
| "loss": 1.1647021770477295, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6413502109704643, | |
| "grad_norm": 8.33935546875, | |
| "learning_rate": 3.5323711230378236e-06, | |
| "loss": 0.8370733261108398, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6455696202531644, | |
| "grad_norm": 11.60359001159668, | |
| "learning_rate": 3.5295521047267085e-06, | |
| "loss": 0.3443516492843628, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.649789029535865, | |
| "grad_norm": 2.730212688446045, | |
| "learning_rate": 3.5267258916651543e-06, | |
| "loss": 1.091811180114746, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6540084388185654, | |
| "grad_norm": 4.5352888107299805, | |
| "learning_rate": 3.5238924991469567e-06, | |
| "loss": 0.916614830493927, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6582278481012658, | |
| "grad_norm": 11.15390682220459, | |
| "learning_rate": 3.5210519425047618e-06, | |
| "loss": 1.0898263454437256, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6624472573839664, | |
| "grad_norm": 4.555877208709717, | |
| "learning_rate": 3.518204237109983e-06, | |
| "loss": 0.5768306255340576, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.4780551195144653, | |
| "learning_rate": 3.51534939837272e-06, | |
| "loss": 1.1375391483306885, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6708860759493671, | |
| "grad_norm": 6.170176982879639, | |
| "learning_rate": 3.5124874417416734e-06, | |
| "loss": 0.6376422643661499, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6751054852320675, | |
| "grad_norm": 0.6833944916725159, | |
| "learning_rate": 3.509618382704061e-06, | |
| "loss": 0.900169849395752, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6793248945147679, | |
| "grad_norm": 1.5629899501800537, | |
| "learning_rate": 3.5067422367855364e-06, | |
| "loss": 1.1173095703125, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.6835443037974684, | |
| "grad_norm": 3.2922439575195312, | |
| "learning_rate": 3.5038590195501006e-06, | |
| "loss": 0.8964512348175049, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.6877637130801688, | |
| "grad_norm": 4.654068470001221, | |
| "learning_rate": 3.5009687466000224e-06, | |
| "loss": 1.2155747413635254, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6919831223628692, | |
| "grad_norm": 7.5080437660217285, | |
| "learning_rate": 3.498071433575751e-06, | |
| "loss": 0.5988451242446899, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6962025316455698, | |
| "grad_norm": 1.9102202653884888, | |
| "learning_rate": 3.495167096155834e-06, | |
| "loss": 1.2323973178863525, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.70042194092827, | |
| "grad_norm": 3.7390973567962646, | |
| "learning_rate": 3.4922557500568272e-06, | |
| "loss": 1.1244511604309082, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7046413502109705, | |
| "grad_norm": 32.09159851074219, | |
| "learning_rate": 3.489337411033217e-06, | |
| "loss": 0.8318772912025452, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7088607594936709, | |
| "grad_norm": 3.8693466186523438, | |
| "learning_rate": 3.48641209487733e-06, | |
| "loss": 0.8628280758857727, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7130801687763713, | |
| "grad_norm": 2.1568024158477783, | |
| "learning_rate": 3.4834798174192476e-06, | |
| "loss": 1.1509721279144287, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7172995780590719, | |
| "grad_norm": 6.118010997772217, | |
| "learning_rate": 3.4805405945267245e-06, | |
| "loss": 1.4755480289459229, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.721518987341772, | |
| "grad_norm": 1.7534123659133911, | |
| "learning_rate": 3.4775944421050976e-06, | |
| "loss": 1.1487780809402466, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7257383966244726, | |
| "grad_norm": 12.342169761657715, | |
| "learning_rate": 3.4746413760972033e-06, | |
| "loss": 0.8102009296417236, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.729957805907173, | |
| "grad_norm": 7.229720115661621, | |
| "learning_rate": 3.4716814124832895e-06, | |
| "loss": 0.38379400968551636, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7341772151898733, | |
| "grad_norm": 5.0790886878967285, | |
| "learning_rate": 3.468714567280931e-06, | |
| "loss": 0.6532369256019592, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.738396624472574, | |
| "grad_norm": 9.148484230041504, | |
| "learning_rate": 3.4657408565449413e-06, | |
| "loss": 0.7519415616989136, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7426160337552743, | |
| "grad_norm": 3.0881879329681396, | |
| "learning_rate": 3.4627602963672854e-06, | |
| "loss": 0.9758714437484741, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7468354430379747, | |
| "grad_norm": 4.036842346191406, | |
| "learning_rate": 3.459772902876994e-06, | |
| "loss": 0.9723775386810303, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7510548523206753, | |
| "grad_norm": 2.8862991333007812, | |
| "learning_rate": 3.4567786922400757e-06, | |
| "loss": 1.1287617683410645, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7552742616033754, | |
| "grad_norm": 2.3025224208831787, | |
| "learning_rate": 3.4537776806594293e-06, | |
| "loss": 1.1016814708709717, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.759493670886076, | |
| "grad_norm": 2.3911185264587402, | |
| "learning_rate": 3.4507698843747567e-06, | |
| "loss": 0.8698973655700684, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7637130801687764, | |
| "grad_norm": 2.9084486961364746, | |
| "learning_rate": 3.4477553196624734e-06, | |
| "loss": 1.1183581352233887, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7679324894514767, | |
| "grad_norm": 2.349198579788208, | |
| "learning_rate": 3.444734002835624e-06, | |
| "loss": 1.0136666297912598, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7721518987341773, | |
| "grad_norm": 3.0843915939331055, | |
| "learning_rate": 3.441705950243789e-06, | |
| "loss": 0.8606936931610107, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7763713080168775, | |
| "grad_norm": 3.7463226318359375, | |
| "learning_rate": 3.4386711782729996e-06, | |
| "loss": 0.9574577808380127, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.780590717299578, | |
| "grad_norm": 1.5658468008041382, | |
| "learning_rate": 3.4356297033456496e-06, | |
| "loss": 0.46850845217704773, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.7848101265822784, | |
| "grad_norm": 1.2881762981414795, | |
| "learning_rate": 3.432581541920404e-06, | |
| "loss": 0.7656896114349365, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.7890295358649788, | |
| "grad_norm": 4.494737148284912, | |
| "learning_rate": 3.429526710492111e-06, | |
| "loss": 0.6177375912666321, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.7932489451476794, | |
| "grad_norm": 2.9015707969665527, | |
| "learning_rate": 3.426465225591713e-06, | |
| "loss": 0.8043622374534607, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7974683544303798, | |
| "grad_norm": 2.33308482170105, | |
| "learning_rate": 3.4233971037861587e-06, | |
| "loss": 1.1262691020965576, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8016877637130801, | |
| "grad_norm": 3.47825026512146, | |
| "learning_rate": 3.4203223616783097e-06, | |
| "loss": 1.4144643545150757, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8059071729957807, | |
| "grad_norm": 0.613185703754425, | |
| "learning_rate": 3.4172410159068545e-06, | |
| "loss": 0.9285470247268677, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.810126582278481, | |
| "grad_norm": 4.0782623291015625, | |
| "learning_rate": 3.414153083146215e-06, | |
| "loss": 1.0604450702667236, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8143459915611815, | |
| "grad_norm": 4.575808525085449, | |
| "learning_rate": 3.411058580106458e-06, | |
| "loss": 0.7167332172393799, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8185654008438819, | |
| "grad_norm": 2.6179590225219727, | |
| "learning_rate": 3.4079575235332077e-06, | |
| "loss": 1.1503570079803467, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8227848101265822, | |
| "grad_norm": 1.5741914510726929, | |
| "learning_rate": 3.4048499302075485e-06, | |
| "loss": 1.0776422023773193, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8270042194092828, | |
| "grad_norm": 2.200496196746826, | |
| "learning_rate": 3.40173581694594e-06, | |
| "loss": 1.0765013694763184, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.831223628691983, | |
| "grad_norm": 7.11644172668457, | |
| "learning_rate": 3.3986152006001233e-06, | |
| "loss": 0.9683362245559692, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8354430379746836, | |
| "grad_norm": 2.3128275871276855, | |
| "learning_rate": 3.3954880980570296e-06, | |
| "loss": 1.044558048248291, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.839662447257384, | |
| "grad_norm": 10.811915397644043, | |
| "learning_rate": 3.392354526238691e-06, | |
| "loss": 0.8069396615028381, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8438818565400843, | |
| "grad_norm": 2.664677858352661, | |
| "learning_rate": 3.3892145021021462e-06, | |
| "loss": 0.9714232683181763, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8481012658227849, | |
| "grad_norm": 2.777123212814331, | |
| "learning_rate": 3.3860680426393515e-06, | |
| "loss": 1.1506626605987549, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8523206751054853, | |
| "grad_norm": 4.2269368171691895, | |
| "learning_rate": 3.3829151648770855e-06, | |
| "loss": 0.8257066011428833, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8565400843881856, | |
| "grad_norm": 3.8701000213623047, | |
| "learning_rate": 3.3797558858768593e-06, | |
| "loss": 0.7449560761451721, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8607594936708862, | |
| "grad_norm": 3.4201698303222656, | |
| "learning_rate": 3.3765902227348255e-06, | |
| "loss": 1.0331380367279053, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8649789029535864, | |
| "grad_norm": 3.0394904613494873, | |
| "learning_rate": 3.3734181925816826e-06, | |
| "loss": 0.7403502464294434, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.869198312236287, | |
| "grad_norm": 2.232851266860962, | |
| "learning_rate": 3.370239812582583e-06, | |
| "loss": 0.7928322553634644, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8734177215189873, | |
| "grad_norm": 1.918642282485962, | |
| "learning_rate": 3.367055099937041e-06, | |
| "loss": 1.0973682403564453, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8776371308016877, | |
| "grad_norm": 4.839916229248047, | |
| "learning_rate": 3.3638640718788406e-06, | |
| "loss": 0.5104875564575195, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8818565400843883, | |
| "grad_norm": 7.4713239669799805, | |
| "learning_rate": 3.3606667456759397e-06, | |
| "loss": 0.7245833873748779, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.8860759493670884, | |
| "grad_norm": 2.0137648582458496, | |
| "learning_rate": 3.3574631386303797e-06, | |
| "loss": 1.1190528869628906, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.890295358649789, | |
| "grad_norm": 1.844823956489563, | |
| "learning_rate": 3.3542532680781876e-06, | |
| "loss": 1.3033103942871094, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.8945147679324894, | |
| "grad_norm": 9.570866584777832, | |
| "learning_rate": 3.351037151389287e-06, | |
| "loss": 0.8090759515762329, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 5.827152252197266, | |
| "learning_rate": 3.3478148059674016e-06, | |
| "loss": 1.06083083152771, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9029535864978904, | |
| "grad_norm": 4.6404595375061035, | |
| "learning_rate": 3.3445862492499595e-06, | |
| "loss": 1.226179838180542, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9071729957805907, | |
| "grad_norm": 4.473128318786621, | |
| "learning_rate": 3.3413514987080043e-06, | |
| "loss": 1.0048933029174805, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9113924050632911, | |
| "grad_norm": 2.088918924331665, | |
| "learning_rate": 3.338110571846093e-06, | |
| "loss": 1.325439214706421, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9156118143459917, | |
| "grad_norm": 7.492137432098389, | |
| "learning_rate": 3.3348634862022074e-06, | |
| "loss": 0.5317611694335938, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9198312236286919, | |
| "grad_norm": 5.070749759674072, | |
| "learning_rate": 3.331610259347657e-06, | |
| "loss": 1.0684950351715088, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9240506329113924, | |
| "grad_norm": 4.511446952819824, | |
| "learning_rate": 3.328350908886983e-06, | |
| "loss": 0.8111604452133179, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9282700421940928, | |
| "grad_norm": 9.428959846496582, | |
| "learning_rate": 3.3250854524578636e-06, | |
| "loss": 1.1320171356201172, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9324894514767932, | |
| "grad_norm": 13.564945220947266, | |
| "learning_rate": 3.3218139077310206e-06, | |
| "loss": 0.8104444742202759, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9367088607594938, | |
| "grad_norm": 2.054192543029785, | |
| "learning_rate": 3.3185362924101207e-06, | |
| "loss": 1.0631756782531738, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9409282700421941, | |
| "grad_norm": 3.2311954498291016, | |
| "learning_rate": 3.315252624231682e-06, | |
| "loss": 0.5999157428741455, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9451476793248945, | |
| "grad_norm": 1.8943932056427002, | |
| "learning_rate": 3.3119629209649763e-06, | |
| "loss": 1.0982520580291748, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9493670886075949, | |
| "grad_norm": 1.940902590751648, | |
| "learning_rate": 3.3086672004119335e-06, | |
| "loss": 1.226811408996582, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9535864978902953, | |
| "grad_norm": 3.3977231979370117, | |
| "learning_rate": 3.305365480407046e-06, | |
| "loss": 0.9012327194213867, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9578059071729959, | |
| "grad_norm": 3.1414709091186523, | |
| "learning_rate": 3.3020577788172725e-06, | |
| "loss": 0.7510135173797607, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9620253164556962, | |
| "grad_norm": 2.9762823581695557, | |
| "learning_rate": 3.2987441135419394e-06, | |
| "loss": 1.1897534132003784, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9662447257383966, | |
| "grad_norm": 3.8375062942504883, | |
| "learning_rate": 3.2954245025126446e-06, | |
| "loss": 0.9271247982978821, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9704641350210972, | |
| "grad_norm": 1.9467542171478271, | |
| "learning_rate": 3.292098963693163e-06, | |
| "loss": 1.2084356546401978, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9746835443037973, | |
| "grad_norm": 2.952320098876953, | |
| "learning_rate": 3.2887675150793443e-06, | |
| "loss": 1.1498595476150513, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.978902953586498, | |
| "grad_norm": 1.555445909500122, | |
| "learning_rate": 3.2854301746990206e-06, | |
| "loss": 0.8107820749282837, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.9831223628691983, | |
| "grad_norm": 1.9152470827102661, | |
| "learning_rate": 3.2820869606119068e-06, | |
| "loss": 1.1318726539611816, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9873417721518987, | |
| "grad_norm": 3.219928026199341, | |
| "learning_rate": 3.278737890909502e-06, | |
| "loss": 0.9334742426872253, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.9915611814345993, | |
| "grad_norm": 1.992208480834961, | |
| "learning_rate": 3.275382983714992e-06, | |
| "loss": 0.7602829933166504, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.9957805907172996, | |
| "grad_norm": 2.6617956161499023, | |
| "learning_rate": 3.272022257183153e-06, | |
| "loss": 1.0931661128997803, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 12.275853157043457, | |
| "learning_rate": 3.268655729500251e-06, | |
| "loss": 0.5812578797340393, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0042194092827006, | |
| "grad_norm": 3.4581050872802734, | |
| "learning_rate": 3.265283418883945e-06, | |
| "loss": 0.8604273200035095, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0084388185654007, | |
| "grad_norm": 5.053099155426025, | |
| "learning_rate": 3.2619053435831878e-06, | |
| "loss": 0.6394712924957275, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0126582278481013, | |
| "grad_norm": 2.949049711227417, | |
| "learning_rate": 3.258521521878126e-06, | |
| "loss": 0.8134095072746277, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0168776371308015, | |
| "grad_norm": 3.0072250366210938, | |
| "learning_rate": 3.2551319720800043e-06, | |
| "loss": 0.9163396954536438, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.021097046413502, | |
| "grad_norm": 4.474330902099609, | |
| "learning_rate": 3.251736712531063e-06, | |
| "loss": 0.7234617471694946, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 3.9642207622528076, | |
| "learning_rate": 3.2483357616044418e-06, | |
| "loss": 0.7650543451309204, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.029535864978903, | |
| "grad_norm": 4.6968793869018555, | |
| "learning_rate": 3.244929137704076e-06, | |
| "loss": 1.1930127143859863, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.0337552742616034, | |
| "grad_norm": 1.5408298969268799, | |
| "learning_rate": 3.241516859264602e-06, | |
| "loss": 0.7401737570762634, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.037974683544304, | |
| "grad_norm": 4.210058689117432, | |
| "learning_rate": 3.238098944751256e-06, | |
| "loss": 0.756514310836792, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.042194092827004, | |
| "grad_norm": 3.6998515129089355, | |
| "learning_rate": 3.23467541265977e-06, | |
| "loss": 0.750130295753479, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0464135021097047, | |
| "grad_norm": 2.7548975944519043, | |
| "learning_rate": 3.2312462815162777e-06, | |
| "loss": 1.0819189548492432, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.050632911392405, | |
| "grad_norm": 4.967726707458496, | |
| "learning_rate": 3.2278115698772116e-06, | |
| "loss": 0.923316240310669, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0548523206751055, | |
| "grad_norm": 2.2812294960021973, | |
| "learning_rate": 3.2243712963292003e-06, | |
| "loss": 0.8755730390548706, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.059071729957806, | |
| "grad_norm": 3.7565250396728516, | |
| "learning_rate": 3.2209254794889724e-06, | |
| "loss": 0.6916130781173706, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0632911392405062, | |
| "grad_norm": 2.0674679279327393, | |
| "learning_rate": 3.2174741380032523e-06, | |
| "loss": 0.6281135082244873, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.067510548523207, | |
| "grad_norm": 3.7574315071105957, | |
| "learning_rate": 3.2140172905486612e-06, | |
| "loss": 0.7170443534851074, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.071729957805907, | |
| "grad_norm": 3.4279699325561523, | |
| "learning_rate": 3.210554955831615e-06, | |
| "loss": 1.0432848930358887, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.0759493670886076, | |
| "grad_norm": 2.687915802001953, | |
| "learning_rate": 3.207087152588224e-06, | |
| "loss": 0.9696755409240723, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.080168776371308, | |
| "grad_norm": 2.2797346115112305, | |
| "learning_rate": 3.203613899584189e-06, | |
| "loss": 1.0136628150939941, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.0843881856540083, | |
| "grad_norm": 2.3300132751464844, | |
| "learning_rate": 3.2001352156147045e-06, | |
| "loss": 1.0422950983047485, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.088607594936709, | |
| "grad_norm": 6.217328071594238, | |
| "learning_rate": 3.1966511195043527e-06, | |
| "loss": 0.5632253289222717, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0928270042194095, | |
| "grad_norm": 2.278618335723877, | |
| "learning_rate": 3.193161630107003e-06, | |
| "loss": 0.5706143379211426, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.0970464135021096, | |
| "grad_norm": 2.097888946533203, | |
| "learning_rate": 3.18966676630571e-06, | |
| "loss": 1.1316472291946411, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1012658227848102, | |
| "grad_norm": 4.8473286628723145, | |
| "learning_rate": 3.186166547012612e-06, | |
| "loss": 1.068217158317566, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1054852320675104, | |
| "grad_norm": 1.3159743547439575, | |
| "learning_rate": 3.1826609911688273e-06, | |
| "loss": 0.643653154373169, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.109704641350211, | |
| "grad_norm": 2.744520425796509, | |
| "learning_rate": 3.1791501177443533e-06, | |
| "loss": 1.1834640502929688, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1139240506329116, | |
| "grad_norm": 5.579896926879883, | |
| "learning_rate": 3.1756339457379626e-06, | |
| "loss": 1.023376703262329, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.1181434599156117, | |
| "grad_norm": 2.515099048614502, | |
| "learning_rate": 3.1721124941771005e-06, | |
| "loss": 1.092795491218567, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1223628691983123, | |
| "grad_norm": 1.9233348369598389, | |
| "learning_rate": 3.1685857821177832e-06, | |
| "loss": 0.6104440689086914, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1265822784810124, | |
| "grad_norm": 1.7998379468917847, | |
| "learning_rate": 3.1650538286444902e-06, | |
| "loss": 0.7144567966461182, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.130801687763713, | |
| "grad_norm": 1.6687654256820679, | |
| "learning_rate": 3.16151665287007e-06, | |
| "loss": 0.6989231109619141, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1350210970464136, | |
| "grad_norm": 3.730558156967163, | |
| "learning_rate": 3.1579742739356252e-06, | |
| "loss": 0.8780606985092163, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1392405063291138, | |
| "grad_norm": 3.9646623134613037, | |
| "learning_rate": 3.154426711010419e-06, | |
| "loss": 1.304856300354004, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1434599156118144, | |
| "grad_norm": 4.966225624084473, | |
| "learning_rate": 3.1508739832917664e-06, | |
| "loss": 0.5962163209915161, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.147679324894515, | |
| "grad_norm": 3.8472814559936523, | |
| "learning_rate": 3.147316110004929e-06, | |
| "loss": 0.8961644768714905, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.151898734177215, | |
| "grad_norm": 16.210412979125977, | |
| "learning_rate": 3.1437531104030172e-06, | |
| "loss": 0.7574584484100342, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1561181434599157, | |
| "grad_norm": 6.170048713684082, | |
| "learning_rate": 3.1401850037668773e-06, | |
| "loss": 0.8245753049850464, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.160337552742616, | |
| "grad_norm": 7.539897918701172, | |
| "learning_rate": 3.1366118094049962e-06, | |
| "loss": 0.8227906227111816, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1645569620253164, | |
| "grad_norm": 2.6890225410461426, | |
| "learning_rate": 3.133033546653389e-06, | |
| "loss": 1.0590184926986694, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.168776371308017, | |
| "grad_norm": 2.2687880992889404, | |
| "learning_rate": 3.129450234875501e-06, | |
| "loss": 1.1196215152740479, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.172995780590717, | |
| "grad_norm": 1.5322057008743286, | |
| "learning_rate": 3.1258618934620977e-06, | |
| "loss": 1.0350878238677979, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1772151898734178, | |
| "grad_norm": 1.7896466255187988, | |
| "learning_rate": 3.1222685418311624e-06, | |
| "loss": 1.0621168613433838, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.181434599156118, | |
| "grad_norm": 1.9826382398605347, | |
| "learning_rate": 3.1186701994277913e-06, | |
| "loss": 1.0807254314422607, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.1856540084388185, | |
| "grad_norm": 2.4833922386169434, | |
| "learning_rate": 3.115066885724087e-06, | |
| "loss": 1.0103787183761597, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.189873417721519, | |
| "grad_norm": 4.183322429656982, | |
| "learning_rate": 3.111458620219056e-06, | |
| "loss": 1.0446069240570068, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.1940928270042193, | |
| "grad_norm": 5.676382064819336, | |
| "learning_rate": 3.107845422438497e-06, | |
| "loss": 1.1852116584777832, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.19831223628692, | |
| "grad_norm": 12.257451057434082, | |
| "learning_rate": 3.1042273119349024e-06, | |
| "loss": 0.3302527964115143, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2025316455696204, | |
| "grad_norm": 1.8637685775756836, | |
| "learning_rate": 3.10060430828735e-06, | |
| "loss": 1.0095632076263428, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2067510548523206, | |
| "grad_norm": 6.286106109619141, | |
| "learning_rate": 3.0969764311013927e-06, | |
| "loss": 0.6037812232971191, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.210970464135021, | |
| "grad_norm": 2.026481866836548, | |
| "learning_rate": 3.09334370000896e-06, | |
| "loss": 0.8940553665161133, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2151898734177213, | |
| "grad_norm": 2.958310604095459, | |
| "learning_rate": 3.089706134668245e-06, | |
| "loss": 1.070237636566162, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.219409282700422, | |
| "grad_norm": 5.202909469604492, | |
| "learning_rate": 3.0860637547636023e-06, | |
| "loss": 0.9080023765563965, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2236286919831225, | |
| "grad_norm": 4.214676856994629, | |
| "learning_rate": 3.082416580005441e-06, | |
| "loss": 0.9310380220413208, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.2278481012658227, | |
| "grad_norm": 4.913782119750977, | |
| "learning_rate": 3.0787646301301143e-06, | |
| "loss": 0.8610812425613403, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2320675105485233, | |
| "grad_norm": 11.496319770812988, | |
| "learning_rate": 3.0751079248998183e-06, | |
| "loss": 0.5102381706237793, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.2362869198312234, | |
| "grad_norm": 2.501431703567505, | |
| "learning_rate": 3.0714464841024817e-06, | |
| "loss": 1.026395559310913, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.240506329113924, | |
| "grad_norm": 1.0209457874298096, | |
| "learning_rate": 3.067780327551658e-06, | |
| "loss": 0.7514087557792664, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2447257383966246, | |
| "grad_norm": 10.08558464050293, | |
| "learning_rate": 3.06410947508642e-06, | |
| "loss": 0.4998623728752136, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.2489451476793247, | |
| "grad_norm": 2.017042875289917, | |
| "learning_rate": 3.060433946571253e-06, | |
| "loss": 0.9955783486366272, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2531645569620253, | |
| "grad_norm": 3.0692787170410156, | |
| "learning_rate": 3.0567537618959453e-06, | |
| "loss": 1.24436616897583, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.257383966244726, | |
| "grad_norm": 2.2183597087860107, | |
| "learning_rate": 3.0530689409754826e-06, | |
| "loss": 1.1389007568359375, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.261603375527426, | |
| "grad_norm": 3.1245839595794678, | |
| "learning_rate": 3.0493795037499374e-06, | |
| "loss": 1.1064579486846924, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.2658227848101267, | |
| "grad_norm": 5.401794910430908, | |
| "learning_rate": 3.0456854701843647e-06, | |
| "loss": 1.280016303062439, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.270042194092827, | |
| "grad_norm": 2.5527584552764893, | |
| "learning_rate": 3.041986860268693e-06, | |
| "loss": 1.0337902307510376, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.2742616033755274, | |
| "grad_norm": 1.6811496019363403, | |
| "learning_rate": 3.0382836940176112e-06, | |
| "loss": 0.7087812423706055, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 4.886277675628662, | |
| "learning_rate": 3.034575991470468e-06, | |
| "loss": 0.8468987941741943, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.282700421940928, | |
| "grad_norm": 10.467023849487305, | |
| "learning_rate": 3.03086377269116e-06, | |
| "loss": 0.46134668588638306, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.2869198312236287, | |
| "grad_norm": 4.281970500946045, | |
| "learning_rate": 3.027147057768022e-06, | |
| "loss": 0.6730149984359741, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.291139240506329, | |
| "grad_norm": 1.6377662420272827, | |
| "learning_rate": 3.023425866813718e-06, | |
| "loss": 0.5801299810409546, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.2953586497890295, | |
| "grad_norm": 4.013052940368652, | |
| "learning_rate": 3.0197002199651353e-06, | |
| "loss": 0.900696873664856, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.29957805907173, | |
| "grad_norm": 1.2075470685958862, | |
| "learning_rate": 3.015970137383273e-06, | |
| "loss": 0.557762861251831, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3037974683544302, | |
| "grad_norm": 6.79136848449707, | |
| "learning_rate": 3.0122356392531345e-06, | |
| "loss": 0.8252531290054321, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.308016877637131, | |
| "grad_norm": 1.973429560661316, | |
| "learning_rate": 3.008496745783617e-06, | |
| "loss": 0.6639243364334106, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3122362869198314, | |
| "grad_norm": 5.644299507141113, | |
| "learning_rate": 3.0047534772074038e-06, | |
| "loss": 0.41757094860076904, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3164556962025316, | |
| "grad_norm": 4.321779727935791, | |
| "learning_rate": 3.001005853780852e-06, | |
| "loss": 1.101494550704956, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.320675105485232, | |
| "grad_norm": 2.1912591457366943, | |
| "learning_rate": 2.9972538957838848e-06, | |
| "loss": 0.9152376055717468, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3248945147679323, | |
| "grad_norm": 2.042452335357666, | |
| "learning_rate": 2.9934976235198827e-06, | |
| "loss": 1.0394017696380615, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.329113924050633, | |
| "grad_norm": 1.53744637966156, | |
| "learning_rate": 2.989737057315572e-06, | |
| "loss": 1.2090572118759155, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 2.0143048763275146, | |
| "learning_rate": 2.9859722175209153e-06, | |
| "loss": 0.7863491773605347, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3375527426160336, | |
| "grad_norm": 10.555294036865234, | |
| "learning_rate": 2.9822031245090002e-06, | |
| "loss": 0.5064557790756226, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3417721518987342, | |
| "grad_norm": 3.0460026264190674, | |
| "learning_rate": 2.978429798675931e-06, | |
| "loss": 1.0185744762420654, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3459915611814344, | |
| "grad_norm": 1.6025739908218384, | |
| "learning_rate": 2.97465226044072e-06, | |
| "loss": 1.0687915086746216, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.350210970464135, | |
| "grad_norm": 2.336373805999756, | |
| "learning_rate": 2.9708705302451697e-06, | |
| "loss": 1.1018157005310059, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.3544303797468356, | |
| "grad_norm": 1.2120983600616455, | |
| "learning_rate": 2.96708462855377e-06, | |
| "loss": 0.6393563747406006, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.3586497890295357, | |
| "grad_norm": 5.554210186004639, | |
| "learning_rate": 2.9632945758535847e-06, | |
| "loss": 0.9500521421432495, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3628691983122363, | |
| "grad_norm": 13.489524841308594, | |
| "learning_rate": 2.9595003926541398e-06, | |
| "loss": 0.6889848709106445, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.367088607594937, | |
| "grad_norm": 6.1560187339782715, | |
| "learning_rate": 2.9557020994873125e-06, | |
| "loss": 0.9626091718673706, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.371308016877637, | |
| "grad_norm": 1.836715579032898, | |
| "learning_rate": 2.951899716907221e-06, | |
| "loss": 0.5855181217193604, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.3755274261603376, | |
| "grad_norm": 1.9696272611618042, | |
| "learning_rate": 2.9480932654901142e-06, | |
| "loss": 0.8846515417098999, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.379746835443038, | |
| "grad_norm": 2.0595052242279053, | |
| "learning_rate": 2.944282765834257e-06, | |
| "loss": 1.0026812553405762, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.3839662447257384, | |
| "grad_norm": 8.984773635864258, | |
| "learning_rate": 2.9404682385598225e-06, | |
| "loss": 0.4564356803894043, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.388185654008439, | |
| "grad_norm": 9.524094581604004, | |
| "learning_rate": 2.9366497043087794e-06, | |
| "loss": 0.3366748094558716, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.392405063291139, | |
| "grad_norm": 2.6163482666015625, | |
| "learning_rate": 2.932827183744778e-06, | |
| "loss": 0.46002885699272156, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.3966244725738397, | |
| "grad_norm": 7.858697414398193, | |
| "learning_rate": 2.929000697553041e-06, | |
| "loss": 0.5188404321670532, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.40084388185654, | |
| "grad_norm": 2.04315447807312, | |
| "learning_rate": 2.925170266440252e-06, | |
| "loss": 1.063408613204956, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4050632911392404, | |
| "grad_norm": 3.0201163291931152, | |
| "learning_rate": 2.921335911134439e-06, | |
| "loss": 0.7606229186058044, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.409282700421941, | |
| "grad_norm": 5.318437576293945, | |
| "learning_rate": 2.91749765238487e-06, | |
| "loss": 0.2792668044567108, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.413502109704641, | |
| "grad_norm": 1.64540433883667, | |
| "learning_rate": 2.9136555109619316e-06, | |
| "loss": 0.7836066484451294, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4177215189873418, | |
| "grad_norm": 7.265844821929932, | |
| "learning_rate": 2.9098095076570235e-06, | |
| "loss": 1.0778812170028687, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4219409282700424, | |
| "grad_norm": 4.908560752868652, | |
| "learning_rate": 2.9059596632824432e-06, | |
| "loss": 0.8231828212738037, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4261603375527425, | |
| "grad_norm": 3.473619222640991, | |
| "learning_rate": 2.902105998671275e-06, | |
| "loss": 1.0785859823226929, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.430379746835443, | |
| "grad_norm": 5.009274959564209, | |
| "learning_rate": 2.8982485346772733e-06, | |
| "loss": 0.6990054845809937, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4345991561181437, | |
| "grad_norm": 1.6592916250228882, | |
| "learning_rate": 2.894387292174754e-06, | |
| "loss": 1.1584959030151367, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.438818565400844, | |
| "grad_norm": 1.9908864498138428, | |
| "learning_rate": 2.8905222920584814e-06, | |
| "loss": 0.3479560613632202, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4430379746835444, | |
| "grad_norm": 2.59413743019104, | |
| "learning_rate": 2.886653555243553e-06, | |
| "loss": 0.7740304470062256, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.4472573839662446, | |
| "grad_norm": 3.607126235961914, | |
| "learning_rate": 2.882781102665284e-06, | |
| "loss": 1.0350849628448486, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.451476793248945, | |
| "grad_norm": 2.7151076793670654, | |
| "learning_rate": 2.8789049552791024e-06, | |
| "loss": 0.6460145711898804, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4556962025316453, | |
| "grad_norm": 1.7807066440582275, | |
| "learning_rate": 2.8750251340604255e-06, | |
| "loss": 1.0453755855560303, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.459915611814346, | |
| "grad_norm": 2.944485664367676, | |
| "learning_rate": 2.8711416600045556e-06, | |
| "loss": 1.079903483390808, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4641350210970465, | |
| "grad_norm": 0.9675163626670837, | |
| "learning_rate": 2.8672545541265583e-06, | |
| "loss": 0.5578194856643677, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4683544303797467, | |
| "grad_norm": 1.795234203338623, | |
| "learning_rate": 2.8633638374611544e-06, | |
| "loss": 1.0072107315063477, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4725738396624473, | |
| "grad_norm": 3.3494741916656494, | |
| "learning_rate": 2.8594695310626034e-06, | |
| "loss": 1.0281925201416016, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.476793248945148, | |
| "grad_norm": 2.088599920272827, | |
| "learning_rate": 2.8555716560045917e-06, | |
| "loss": 1.0314571857452393, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.481012658227848, | |
| "grad_norm": 2.605670213699341, | |
| "learning_rate": 2.851670233380114e-06, | |
| "loss": 0.7644580602645874, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.4852320675105486, | |
| "grad_norm": 13.257305145263672, | |
| "learning_rate": 2.8477652843013666e-06, | |
| "loss": 0.42062222957611084, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.489451476793249, | |
| "grad_norm": 7.103763103485107, | |
| "learning_rate": 2.8438568298996265e-06, | |
| "loss": 0.7796779274940491, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4936708860759493, | |
| "grad_norm": 3.013402223587036, | |
| "learning_rate": 2.8399448913251374e-06, | |
| "loss": 0.9339659214019775, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.49789029535865, | |
| "grad_norm": 7.224562644958496, | |
| "learning_rate": 2.836029489747002e-06, | |
| "loss": 0.49434345960617065, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.50210970464135, | |
| "grad_norm": 17.112947463989258, | |
| "learning_rate": 2.8321106463530592e-06, | |
| "loss": 0.6316568851470947, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5063291139240507, | |
| "grad_norm": 5.573176383972168, | |
| "learning_rate": 2.8281883823497745e-06, | |
| "loss": 0.7511799335479736, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.510548523206751, | |
| "grad_norm": 2.383787155151367, | |
| "learning_rate": 2.824262718962122e-06, | |
| "loss": 1.03713858127594, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5147679324894514, | |
| "grad_norm": 5.0437116622924805, | |
| "learning_rate": 2.820333677433474e-06, | |
| "loss": 0.510556697845459, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.518987341772152, | |
| "grad_norm": 6.297809600830078, | |
| "learning_rate": 2.816401279025482e-06, | |
| "loss": 1.3623912334442139, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.523206751054852, | |
| "grad_norm": 2.4292147159576416, | |
| "learning_rate": 2.8124655450179618e-06, | |
| "loss": 1.1327567100524902, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5274261603375527, | |
| "grad_norm": 2.8005106449127197, | |
| "learning_rate": 2.808526496708781e-06, | |
| "loss": 0.980167031288147, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5316455696202533, | |
| "grad_norm": 6.94888162612915, | |
| "learning_rate": 2.804584155413741e-06, | |
| "loss": 0.6094427704811096, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5358649789029535, | |
| "grad_norm": 2.302324056625366, | |
| "learning_rate": 2.8006385424664638e-06, | |
| "loss": 0.7884533405303955, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.540084388185654, | |
| "grad_norm": 7.919814586639404, | |
| "learning_rate": 2.7966896792182755e-06, | |
| "loss": 0.6705489754676819, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5443037974683547, | |
| "grad_norm": 2.791510581970215, | |
| "learning_rate": 2.792737587038092e-06, | |
| "loss": 0.9616777300834656, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.548523206751055, | |
| "grad_norm": 5.007606029510498, | |
| "learning_rate": 2.7887822873122995e-06, | |
| "loss": 0.7277128100395203, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.5527426160337554, | |
| "grad_norm": 2.232788562774658, | |
| "learning_rate": 2.7848238014446447e-06, | |
| "loss": 1.1262240409851074, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5569620253164556, | |
| "grad_norm": 3.4404702186584473, | |
| "learning_rate": 2.7808621508561123e-06, | |
| "loss": 1.0465441942214966, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.561181434599156, | |
| "grad_norm": 8.573604583740234, | |
| "learning_rate": 2.776897356984816e-06, | |
| "loss": 0.30951395630836487, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.5654008438818563, | |
| "grad_norm": 3.45868182182312, | |
| "learning_rate": 2.7729294412858776e-06, | |
| "loss": 0.7883036136627197, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.569620253164557, | |
| "grad_norm": 1.7647202014923096, | |
| "learning_rate": 2.7689584252313128e-06, | |
| "loss": 1.0650732517242432, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.5738396624472575, | |
| "grad_norm": 8.709357261657715, | |
| "learning_rate": 2.7649843303099127e-06, | |
| "loss": 0.6637066602706909, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5780590717299576, | |
| "grad_norm": 4.496120929718018, | |
| "learning_rate": 2.761007178027132e-06, | |
| "loss": 0.9158288240432739, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.5822784810126582, | |
| "grad_norm": 11.006595611572266, | |
| "learning_rate": 2.75702698990497e-06, | |
| "loss": 0.7496324777603149, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.586497890295359, | |
| "grad_norm": 4.899750232696533, | |
| "learning_rate": 2.7530437874818515e-06, | |
| "loss": 0.6235587000846863, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.590717299578059, | |
| "grad_norm": 1.8441094160079956, | |
| "learning_rate": 2.749057592312515e-06, | |
| "loss": 1.0314083099365234, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.5949367088607596, | |
| "grad_norm": 2.7288100719451904, | |
| "learning_rate": 2.7450684259678943e-06, | |
| "loss": 1.0736459493637085, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.59915611814346, | |
| "grad_norm": 3.8577749729156494, | |
| "learning_rate": 2.7410763100350004e-06, | |
| "loss": 0.9584764838218689, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6033755274261603, | |
| "grad_norm": 9.928874969482422, | |
| "learning_rate": 2.7370812661168046e-06, | |
| "loss": 0.2811320722103119, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.607594936708861, | |
| "grad_norm": 3.457975387573242, | |
| "learning_rate": 2.7330833158321267e-06, | |
| "loss": 1.1292645931243896, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.611814345991561, | |
| "grad_norm": 6.1282172203063965, | |
| "learning_rate": 2.7290824808155096e-06, | |
| "loss": 1.2942759990692139, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6160337552742616, | |
| "grad_norm": 6.050518035888672, | |
| "learning_rate": 2.7250787827171085e-06, | |
| "loss": 0.7845382690429688, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.620253164556962, | |
| "grad_norm": 2.2712647914886475, | |
| "learning_rate": 2.721072243202573e-06, | |
| "loss": 0.9927393794059753, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6244725738396624, | |
| "grad_norm": 12.99117660522461, | |
| "learning_rate": 2.7170628839529277e-06, | |
| "loss": 0.4361240863800049, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.628691983122363, | |
| "grad_norm": 2.062415599822998, | |
| "learning_rate": 2.7130507266644555e-06, | |
| "loss": 0.7296593189239502, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.632911392405063, | |
| "grad_norm": 6.197027206420898, | |
| "learning_rate": 2.709035793048581e-06, | |
| "loss": 1.5014359951019287, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.6371308016877637, | |
| "grad_norm": 1.7749969959259033, | |
| "learning_rate": 2.705018104831753e-06, | |
| "loss": 1.0191712379455566, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6413502109704643, | |
| "grad_norm": 3.7179009914398193, | |
| "learning_rate": 2.700997683755326e-06, | |
| "loss": 0.9707983732223511, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.6455696202531644, | |
| "grad_norm": 7.614749431610107, | |
| "learning_rate": 2.6969745515754444e-06, | |
| "loss": 0.47567054629325867, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.649789029535865, | |
| "grad_norm": 3.8538355827331543, | |
| "learning_rate": 2.6929487300629206e-06, | |
| "loss": 0.5580261945724487, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.6540084388185656, | |
| "grad_norm": 3.0637574195861816, | |
| "learning_rate": 2.6889202410031237e-06, | |
| "loss": 0.9232720136642456, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 1.9953484535217285, | |
| "learning_rate": 2.6848891061958565e-06, | |
| "loss": 1.007423996925354, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6624472573839664, | |
| "grad_norm": 10.962545394897461, | |
| "learning_rate": 2.680855347455238e-06, | |
| "loss": 1.0483016967773438, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.6327028274536133, | |
| "learning_rate": 2.6768189866095867e-06, | |
| "loss": 0.5767178535461426, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.670886075949367, | |
| "grad_norm": 5.506629943847656, | |
| "learning_rate": 2.6727800455013037e-06, | |
| "loss": 0.8919286727905273, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.6751054852320673, | |
| "grad_norm": 1.8910753726959229, | |
| "learning_rate": 2.6687385459867514e-06, | |
| "loss": 0.7154239416122437, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.679324894514768, | |
| "grad_norm": 4.416780948638916, | |
| "learning_rate": 2.6646945099361382e-06, | |
| "loss": 0.4701068103313446, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.6835443037974684, | |
| "grad_norm": 1.5386635065078735, | |
| "learning_rate": 2.6606479592333965e-06, | |
| "loss": 0.9448637962341309, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.6877637130801686, | |
| "grad_norm": 6.68757963180542, | |
| "learning_rate": 2.6565989157760678e-06, | |
| "loss": 0.735755443572998, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.691983122362869, | |
| "grad_norm": 23.566585540771484, | |
| "learning_rate": 2.652547401475184e-06, | |
| "loss": 0.8000218868255615, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.6962025316455698, | |
| "grad_norm": 1.7401084899902344, | |
| "learning_rate": 2.6484934382551465e-06, | |
| "loss": 0.35548001527786255, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.70042194092827, | |
| "grad_norm": 10.348366737365723, | |
| "learning_rate": 2.644437048053609e-06, | |
| "loss": 0.8879528641700745, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7046413502109705, | |
| "grad_norm": 2.0043532848358154, | |
| "learning_rate": 2.6403782528213577e-06, | |
| "loss": 1.076289415359497, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.708860759493671, | |
| "grad_norm": 14.326828956604004, | |
| "learning_rate": 2.6363170745221958e-06, | |
| "loss": 0.5147005915641785, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7130801687763713, | |
| "grad_norm": 2.707928419113159, | |
| "learning_rate": 2.6322535351328193e-06, | |
| "loss": 0.502042293548584, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.717299578059072, | |
| "grad_norm": 1.4950000047683716, | |
| "learning_rate": 2.6281876566427034e-06, | |
| "loss": 0.6342880129814148, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.721518987341772, | |
| "grad_norm": 0.5780206918716431, | |
| "learning_rate": 2.624119461053979e-06, | |
| "loss": 0.7421303391456604, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.7257383966244726, | |
| "grad_norm": 1.3298128843307495, | |
| "learning_rate": 2.620048970381319e-06, | |
| "loss": 0.9955764412879944, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7299578059071727, | |
| "grad_norm": 2.542677879333496, | |
| "learning_rate": 2.6159762066518117e-06, | |
| "loss": 0.5678607821464539, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7341772151898733, | |
| "grad_norm": 2.9699714183807373, | |
| "learning_rate": 2.61190119190485e-06, | |
| "loss": 1.0441884994506836, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.738396624472574, | |
| "grad_norm": 1.9846669435501099, | |
| "learning_rate": 2.607823948192005e-06, | |
| "loss": 1.0227396488189697, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.742616033755274, | |
| "grad_norm": 3.1612093448638916, | |
| "learning_rate": 2.6037444975769104e-06, | |
| "loss": 0.7024236917495728, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7468354430379747, | |
| "grad_norm": 1.8448959589004517, | |
| "learning_rate": 2.5996628621351437e-06, | |
| "loss": 1.156023621559143, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.7510548523206753, | |
| "grad_norm": 4.011197566986084, | |
| "learning_rate": 2.5955790639541036e-06, | |
| "loss": 0.6238597631454468, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7552742616033754, | |
| "grad_norm": 3.856045961380005, | |
| "learning_rate": 2.591493125132893e-06, | |
| "loss": 1.281459093093872, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.759493670886076, | |
| "grad_norm": 2.341705083847046, | |
| "learning_rate": 2.5874050677821984e-06, | |
| "loss": 0.9869955778121948, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7637130801687766, | |
| "grad_norm": 10.147032737731934, | |
| "learning_rate": 2.5833149140241718e-06, | |
| "loss": 0.8909780979156494, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7679324894514767, | |
| "grad_norm": 1.7961941957473755, | |
| "learning_rate": 2.579222685992307e-06, | |
| "loss": 1.0535545349121094, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.7721518987341773, | |
| "grad_norm": 3.12715482711792, | |
| "learning_rate": 2.5751284058313266e-06, | |
| "loss": 1.1261003017425537, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.7763713080168775, | |
| "grad_norm": 3.8387131690979004, | |
| "learning_rate": 2.5710320956970536e-06, | |
| "loss": 0.7698974609375, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.780590717299578, | |
| "grad_norm": 1.3000264167785645, | |
| "learning_rate": 2.5669337777562996e-06, | |
| "loss": 0.5697190761566162, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.7848101265822782, | |
| "grad_norm": 1.9856594800949097, | |
| "learning_rate": 2.5628334741867385e-06, | |
| "loss": 1.1043368577957153, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.789029535864979, | |
| "grad_norm": 3.5784945487976074, | |
| "learning_rate": 2.5587312071767923e-06, | |
| "loss": 0.6595450639724731, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.7932489451476794, | |
| "grad_norm": 5.370586395263672, | |
| "learning_rate": 2.554626998925505e-06, | |
| "loss": 1.2037230730056763, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.7974683544303796, | |
| "grad_norm": 6.791380882263184, | |
| "learning_rate": 2.5505208716424275e-06, | |
| "loss": 0.899883508682251, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.80168776371308, | |
| "grad_norm": 1.783818006515503, | |
| "learning_rate": 2.5464128475474937e-06, | |
| "loss": 0.7012801170349121, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8059071729957807, | |
| "grad_norm": 1.9667185544967651, | |
| "learning_rate": 2.542302948870904e-06, | |
| "loss": 1.041996955871582, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.810126582278481, | |
| "grad_norm": 13.571832656860352, | |
| "learning_rate": 2.5381911978530006e-06, | |
| "loss": 0.9141802787780762, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8143459915611815, | |
| "grad_norm": 2.473447799682617, | |
| "learning_rate": 2.5340776167441508e-06, | |
| "loss": 0.5973923206329346, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.818565400843882, | |
| "grad_norm": 1.2413594722747803, | |
| "learning_rate": 2.529962227804626e-06, | |
| "loss": 0.8588274717330933, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8227848101265822, | |
| "grad_norm": 5.830739498138428, | |
| "learning_rate": 2.525845053304479e-06, | |
| "loss": 0.7775506973266602, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.827004219409283, | |
| "grad_norm": 5.612140655517578, | |
| "learning_rate": 2.521726115523425e-06, | |
| "loss": 0.9469473361968994, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.831223628691983, | |
| "grad_norm": 2.9371390342712402, | |
| "learning_rate": 2.517605436750723e-06, | |
| "loss": 1.0295050144195557, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8354430379746836, | |
| "grad_norm": 2.6451170444488525, | |
| "learning_rate": 2.513483039285051e-06, | |
| "loss": 1.1780718564987183, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8396624472573837, | |
| "grad_norm": 12.214982032775879, | |
| "learning_rate": 2.5093589454343883e-06, | |
| "loss": 0.7536942362785339, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8438818565400843, | |
| "grad_norm": 2.7933950424194336, | |
| "learning_rate": 2.505233177515894e-06, | |
| "loss": 0.607318639755249, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.848101265822785, | |
| "grad_norm": 2.1484858989715576, | |
| "learning_rate": 2.501105757855787e-06, | |
| "loss": 1.0892062187194824, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.852320675105485, | |
| "grad_norm": 3.0315003395080566, | |
| "learning_rate": 2.4969767087892236e-06, | |
| "loss": 0.7782174348831177, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8565400843881856, | |
| "grad_norm": 2.569249153137207, | |
| "learning_rate": 2.492846052660178e-06, | |
| "loss": 0.8103134632110596, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.8607594936708862, | |
| "grad_norm": 8.901324272155762, | |
| "learning_rate": 2.4887138118213206e-06, | |
| "loss": 0.5044631361961365, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.8649789029535864, | |
| "grad_norm": 2.725210428237915, | |
| "learning_rate": 2.4845800086338972e-06, | |
| "loss": 1.0778303146362305, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.869198312236287, | |
| "grad_norm": 3.4764597415924072, | |
| "learning_rate": 2.4804446654676076e-06, | |
| "loss": 0.8491913080215454, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8734177215189876, | |
| "grad_norm": 1.586370587348938, | |
| "learning_rate": 2.4763078047004863e-06, | |
| "loss": 0.6659104824066162, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.8776371308016877, | |
| "grad_norm": 6.09430456161499, | |
| "learning_rate": 2.47216944871878e-06, | |
| "loss": 0.950684130191803, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.8818565400843883, | |
| "grad_norm": 2.1875460147857666, | |
| "learning_rate": 2.468029619916825e-06, | |
| "loss": 0.9997307062149048, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.8860759493670884, | |
| "grad_norm": 3.9469892978668213, | |
| "learning_rate": 2.46388834069693e-06, | |
| "loss": 1.1051433086395264, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.890295358649789, | |
| "grad_norm": 1.705639123916626, | |
| "learning_rate": 2.4597456334692505e-06, | |
| "loss": 1.03743577003479, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.894514767932489, | |
| "grad_norm": 22.948728561401367, | |
| "learning_rate": 2.455601520651671e-06, | |
| "loss": 0.4580141305923462, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.8987341772151898, | |
| "grad_norm": 1.9022364616394043, | |
| "learning_rate": 2.451456024669681e-06, | |
| "loss": 0.92431640625, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9029535864978904, | |
| "grad_norm": 1.598383903503418, | |
| "learning_rate": 2.4473091679562555e-06, | |
| "loss": 1.1237053871154785, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9071729957805905, | |
| "grad_norm": 4.576679706573486, | |
| "learning_rate": 2.443160972951733e-06, | |
| "loss": 0.8321917653083801, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.911392405063291, | |
| "grad_norm": 3.267960786819458, | |
| "learning_rate": 2.4390114621036948e-06, | |
| "loss": 1.2134051322937012, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9156118143459917, | |
| "grad_norm": 9.497183799743652, | |
| "learning_rate": 2.43486065786684e-06, | |
| "loss": 0.6116930842399597, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.919831223628692, | |
| "grad_norm": 9.528655052185059, | |
| "learning_rate": 2.43070858270287e-06, | |
| "loss": 0.7370846271514893, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9240506329113924, | |
| "grad_norm": 2.335017204284668, | |
| "learning_rate": 2.4265552590803616e-06, | |
| "loss": 0.6520988941192627, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.928270042194093, | |
| "grad_norm": 3.7409374713897705, | |
| "learning_rate": 2.4224007094746495e-06, | |
| "loss": 1.0449352264404297, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.932489451476793, | |
| "grad_norm": 2.975673198699951, | |
| "learning_rate": 2.418244956367701e-06, | |
| "loss": 0.9698547124862671, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9367088607594938, | |
| "grad_norm": 2.086550712585449, | |
| "learning_rate": 2.4140880222479963e-06, | |
| "loss": 0.6123561859130859, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.9409282700421944, | |
| "grad_norm": 2.2701752185821533, | |
| "learning_rate": 2.4099299296104063e-06, | |
| "loss": 0.6262718439102173, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.9451476793248945, | |
| "grad_norm": 4.327895164489746, | |
| "learning_rate": 2.405770700956073e-06, | |
| "loss": 1.0023303031921387, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9493670886075947, | |
| "grad_norm": 1.0873901844024658, | |
| "learning_rate": 2.401610358792283e-06, | |
| "loss": 0.8893314599990845, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.9535864978902953, | |
| "grad_norm": 3.0334839820861816, | |
| "learning_rate": 2.3974489256323508e-06, | |
| "loss": 0.8417981266975403, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.957805907172996, | |
| "grad_norm": 5.334658622741699, | |
| "learning_rate": 2.3932864239954937e-06, | |
| "loss": 0.7297941446304321, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.962025316455696, | |
| "grad_norm": 2.946950674057007, | |
| "learning_rate": 2.3891228764067106e-06, | |
| "loss": 1.0070791244506836, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.9662447257383966, | |
| "grad_norm": 3.0521016120910645, | |
| "learning_rate": 2.384958305396662e-06, | |
| "loss": 0.8960994482040405, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.970464135021097, | |
| "grad_norm": 4.832094192504883, | |
| "learning_rate": 2.380792733501545e-06, | |
| "loss": 0.577763557434082, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.9746835443037973, | |
| "grad_norm": 3.717233419418335, | |
| "learning_rate": 2.376626183262975e-06, | |
| "loss": 0.8571799993515015, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.978902953586498, | |
| "grad_norm": 5.3040547370910645, | |
| "learning_rate": 2.3724586772278574e-06, | |
| "loss": 1.0527344942092896, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.9831223628691985, | |
| "grad_norm": 5.977110385894775, | |
| "learning_rate": 2.368290237948275e-06, | |
| "loss": 0.8416517972946167, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.9873417721518987, | |
| "grad_norm": 10.9218111038208, | |
| "learning_rate": 2.3641208879813567e-06, | |
| "loss": 0.8895251750946045, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.9915611814345993, | |
| "grad_norm": 2.910202741622925, | |
| "learning_rate": 2.3599506498891625e-06, | |
| "loss": 0.9375064373016357, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.9957805907173, | |
| "grad_norm": 4.632609844207764, | |
| "learning_rate": 2.355779546238555e-06, | |
| "loss": 1.054133415222168, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.0572307109832764, | |
| "learning_rate": 2.3516075996010844e-06, | |
| "loss": 0.47653502225875854, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.0042194092827006, | |
| "grad_norm": 6.811531066894531, | |
| "learning_rate": 2.3474348325528613e-06, | |
| "loss": 0.7990585565567017, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.0084388185654007, | |
| "grad_norm": 4.26148796081543, | |
| "learning_rate": 2.3432612676744338e-06, | |
| "loss": 0.6641910672187805, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.0126582278481013, | |
| "grad_norm": 14.421019554138184, | |
| "learning_rate": 2.3390869275506704e-06, | |
| "loss": 0.6507161855697632, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.0168776371308015, | |
| "grad_norm": 3.2018351554870605, | |
| "learning_rate": 2.334911834770633e-06, | |
| "loss": 0.3902518153190613, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.021097046413502, | |
| "grad_norm": 1.717463731765747, | |
| "learning_rate": 2.330736011927458e-06, | |
| "loss": 1.0567653179168701, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.0253164556962027, | |
| "grad_norm": 1.948553442955017, | |
| "learning_rate": 2.326559481618229e-06, | |
| "loss": 0.9750782251358032, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.029535864978903, | |
| "grad_norm": 3.6881439685821533, | |
| "learning_rate": 2.322382266443863e-06, | |
| "loss": 1.128783106803894, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.0337552742616034, | |
| "grad_norm": 3.5191917419433594, | |
| "learning_rate": 2.3182043890089784e-06, | |
| "loss": 0.5267306566238403, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 2.0311169624328613, | |
| "learning_rate": 2.3140258719217808e-06, | |
| "loss": 0.9317551851272583, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.042194092827004, | |
| "grad_norm": 5.587665557861328, | |
| "learning_rate": 2.309846737793935e-06, | |
| "loss": 0.6537089943885803, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.0464135021097047, | |
| "grad_norm": 2.518566131591797, | |
| "learning_rate": 2.3056670092404463e-06, | |
| "loss": 0.8329222202301025, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.050632911392405, | |
| "grad_norm": 2.06829833984375, | |
| "learning_rate": 2.3014867088795357e-06, | |
| "loss": 1.0246927738189697, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.0548523206751055, | |
| "grad_norm": 9.406811714172363, | |
| "learning_rate": 2.297305859332519e-06, | |
| "loss": 0.6608364582061768, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.059071729957806, | |
| "grad_norm": 18.555009841918945, | |
| "learning_rate": 2.2931244832236837e-06, | |
| "loss": 0.8099187612533569, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.0632911392405062, | |
| "grad_norm": 2.87371563911438, | |
| "learning_rate": 2.288942603180167e-06, | |
| "loss": 1.048098087310791, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.067510548523207, | |
| "grad_norm": 5.4316205978393555, | |
| "learning_rate": 2.2847602418318327e-06, | |
| "loss": 0.7442044019699097, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.071729957805907, | |
| "grad_norm": 6.081297397613525, | |
| "learning_rate": 2.2805774218111496e-06, | |
| "loss": 0.6251615285873413, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.0759493670886076, | |
| "grad_norm": 10.227375984191895, | |
| "learning_rate": 2.276394165753067e-06, | |
| "loss": 0.6871986389160156, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.080168776371308, | |
| "grad_norm": 7.270413398742676, | |
| "learning_rate": 2.272210496294896e-06, | |
| "loss": 0.7179367542266846, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.0843881856540083, | |
| "grad_norm": 2.082552194595337, | |
| "learning_rate": 2.268026436076185e-06, | |
| "loss": 0.9696202278137207, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.088607594936709, | |
| "grad_norm": 2.518341302871704, | |
| "learning_rate": 2.263842007738594e-06, | |
| "loss": 0.9051344394683838, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.0928270042194095, | |
| "grad_norm": 2.340363025665283, | |
| "learning_rate": 2.2596572339257777e-06, | |
| "loss": 0.8250648975372314, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.0970464135021096, | |
| "grad_norm": 11.077507019042969, | |
| "learning_rate": 2.255472137283259e-06, | |
| "loss": 0.6344802975654602, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.1012658227848102, | |
| "grad_norm": 7.140725612640381, | |
| "learning_rate": 2.2512867404583085e-06, | |
| "loss": 0.1541098654270172, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.1054852320675104, | |
| "grad_norm": 4.636476993560791, | |
| "learning_rate": 2.2471010660998215e-06, | |
| "loss": 1.4155219793319702, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.109704641350211, | |
| "grad_norm": 3.027451276779175, | |
| "learning_rate": 2.242915136858193e-06, | |
| "loss": 0.49524158239364624, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.1139240506329116, | |
| "grad_norm": 3.410243034362793, | |
| "learning_rate": 2.2387289753852e-06, | |
| "loss": 1.0359880924224854, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.1181434599156117, | |
| "grad_norm": 2.0419440269470215, | |
| "learning_rate": 2.234542604333875e-06, | |
| "loss": 1.03524911403656, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.1223628691983123, | |
| "grad_norm": 2.8948912620544434, | |
| "learning_rate": 2.230356046358384e-06, | |
| "loss": 0.9543738961219788, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.1265822784810124, | |
| "grad_norm": 2.4057018756866455, | |
| "learning_rate": 2.2261693241139065e-06, | |
| "loss": 0.9722020030021667, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.130801687763713, | |
| "grad_norm": 1.9602731466293335, | |
| "learning_rate": 2.2219824602565087e-06, | |
| "loss": 0.9750865697860718, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.1350210970464136, | |
| "grad_norm": 2.10933780670166, | |
| "learning_rate": 2.2177954774430234e-06, | |
| "loss": 0.6285134553909302, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.1392405063291138, | |
| "grad_norm": 1.8953523635864258, | |
| "learning_rate": 2.2136083983309286e-06, | |
| "loss": 0.6080442667007446, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.1434599156118144, | |
| "grad_norm": 10.058272361755371, | |
| "learning_rate": 2.2094212455782227e-06, | |
| "loss": 1.1448235511779785, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.147679324894515, | |
| "grad_norm": 3.1135060787200928, | |
| "learning_rate": 2.2052340418433024e-06, | |
| "loss": 0.6743577718734741, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.151898734177215, | |
| "grad_norm": 6.616735458374023, | |
| "learning_rate": 2.2010468097848396e-06, | |
| "loss": 0.737909197807312, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.1561181434599157, | |
| "grad_norm": 3.229160785675049, | |
| "learning_rate": 2.1968595720616606e-06, | |
| "loss": 0.8287728428840637, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.160337552742616, | |
| "grad_norm": 0.8820177316665649, | |
| "learning_rate": 2.192672351332623e-06, | |
| "loss": 0.4992554783821106, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.1645569620253164, | |
| "grad_norm": 2.330535411834717, | |
| "learning_rate": 2.1884851702564897e-06, | |
| "loss": 0.5810240507125854, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.168776371308017, | |
| "grad_norm": 1.8228909969329834, | |
| "learning_rate": 2.1842980514918117e-06, | |
| "loss": 0.9471129179000854, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.172995780590717, | |
| "grad_norm": 2.3642683029174805, | |
| "learning_rate": 2.1801110176968016e-06, | |
| "loss": 0.8418397903442383, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.1772151898734178, | |
| "grad_norm": 1.8827167749404907, | |
| "learning_rate": 2.1759240915292135e-06, | |
| "loss": 0.9700140357017517, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.181434599156118, | |
| "grad_norm": 3.775982618331909, | |
| "learning_rate": 2.171737295646216e-06, | |
| "loss": 1.1170215606689453, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.1856540084388185, | |
| "grad_norm": 4.465606212615967, | |
| "learning_rate": 2.167550652704276e-06, | |
| "loss": 0.9244706630706787, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.189873417721519, | |
| "grad_norm": 2.6871254444122314, | |
| "learning_rate": 2.1633641853590318e-06, | |
| "loss": 0.25759080052375793, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.1940928270042193, | |
| "grad_norm": 3.345410108566284, | |
| "learning_rate": 2.15917791626517e-06, | |
| "loss": 0.9588069319725037, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.19831223628692, | |
| "grad_norm": 10.61077880859375, | |
| "learning_rate": 2.154991868076306e-06, | |
| "loss": 0.5874932408332825, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.2025316455696204, | |
| "grad_norm": 2.544962167739868, | |
| "learning_rate": 2.1508060634448595e-06, | |
| "loss": 0.991689920425415, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.2067510548523206, | |
| "grad_norm": 3.6874847412109375, | |
| "learning_rate": 2.1466205250219315e-06, | |
| "loss": 0.9816372990608215, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.210970464135021, | |
| "grad_norm": 5.628320217132568, | |
| "learning_rate": 2.142435275457184e-06, | |
| "loss": 0.30518054962158203, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.2151898734177213, | |
| "grad_norm": 3.656771659851074, | |
| "learning_rate": 2.1382503373987133e-06, | |
| "loss": 0.7900766134262085, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.219409282700422, | |
| "grad_norm": 2.2453036308288574, | |
| "learning_rate": 2.1340657334929335e-06, | |
| "loss": 0.8744317293167114, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.2236286919831225, | |
| "grad_norm": 0.9895398616790771, | |
| "learning_rate": 2.1298814863844476e-06, | |
| "loss": 0.47598880529403687, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.2278481012658227, | |
| "grad_norm": 5.763035297393799, | |
| "learning_rate": 2.1256976187159278e-06, | |
| "loss": 0.7799667119979858, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.2320675105485233, | |
| "grad_norm": 0.7087782621383667, | |
| "learning_rate": 2.121514153127995e-06, | |
| "loss": 0.2722686529159546, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.2362869198312234, | |
| "grad_norm": 3.2583420276641846, | |
| "learning_rate": 2.1173311122590932e-06, | |
| "loss": 0.7357510328292847, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.240506329113924, | |
| "grad_norm": 3.8085386753082275, | |
| "learning_rate": 2.1131485187453676e-06, | |
| "loss": 0.9901435375213623, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.2447257383966246, | |
| "grad_norm": 2.8548874855041504, | |
| "learning_rate": 2.1089663952205435e-06, | |
| "loss": 0.9335240721702576, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.2489451476793247, | |
| "grad_norm": 9.909287452697754, | |
| "learning_rate": 2.104784764315802e-06, | |
| "loss": 0.752236008644104, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.2531645569620253, | |
| "grad_norm": 9.005875587463379, | |
| "learning_rate": 2.100603648659659e-06, | |
| "loss": 0.741628885269165, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.257383966244726, | |
| "grad_norm": 8.307135581970215, | |
| "learning_rate": 2.096423070877843e-06, | |
| "loss": 0.6267164945602417, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.261603375527426, | |
| "grad_norm": 6.679696559906006, | |
| "learning_rate": 2.092243053593169e-06, | |
| "loss": 0.5680997371673584, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.2658227848101267, | |
| "grad_norm": 3.8873493671417236, | |
| "learning_rate": 2.0880636194254225e-06, | |
| "loss": 0.874029278755188, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.270042194092827, | |
| "grad_norm": 6.6328301429748535, | |
| "learning_rate": 2.0838847909912307e-06, | |
| "loss": 0.4085759222507477, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.2742616033755274, | |
| "grad_norm": 2.145261526107788, | |
| "learning_rate": 2.0797065909039457e-06, | |
| "loss": 0.36501544713974, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.278481012658228, | |
| "grad_norm": 7.986878395080566, | |
| "learning_rate": 2.0755290417735156e-06, | |
| "loss": 0.4557437002658844, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.282700421940928, | |
| "grad_norm": 2.874678373336792, | |
| "learning_rate": 2.071352166206369e-06, | |
| "loss": 0.962173581123352, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.2869198312236287, | |
| "grad_norm": 1.1194981336593628, | |
| "learning_rate": 2.0671759868052893e-06, | |
| "loss": 0.7566915154457092, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.291139240506329, | |
| "grad_norm": 2.609232187271118, | |
| "learning_rate": 2.0630005261692905e-06, | |
| "loss": 0.6619813442230225, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.2953586497890295, | |
| "grad_norm": 9.03283977508545, | |
| "learning_rate": 2.0588258068935002e-06, | |
| "loss": 0.5809231400489807, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.29957805907173, | |
| "grad_norm": 3.785902500152588, | |
| "learning_rate": 2.0546518515690316e-06, | |
| "loss": 0.8656713366508484, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.3037974683544302, | |
| "grad_norm": 11.584537506103516, | |
| "learning_rate": 2.0504786827828648e-06, | |
| "loss": 0.7611091136932373, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.308016877637131, | |
| "grad_norm": 8.480819702148438, | |
| "learning_rate": 2.0463063231177236e-06, | |
| "loss": 0.5610800981521606, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.3122362869198314, | |
| "grad_norm": 5.454217910766602, | |
| "learning_rate": 2.0421347951519535e-06, | |
| "loss": 0.5264372229576111, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.3164556962025316, | |
| "grad_norm": 3.4205212593078613, | |
| "learning_rate": 2.037964121459399e-06, | |
| "loss": 0.5730254650115967, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.320675105485232, | |
| "grad_norm": 3.719339609146118, | |
| "learning_rate": 2.033794324609282e-06, | |
| "loss": 1.091575026512146, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.3248945147679323, | |
| "grad_norm": 2.0314159393310547, | |
| "learning_rate": 2.0296254271660795e-06, | |
| "loss": 0.8482744693756104, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.329113924050633, | |
| "grad_norm": 2.4221394062042236, | |
| "learning_rate": 2.025457451689401e-06, | |
| "loss": 0.9338847398757935, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 3.0065743923187256, | |
| "learning_rate": 2.0212904207338672e-06, | |
| "loss": 0.7377324104309082, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.3375527426160336, | |
| "grad_norm": 1.4527244567871094, | |
| "learning_rate": 2.0171243568489883e-06, | |
| "loss": 0.48970168828964233, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.3417721518987342, | |
| "grad_norm": 5.6556010246276855, | |
| "learning_rate": 2.0129592825790397e-06, | |
| "loss": 0.7742688655853271, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.3459915611814344, | |
| "grad_norm": 5.596024990081787, | |
| "learning_rate": 2.0087952204629422e-06, | |
| "loss": 0.641385555267334, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.350210970464135, | |
| "grad_norm": 2.5374205112457275, | |
| "learning_rate": 2.0046321930341405e-06, | |
| "loss": 0.5972579717636108, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.3544303797468356, | |
| "grad_norm": 1.8447959423065186, | |
| "learning_rate": 2.0004702228204797e-06, | |
| "loss": 0.8615912199020386, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.3586497890295357, | |
| "grad_norm": 6.396413326263428, | |
| "learning_rate": 1.9963093323440824e-06, | |
| "loss": 0.9015900492668152, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.3628691983122363, | |
| "grad_norm": 0.825650155544281, | |
| "learning_rate": 1.99214954412123e-06, | |
| "loss": 0.6481198072433472, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.367088607594937, | |
| "grad_norm": 6.811497211456299, | |
| "learning_rate": 1.9879908806622385e-06, | |
| "loss": 0.4374066889286041, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.371308016877637, | |
| "grad_norm": 0.7065004706382751, | |
| "learning_rate": 1.9838333644713377e-06, | |
| "loss": 0.4804467558860779, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.3755274261603376, | |
| "grad_norm": 10.63037109375, | |
| "learning_rate": 1.9796770180465484e-06, | |
| "loss": 0.6881888508796692, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.379746835443038, | |
| "grad_norm": 5.440734386444092, | |
| "learning_rate": 1.9755218638795626e-06, | |
| "loss": 0.547875165939331, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.3839662447257384, | |
| "grad_norm": 1.4273031949996948, | |
| "learning_rate": 1.971367924455618e-06, | |
| "loss": 0.5285290479660034, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.388185654008439, | |
| "grad_norm": 3.3389201164245605, | |
| "learning_rate": 1.9672152222533822e-06, | |
| "loss": 1.0279819965362549, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.392405063291139, | |
| "grad_norm": 2.3950865268707275, | |
| "learning_rate": 1.9630637797448248e-06, | |
| "loss": 0.6111994981765747, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.3966244725738397, | |
| "grad_norm": 1.5823328495025635, | |
| "learning_rate": 1.9589136193951e-06, | |
| "loss": 0.5231560468673706, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.40084388185654, | |
| "grad_norm": 2.5763416290283203, | |
| "learning_rate": 1.9547647636624243e-06, | |
| "loss": 0.916947603225708, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.4050632911392404, | |
| "grad_norm": 2.1279733180999756, | |
| "learning_rate": 1.9506172349979523e-06, | |
| "loss": 0.39490947127342224, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.409282700421941, | |
| "grad_norm": 13.062804222106934, | |
| "learning_rate": 1.9464710558456595e-06, | |
| "loss": 0.8276299238204956, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.413502109704641, | |
| "grad_norm": 2.3977434635162354, | |
| "learning_rate": 1.942326248642218e-06, | |
| "loss": 1.0900508165359497, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.4177215189873418, | |
| "grad_norm": 2.819269895553589, | |
| "learning_rate": 1.9381828358168748e-06, | |
| "loss": 0.9528172016143799, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.4219409282700424, | |
| "grad_norm": 23.19445037841797, | |
| "learning_rate": 1.934040839791332e-06, | |
| "loss": 0.5396543145179749, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.4261603375527425, | |
| "grad_norm": 7.595386028289795, | |
| "learning_rate": 1.9299002829796253e-06, | |
| "loss": 0.3888126611709595, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.430379746835443, | |
| "grad_norm": 5.8151960372924805, | |
| "learning_rate": 1.925761187788002e-06, | |
| "loss": 0.3526824712753296, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.4345991561181437, | |
| "grad_norm": 5.015478134155273, | |
| "learning_rate": 1.921623576614799e-06, | |
| "loss": 1.0127757787704468, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.438818565400844, | |
| "grad_norm": 3.884026050567627, | |
| "learning_rate": 1.917487471850323e-06, | |
| "loss": 0.3786028325557709, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.4430379746835444, | |
| "grad_norm": 4.3548784255981445, | |
| "learning_rate": 1.91335289587673e-06, | |
| "loss": 1.0020424127578735, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.4472573839662446, | |
| "grad_norm": 1.1631532907485962, | |
| "learning_rate": 1.909219871067902e-06, | |
| "loss": 0.5979082584381104, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.451476793248945, | |
| "grad_norm": 5.268650531768799, | |
| "learning_rate": 1.9050884197893278e-06, | |
| "loss": 1.1838793754577637, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.4556962025316453, | |
| "grad_norm": 4.640054702758789, | |
| "learning_rate": 1.90095856439798e-06, | |
| "loss": 1.1896966695785522, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.459915611814346, | |
| "grad_norm": 3.0583033561706543, | |
| "learning_rate": 1.8968303272421968e-06, | |
| "loss": 0.9648596048355103, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.4641350210970465, | |
| "grad_norm": 30.096092224121094, | |
| "learning_rate": 1.8927037306615578e-06, | |
| "loss": 1.0935192108154297, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.4683544303797467, | |
| "grad_norm": 2.4771618843078613, | |
| "learning_rate": 1.8885787969867656e-06, | |
| "loss": 0.35215988755226135, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.4725738396624473, | |
| "grad_norm": 10.702546119689941, | |
| "learning_rate": 1.884455548539524e-06, | |
| "loss": 0.839633584022522, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.476793248945148, | |
| "grad_norm": 1.648725152015686, | |
| "learning_rate": 1.8803340076324181e-06, | |
| "loss": 0.9294931888580322, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.481012658227848, | |
| "grad_norm": 2.6175386905670166, | |
| "learning_rate": 1.876214196568791e-06, | |
| "loss": 0.5126534104347229, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.4852320675105486, | |
| "grad_norm": 2.2899160385131836, | |
| "learning_rate": 1.872096137642627e-06, | |
| "loss": 0.8264724612236023, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.489451476793249, | |
| "grad_norm": 1.0500420331954956, | |
| "learning_rate": 1.8679798531384274e-06, | |
| "loss": 0.4854082465171814, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.4936708860759493, | |
| "grad_norm": 1.5645257234573364, | |
| "learning_rate": 1.8638653653310926e-06, | |
| "loss": 0.7242560386657715, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.49789029535865, | |
| "grad_norm": 3.557481050491333, | |
| "learning_rate": 1.8597526964857985e-06, | |
| "loss": 0.7009620666503906, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.50210970464135, | |
| "grad_norm": 2.4170994758605957, | |
| "learning_rate": 1.8556418688578797e-06, | |
| "loss": 1.0089216232299805, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.5063291139240507, | |
| "grad_norm": 5.906785488128662, | |
| "learning_rate": 1.8515329046927058e-06, | |
| "loss": 1.111635446548462, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.510548523206751, | |
| "grad_norm": 21.11191749572754, | |
| "learning_rate": 1.8474258262255642e-06, | |
| "loss": 0.4738878309726715, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.5147679324894514, | |
| "grad_norm": 6.232138633728027, | |
| "learning_rate": 1.843320655681536e-06, | |
| "loss": 1.019901990890503, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.518987341772152, | |
| "grad_norm": 7.000395774841309, | |
| "learning_rate": 1.839217415275379e-06, | |
| "loss": 0.6458152532577515, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.523206751054852, | |
| "grad_norm": 2.109321355819702, | |
| "learning_rate": 1.835116127211406e-06, | |
| "loss": 0.9234386086463928, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.5274261603375527, | |
| "grad_norm": 8.41999340057373, | |
| "learning_rate": 1.8310168136833646e-06, | |
| "loss": 0.382904052734375, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.5316455696202533, | |
| "grad_norm": 2.0964558124542236, | |
| "learning_rate": 1.8269194968743178e-06, | |
| "loss": 0.585561990737915, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.5358649789029535, | |
| "grad_norm": 10.49689769744873, | |
| "learning_rate": 1.8228241989565239e-06, | |
| "loss": 0.6187952160835266, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.540084388185654, | |
| "grad_norm": 7.462824821472168, | |
| "learning_rate": 1.8187309420913142e-06, | |
| "loss": 0.7788501977920532, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.5443037974683547, | |
| "grad_norm": 3.3341939449310303, | |
| "learning_rate": 1.8146397484289774e-06, | |
| "loss": 0.9248118996620178, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.548523206751055, | |
| "grad_norm": 2.9744744300842285, | |
| "learning_rate": 1.810550640108636e-06, | |
| "loss": 0.7860240936279297, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.5527426160337554, | |
| "grad_norm": 16.682893753051758, | |
| "learning_rate": 1.8064636392581285e-06, | |
| "loss": 0.7947289347648621, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.5569620253164556, | |
| "grad_norm": 11.304174423217773, | |
| "learning_rate": 1.8023787679938884e-06, | |
| "loss": 0.32021385431289673, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.561181434599156, | |
| "grad_norm": 3.4476826190948486, | |
| "learning_rate": 1.7982960484208255e-06, | |
| "loss": 0.5928635597229004, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.5654008438818563, | |
| "grad_norm": 4.565676689147949, | |
| "learning_rate": 1.7942155026322064e-06, | |
| "loss": 1.007154941558838, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.569620253164557, | |
| "grad_norm": 54.17780685424805, | |
| "learning_rate": 1.7901371527095336e-06, | |
| "loss": 0.20298929512500763, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.5738396624472575, | |
| "grad_norm": 0.7691044807434082, | |
| "learning_rate": 1.7860610207224266e-06, | |
| "loss": 0.610919713973999, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.5780590717299576, | |
| "grad_norm": 7.206573486328125, | |
| "learning_rate": 1.7819871287285042e-06, | |
| "loss": 0.2613908350467682, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.5822784810126582, | |
| "grad_norm": 2.2238030433654785, | |
| "learning_rate": 1.7779154987732628e-06, | |
| "loss": 0.7429696321487427, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.586497890295359, | |
| "grad_norm": 2.2671563625335693, | |
| "learning_rate": 1.7738461528899582e-06, | |
| "loss": 0.6627340912818909, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.590717299578059, | |
| "grad_norm": 1.9748802185058594, | |
| "learning_rate": 1.769779113099485e-06, | |
| "loss": 0.5637974739074707, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.5949367088607596, | |
| "grad_norm": 2.075197696685791, | |
| "learning_rate": 1.7657144014102605e-06, | |
| "loss": 1.022030234336853, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.59915611814346, | |
| "grad_norm": 2.7764699459075928, | |
| "learning_rate": 1.7616520398181019e-06, | |
| "loss": 0.6542642116546631, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.6033755274261603, | |
| "grad_norm": 5.018822193145752, | |
| "learning_rate": 1.757592050306111e-06, | |
| "loss": 0.7118390202522278, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.607594936708861, | |
| "grad_norm": 1.829730749130249, | |
| "learning_rate": 1.7535344548445523e-06, | |
| "loss": 0.5238461494445801, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.611814345991561, | |
| "grad_norm": 2.2571935653686523, | |
| "learning_rate": 1.7494792753907342e-06, | |
| "loss": 0.9762560129165649, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.6160337552742616, | |
| "grad_norm": 11.215494155883789, | |
| "learning_rate": 1.7454265338888923e-06, | |
| "loss": 1.1840991973876953, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.620253164556962, | |
| "grad_norm": 5.1113972663879395, | |
| "learning_rate": 1.741376252270069e-06, | |
| "loss": 0.5932983160018921, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.6244725738396624, | |
| "grad_norm": 3.276780843734741, | |
| "learning_rate": 1.7373284524519956e-06, | |
| "loss": 0.654528021812439, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.628691983122363, | |
| "grad_norm": 4.502676486968994, | |
| "learning_rate": 1.733283156338973e-06, | |
| "loss": 0.329173743724823, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.632911392405063, | |
| "grad_norm": 4.122840404510498, | |
| "learning_rate": 1.7292403858217534e-06, | |
| "loss": 1.0182509422302246, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.6371308016877637, | |
| "grad_norm": 8.013359069824219, | |
| "learning_rate": 1.7252001627774227e-06, | |
| "loss": 0.5020068287849426, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.6413502109704643, | |
| "grad_norm": 7.430994987487793, | |
| "learning_rate": 1.72116250906928e-06, | |
| "loss": 0.45291832089424133, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.6455696202531644, | |
| "grad_norm": 5.890309810638428, | |
| "learning_rate": 1.7171274465467224e-06, | |
| "loss": 0.8754688501358032, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.649789029535865, | |
| "grad_norm": 9.963774681091309, | |
| "learning_rate": 1.7130949970451245e-06, | |
| "loss": 0.2187124788761139, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.6540084388185656, | |
| "grad_norm": 6.262022972106934, | |
| "learning_rate": 1.709065182385719e-06, | |
| "loss": 0.886106014251709, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.6582278481012658, | |
| "grad_norm": 9.15018367767334, | |
| "learning_rate": 1.7050380243754838e-06, | |
| "loss": 0.3278903365135193, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.6624472573839664, | |
| "grad_norm": 30.086578369140625, | |
| "learning_rate": 1.7010135448070169e-06, | |
| "loss": 0.3603389263153076, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 24.306060791015625, | |
| "learning_rate": 1.6969917654584247e-06, | |
| "loss": 0.6651766300201416, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.670886075949367, | |
| "grad_norm": 4.77196741104126, | |
| "learning_rate": 1.692972708093201e-06, | |
| "loss": 0.33792465925216675, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.6751054852320673, | |
| "grad_norm": 1.7918250560760498, | |
| "learning_rate": 1.688956394460109e-06, | |
| "loss": 1.0997920036315918, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.679324894514768, | |
| "grad_norm": 19.624130249023438, | |
| "learning_rate": 1.6849428462930653e-06, | |
| "loss": 0.5909217596054077, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.6835443037974684, | |
| "grad_norm": 7.293959140777588, | |
| "learning_rate": 1.6809320853110215e-06, | |
| "loss": 0.563459038734436, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.6877637130801686, | |
| "grad_norm": 2.4896528720855713, | |
| "learning_rate": 1.6769241332178469e-06, | |
| "loss": 1.0555415153503418, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.691983122362869, | |
| "grad_norm": 2.973538398742676, | |
| "learning_rate": 1.6729190117022095e-06, | |
| "loss": 0.8185904026031494, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.6962025316455698, | |
| "grad_norm": 3.3849141597747803, | |
| "learning_rate": 1.6689167424374597e-06, | |
| "loss": 0.8749343752861023, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.70042194092827, | |
| "grad_norm": 2.0385217666625977, | |
| "learning_rate": 1.664917347081516e-06, | |
| "loss": 1.026354432106018, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.7046413502109705, | |
| "grad_norm": 5.828520774841309, | |
| "learning_rate": 1.660920847276741e-06, | |
| "loss": 0.8060284852981567, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.708860759493671, | |
| "grad_norm": 5.976668357849121, | |
| "learning_rate": 1.6569272646498318e-06, | |
| "loss": 0.7234772443771362, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.7130801687763713, | |
| "grad_norm": 9.543655395507812, | |
| "learning_rate": 1.6529366208116974e-06, | |
| "loss": 0.7528952360153198, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.717299578059072, | |
| "grad_norm": 4.140414237976074, | |
| "learning_rate": 1.6489489373573443e-06, | |
| "loss": 0.26903659105300903, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.721518987341772, | |
| "grad_norm": 12.051411628723145, | |
| "learning_rate": 1.64496423586576e-06, | |
| "loss": 0.5374072790145874, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.7257383966244726, | |
| "grad_norm": 2.326197624206543, | |
| "learning_rate": 1.6409825378997941e-06, | |
| "loss": 0.9479004740715027, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.7299578059071727, | |
| "grad_norm": 4.621135234832764, | |
| "learning_rate": 1.6370038650060437e-06, | |
| "loss": 0.5748968124389648, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.7341772151898733, | |
| "grad_norm": 2.885585069656372, | |
| "learning_rate": 1.6330282387147349e-06, | |
| "loss": 0.5932916402816772, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.738396624472574, | |
| "grad_norm": 1.9321597814559937, | |
| "learning_rate": 1.6290556805396093e-06, | |
| "loss": 0.9674075245857239, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.742616033755274, | |
| "grad_norm": 3.254708766937256, | |
| "learning_rate": 1.6250862119778046e-06, | |
| "loss": 0.4991704523563385, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.7468354430379747, | |
| "grad_norm": 1.1030203104019165, | |
| "learning_rate": 1.6211198545097381e-06, | |
| "loss": 0.5824090242385864, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.7510548523206753, | |
| "grad_norm": 2.4272022247314453, | |
| "learning_rate": 1.6171566295989947e-06, | |
| "loss": 0.8916751146316528, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.7552742616033754, | |
| "grad_norm": 2.7834560871124268, | |
| "learning_rate": 1.6131965586922039e-06, | |
| "loss": 0.9039870500564575, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.759493670886076, | |
| "grad_norm": 3.2108805179595947, | |
| "learning_rate": 1.6092396632189317e-06, | |
| "loss": 0.8393138647079468, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.7637130801687766, | |
| "grad_norm": 8.731537818908691, | |
| "learning_rate": 1.6052859645915575e-06, | |
| "loss": 0.8530555963516235, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.7679324894514767, | |
| "grad_norm": 2.2591445446014404, | |
| "learning_rate": 1.6013354842051624e-06, | |
| "loss": 1.0453441143035889, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.7721518987341773, | |
| "grad_norm": 18.5029296875, | |
| "learning_rate": 1.5973882434374124e-06, | |
| "loss": 0.2866585850715637, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.7763713080168775, | |
| "grad_norm": 2.598447561264038, | |
| "learning_rate": 1.5934442636484425e-06, | |
| "loss": 0.5377147197723389, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.780590717299578, | |
| "grad_norm": 2.245370864868164, | |
| "learning_rate": 1.5895035661807397e-06, | |
| "loss": 0.9374682903289795, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.7848101265822782, | |
| "grad_norm": 10.506272315979004, | |
| "learning_rate": 1.5855661723590319e-06, | |
| "loss": 0.7131825685501099, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.789029535864979, | |
| "grad_norm": 5.187559127807617, | |
| "learning_rate": 1.581632103490168e-06, | |
| "loss": 0.9631250500679016, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.7932489451476794, | |
| "grad_norm": 5.299999713897705, | |
| "learning_rate": 1.577701380863003e-06, | |
| "loss": 1.1112829446792603, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.7974683544303796, | |
| "grad_norm": 2.1457207202911377, | |
| "learning_rate": 1.5737740257482867e-06, | |
| "loss": 0.8928860425949097, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.80168776371308, | |
| "grad_norm": 2.5547454357147217, | |
| "learning_rate": 1.569850059398544e-06, | |
| "loss": 1.004746675491333, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 3.8059071729957807, | |
| "grad_norm": 3.674745798110962, | |
| "learning_rate": 1.565929503047963e-06, | |
| "loss": 0.49736908078193665, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 3.810126582278481, | |
| "grad_norm": 7.80587100982666, | |
| "learning_rate": 1.562012377912277e-06, | |
| "loss": 0.23617342114448547, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 3.8143459915611815, | |
| "grad_norm": 5.4438958168029785, | |
| "learning_rate": 1.5580987051886533e-06, | |
| "loss": 0.8461598753929138, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 3.818565400843882, | |
| "grad_norm": 2.466731071472168, | |
| "learning_rate": 1.554188506055577e-06, | |
| "loss": 0.9447206258773804, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.8227848101265822, | |
| "grad_norm": 5.592019081115723, | |
| "learning_rate": 1.550281801672735e-06, | |
| "loss": 0.47888684272766113, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 3.827004219409283, | |
| "grad_norm": 2.1095151901245117, | |
| "learning_rate": 1.5463786131809031e-06, | |
| "loss": 0.9347876310348511, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 3.831223628691983, | |
| "grad_norm": 4.567122936248779, | |
| "learning_rate": 1.542478961701831e-06, | |
| "loss": 0.8219131231307983, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 3.8354430379746836, | |
| "grad_norm": 3.2872185707092285, | |
| "learning_rate": 1.5385828683381293e-06, | |
| "loss": 0.7965229749679565, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 3.8396624472573837, | |
| "grad_norm": 4.746089935302734, | |
| "learning_rate": 1.5346903541731524e-06, | |
| "loss": 0.6401727199554443, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.8438818565400843, | |
| "grad_norm": 3.5851891040802, | |
| "learning_rate": 1.530801440270888e-06, | |
| "loss": 0.9646581411361694, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 3.848101265822785, | |
| "grad_norm": 7.018674373626709, | |
| "learning_rate": 1.5269161476758404e-06, | |
| "loss": 0.7993499636650085, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 3.852320675105485, | |
| "grad_norm": 3.83168888092041, | |
| "learning_rate": 1.523034497412916e-06, | |
| "loss": 0.9415961503982544, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 3.8565400843881856, | |
| "grad_norm": 3.7820115089416504, | |
| "learning_rate": 1.5191565104873144e-06, | |
| "loss": 0.9054951667785645, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 3.8607594936708862, | |
| "grad_norm": 5.366248607635498, | |
| "learning_rate": 1.5152822078844088e-06, | |
| "loss": 0.9999287128448486, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.8649789029535864, | |
| "grad_norm": 5.807839393615723, | |
| "learning_rate": 1.511411610569636e-06, | |
| "loss": 0.3293692171573639, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 3.869198312236287, | |
| "grad_norm": 3.83225679397583, | |
| "learning_rate": 1.5075447394883814e-06, | |
| "loss": 0.6949493885040283, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 3.8734177215189876, | |
| "grad_norm": 10.349047660827637, | |
| "learning_rate": 1.5036816155658665e-06, | |
| "loss": 0.7142183184623718, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 3.8776371308016877, | |
| "grad_norm": 4.179904460906982, | |
| "learning_rate": 1.4998222597070362e-06, | |
| "loss": 0.6529619097709656, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 3.8818565400843883, | |
| "grad_norm": 11.569310188293457, | |
| "learning_rate": 1.4959666927964437e-06, | |
| "loss": 0.8389513492584229, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.8860759493670884, | |
| "grad_norm": 4.005336761474609, | |
| "learning_rate": 1.4921149356981397e-06, | |
| "loss": 0.5777831077575684, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 3.890295358649789, | |
| "grad_norm": 5.133764266967773, | |
| "learning_rate": 1.4882670092555567e-06, | |
| "loss": 0.5414679050445557, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 3.894514767932489, | |
| "grad_norm": 2.863504409790039, | |
| "learning_rate": 1.4844229342913996e-06, | |
| "loss": 0.9309226870536804, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.8987341772151898, | |
| "grad_norm": 22.195985794067383, | |
| "learning_rate": 1.480582731607531e-06, | |
| "loss": 0.4635329842567444, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 3.9029535864978904, | |
| "grad_norm": 2.475642204284668, | |
| "learning_rate": 1.4767464219848593e-06, | |
| "loss": 0.9393260478973389, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.9071729957805905, | |
| "grad_norm": 3.141064405441284, | |
| "learning_rate": 1.4729140261832246e-06, | |
| "loss": 0.9542742967605591, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 3.911392405063291, | |
| "grad_norm": 2.667790174484253, | |
| "learning_rate": 1.4690855649412895e-06, | |
| "loss": 0.9756711721420288, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 3.9156118143459917, | |
| "grad_norm": 2.641533374786377, | |
| "learning_rate": 1.4652610589764235e-06, | |
| "loss": 0.9634566903114319, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 3.919831223628692, | |
| "grad_norm": 2.9647128582000732, | |
| "learning_rate": 1.461440528984594e-06, | |
| "loss": 0.9994820356369019, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 3.9240506329113924, | |
| "grad_norm": 5.323459625244141, | |
| "learning_rate": 1.4576239956402514e-06, | |
| "loss": 0.9943286180496216, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.928270042194093, | |
| "grad_norm": 2.4466195106506348, | |
| "learning_rate": 1.4538114795962195e-06, | |
| "loss": 0.6168838143348694, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 3.932489451476793, | |
| "grad_norm": 3.8990132808685303, | |
| "learning_rate": 1.4500030014835822e-06, | |
| "loss": 0.6228926777839661, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 3.9367088607594938, | |
| "grad_norm": 6.640925407409668, | |
| "learning_rate": 1.4461985819115733e-06, | |
| "loss": 1.230762243270874, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 3.9409282700421944, | |
| "grad_norm": 1.7788114547729492, | |
| "learning_rate": 1.4423982414674635e-06, | |
| "loss": 0.9199753999710083, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 3.9451476793248945, | |
| "grad_norm": 11.634161949157715, | |
| "learning_rate": 1.4386020007164494e-06, | |
| "loss": 0.702942967414856, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.9493670886075947, | |
| "grad_norm": 0.652026355266571, | |
| "learning_rate": 1.4348098802015446e-06, | |
| "loss": 0.5037093162536621, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 3.9535864978902953, | |
| "grad_norm": 10.706385612487793, | |
| "learning_rate": 1.4310219004434632e-06, | |
| "loss": 0.45475533604621887, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 3.957805907172996, | |
| "grad_norm": 7.073146820068359, | |
| "learning_rate": 1.4272380819405139e-06, | |
| "loss": 0.8023735284805298, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 3.962025316455696, | |
| "grad_norm": 3.564532518386841, | |
| "learning_rate": 1.4234584451684866e-06, | |
| "loss": 0.716842770576477, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.9662447257383966, | |
| "grad_norm": 1.7148876190185547, | |
| "learning_rate": 1.4196830105805432e-06, | |
| "loss": 0.5358736515045166, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.970464135021097, | |
| "grad_norm": 3.4616918563842773, | |
| "learning_rate": 1.4159117986071038e-06, | |
| "loss": 0.9063611030578613, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 3.9746835443037973, | |
| "grad_norm": 5.480584144592285, | |
| "learning_rate": 1.4121448296557406e-06, | |
| "loss": 0.40525734424591064, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 3.978902953586498, | |
| "grad_norm": 4.338303565979004, | |
| "learning_rate": 1.4083821241110637e-06, | |
| "loss": 0.9141275882720947, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 3.9831223628691985, | |
| "grad_norm": 7.042728900909424, | |
| "learning_rate": 1.4046237023346113e-06, | |
| "loss": 0.6083638668060303, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 3.9873417721518987, | |
| "grad_norm": 7.335713863372803, | |
| "learning_rate": 1.400869584664743e-06, | |
| "loss": 0.9237312078475952, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.9915611814345993, | |
| "grad_norm": 7.168555736541748, | |
| "learning_rate": 1.3971197914165238e-06, | |
| "loss": 0.6043530702590942, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 3.9957805907173, | |
| "grad_norm": 2.8935647010803223, | |
| "learning_rate": 1.3933743428816209e-06, | |
| "loss": 0.9517507553100586, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.358701705932617, | |
| "learning_rate": 1.3896332593281876e-06, | |
| "loss": 0.9641570448875427, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 4.0042194092827, | |
| "grad_norm": 4.007087230682373, | |
| "learning_rate": 1.385896561000759e-06, | |
| "loss": 0.8658764362335205, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 4.008438818565401, | |
| "grad_norm": 6.783811092376709, | |
| "learning_rate": 1.382164268120137e-06, | |
| "loss": 0.7082722187042236, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.012658227848101, | |
| "grad_norm": 2.4722962379455566, | |
| "learning_rate": 1.3784364008832867e-06, | |
| "loss": 0.7488058805465698, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 4.0168776371308015, | |
| "grad_norm": 7.7128705978393555, | |
| "learning_rate": 1.3747129794632236e-06, | |
| "loss": 0.5546174049377441, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 4.0210970464135025, | |
| "grad_norm": 1.1015756130218506, | |
| "learning_rate": 1.3709940240089027e-06, | |
| "loss": 0.5142375826835632, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 4.025316455696203, | |
| "grad_norm": 5.702658653259277, | |
| "learning_rate": 1.3672795546451144e-06, | |
| "loss": 0.9443526268005371, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 4.029535864978903, | |
| "grad_norm": 6.516256809234619, | |
| "learning_rate": 1.3635695914723724e-06, | |
| "loss": 0.11540517210960388, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.033755274261603, | |
| "grad_norm": 3.0924103260040283, | |
| "learning_rate": 1.359864154566805e-06, | |
| "loss": 0.7493268251419067, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 4.037974683544304, | |
| "grad_norm": 5.080263614654541, | |
| "learning_rate": 1.356163263980048e-06, | |
| "loss": 0.793247401714325, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 4.042194092827004, | |
| "grad_norm": 0.8498378396034241, | |
| "learning_rate": 1.352466939739134e-06, | |
| "loss": 0.4381150007247925, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 4.046413502109704, | |
| "grad_norm": 5.049806594848633, | |
| "learning_rate": 1.3487752018463865e-06, | |
| "loss": 0.23625794053077698, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 4.050632911392405, | |
| "grad_norm": 6.505473613739014, | |
| "learning_rate": 1.34508807027931e-06, | |
| "loss": 0.8553643226623535, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.0548523206751055, | |
| "grad_norm": 2.442864418029785, | |
| "learning_rate": 1.341405564990481e-06, | |
| "loss": 0.9089441895484924, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 4.059071729957806, | |
| "grad_norm": 3.782691717147827, | |
| "learning_rate": 1.3377277059074428e-06, | |
| "loss": 0.6086368560791016, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 4.063291139240507, | |
| "grad_norm": 7.667325019836426, | |
| "learning_rate": 1.3340545129325956e-06, | |
| "loss": 0.5529667139053345, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 4.067510548523207, | |
| "grad_norm": 4.649930953979492, | |
| "learning_rate": 1.330386005943089e-06, | |
| "loss": 0.7499093413352966, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 4.071729957805907, | |
| "grad_norm": 6.8586602210998535, | |
| "learning_rate": 1.3267222047907167e-06, | |
| "loss": 0.2909429967403412, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.075949367088608, | |
| "grad_norm": 5.850220680236816, | |
| "learning_rate": 1.323063129301806e-06, | |
| "loss": 0.5432990789413452, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 4.080168776371308, | |
| "grad_norm": 5.898839473724365, | |
| "learning_rate": 1.3194087992771097e-06, | |
| "loss": 0.6550246477127075, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 4.084388185654008, | |
| "grad_norm": 3.0061066150665283, | |
| "learning_rate": 1.3157592344917036e-06, | |
| "loss": 0.7705998420715332, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 4.0886075949367084, | |
| "grad_norm": 2.5635762214660645, | |
| "learning_rate": 1.3121144546948766e-06, | |
| "loss": 0.44453972578048706, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 4.0928270042194095, | |
| "grad_norm": 4.0387773513793945, | |
| "learning_rate": 1.3084744796100229e-06, | |
| "loss": 0.5306001901626587, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.09704641350211, | |
| "grad_norm": 4.215574264526367, | |
| "learning_rate": 1.3048393289345369e-06, | |
| "loss": 0.5609068870544434, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 4.10126582278481, | |
| "grad_norm": 2.5985476970672607, | |
| "learning_rate": 1.3012090223397066e-06, | |
| "loss": 0.503987193107605, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 4.105485232067511, | |
| "grad_norm": 2.6729464530944824, | |
| "learning_rate": 1.2975835794706063e-06, | |
| "loss": 0.8981311321258545, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 4.109704641350211, | |
| "grad_norm": 8.088824272155762, | |
| "learning_rate": 1.2939630199459914e-06, | |
| "loss": 0.502710223197937, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 4.113924050632911, | |
| "grad_norm": 0.9990053772926331, | |
| "learning_rate": 1.2903473633581894e-06, | |
| "loss": 0.5058774948120117, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.118143459915612, | |
| "grad_norm": 2.116455554962158, | |
| "learning_rate": 1.2867366292729984e-06, | |
| "loss": 0.8362418413162231, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 4.122362869198312, | |
| "grad_norm": 4.284731388092041, | |
| "learning_rate": 1.283130837229578e-06, | |
| "loss": 0.9526023864746094, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 4.1265822784810124, | |
| "grad_norm": 27.23639678955078, | |
| "learning_rate": 1.2795300067403436e-06, | |
| "loss": 0.16982686519622803, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 4.1308016877637135, | |
| "grad_norm": 9.439923286437988, | |
| "learning_rate": 1.275934157290863e-06, | |
| "loss": 0.844666600227356, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 4.135021097046414, | |
| "grad_norm": 52.91316604614258, | |
| "learning_rate": 1.2723433083397486e-06, | |
| "loss": 0.8215901255607605, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.139240506329114, | |
| "grad_norm": 12.898977279663086, | |
| "learning_rate": 1.2687574793185535e-06, | |
| "loss": 0.5214605331420898, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 4.143459915611814, | |
| "grad_norm": 1.9493759870529175, | |
| "learning_rate": 1.2651766896316653e-06, | |
| "loss": 0.8226008415222168, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 4.147679324894515, | |
| "grad_norm": 0.4556528925895691, | |
| "learning_rate": 1.2616009586562021e-06, | |
| "loss": 0.43690699338912964, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 4.151898734177215, | |
| "grad_norm": 3.7246546745300293, | |
| "learning_rate": 1.2580303057419079e-06, | |
| "loss": 0.871078610420227, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 4.156118143459915, | |
| "grad_norm": 3.5394413471221924, | |
| "learning_rate": 1.2544647502110464e-06, | |
| "loss": 0.9380326271057129, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.160337552742616, | |
| "grad_norm": 4.74537467956543, | |
| "learning_rate": 1.2509043113582969e-06, | |
| "loss": 1.0427074432373047, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 4.1645569620253164, | |
| "grad_norm": 5.703405380249023, | |
| "learning_rate": 1.247349008450651e-06, | |
| "loss": 0.17169350385665894, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 4.168776371308017, | |
| "grad_norm": 8.463484764099121, | |
| "learning_rate": 1.243798860727308e-06, | |
| "loss": 0.5819951891899109, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 4.172995780590718, | |
| "grad_norm": 5.530209541320801, | |
| "learning_rate": 1.2402538873995701e-06, | |
| "loss": 0.40900328755378723, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 4.177215189873418, | |
| "grad_norm": 6.495384216308594, | |
| "learning_rate": 1.236714107650737e-06, | |
| "loss": 0.42087459564208984, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.181434599156118, | |
| "grad_norm": 3.931180953979492, | |
| "learning_rate": 1.233179540636006e-06, | |
| "loss": 0.7898563742637634, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 4.185654008438819, | |
| "grad_norm": 5.3524322509765625, | |
| "learning_rate": 1.2296502054823655e-06, | |
| "loss": 0.335269570350647, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 4.189873417721519, | |
| "grad_norm": 6.85384464263916, | |
| "learning_rate": 1.226126121288492e-06, | |
| "loss": 0.220280259847641, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 4.194092827004219, | |
| "grad_norm": 17.49827003479004, | |
| "learning_rate": 1.222607307124647e-06, | |
| "loss": 0.5092884302139282, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 4.198312236286919, | |
| "grad_norm": 5.663785934448242, | |
| "learning_rate": 1.2190937820325733e-06, | |
| "loss": 0.4246003031730652, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.2025316455696204, | |
| "grad_norm": 4.241413116455078, | |
| "learning_rate": 1.215585565025394e-06, | |
| "loss": 0.8379718065261841, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 4.206751054852321, | |
| "grad_norm": 6.015312194824219, | |
| "learning_rate": 1.2120826750875059e-06, | |
| "loss": 0.5074017643928528, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 4.210970464135021, | |
| "grad_norm": 3.0558958053588867, | |
| "learning_rate": 1.2085851311744794e-06, | |
| "loss": 0.8118472099304199, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 4.215189873417722, | |
| "grad_norm": 6.353532314300537, | |
| "learning_rate": 1.205092952212956e-06, | |
| "loss": 1.135847568511963, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 4.219409282700422, | |
| "grad_norm": 2.435732126235962, | |
| "learning_rate": 1.201606157100544e-06, | |
| "loss": 0.9003854990005493, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.223628691983122, | |
| "grad_norm": 8.28079891204834, | |
| "learning_rate": 1.1981247647057202e-06, | |
| "loss": 0.6943663358688354, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 4.227848101265823, | |
| "grad_norm": 7.8127264976501465, | |
| "learning_rate": 1.1946487938677226e-06, | |
| "loss": 0.16587281227111816, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 4.232067510548523, | |
| "grad_norm": 1.957531213760376, | |
| "learning_rate": 1.1911782633964518e-06, | |
| "loss": 0.9451367855072021, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 4.236286919831223, | |
| "grad_norm": 2.2095224857330322, | |
| "learning_rate": 1.1877131920723674e-06, | |
| "loss": 0.4541362524032593, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 4.2405063291139244, | |
| "grad_norm": 2.0317702293395996, | |
| "learning_rate": 1.1842535986463885e-06, | |
| "loss": 0.9444383382797241, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.244725738396625, | |
| "grad_norm": 8.539976119995117, | |
| "learning_rate": 1.180799501839791e-06, | |
| "loss": 0.19654181599617004, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 4.248945147679325, | |
| "grad_norm": 3.8399620056152344, | |
| "learning_rate": 1.1773509203441052e-06, | |
| "loss": 0.5152616500854492, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 4.253164556962025, | |
| "grad_norm": 2.4427969455718994, | |
| "learning_rate": 1.1739078728210175e-06, | |
| "loss": 0.89030921459198, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 4.257383966244726, | |
| "grad_norm": 3.564229726791382, | |
| "learning_rate": 1.170470377902266e-06, | |
| "loss": 0.9515880346298218, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 4.261603375527426, | |
| "grad_norm": 17.13824462890625, | |
| "learning_rate": 1.167038454189543e-06, | |
| "loss": 0.0852670818567276, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.265822784810126, | |
| "grad_norm": 1.1132172346115112, | |
| "learning_rate": 1.163612120254392e-06, | |
| "loss": 0.3325420618057251, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 4.270042194092827, | |
| "grad_norm": 2.2386295795440674, | |
| "learning_rate": 1.1601913946381068e-06, | |
| "loss": 0.8490246534347534, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 4.274261603375527, | |
| "grad_norm": 4.5493927001953125, | |
| "learning_rate": 1.1567762958516336e-06, | |
| "loss": 0.30698156356811523, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 4.2784810126582276, | |
| "grad_norm": 2.7599310874938965, | |
| "learning_rate": 1.1533668423754703e-06, | |
| "loss": 0.3949320912361145, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 4.282700421940929, | |
| "grad_norm": 0.7302427291870117, | |
| "learning_rate": 1.1499630526595632e-06, | |
| "loss": 0.4672113060951233, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.286919831223629, | |
| "grad_norm": 6.222799777984619, | |
| "learning_rate": 1.1465649451232121e-06, | |
| "loss": 1.0849535465240479, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 4.291139240506329, | |
| "grad_norm": 2.6900506019592285, | |
| "learning_rate": 1.1431725381549675e-06, | |
| "loss": 0.12843787670135498, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 4.29535864978903, | |
| "grad_norm": 7.403899669647217, | |
| "learning_rate": 1.1397858501125304e-06, | |
| "loss": 0.3389854431152344, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 4.29957805907173, | |
| "grad_norm": 5.636825084686279, | |
| "learning_rate": 1.1364048993226566e-06, | |
| "loss": 0.6659049391746521, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 4.30379746835443, | |
| "grad_norm": 10.65471363067627, | |
| "learning_rate": 1.1330297040810534e-06, | |
| "loss": 1.0959115028381348, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.308016877637131, | |
| "grad_norm": 6.164623737335205, | |
| "learning_rate": 1.129660282652284e-06, | |
| "loss": 0.8495713472366333, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 4.312236286919831, | |
| "grad_norm": 21.337953567504883, | |
| "learning_rate": 1.1262966532696658e-06, | |
| "loss": 0.4679602384567261, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 4.3164556962025316, | |
| "grad_norm": 3.735825300216675, | |
| "learning_rate": 1.1229388341351739e-06, | |
| "loss": 1.0504865646362305, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 4.320675105485232, | |
| "grad_norm": 2.874302387237549, | |
| "learning_rate": 1.1195868434193413e-06, | |
| "loss": 0.9641183614730835, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 4.324894514767933, | |
| "grad_norm": 2.697021722793579, | |
| "learning_rate": 1.1162406992611618e-06, | |
| "loss": 0.24490822851657867, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.329113924050633, | |
| "grad_norm": 8.153789520263672, | |
| "learning_rate": 1.1129004197679907e-06, | |
| "loss": 0.43832969665527344, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 4.333333333333333, | |
| "grad_norm": 2.9143199920654297, | |
| "learning_rate": 1.1095660230154457e-06, | |
| "loss": 0.7494316101074219, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 4.337552742616034, | |
| "grad_norm": 22.089580535888672, | |
| "learning_rate": 1.1062375270473129e-06, | |
| "loss": 0.4954107403755188, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 4.341772151898734, | |
| "grad_norm": 5.983814716339111, | |
| "learning_rate": 1.1029149498754458e-06, | |
| "loss": 0.39451485872268677, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 4.345991561181434, | |
| "grad_norm": 3.4319894313812256, | |
| "learning_rate": 1.0995983094796688e-06, | |
| "loss": 0.816379189491272, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.350210970464135, | |
| "grad_norm": 2.19193172454834, | |
| "learning_rate": 1.0962876238076799e-06, | |
| "loss": 0.9197038412094116, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 4.3544303797468356, | |
| "grad_norm": 8.006820678710938, | |
| "learning_rate": 1.0929829107749547e-06, | |
| "loss": 0.8574424982070923, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 4.358649789029536, | |
| "grad_norm": 3.324010133743286, | |
| "learning_rate": 1.0896841882646471e-06, | |
| "loss": 0.9916654825210571, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 4.362869198312236, | |
| "grad_norm": 3.6000797748565674, | |
| "learning_rate": 1.0863914741274944e-06, | |
| "loss": 0.4570949077606201, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 4.367088607594937, | |
| "grad_norm": 1.9650532007217407, | |
| "learning_rate": 1.0831047861817193e-06, | |
| "loss": 0.9559861421585083, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.371308016877637, | |
| "grad_norm": 2.6903204917907715, | |
| "learning_rate": 1.079824142212936e-06, | |
| "loss": 0.9988477230072021, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 4.375527426160337, | |
| "grad_norm": 3.71533203125, | |
| "learning_rate": 1.07654955997405e-06, | |
| "loss": 0.8142194747924805, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 4.379746835443038, | |
| "grad_norm": 6.335799694061279, | |
| "learning_rate": 1.0732810571851677e-06, | |
| "loss": 0.6120598316192627, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 4.383966244725738, | |
| "grad_norm": 2.123081684112549, | |
| "learning_rate": 1.0700186515334939e-06, | |
| "loss": 0.4905482232570648, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 4.3881856540084385, | |
| "grad_norm": 14.160784721374512, | |
| "learning_rate": 1.0667623606732408e-06, | |
| "loss": 0.9914622902870178, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.3924050632911396, | |
| "grad_norm": 1.714659571647644, | |
| "learning_rate": 1.0635122022255298e-06, | |
| "loss": 0.6109655499458313, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 4.39662447257384, | |
| "grad_norm": 4.592569351196289, | |
| "learning_rate": 1.0602681937782985e-06, | |
| "loss": 0.7499299049377441, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 4.40084388185654, | |
| "grad_norm": 3.3476827144622803, | |
| "learning_rate": 1.0570303528862044e-06, | |
| "loss": 0.9557301998138428, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 4.405063291139241, | |
| "grad_norm": 3.5681612491607666, | |
| "learning_rate": 1.0537986970705284e-06, | |
| "loss": 0.9052315354347229, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 4.409282700421941, | |
| "grad_norm": 4.310785293579102, | |
| "learning_rate": 1.0505732438190832e-06, | |
| "loss": 0.6285467147827148, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.413502109704641, | |
| "grad_norm": 8.120601654052734, | |
| "learning_rate": 1.0473540105861158e-06, | |
| "loss": 0.8778185844421387, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 4.417721518987342, | |
| "grad_norm": 2.5801761150360107, | |
| "learning_rate": 1.0441410147922142e-06, | |
| "loss": 0.8876914381980896, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 4.421940928270042, | |
| "grad_norm": 22.094350814819336, | |
| "learning_rate": 1.0409342738242145e-06, | |
| "loss": 0.5706854462623596, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 4.4261603375527425, | |
| "grad_norm": 2.56339955329895, | |
| "learning_rate": 1.0377338050351023e-06, | |
| "loss": 0.8818637132644653, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 4.430379746835443, | |
| "grad_norm": 15.67695140838623, | |
| "learning_rate": 1.0345396257439248e-06, | |
| "loss": 0.6227443814277649, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.434599156118144, | |
| "grad_norm": 2.2217981815338135, | |
| "learning_rate": 1.0313517532356928e-06, | |
| "loss": 0.7605068683624268, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 4.438818565400844, | |
| "grad_norm": 2.441141128540039, | |
| "learning_rate": 1.0281702047612885e-06, | |
| "loss": 0.7203768491744995, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 4.443037974683544, | |
| "grad_norm": 4.200733184814453, | |
| "learning_rate": 1.024994997537373e-06, | |
| "loss": 0.8852105736732483, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 4.447257383966245, | |
| "grad_norm": 9.651650428771973, | |
| "learning_rate": 1.0218261487462916e-06, | |
| "loss": 0.5270538330078125, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 4.451476793248945, | |
| "grad_norm": 2.550156593322754, | |
| "learning_rate": 1.0186636755359814e-06, | |
| "loss": 0.8197285532951355, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.455696202531645, | |
| "grad_norm": 2.093350887298584, | |
| "learning_rate": 1.0155075950198794e-06, | |
| "loss": 0.8607369065284729, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 4.459915611814346, | |
| "grad_norm": 1.2756742238998413, | |
| "learning_rate": 1.0123579242768282e-06, | |
| "loss": 0.6345518827438354, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 4.4641350210970465, | |
| "grad_norm": 3.17000675201416, | |
| "learning_rate": 1.0092146803509854e-06, | |
| "loss": 0.48864442110061646, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 4.468354430379747, | |
| "grad_norm": 2.0671489238739014, | |
| "learning_rate": 1.006077880251729e-06, | |
| "loss": 0.862575888633728, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 4.472573839662447, | |
| "grad_norm": 2.851736307144165, | |
| "learning_rate": 1.0029475409535692e-06, | |
| "loss": 0.4032348692417145, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.476793248945148, | |
| "grad_norm": 4.477703094482422, | |
| "learning_rate": 9.998236793960514e-07, | |
| "loss": 0.36202433705329895, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 4.481012658227848, | |
| "grad_norm": 8.475764274597168, | |
| "learning_rate": 9.967063124836695e-07, | |
| "loss": 0.21301576495170593, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 4.485232067510548, | |
| "grad_norm": 3.3703811168670654, | |
| "learning_rate": 9.935954570857717e-07, | |
| "loss": 0.39527398347854614, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 4.489451476793249, | |
| "grad_norm": 2.7759153842926025, | |
| "learning_rate": 9.90491130036468e-07, | |
| "loss": 0.6493411064147949, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 4.493670886075949, | |
| "grad_norm": 9.41816520690918, | |
| "learning_rate": 9.873933481345432e-07, | |
| "loss": 0.484800785779953, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.4978902953586495, | |
| "grad_norm": 2.5206875801086426, | |
| "learning_rate": 9.843021281433624e-07, | |
| "loss": 1.0602920055389404, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 4.5021097046413505, | |
| "grad_norm": 4.042180061340332, | |
| "learning_rate": 9.81217486790782e-07, | |
| "loss": 0.7310470342636108, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 4.506329113924051, | |
| "grad_norm": 4.009156703948975, | |
| "learning_rate": 9.781394407690582e-07, | |
| "loss": 0.12923167645931244, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 4.510548523206751, | |
| "grad_norm": 3.74722957611084, | |
| "learning_rate": 9.750680067347574e-07, | |
| "loss": 0.3252981901168823, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 4.514767932489452, | |
| "grad_norm": 39.80788040161133, | |
| "learning_rate": 9.720032013086665e-07, | |
| "loss": 0.25149163603782654, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.518987341772152, | |
| "grad_norm": 1.6690428256988525, | |
| "learning_rate": 9.689450410757014e-07, | |
| "loss": 0.6628930568695068, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 4.523206751054852, | |
| "grad_norm": 12.622380256652832, | |
| "learning_rate": 9.658935425848178e-07, | |
| "loss": 0.17167873680591583, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 4.527426160337553, | |
| "grad_norm": 3.543349504470825, | |
| "learning_rate": 9.628487223489232e-07, | |
| "loss": 0.5717638731002808, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 4.531645569620253, | |
| "grad_norm": 3.6029629707336426, | |
| "learning_rate": 9.598105968447845e-07, | |
| "loss": 0.5759022831916809, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 4.5358649789029535, | |
| "grad_norm": 0.09577035158872604, | |
| "learning_rate": 9.567791825129436e-07, | |
| "loss": 0.45371395349502563, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.540084388185654, | |
| "grad_norm": 2.7558352947235107, | |
| "learning_rate": 9.537544957576232e-07, | |
| "loss": 0.5172098875045776, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 4.544303797468355, | |
| "grad_norm": 6.613936424255371, | |
| "learning_rate": 9.507365529466414e-07, | |
| "loss": 1.0241069793701172, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 4.548523206751055, | |
| "grad_norm": 3.1837728023529053, | |
| "learning_rate": 9.477253704113204e-07, | |
| "loss": 0.9064798355102539, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 4.552742616033755, | |
| "grad_norm": 2.8910419940948486, | |
| "learning_rate": 9.447209644464014e-07, | |
| "loss": 0.8971297740936279, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 4.556962025316456, | |
| "grad_norm": 3.6541380882263184, | |
| "learning_rate": 9.417233513099545e-07, | |
| "loss": 0.7274525165557861, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.561181434599156, | |
| "grad_norm": 2.7287378311157227, | |
| "learning_rate": 9.387325472232908e-07, | |
| "loss": 0.7473336458206177, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 4.565400843881856, | |
| "grad_norm": 5.9793500900268555, | |
| "learning_rate": 9.357485683708752e-07, | |
| "loss": 0.6158387660980225, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 4.569620253164557, | |
| "grad_norm": 2.9492175579071045, | |
| "learning_rate": 9.327714309002378e-07, | |
| "loss": 0.8946245312690735, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 4.5738396624472575, | |
| "grad_norm": 2.516920566558838, | |
| "learning_rate": 9.298011509218878e-07, | |
| "loss": 0.7441626787185669, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 4.578059071729958, | |
| "grad_norm": 9.32639217376709, | |
| "learning_rate": 9.268377445092257e-07, | |
| "loss": 0.18001851439476013, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.582278481012658, | |
| "grad_norm": 2.5811736583709717, | |
| "learning_rate": 9.238812276984563e-07, | |
| "loss": 0.6168837547302246, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 4.586497890295359, | |
| "grad_norm": 79.09625244140625, | |
| "learning_rate": 9.209316164885007e-07, | |
| "loss": 0.7156883478164673, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 4.590717299578059, | |
| "grad_norm": 3.27329683303833, | |
| "learning_rate": 9.179889268409126e-07, | |
| "loss": 0.9324935078620911, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 4.594936708860759, | |
| "grad_norm": 10.160416603088379, | |
| "learning_rate": 9.150531746797897e-07, | |
| "loss": 0.6166714429855347, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 4.59915611814346, | |
| "grad_norm": 3.8143131732940674, | |
| "learning_rate": 9.121243758916885e-07, | |
| "loss": 0.5362197756767273, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.60337552742616, | |
| "grad_norm": 2.629331111907959, | |
| "learning_rate": 9.092025463255371e-07, | |
| "loss": 0.9286479949951172, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 4.6075949367088604, | |
| "grad_norm": 3.1048662662506104, | |
| "learning_rate": 9.062877017925509e-07, | |
| "loss": 0.23398178815841675, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 4.6118143459915615, | |
| "grad_norm": 2.799243211746216, | |
| "learning_rate": 9.033798580661465e-07, | |
| "loss": 0.9572643041610718, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 4.616033755274262, | |
| "grad_norm": 1.9954626560211182, | |
| "learning_rate": 9.00479030881856e-07, | |
| "loss": 0.5257174372673035, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 4.620253164556962, | |
| "grad_norm": 2.1326327323913574, | |
| "learning_rate": 8.975852359372421e-07, | |
| "loss": 0.907311737537384, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.624472573839663, | |
| "grad_norm": 2.5456783771514893, | |
| "learning_rate": 8.946984888918133e-07, | |
| "loss": 0.48332294821739197, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 4.628691983122363, | |
| "grad_norm": 4.807265758514404, | |
| "learning_rate": 8.918188053669391e-07, | |
| "loss": 0.8633521199226379, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 4.632911392405063, | |
| "grad_norm": 3.1572585105895996, | |
| "learning_rate": 8.889462009457651e-07, | |
| "loss": 0.4701206088066101, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 4.637130801687764, | |
| "grad_norm": 2.305100440979004, | |
| "learning_rate": 8.860806911731295e-07, | |
| "loss": 0.4662626385688782, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 4.641350210970464, | |
| "grad_norm": 2.7214598655700684, | |
| "learning_rate": 8.832222915554783e-07, | |
| "loss": 0.8649228811264038, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.6455696202531644, | |
| "grad_norm": 4.30544900894165, | |
| "learning_rate": 8.803710175607808e-07, | |
| "loss": 0.8740881085395813, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 4.649789029535865, | |
| "grad_norm": 1.4659613370895386, | |
| "learning_rate": 8.775268846184471e-07, | |
| "loss": 0.5230797529220581, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 4.654008438818566, | |
| "grad_norm": 3.350233316421509, | |
| "learning_rate": 8.74689908119245e-07, | |
| "loss": 0.5945952534675598, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 4.658227848101266, | |
| "grad_norm": 1.1876442432403564, | |
| "learning_rate": 8.718601034152144e-07, | |
| "loss": 0.5520751476287842, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 4.662447257383966, | |
| "grad_norm": 2.593919277191162, | |
| "learning_rate": 8.690374858195868e-07, | |
| "loss": 0.8659783601760864, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 2.051456928253174, | |
| "learning_rate": 8.662220706067007e-07, | |
| "loss": 0.7441516518592834, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 4.670886075949367, | |
| "grad_norm": 9.695352554321289, | |
| "learning_rate": 8.634138730119199e-07, | |
| "loss": 0.6046957969665527, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 4.675105485232067, | |
| "grad_norm": 3.217013359069824, | |
| "learning_rate": 8.606129082315514e-07, | |
| "loss": 0.8700679540634155, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 4.679324894514768, | |
| "grad_norm": 2.5168628692626953, | |
| "learning_rate": 8.578191914227602e-07, | |
| "loss": 0.5581780076026917, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 4.6835443037974684, | |
| "grad_norm": 3.7480080127716064, | |
| "learning_rate": 8.550327377034915e-07, | |
| "loss": 0.7154510617256165, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.687763713080169, | |
| "grad_norm": 2.187389373779297, | |
| "learning_rate": 8.522535621523864e-07, | |
| "loss": 0.17346470057964325, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 4.691983122362869, | |
| "grad_norm": 2.2572085857391357, | |
| "learning_rate": 8.494816798087014e-07, | |
| "loss": 0.8721593618392944, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 4.69620253164557, | |
| "grad_norm": 8.44543170928955, | |
| "learning_rate": 8.467171056722262e-07, | |
| "loss": 0.5838876962661743, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 4.70042194092827, | |
| "grad_norm": 10.777728080749512, | |
| "learning_rate": 8.439598547032021e-07, | |
| "loss": 0.15432819724082947, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 4.70464135021097, | |
| "grad_norm": 2.5708587169647217, | |
| "learning_rate": 8.412099418222429e-07, | |
| "loss": 0.8907821178436279, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.708860759493671, | |
| "grad_norm": 1.8050702810287476, | |
| "learning_rate": 8.384673819102515e-07, | |
| "loss": 0.8190984725952148, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 4.713080168776371, | |
| "grad_norm": 13.505372047424316, | |
| "learning_rate": 8.357321898083417e-07, | |
| "loss": 0.5908716917037964, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 4.717299578059071, | |
| "grad_norm": 4.608894348144531, | |
| "learning_rate": 8.330043803177576e-07, | |
| "loss": 0.43208563327789307, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 4.7215189873417724, | |
| "grad_norm": 6.133680820465088, | |
| "learning_rate": 8.302839681997924e-07, | |
| "loss": 0.7111215591430664, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 4.725738396624473, | |
| "grad_norm": 1.9396830797195435, | |
| "learning_rate": 8.275709681757091e-07, | |
| "loss": 0.8701183795928955, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.729957805907173, | |
| "grad_norm": 2.7942826747894287, | |
| "learning_rate": 8.248653949266609e-07, | |
| "loss": 0.9508087635040283, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 4.734177215189874, | |
| "grad_norm": 1.77509343624115, | |
| "learning_rate": 8.221672630936114e-07, | |
| "loss": 0.14094747602939606, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 4.738396624472574, | |
| "grad_norm": 9.949209213256836, | |
| "learning_rate": 8.194765872772569e-07, | |
| "loss": 0.7157829999923706, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 4.742616033755274, | |
| "grad_norm": 3.089747667312622, | |
| "learning_rate": 8.167933820379438e-07, | |
| "loss": 0.9330974817276001, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 4.746835443037975, | |
| "grad_norm": 8.302231788635254, | |
| "learning_rate": 8.141176618955941e-07, | |
| "loss": 0.18974465131759644, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.751054852320675, | |
| "grad_norm": 17.27684211730957, | |
| "learning_rate": 8.114494413296242e-07, | |
| "loss": 0.6534916162490845, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 4.755274261603375, | |
| "grad_norm": 13.299623489379883, | |
| "learning_rate": 8.087887347788675e-07, | |
| "loss": 0.5243600606918335, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 4.759493670886076, | |
| "grad_norm": 1.3798922300338745, | |
| "learning_rate": 8.061355566414959e-07, | |
| "loss": 0.46594005823135376, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 4.763713080168777, | |
| "grad_norm": 6.182672023773193, | |
| "learning_rate": 8.034899212749415e-07, | |
| "loss": 0.22735753655433655, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 4.767932489451477, | |
| "grad_norm": 4.455085277557373, | |
| "learning_rate": 8.0085184299582e-07, | |
| "loss": 0.22588486969470978, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.772151898734177, | |
| "grad_norm": 2.398963212966919, | |
| "learning_rate": 7.982213360798524e-07, | |
| "loss": 0.5842011570930481, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 4.776371308016878, | |
| "grad_norm": 3.986417055130005, | |
| "learning_rate": 7.955984147617878e-07, | |
| "loss": 0.8581550121307373, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 4.780590717299578, | |
| "grad_norm": 2.5186336040496826, | |
| "learning_rate": 7.929830932353267e-07, | |
| "loss": 0.9678604602813721, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 4.784810126582278, | |
| "grad_norm": 17.917510986328125, | |
| "learning_rate": 7.903753856530439e-07, | |
| "loss": 0.776985764503479, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 4.789029535864979, | |
| "grad_norm": 4.219602108001709, | |
| "learning_rate": 7.877753061263124e-07, | |
| "loss": 0.49661773443222046, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 4.793248945147679, | |
| "grad_norm": 2.524501323699951, | |
| "learning_rate": 7.851828687252258e-07, | |
| "loss": 0.9214498996734619, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 4.7974683544303796, | |
| "grad_norm": 24.021936416625977, | |
| "learning_rate": 7.825980874785245e-07, | |
| "loss": 0.2861242890357971, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 4.80168776371308, | |
| "grad_norm": 1.766944169998169, | |
| "learning_rate": 7.800209763735166e-07, | |
| "loss": 0.2682395279407501, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 4.805907172995781, | |
| "grad_norm": 3.6635119915008545, | |
| "learning_rate": 7.774515493560047e-07, | |
| "loss": 0.5065731406211853, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 4.810126582278481, | |
| "grad_norm": 0.9169036746025085, | |
| "learning_rate": 7.748898203302101e-07, | |
| "loss": 0.4213840365409851, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.814345991561181, | |
| "grad_norm": 2.111497402191162, | |
| "learning_rate": 7.723358031586968e-07, | |
| "loss": 0.8279630541801453, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 4.818565400843882, | |
| "grad_norm": 3.6885154247283936, | |
| "learning_rate": 7.697895116622962e-07, | |
| "loss": 0.721439003944397, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 4.822784810126582, | |
| "grad_norm": 4.22064733505249, | |
| "learning_rate": 7.672509596200339e-07, | |
| "loss": 0.8761791586875916, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 4.827004219409282, | |
| "grad_norm": 2.2504615783691406, | |
| "learning_rate": 7.647201607690535e-07, | |
| "loss": 0.43095457553863525, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 4.831223628691983, | |
| "grad_norm": 2.19746470451355, | |
| "learning_rate": 7.621971288045436e-07, | |
| "loss": 0.7216506004333496, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 4.8354430379746836, | |
| "grad_norm": 2.588840961456299, | |
| "learning_rate": 7.596818773796616e-07, | |
| "loss": 0.8444218039512634, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 4.839662447257384, | |
| "grad_norm": 2.1437089443206787, | |
| "learning_rate": 7.571744201054619e-07, | |
| "loss": 0.9132941961288452, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 4.843881856540085, | |
| "grad_norm": 2.2970213890075684, | |
| "learning_rate": 7.54674770550823e-07, | |
| "loss": 0.8675155639648438, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 4.848101265822785, | |
| "grad_norm": 2.469003438949585, | |
| "learning_rate": 7.521829422423707e-07, | |
| "loss": 0.8924763202667236, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 4.852320675105485, | |
| "grad_norm": 5.6491169929504395, | |
| "learning_rate": 7.496989486644074e-07, | |
| "loss": 1.2289131879806519, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.856540084388186, | |
| "grad_norm": 0.6651078462600708, | |
| "learning_rate": 7.472228032588392e-07, | |
| "loss": 0.5435088872909546, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 4.860759493670886, | |
| "grad_norm": 1.8895771503448486, | |
| "learning_rate": 7.447545194251021e-07, | |
| "loss": 0.4832010865211487, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 4.864978902953586, | |
| "grad_norm": 4.667498588562012, | |
| "learning_rate": 7.422941105200888e-07, | |
| "loss": 0.7593515515327454, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 4.869198312236287, | |
| "grad_norm": 2.6413588523864746, | |
| "learning_rate": 7.398415898580795e-07, | |
| "loss": 0.5025730729103088, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 4.8734177215189876, | |
| "grad_norm": 2.2257080078125, | |
| "learning_rate": 7.373969707106667e-07, | |
| "loss": 0.5178145170211792, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 4.877637130801688, | |
| "grad_norm": 4.63566255569458, | |
| "learning_rate": 7.349602663066848e-07, | |
| "loss": 0.8785790801048279, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 4.881856540084388, | |
| "grad_norm": 11.207052230834961, | |
| "learning_rate": 7.325314898321387e-07, | |
| "loss": 0.6604704260826111, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 4.886075949367089, | |
| "grad_norm": 2.7186286449432373, | |
| "learning_rate": 7.30110654430131e-07, | |
| "loss": 0.8655844330787659, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 4.890295358649789, | |
| "grad_norm": 9.436038970947266, | |
| "learning_rate": 7.276977732007934e-07, | |
| "loss": 0.6372033357620239, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 4.894514767932489, | |
| "grad_norm": 9.619095802307129, | |
| "learning_rate": 7.252928592012131e-07, | |
| "loss": 0.5399308204650879, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.89873417721519, | |
| "grad_norm": 3.560415267944336, | |
| "learning_rate": 7.228959254453634e-07, | |
| "loss": 0.5512664318084717, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 4.90295358649789, | |
| "grad_norm": 2.261822462081909, | |
| "learning_rate": 7.20506984904034e-07, | |
| "loss": 0.965155839920044, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 4.9071729957805905, | |
| "grad_norm": 5.737890243530273, | |
| "learning_rate": 7.181260505047593e-07, | |
| "loss": 0.5091350078582764, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 4.911392405063291, | |
| "grad_norm": 2.460875988006592, | |
| "learning_rate": 7.157531351317499e-07, | |
| "loss": 0.6960829496383667, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 4.915611814345992, | |
| "grad_norm": 2.570103883743286, | |
| "learning_rate": 7.133882516258215e-07, | |
| "loss": 1.0476431846618652, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 4.919831223628692, | |
| "grad_norm": 0.8946544528007507, | |
| "learning_rate": 7.110314127843266e-07, | |
| "loss": 0.5339324474334717, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 4.924050632911392, | |
| "grad_norm": 6.283257007598877, | |
| "learning_rate": 7.086826313610843e-07, | |
| "loss": 0.6191664934158325, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 4.928270042194093, | |
| "grad_norm": 2.9751780033111572, | |
| "learning_rate": 7.063419200663121e-07, | |
| "loss": 0.9971131086349487, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 4.932489451476793, | |
| "grad_norm": 30.684070587158203, | |
| "learning_rate": 7.040092915665563e-07, | |
| "loss": 0.5671279430389404, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 4.936708860759493, | |
| "grad_norm": 3.855710506439209, | |
| "learning_rate": 7.016847584846243e-07, | |
| "loss": 0.5699124336242676, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.940928270042194, | |
| "grad_norm": 5.847226142883301, | |
| "learning_rate": 6.993683333995155e-07, | |
| "loss": 0.8012879490852356, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 4.9451476793248945, | |
| "grad_norm": 6.018973350524902, | |
| "learning_rate": 6.970600288463544e-07, | |
| "loss": 0.5165205597877502, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 4.949367088607595, | |
| "grad_norm": 2.1352529525756836, | |
| "learning_rate": 6.947598573163207e-07, | |
| "loss": 0.9921296834945679, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 4.953586497890296, | |
| "grad_norm": 2.2737410068511963, | |
| "learning_rate": 6.924678312565846e-07, | |
| "loss": 0.5466551780700684, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 4.957805907172996, | |
| "grad_norm": 2.052476167678833, | |
| "learning_rate": 6.901839630702358e-07, | |
| "loss": 0.7028835415840149, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.962025316455696, | |
| "grad_norm": 5.379444599151611, | |
| "learning_rate": 6.879082651162198e-07, | |
| "loss": 0.4037717580795288, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 4.966244725738397, | |
| "grad_norm": 2.764251470565796, | |
| "learning_rate": 6.856407497092698e-07, | |
| "loss": 0.8569744825363159, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 4.970464135021097, | |
| "grad_norm": 11.355871200561523, | |
| "learning_rate": 6.833814291198395e-07, | |
| "loss": 0.5073586106300354, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 4.974683544303797, | |
| "grad_norm": 3.204270124435425, | |
| "learning_rate": 6.811303155740364e-07, | |
| "loss": 0.8562701344490051, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 4.978902953586498, | |
| "grad_norm": 0.8829920887947083, | |
| "learning_rate": 6.788874212535576e-07, | |
| "loss": 0.5263558626174927, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.9831223628691985, | |
| "grad_norm": 4.092447757720947, | |
| "learning_rate": 6.766527582956217e-07, | |
| "loss": 0.8253353238105774, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 4.987341772151899, | |
| "grad_norm": 5.725378513336182, | |
| "learning_rate": 6.744263387929044e-07, | |
| "loss": 1.0841920375823975, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 4.991561181434599, | |
| "grad_norm": 2.338805913925171, | |
| "learning_rate": 6.722081747934722e-07, | |
| "loss": 0.9890093803405762, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 4.9957805907173, | |
| "grad_norm": 6.7110137939453125, | |
| "learning_rate": 6.699982783007181e-07, | |
| "loss": 0.6056183576583862, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 8.881539344787598, | |
| "learning_rate": 6.677966612732969e-07, | |
| "loss": 0.19234615564346313, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.0042194092827, | |
| "grad_norm": 2.8525843620300293, | |
| "learning_rate": 6.656033356250588e-07, | |
| "loss": 0.5725005865097046, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 5.008438818565401, | |
| "grad_norm": 2.8133490085601807, | |
| "learning_rate": 6.634183132249862e-07, | |
| "loss": 0.930966317653656, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 5.012658227848101, | |
| "grad_norm": 6.63195276260376, | |
| "learning_rate": 6.612416058971295e-07, | |
| "loss": 0.6420108079910278, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 5.0168776371308015, | |
| "grad_norm": 3.1725778579711914, | |
| "learning_rate": 6.590732254205429e-07, | |
| "loss": 0.8284573554992676, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 5.0210970464135025, | |
| "grad_norm": 4.44754695892334, | |
| "learning_rate": 6.569131835292196e-07, | |
| "loss": 0.48461687564849854, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.025316455696203, | |
| "grad_norm": 3.432302474975586, | |
| "learning_rate": 6.547614919120305e-07, | |
| "loss": 0.7024222016334534, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 5.029535864978903, | |
| "grad_norm": 2.8031928539276123, | |
| "learning_rate": 6.526181622126594e-07, | |
| "loss": 0.6068092584609985, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 5.033755274261603, | |
| "grad_norm": 6.872043132781982, | |
| "learning_rate": 6.504832060295403e-07, | |
| "loss": 0.5841951370239258, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 5.037974683544304, | |
| "grad_norm": 2.907249927520752, | |
| "learning_rate": 6.483566349157945e-07, | |
| "loss": 0.6709692478179932, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 5.042194092827004, | |
| "grad_norm": 0.09023797512054443, | |
| "learning_rate": 6.462384603791684e-07, | |
| "loss": 0.5061817765235901, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.046413502109704, | |
| "grad_norm": 11.977813720703125, | |
| "learning_rate": 6.441286938819714e-07, | |
| "loss": 0.31504881381988525, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 5.050632911392405, | |
| "grad_norm": 5.734729766845703, | |
| "learning_rate": 6.420273468410131e-07, | |
| "loss": 0.48150938749313354, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 5.0548523206751055, | |
| "grad_norm": 1.924585223197937, | |
| "learning_rate": 6.399344306275419e-07, | |
| "loss": 0.3540734052658081, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 5.059071729957806, | |
| "grad_norm": 3.9746241569519043, | |
| "learning_rate": 6.378499565671839e-07, | |
| "loss": 0.8421119451522827, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 5.063291139240507, | |
| "grad_norm": 7.219764232635498, | |
| "learning_rate": 6.35773935939881e-07, | |
| "loss": 1.0435487031936646, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.067510548523207, | |
| "grad_norm": 5.04371976852417, | |
| "learning_rate": 6.337063799798305e-07, | |
| "loss": 0.9782629013061523, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 5.071729957805907, | |
| "grad_norm": 16.103092193603516, | |
| "learning_rate": 6.316472998754234e-07, | |
| "loss": 0.033330727368593216, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 5.075949367088608, | |
| "grad_norm": 4.6494526863098145, | |
| "learning_rate": 6.29596706769185e-07, | |
| "loss": 0.7846492528915405, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 5.080168776371308, | |
| "grad_norm": 2.9837498664855957, | |
| "learning_rate": 6.275546117577132e-07, | |
| "loss": 0.48354560136795044, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 5.084388185654008, | |
| "grad_norm": 5.440659999847412, | |
| "learning_rate": 6.255210258916199e-07, | |
| "loss": 0.5124998688697815, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.0886075949367084, | |
| "grad_norm": 2.850675106048584, | |
| "learning_rate": 6.234959601754703e-07, | |
| "loss": 0.7655423879623413, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 5.0928270042194095, | |
| "grad_norm": 2.1231069564819336, | |
| "learning_rate": 6.214794255677234e-07, | |
| "loss": 0.7977665662765503, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 5.09704641350211, | |
| "grad_norm": 17.62323760986328, | |
| "learning_rate": 6.194714329806732e-07, | |
| "loss": 0.34903600811958313, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 5.10126582278481, | |
| "grad_norm": 2.3470253944396973, | |
| "learning_rate": 6.174719932803891e-07, | |
| "loss": 0.5935072898864746, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 5.105485232067511, | |
| "grad_norm": 2.2750778198242188, | |
| "learning_rate": 6.154811172866576e-07, | |
| "loss": 1.007997751235962, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.109704641350211, | |
| "grad_norm": 2.6111321449279785, | |
| "learning_rate": 6.13498815772923e-07, | |
| "loss": 0.7840423583984375, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 5.113924050632911, | |
| "grad_norm": 1.3846306800842285, | |
| "learning_rate": 6.115250994662303e-07, | |
| "loss": 0.5133131742477417, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 5.118143459915612, | |
| "grad_norm": 2.471632480621338, | |
| "learning_rate": 6.095599790471655e-07, | |
| "loss": 0.5239850282669067, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 5.122362869198312, | |
| "grad_norm": 6.427463054656982, | |
| "learning_rate": 6.076034651497995e-07, | |
| "loss": 0.46869874000549316, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 5.1265822784810124, | |
| "grad_norm": 2.414717674255371, | |
| "learning_rate": 6.056555683616291e-07, | |
| "loss": 0.5103088617324829, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.1308016877637135, | |
| "grad_norm": 2.512148380279541, | |
| "learning_rate": 6.037162992235214e-07, | |
| "loss": 0.8223515152931213, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 5.135021097046414, | |
| "grad_norm": 2.9548940658569336, | |
| "learning_rate": 6.017856682296551e-07, | |
| "loss": 0.917111873626709, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 5.139240506329114, | |
| "grad_norm": 3.2818551063537598, | |
| "learning_rate": 5.998636858274642e-07, | |
| "loss": 0.4495956301689148, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 5.143459915611814, | |
| "grad_norm": 7.414487838745117, | |
| "learning_rate": 5.97950362417582e-07, | |
| "loss": 0.10738074779510498, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 5.147679324894515, | |
| "grad_norm": 3.5307252407073975, | |
| "learning_rate": 5.960457083537848e-07, | |
| "loss": 0.6862280368804932, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.151898734177215, | |
| "grad_norm": 5.234355449676514, | |
| "learning_rate": 5.941497339429337e-07, | |
| "loss": 0.790778636932373, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 5.156118143459915, | |
| "grad_norm": 9.668578147888184, | |
| "learning_rate": 5.922624494449232e-07, | |
| "loss": 0.44245994091033936, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 5.160337552742616, | |
| "grad_norm": 2.4678266048431396, | |
| "learning_rate": 5.903838650726219e-07, | |
| "loss": 0.9481706023216248, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 5.1645569620253164, | |
| "grad_norm": 6.675557613372803, | |
| "learning_rate": 5.885139909918178e-07, | |
| "loss": 0.5106003284454346, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 5.168776371308017, | |
| "grad_norm": 2.8948278427124023, | |
| "learning_rate": 5.866528373211652e-07, | |
| "loss": 0.818520188331604, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.172995780590718, | |
| "grad_norm": 0.031267765909433365, | |
| "learning_rate": 5.848004141321279e-07, | |
| "loss": 0.4252956509590149, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 5.177215189873418, | |
| "grad_norm": 5.288304805755615, | |
| "learning_rate": 5.82956731448926e-07, | |
| "loss": 0.17302009463310242, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 5.181434599156118, | |
| "grad_norm": 2.205019950866699, | |
| "learning_rate": 5.811217992484801e-07, | |
| "loss": 0.44998836517333984, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 5.185654008438819, | |
| "grad_norm": 2.3904027938842773, | |
| "learning_rate": 5.792956274603598e-07, | |
| "loss": 0.5072075128555298, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 5.189873417721519, | |
| "grad_norm": 10.959815979003906, | |
| "learning_rate": 5.774782259667278e-07, | |
| "loss": 0.5302789807319641, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.194092827004219, | |
| "grad_norm": 3.0402653217315674, | |
| "learning_rate": 5.756696046022868e-07, | |
| "loss": 0.8277729749679565, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 5.198312236286919, | |
| "grad_norm": 1.877632737159729, | |
| "learning_rate": 5.738697731542275e-07, | |
| "loss": 0.8515483736991882, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 5.2025316455696204, | |
| "grad_norm": 3.8875975608825684, | |
| "learning_rate": 5.720787413621739e-07, | |
| "loss": 0.3267098069190979, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 5.206751054852321, | |
| "grad_norm": 2.2627320289611816, | |
| "learning_rate": 5.702965189181324e-07, | |
| "loss": 0.786805272102356, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 5.210970464135021, | |
| "grad_norm": 6.892368793487549, | |
| "learning_rate": 5.685231154664372e-07, | |
| "loss": 0.6648309826850891, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.215189873417722, | |
| "grad_norm": 3.592425584793091, | |
| "learning_rate": 5.667585406036999e-07, | |
| "loss": 0.6738979816436768, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 5.219409282700422, | |
| "grad_norm": 4.459148406982422, | |
| "learning_rate": 5.650028038787577e-07, | |
| "loss": 0.7590001821517944, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 5.223628691983122, | |
| "grad_norm": 2.538756847381592, | |
| "learning_rate": 5.632559147926202e-07, | |
| "loss": 0.42987027764320374, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 5.227848101265823, | |
| "grad_norm": 2.191638946533203, | |
| "learning_rate": 5.615178827984186e-07, | |
| "loss": 0.0880412608385086, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 5.232067510548523, | |
| "grad_norm": 0.6364640593528748, | |
| "learning_rate": 5.597887173013555e-07, | |
| "loss": 0.48929768800735474, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.236286919831223, | |
| "grad_norm": 3.1823930740356445, | |
| "learning_rate": 5.580684276586535e-07, | |
| "loss": 0.7606073617935181, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 5.2405063291139244, | |
| "grad_norm": 3.0193521976470947, | |
| "learning_rate": 5.563570231795027e-07, | |
| "loss": 0.4337414503097534, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 5.244725738396625, | |
| "grad_norm": 5.9851298332214355, | |
| "learning_rate": 5.546545131250133e-07, | |
| "loss": 1.1480921506881714, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 5.248945147679325, | |
| "grad_norm": 2.274847984313965, | |
| "learning_rate": 5.52960906708164e-07, | |
| "loss": 0.8605833053588867, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 5.253164556962025, | |
| "grad_norm": 4.390803813934326, | |
| "learning_rate": 5.512762130937521e-07, | |
| "loss": 0.891315221786499, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.257383966244726, | |
| "grad_norm": 3.777196168899536, | |
| "learning_rate": 5.496004413983437e-07, | |
| "loss": 0.9285299777984619, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 5.261603375527426, | |
| "grad_norm": 2.7875325679779053, | |
| "learning_rate": 5.479336006902255e-07, | |
| "loss": 0.6960370540618896, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 5.265822784810126, | |
| "grad_norm": 5.017436981201172, | |
| "learning_rate": 5.462756999893543e-07, | |
| "loss": 0.42756134271621704, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 5.270042194092827, | |
| "grad_norm": 4.370994567871094, | |
| "learning_rate": 5.446267482673096e-07, | |
| "loss": 0.9004020690917969, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 5.274261603375527, | |
| "grad_norm": 8.342371940612793, | |
| "learning_rate": 5.429867544472434e-07, | |
| "loss": 0.49218082427978516, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.2784810126582276, | |
| "grad_norm": 4.241844654083252, | |
| "learning_rate": 5.413557274038332e-07, | |
| "loss": 0.6770671606063843, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 5.282700421940929, | |
| "grad_norm": 2.6205804347991943, | |
| "learning_rate": 5.397336759632338e-07, | |
| "loss": 0.660459041595459, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 5.286919831223629, | |
| "grad_norm": 8.33484935760498, | |
| "learning_rate": 5.381206089030293e-07, | |
| "loss": 0.9731260538101196, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 5.291139240506329, | |
| "grad_norm": 2.1884765625, | |
| "learning_rate": 5.365165349521859e-07, | |
| "loss": 0.9394969940185547, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 5.29535864978903, | |
| "grad_norm": 13.115405082702637, | |
| "learning_rate": 5.349214627910034e-07, | |
| "loss": 0.3471090793609619, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.29957805907173, | |
| "grad_norm": 4.7298970222473145, | |
| "learning_rate": 5.333354010510703e-07, | |
| "loss": 0.49661415815353394, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 5.30379746835443, | |
| "grad_norm": 8.904556274414062, | |
| "learning_rate": 5.31758358315216e-07, | |
| "loss": 0.9580909609794617, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 5.308016877637131, | |
| "grad_norm": 4.521732807159424, | |
| "learning_rate": 5.301903431174628e-07, | |
| "loss": 0.6797637939453125, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 5.312236286919831, | |
| "grad_norm": 3.3015851974487305, | |
| "learning_rate": 5.286313639429837e-07, | |
| "loss": 0.8633707761764526, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 5.3164556962025316, | |
| "grad_norm": 6.265556812286377, | |
| "learning_rate": 5.270814292280526e-07, | |
| "loss": 0.9207254648208618, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.320675105485232, | |
| "grad_norm": 2.132657051086426, | |
| "learning_rate": 5.255405473600001e-07, | |
| "loss": 0.8656923174858093, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 5.324894514767933, | |
| "grad_norm": 3.075263738632202, | |
| "learning_rate": 5.240087266771686e-07, | |
| "loss": 0.8665053844451904, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 5.329113924050633, | |
| "grad_norm": 25.491024017333984, | |
| "learning_rate": 5.22485975468867e-07, | |
| "loss": 0.9272741675376892, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 5.792654514312744, | |
| "learning_rate": 5.209723019753245e-07, | |
| "loss": 0.6649227142333984, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 5.337552742616034, | |
| "grad_norm": 3.785661220550537, | |
| "learning_rate": 5.19467714387648e-07, | |
| "loss": 0.7637553215026855, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.341772151898734, | |
| "grad_norm": 3.9980387687683105, | |
| "learning_rate": 5.179722208477764e-07, | |
| "loss": 0.8297359347343445, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 5.345991561181434, | |
| "grad_norm": 3.7611520290374756, | |
| "learning_rate": 5.164858294484372e-07, | |
| "loss": 0.5959780216217041, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 5.350210970464135, | |
| "grad_norm": 3.5796737670898438, | |
| "learning_rate": 5.150085482331025e-07, | |
| "loss": 0.8286501169204712, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 5.3544303797468356, | |
| "grad_norm": 3.5265612602233887, | |
| "learning_rate": 5.135403851959455e-07, | |
| "loss": 0.7233340740203857, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 5.358649789029536, | |
| "grad_norm": 3.3139536380767822, | |
| "learning_rate": 5.120813482817971e-07, | |
| "loss": 0.5095676183700562, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.362869198312236, | |
| "grad_norm": 4.816203594207764, | |
| "learning_rate": 5.106314453861031e-07, | |
| "loss": 0.10940656065940857, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 5.367088607594937, | |
| "grad_norm": 0.789928138256073, | |
| "learning_rate": 5.091906843548809e-07, | |
| "loss": 0.4012370705604553, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 5.371308016877637, | |
| "grad_norm": 6.561746120452881, | |
| "learning_rate": 5.077590729846782e-07, | |
| "loss": 0.6537183523178101, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 5.375527426160337, | |
| "grad_norm": 2.8327221870422363, | |
| "learning_rate": 5.063366190225298e-07, | |
| "loss": 0.8231172561645508, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 5.379746835443038, | |
| "grad_norm": 8.452791213989258, | |
| "learning_rate": 5.049233301659161e-07, | |
| "loss": 0.5680804252624512, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.383966244725738, | |
| "grad_norm": 3.7673988342285156, | |
| "learning_rate": 5.035192140627213e-07, | |
| "loss": 0.1833023726940155, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 5.3881856540084385, | |
| "grad_norm": 2.091782569885254, | |
| "learning_rate": 5.021242783111924e-07, | |
| "loss": 0.7948375344276428, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 5.3924050632911396, | |
| "grad_norm": 14.723713874816895, | |
| "learning_rate": 5.007385304598978e-07, | |
| "loss": 0.6941039562225342, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 5.39662447257384, | |
| "grad_norm": 14.07388973236084, | |
| "learning_rate": 4.993619780076855e-07, | |
| "loss": 0.43440479040145874, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 5.40084388185654, | |
| "grad_norm": 7.218398094177246, | |
| "learning_rate": 4.979946284036441e-07, | |
| "loss": 0.21915487945079803, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.405063291139241, | |
| "grad_norm": 4.034780979156494, | |
| "learning_rate": 4.966364890470618e-07, | |
| "loss": 0.547726571559906, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 5.409282700421941, | |
| "grad_norm": 2.1914002895355225, | |
| "learning_rate": 4.952875672873867e-07, | |
| "loss": 0.9137965440750122, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 5.413502109704641, | |
| "grad_norm": 2.9248530864715576, | |
| "learning_rate": 4.939478704241859e-07, | |
| "loss": 0.4639781713485718, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 5.417721518987342, | |
| "grad_norm": 3.954902172088623, | |
| "learning_rate": 4.926174057071077e-07, | |
| "loss": 0.7315584421157837, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 5.421940928270042, | |
| "grad_norm": 3.5057809352874756, | |
| "learning_rate": 4.912961803358409e-07, | |
| "loss": 0.17236268520355225, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.4261603375527425, | |
| "grad_norm": 7.570180416107178, | |
| "learning_rate": 4.899842014600768e-07, | |
| "loss": 0.542130708694458, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 5.430379746835443, | |
| "grad_norm": 0.8830806612968445, | |
| "learning_rate": 4.886814761794694e-07, | |
| "loss": 0.08617094159126282, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 5.434599156118144, | |
| "grad_norm": 2.9515278339385986, | |
| "learning_rate": 4.873880115435982e-07, | |
| "loss": 0.6731958389282227, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 5.438818565400844, | |
| "grad_norm": 14.474751472473145, | |
| "learning_rate": 4.861038145519302e-07, | |
| "loss": 0.8146198987960815, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 5.443037974683544, | |
| "grad_norm": 4.252223968505859, | |
| "learning_rate": 4.848288921537804e-07, | |
| "loss": 0.7910962104797363, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.447257383966245, | |
| "grad_norm": 3.409487009048462, | |
| "learning_rate": 4.835632512482754e-07, | |
| "loss": 0.4601414203643799, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 5.451476793248945, | |
| "grad_norm": 2.4231197834014893, | |
| "learning_rate": 4.823068986843162e-07, | |
| "loss": 0.5326846837997437, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 5.455696202531645, | |
| "grad_norm": 11.150148391723633, | |
| "learning_rate": 4.810598412605407e-07, | |
| "loss": 0.6682008504867554, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 5.459915611814346, | |
| "grad_norm": 1.4135065078735352, | |
| "learning_rate": 4.798220857252866e-07, | |
| "loss": 0.30620691180229187, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 5.4641350210970465, | |
| "grad_norm": 6.7066755294799805, | |
| "learning_rate": 4.785936387765555e-07, | |
| "loss": 0.7434167861938477, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.468354430379747, | |
| "grad_norm": 1.7110621929168701, | |
| "learning_rate": 4.773745070619767e-07, | |
| "loss": 0.5532716512680054, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 5.472573839662447, | |
| "grad_norm": 3.8936519622802734, | |
| "learning_rate": 4.761646971787707e-07, | |
| "loss": 0.47537532448768616, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 5.476793248945148, | |
| "grad_norm": 4.289298057556152, | |
| "learning_rate": 4.749642156737138e-07, | |
| "loss": 0.34944185614585876, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 5.481012658227848, | |
| "grad_norm": 2.747558116912842, | |
| "learning_rate": 4.7377306904310233e-07, | |
| "loss": 0.16377092897891998, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 5.485232067510548, | |
| "grad_norm": 0.3106602132320404, | |
| "learning_rate": 4.7259126373271865e-07, | |
| "loss": 0.42584800720214844, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.489451476793249, | |
| "grad_norm": 5.100452899932861, | |
| "learning_rate": 4.714188061377942e-07, | |
| "loss": 0.8994771242141724, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 5.493670886075949, | |
| "grad_norm": 7.287261962890625, | |
| "learning_rate": 4.7025570260297703e-07, | |
| "loss": 0.8067635297775269, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 5.4978902953586495, | |
| "grad_norm": 15.138601303100586, | |
| "learning_rate": 4.6910195942229627e-07, | |
| "loss": 0.13593333959579468, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 5.5021097046413505, | |
| "grad_norm": 2.714247226715088, | |
| "learning_rate": 4.6795758283912836e-07, | |
| "loss": 0.3896440267562866, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 5.506329113924051, | |
| "grad_norm": 3.3304672241210938, | |
| "learning_rate": 4.668225790461631e-07, | |
| "loss": 0.0639631599187851, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.510548523206751, | |
| "grad_norm": 19.265941619873047, | |
| "learning_rate": 4.6569695418537063e-07, | |
| "loss": 0.2734604477882385, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 5.514767932489452, | |
| "grad_norm": 2.5918619632720947, | |
| "learning_rate": 4.645807143479674e-07, | |
| "loss": 0.8366518616676331, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 5.518987341772152, | |
| "grad_norm": 1.1537539958953857, | |
| "learning_rate": 4.634738655743843e-07, | |
| "loss": 0.4462703466415405, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 5.523206751054852, | |
| "grad_norm": 2.6978986263275146, | |
| "learning_rate": 4.6237641385423225e-07, | |
| "loss": 0.4549875259399414, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 5.527426160337553, | |
| "grad_norm": 2.130697727203369, | |
| "learning_rate": 4.6128836512627204e-07, | |
| "loss": 0.8581835627555847, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.531645569620253, | |
| "grad_norm": 4.347284317016602, | |
| "learning_rate": 4.602097252783805e-07, | |
| "loss": 0.5586264133453369, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 5.5358649789029535, | |
| "grad_norm": 14.522599220275879, | |
| "learning_rate": 4.591405001475189e-07, | |
| "loss": 0.8266869783401489, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 5.540084388185654, | |
| "grad_norm": 7.911047458648682, | |
| "learning_rate": 4.58080695519702e-07, | |
| "loss": 0.44375789165496826, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 5.544303797468355, | |
| "grad_norm": 4.837867736816406, | |
| "learning_rate": 4.570303171299666e-07, | |
| "loss": 0.6062820553779602, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 5.548523206751055, | |
| "grad_norm": 5.2242021560668945, | |
| "learning_rate": 4.5598937066233973e-07, | |
| "loss": 0.7080090641975403, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.552742616033755, | |
| "grad_norm": 4.157374858856201, | |
| "learning_rate": 4.5495786174980867e-07, | |
| "loss": 0.45279741287231445, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 5.556962025316456, | |
| "grad_norm": 3.1067519187927246, | |
| "learning_rate": 4.539357959742899e-07, | |
| "loss": 0.4694240689277649, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 5.561181434599156, | |
| "grad_norm": 2.8363306522369385, | |
| "learning_rate": 4.5292317886659993e-07, | |
| "loss": 0.37042319774627686, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 5.565400843881856, | |
| "grad_norm": 5.392505168914795, | |
| "learning_rate": 4.51920015906424e-07, | |
| "loss": 0.4348013401031494, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 5.569620253164557, | |
| "grad_norm": 1.3840664625167847, | |
| "learning_rate": 4.5092631252228734e-07, | |
| "loss": 0.2230294644832611, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.5738396624472575, | |
| "grad_norm": 2.5080552101135254, | |
| "learning_rate": 4.4994207409152575e-07, | |
| "loss": 0.8967776298522949, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 5.578059071729958, | |
| "grad_norm": 3.2199008464813232, | |
| "learning_rate": 4.48967305940256e-07, | |
| "loss": 0.9706035852432251, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 5.582278481012658, | |
| "grad_norm": 11.19129753112793, | |
| "learning_rate": 4.480020133433474e-07, | |
| "loss": 0.626300573348999, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 5.586497890295359, | |
| "grad_norm": 14.880667686462402, | |
| "learning_rate": 4.47046201524393e-07, | |
| "loss": 0.06408479064702988, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 5.590717299578059, | |
| "grad_norm": 2.3462014198303223, | |
| "learning_rate": 4.460998756556818e-07, | |
| "loss": 0.44877690076828003, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.594936708860759, | |
| "grad_norm": 3.08370041847229, | |
| "learning_rate": 4.451630408581701e-07, | |
| "loss": 0.3830834925174713, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 5.59915611814346, | |
| "grad_norm": 7.73508358001709, | |
| "learning_rate": 4.442357022014546e-07, | |
| "loss": 0.15033870935440063, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 5.60337552742616, | |
| "grad_norm": 4.020411014556885, | |
| "learning_rate": 4.43317864703744e-07, | |
| "loss": 0.5552294850349426, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 5.6075949367088604, | |
| "grad_norm": 12.524031639099121, | |
| "learning_rate": 4.4240953333183257e-07, | |
| "loss": 0.1009381040930748, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 5.6118143459915615, | |
| "grad_norm": 3.7477056980133057, | |
| "learning_rate": 4.4151071300107296e-07, | |
| "loss": 0.4878613352775574, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.616033755274262, | |
| "grad_norm": 25.352882385253906, | |
| "learning_rate": 4.406214085753499e-07, | |
| "loss": 0.0786014273762703, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 5.620253164556962, | |
| "grad_norm": 5.754502773284912, | |
| "learning_rate": 4.3974162486705327e-07, | |
| "loss": 0.424061119556427, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 5.624472573839663, | |
| "grad_norm": 4.437866687774658, | |
| "learning_rate": 4.38871366637053e-07, | |
| "loss": 0.07941263914108276, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 5.628691983122363, | |
| "grad_norm": 3.537459373474121, | |
| "learning_rate": 4.380106385946721e-07, | |
| "loss": 0.30082571506500244, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 5.632911392405063, | |
| "grad_norm": 2.312814474105835, | |
| "learning_rate": 4.3715944539766257e-07, | |
| "loss": 0.71795254945755, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.637130801687764, | |
| "grad_norm": 5.115408897399902, | |
| "learning_rate": 4.3631779165217875e-07, | |
| "loss": 0.811305820941925, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 5.641350210970464, | |
| "grad_norm": 8.744047164916992, | |
| "learning_rate": 4.354856819127537e-07, | |
| "loss": 0.6766564249992371, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 5.6455696202531644, | |
| "grad_norm": 2.2004096508026123, | |
| "learning_rate": 4.346631206822732e-07, | |
| "loss": 0.8192415237426758, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 5.649789029535865, | |
| "grad_norm": 1.8391209840774536, | |
| "learning_rate": 4.338501124119533e-07, | |
| "loss": 0.5205031037330627, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 5.654008438818566, | |
| "grad_norm": 3.9403841495513916, | |
| "learning_rate": 4.330466615013138e-07, | |
| "loss": 0.2361564040184021, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.658227848101266, | |
| "grad_norm": 4.0212554931640625, | |
| "learning_rate": 4.3225277229815673e-07, | |
| "loss": 0.45385825634002686, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 5.662447257383966, | |
| "grad_norm": 3.5017166137695312, | |
| "learning_rate": 4.314684490985411e-07, | |
| "loss": 0.2712249159812927, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 5.666666666666667, | |
| "grad_norm": 2.6000726222991943, | |
| "learning_rate": 4.3069369614676086e-07, | |
| "loss": 0.9603966474533081, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 5.670886075949367, | |
| "grad_norm": 2.9337501525878906, | |
| "learning_rate": 4.2992851763532125e-07, | |
| "loss": 0.5593338012695312, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 5.675105485232067, | |
| "grad_norm": 3.656930923461914, | |
| "learning_rate": 4.291729177049159e-07, | |
| "loss": 1.0005125999450684, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 5.679324894514768, | |
| "grad_norm": 12.878107070922852, | |
| "learning_rate": 4.28426900444406e-07, | |
| "loss": 0.04988168552517891, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 5.6835443037974684, | |
| "grad_norm": 2.371689558029175, | |
| "learning_rate": 4.2769046989079543e-07, | |
| "loss": 0.8081762790679932, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 5.687763713080169, | |
| "grad_norm": 5.237072944641113, | |
| "learning_rate": 4.2696363002921135e-07, | |
| "loss": 0.4558332860469818, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 5.691983122362869, | |
| "grad_norm": 2.5988988876342773, | |
| "learning_rate": 4.262463847928818e-07, | |
| "loss": 0.8788666129112244, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 5.69620253164557, | |
| "grad_norm": 3.3628621101379395, | |
| "learning_rate": 4.2553873806311424e-07, | |
| "loss": 0.8370002508163452, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.70042194092827, | |
| "grad_norm": 3.688671588897705, | |
| "learning_rate": 4.248406936692747e-07, | |
| "loss": 0.6099220514297485, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 5.70464135021097, | |
| "grad_norm": 1.2157199382781982, | |
| "learning_rate": 4.2415225538876686e-07, | |
| "loss": 0.49759507179260254, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 5.708860759493671, | |
| "grad_norm": 0.465036541223526, | |
| "learning_rate": 4.2347342694701206e-07, | |
| "loss": 0.40582969784736633, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 5.713080168776371, | |
| "grad_norm": 13.82797622680664, | |
| "learning_rate": 4.2280421201742874e-07, | |
| "loss": 0.11880761384963989, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 5.717299578059071, | |
| "grad_norm": 1.5762630701065063, | |
| "learning_rate": 4.221446142214125e-07, | |
| "loss": 0.620478630065918, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 5.7215189873417724, | |
| "grad_norm": 4.519263744354248, | |
| "learning_rate": 4.214946371283172e-07, | |
| "loss": 0.8996577262878418, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 5.725738396624473, | |
| "grad_norm": 8.791622161865234, | |
| "learning_rate": 4.2085428425543474e-07, | |
| "loss": 0.6637638807296753, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 5.729957805907173, | |
| "grad_norm": 3.510023832321167, | |
| "learning_rate": 4.202235590679763e-07, | |
| "loss": 0.77869713306427, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 5.734177215189874, | |
| "grad_norm": 5.473074913024902, | |
| "learning_rate": 4.1960246497905417e-07, | |
| "loss": 0.8682685494422913, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 5.738396624472574, | |
| "grad_norm": 2.2831952571868896, | |
| "learning_rate": 4.1899100534966263e-07, | |
| "loss": 0.8572003841400146, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 5.742616033755274, | |
| "grad_norm": 4.826292037963867, | |
| "learning_rate": 4.183891834886598e-07, | |
| "loss": 0.834069013595581, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 5.746835443037975, | |
| "grad_norm": 32.02092742919922, | |
| "learning_rate": 4.177970026527499e-07, | |
| "loss": 0.22675754129886627, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 5.751054852320675, | |
| "grad_norm": 2.374525308609009, | |
| "learning_rate": 4.1721446604646607e-07, | |
| "loss": 0.6690686345100403, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 5.755274261603375, | |
| "grad_norm": 2.256140947341919, | |
| "learning_rate": 4.1664157682215173e-07, | |
| "loss": 0.7398881316184998, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 5.759493670886076, | |
| "grad_norm": 4.3521504402160645, | |
| "learning_rate": 4.1607833807994547e-07, | |
| "loss": 0.8732868432998657, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 5.763713080168777, | |
| "grad_norm": 6.75162410736084, | |
| "learning_rate": 4.155247528677621e-07, | |
| "loss": 0.7909585237503052, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 5.767932489451477, | |
| "grad_norm": 31.269031524658203, | |
| "learning_rate": 4.1498082418127807e-07, | |
| "loss": 0.2190740704536438, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 5.772151898734177, | |
| "grad_norm": 7.679251194000244, | |
| "learning_rate": 4.1444655496391376e-07, | |
| "loss": 0.46999984979629517, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 5.776371308016878, | |
| "grad_norm": 2.543074607849121, | |
| "learning_rate": 4.139219481068185e-07, | |
| "loss": 0.884986162185669, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 5.780590717299578, | |
| "grad_norm": 2.4317591190338135, | |
| "learning_rate": 4.13407006448855e-07, | |
| "loss": 0.5444875955581665, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 5.784810126582278, | |
| "grad_norm": 2.9350624084472656, | |
| "learning_rate": 4.1290173277658303e-07, | |
| "loss": 0.8912389278411865, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 5.789029535864979, | |
| "grad_norm": 7.446691513061523, | |
| "learning_rate": 4.124061298242451e-07, | |
| "loss": 0.5339520573616028, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 5.793248945147679, | |
| "grad_norm": 5.2088704109191895, | |
| "learning_rate": 4.119202002737515e-07, | |
| "loss": 0.45539939403533936, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 5.7974683544303796, | |
| "grad_norm": 3.678557872772217, | |
| "learning_rate": 4.1144394675466634e-07, | |
| "loss": 0.8749001026153564, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 5.80168776371308, | |
| "grad_norm": 10.216012954711914, | |
| "learning_rate": 4.109773718441916e-07, | |
| "loss": 0.7841247320175171, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.805907172995781, | |
| "grad_norm": 2.440023422241211, | |
| "learning_rate": 4.105204780671556e-07, | |
| "loss": 0.8511307239532471, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 5.810126582278481, | |
| "grad_norm": 7.605076789855957, | |
| "learning_rate": 4.100732678959971e-07, | |
| "loss": 1.0421419143676758, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 5.814345991561181, | |
| "grad_norm": 4.731003284454346, | |
| "learning_rate": 4.0963574375075354e-07, | |
| "loss": 0.4821122884750366, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 5.818565400843882, | |
| "grad_norm": 3.862736463546753, | |
| "learning_rate": 4.092079079990471e-07, | |
| "loss": 0.05994529277086258, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 5.822784810126582, | |
| "grad_norm": 2.8706905841827393, | |
| "learning_rate": 4.087897629560719e-07, | |
| "loss": 0.6597020626068115, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.827004219409282, | |
| "grad_norm": 10.528990745544434, | |
| "learning_rate": 4.0838131088458207e-07, | |
| "loss": 0.5567920804023743, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 5.831223628691983, | |
| "grad_norm": 12.403848648071289, | |
| "learning_rate": 4.079825539948785e-07, | |
| "loss": 0.22084438800811768, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 5.8354430379746836, | |
| "grad_norm": 3.479530096054077, | |
| "learning_rate": 4.0759349444479853e-07, | |
| "loss": 0.8606102466583252, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 5.839662447257384, | |
| "grad_norm": 2.724365711212158, | |
| "learning_rate": 4.072141343397021e-07, | |
| "loss": 0.45490285754203796, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 5.843881856540085, | |
| "grad_norm": 6.362490653991699, | |
| "learning_rate": 4.068444757324621e-07, | |
| "loss": 0.8239868879318237, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 5.848101265822785, | |
| "grad_norm": 10.339574813842773, | |
| "learning_rate": 4.064845206234523e-07, | |
| "loss": 0.5215486884117126, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 5.852320675105485, | |
| "grad_norm": 186.8642578125, | |
| "learning_rate": 4.061342709605374e-07, | |
| "loss": 0.5665589570999146, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 5.856540084388186, | |
| "grad_norm": 2.5368990898132324, | |
| "learning_rate": 4.057937286390615e-07, | |
| "loss": 0.7514277100563049, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 5.860759493670886, | |
| "grad_norm": 7.951842784881592, | |
| "learning_rate": 4.0546289550183833e-07, | |
| "loss": 0.8747674822807312, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 5.864978902953586, | |
| "grad_norm": 4.173673152923584, | |
| "learning_rate": 4.0514177333914147e-07, | |
| "loss": 0.8620109558105469, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 5.869198312236287, | |
| "grad_norm": 2.6262011528015137, | |
| "learning_rate": 4.0483036388869426e-07, | |
| "loss": 0.8278003931045532, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 5.8734177215189876, | |
| "grad_norm": 3.4531075954437256, | |
| "learning_rate": 4.045286688356607e-07, | |
| "loss": 0.8439078330993652, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 5.877637130801688, | |
| "grad_norm": 17.26287269592285, | |
| "learning_rate": 4.0423668981263635e-07, | |
| "loss": 0.2546153664588928, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 5.881856540084388, | |
| "grad_norm": 2.9670450687408447, | |
| "learning_rate": 4.039544283996389e-07, | |
| "loss": 0.803874135017395, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 5.886075949367089, | |
| "grad_norm": 1.728909969329834, | |
| "learning_rate": 4.036818861241004e-07, | |
| "loss": 0.11378484964370728, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.890295358649789, | |
| "grad_norm": 15.379825592041016, | |
| "learning_rate": 4.0341906446085865e-07, | |
| "loss": 0.40370649099349976, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 5.894514767932489, | |
| "grad_norm": 14.338972091674805, | |
| "learning_rate": 4.0316596483214915e-07, | |
| "loss": 0.7983355522155762, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 5.89873417721519, | |
| "grad_norm": 3.563936710357666, | |
| "learning_rate": 4.0292258860759767e-07, | |
| "loss": 0.9050275087356567, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 5.90295358649789, | |
| "grad_norm": 2.441664934158325, | |
| "learning_rate": 4.026889371042125e-07, | |
| "loss": 0.4420316219329834, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 5.9071729957805905, | |
| "grad_norm": 2.3287241458892822, | |
| "learning_rate": 4.024650115863774e-07, | |
| "loss": 0.7599180936813354, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.911392405063291, | |
| "grad_norm": 3.5945613384246826, | |
| "learning_rate": 4.022508132658452e-07, | |
| "loss": 0.6878820657730103, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 5.915611814345992, | |
| "grad_norm": 12.153562545776367, | |
| "learning_rate": 4.020463433017305e-07, | |
| "loss": 0.40130820870399475, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 5.919831223628692, | |
| "grad_norm": 3.069974899291992, | |
| "learning_rate": 4.0185160280050384e-07, | |
| "loss": 0.095822274684906, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 5.924050632911392, | |
| "grad_norm": 8.717458724975586, | |
| "learning_rate": 4.01666592815986e-07, | |
| "loss": 0.9885622262954712, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 5.928270042194093, | |
| "grad_norm": 7.206968307495117, | |
| "learning_rate": 4.014913143493415e-07, | |
| "loss": 0.04864209145307541, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 5.932489451476793, | |
| "grad_norm": 3.1413886547088623, | |
| "learning_rate": 4.0132576834907404e-07, | |
| "loss": 0.43854427337646484, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 5.936708860759493, | |
| "grad_norm": 0.46113014221191406, | |
| "learning_rate": 4.0116995571102056e-07, | |
| "loss": 0.4027542471885681, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 5.940928270042194, | |
| "grad_norm": 3.120668888092041, | |
| "learning_rate": 4.0102387727834705e-07, | |
| "loss": 0.6854231357574463, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 5.9451476793248945, | |
| "grad_norm": 2.3229949474334717, | |
| "learning_rate": 4.008875338415438e-07, | |
| "loss": 0.5028409361839294, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 5.949367088607595, | |
| "grad_norm": 0.521416187286377, | |
| "learning_rate": 4.007609261384207e-07, | |
| "loss": 0.43289196491241455, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 5.953586497890296, | |
| "grad_norm": 2.9964866638183594, | |
| "learning_rate": 4.006440548541041e-07, | |
| "loss": 0.9015544652938843, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 5.957805907172996, | |
| "grad_norm": 1.731990933418274, | |
| "learning_rate": 4.005369206210321e-07, | |
| "loss": 0.43057486414909363, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 5.962025316455696, | |
| "grad_norm": 2.3747055530548096, | |
| "learning_rate": 4.0043952401895207e-07, | |
| "loss": 0.8347324132919312, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 5.966244725738397, | |
| "grad_norm": 1.1531779766082764, | |
| "learning_rate": 4.0035186557491683e-07, | |
| "loss": 0.44332531094551086, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 5.970464135021097, | |
| "grad_norm": 2.191092014312744, | |
| "learning_rate": 4.0027394576328213e-07, | |
| "loss": 0.39579838514328003, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 5.974683544303797, | |
| "grad_norm": 7.761366367340088, | |
| "learning_rate": 4.0020576500570355e-07, | |
| "loss": 1.0412178039550781, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 5.978902953586498, | |
| "grad_norm": 0.7852330207824707, | |
| "learning_rate": 4.0014732367113567e-07, | |
| "loss": 0.36100465059280396, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 5.9831223628691985, | |
| "grad_norm": 21.701784133911133, | |
| "learning_rate": 4.000986220758279e-07, | |
| "loss": 0.07913509011268616, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 5.987341772151899, | |
| "grad_norm": 5.154250621795654, | |
| "learning_rate": 4.0005966048332503e-07, | |
| "loss": 0.5702348351478577, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 5.991561181434599, | |
| "grad_norm": 1.1740047931671143, | |
| "learning_rate": 4.0003043910446375e-07, | |
| "loss": 0.47653162479400635, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 5.9957805907173, | |
| "grad_norm": 6.092247009277344, | |
| "learning_rate": 4.000109580973733e-07, | |
| "loss": 0.811444878578186, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 3.5838730335235596, | |
| "learning_rate": 4.0000121756747285e-07, | |
| "loss": 0.7996691465377808, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 2844, | |
| "total_flos": 5.392281114922451e+18, | |
| "train_loss": 0.8344338661696338, | |
| "train_runtime": 6866.9503, | |
| "train_samples_per_second": 12.425, | |
| "train_steps_per_second": 0.414 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2844, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.392281114922451e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |