Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-107 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-107 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-107") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-107") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-107") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-107 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-107" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-107", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-107
- SGLang
How to use furproxy/9b-107 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-107" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-107", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-107" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-107", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-107 with Docker Model Runner:
docker model run hf.co/furproxy/9b-107
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1896, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004219409282700422, | |
| "grad_norm": 8.808255195617676, | |
| "learning_rate": 8.421052631578947e-08, | |
| "loss": 2.1962406635284424, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008438818565400843, | |
| "grad_norm": 11.09923267364502, | |
| "learning_rate": 2.526315789473684e-07, | |
| "loss": 1.780366063117981, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012658227848101266, | |
| "grad_norm": 2.419753313064575, | |
| "learning_rate": 4.2105263157894733e-07, | |
| "loss": 1.93634033203125, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.016877637130801686, | |
| "grad_norm": 3.9049134254455566, | |
| "learning_rate": 5.894736842105262e-07, | |
| "loss": 1.943023443222046, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02109704641350211, | |
| "grad_norm": 6.477982044219971, | |
| "learning_rate": 7.578947368421053e-07, | |
| "loss": 1.8409148454666138, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02531645569620253, | |
| "grad_norm": 1.0134243965148926, | |
| "learning_rate": 9.263157894736841e-07, | |
| "loss": 1.3077127933502197, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029535864978902954, | |
| "grad_norm": 3.247878074645996, | |
| "learning_rate": 1.0947368421052632e-06, | |
| "loss": 1.6219741106033325, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03375527426160337, | |
| "grad_norm": 7.898465156555176, | |
| "learning_rate": 1.263157894736842e-06, | |
| "loss": 2.037022113800049, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0379746835443038, | |
| "grad_norm": 1.3195950984954834, | |
| "learning_rate": 1.431578947368421e-06, | |
| "loss": 1.7350990772247314, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04219409282700422, | |
| "grad_norm": 1.8259081840515137, | |
| "learning_rate": 1.6e-06, | |
| "loss": 1.8126976490020752, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.046413502109704644, | |
| "grad_norm": 1.4393107891082764, | |
| "learning_rate": 1.768421052631579e-06, | |
| "loss": 1.4626768827438354, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05063291139240506, | |
| "grad_norm": 2.4846441745758057, | |
| "learning_rate": 1.936842105263158e-06, | |
| "loss": 1.600361704826355, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05485232067510549, | |
| "grad_norm": 2.291980743408203, | |
| "learning_rate": 2.1052631578947366e-06, | |
| "loss": 1.3303472995758057, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05907172995780591, | |
| "grad_norm": 1.7172958850860596, | |
| "learning_rate": 2.273684210526316e-06, | |
| "loss": 1.7306660413742065, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06329113924050633, | |
| "grad_norm": 2.336642026901245, | |
| "learning_rate": 2.4421052631578946e-06, | |
| "loss": 1.3191598653793335, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06751054852320675, | |
| "grad_norm": 1.4607104063034058, | |
| "learning_rate": 2.6105263157894738e-06, | |
| "loss": 1.550937533378601, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07172995780590717, | |
| "grad_norm": 3.0410056114196777, | |
| "learning_rate": 2.7789473684210525e-06, | |
| "loss": 1.0037612915039062, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0759493670886076, | |
| "grad_norm": 1.8816224336624146, | |
| "learning_rate": 2.9473684210526313e-06, | |
| "loss": 1.5629675388336182, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08016877637130802, | |
| "grad_norm": 2.050464391708374, | |
| "learning_rate": 3.1157894736842105e-06, | |
| "loss": 1.1124558448791504, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08438818565400844, | |
| "grad_norm": 1.7500700950622559, | |
| "learning_rate": 3.2842105263157892e-06, | |
| "loss": 1.5181232690811157, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08860759493670886, | |
| "grad_norm": 3.6279757022857666, | |
| "learning_rate": 3.4526315789473684e-06, | |
| "loss": 1.064979076385498, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09282700421940929, | |
| "grad_norm": 3.692965507507324, | |
| "learning_rate": 3.621052631578947e-06, | |
| "loss": 1.7742705345153809, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0970464135021097, | |
| "grad_norm": 2.689681053161621, | |
| "learning_rate": 3.789473684210526e-06, | |
| "loss": 1.652271032333374, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10126582278481013, | |
| "grad_norm": 1.1244308948516846, | |
| "learning_rate": 3.957894736842105e-06, | |
| "loss": 1.5283629894256592, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10548523206751055, | |
| "grad_norm": 1.6453142166137695, | |
| "learning_rate": 4.126315789473685e-06, | |
| "loss": 0.7807677388191223, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10970464135021098, | |
| "grad_norm": 1.3963041305541992, | |
| "learning_rate": 4.294736842105263e-06, | |
| "loss": 0.8492421507835388, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.11392405063291139, | |
| "grad_norm": 1.8241719007492065, | |
| "learning_rate": 4.463157894736841e-06, | |
| "loss": 0.7646088600158691, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11814345991561181, | |
| "grad_norm": 5.430877208709717, | |
| "learning_rate": 4.631578947368421e-06, | |
| "loss": 1.385468602180481, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12236286919831224, | |
| "grad_norm": 4.216091632843018, | |
| "learning_rate": 4.8e-06, | |
| "loss": 0.6626491546630859, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12658227848101267, | |
| "grad_norm": 2.5527150630950928, | |
| "learning_rate": 4.968421052631579e-06, | |
| "loss": 1.4430313110351562, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1308016877637131, | |
| "grad_norm": 2.92517352104187, | |
| "learning_rate": 5.136842105263157e-06, | |
| "loss": 1.4682120084762573, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1350210970464135, | |
| "grad_norm": 0.9485960602760315, | |
| "learning_rate": 5.305263157894736e-06, | |
| "loss": 1.0755484104156494, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13924050632911392, | |
| "grad_norm": 3.126896619796753, | |
| "learning_rate": 5.473684210526316e-06, | |
| "loss": 0.8076987266540527, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14345991561181434, | |
| "grad_norm": 1.318830966949463, | |
| "learning_rate": 5.6421052631578944e-06, | |
| "loss": 1.0927525758743286, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14767932489451477, | |
| "grad_norm": 3.404849052429199, | |
| "learning_rate": 5.810526315789474e-06, | |
| "loss": 0.9728879332542419, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1518987341772152, | |
| "grad_norm": 3.532927989959717, | |
| "learning_rate": 5.978947368421052e-06, | |
| "loss": 1.265703797340393, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.15611814345991562, | |
| "grad_norm": 2.622828245162964, | |
| "learning_rate": 6.147368421052631e-06, | |
| "loss": 1.0082859992980957, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16033755274261605, | |
| "grad_norm": 3.0084891319274902, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 0.8589051365852356, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16455696202531644, | |
| "grad_norm": 1.5682191848754883, | |
| "learning_rate": 6.484210526315789e-06, | |
| "loss": 1.3428035974502563, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16877637130801687, | |
| "grad_norm": 1.149340033531189, | |
| "learning_rate": 6.652631578947369e-06, | |
| "loss": 1.3348019123077393, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1729957805907173, | |
| "grad_norm": 1.4813284873962402, | |
| "learning_rate": 6.821052631578947e-06, | |
| "loss": 1.3795466423034668, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17721518987341772, | |
| "grad_norm": 2.05072283744812, | |
| "learning_rate": 6.989473684210526e-06, | |
| "loss": 0.6971267461776733, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.18143459915611815, | |
| "grad_norm": 1.8534572124481201, | |
| "learning_rate": 7.157894736842105e-06, | |
| "loss": 1.113297700881958, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18565400843881857, | |
| "grad_norm": 1.253941535949707, | |
| "learning_rate": 7.326315789473684e-06, | |
| "loss": 1.2875044345855713, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.189873417721519, | |
| "grad_norm": 1.6247971057891846, | |
| "learning_rate": 7.494736842105263e-06, | |
| "loss": 0.6056119203567505, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1940928270042194, | |
| "grad_norm": 1.780320167541504, | |
| "learning_rate": 7.663157894736842e-06, | |
| "loss": 1.676588773727417, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19831223628691982, | |
| "grad_norm": 2.9397106170654297, | |
| "learning_rate": 7.831578947368421e-06, | |
| "loss": 0.941127359867096, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 1.2082825899124146, | |
| "learning_rate": 8e-06, | |
| "loss": 1.4084690809249878, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20675105485232068, | |
| "grad_norm": 1.0113117694854736, | |
| "learning_rate": 7.999978091917096e-06, | |
| "loss": 1.4652810096740723, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2109704641350211, | |
| "grad_norm": 3.00728178024292, | |
| "learning_rate": 7.999912367935033e-06, | |
| "loss": 1.2376593351364136, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21518987341772153, | |
| "grad_norm": 2.393007278442383, | |
| "learning_rate": 7.999802828853748e-06, | |
| "loss": 1.051348090171814, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.21940928270042195, | |
| "grad_norm": 1.1730682849884033, | |
| "learning_rate": 7.999649476006456e-06, | |
| "loss": 0.9889463782310486, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.22362869198312235, | |
| "grad_norm": 3.6534433364868164, | |
| "learning_rate": 7.99945231125964e-06, | |
| "loss": 0.9653201699256897, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.22784810126582278, | |
| "grad_norm": 1.5248578786849976, | |
| "learning_rate": 7.999211337013023e-06, | |
| "loss": 0.970741331577301, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2320675105485232, | |
| "grad_norm": 1.0831537246704102, | |
| "learning_rate": 7.998926556199543e-06, | |
| "loss": 1.0245221853256226, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23628691983122363, | |
| "grad_norm": 1.1785749197006226, | |
| "learning_rate": 7.998597972285308e-06, | |
| "loss": 1.3712621927261353, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.24050632911392406, | |
| "grad_norm": 1.3017958402633667, | |
| "learning_rate": 7.998225589269567e-06, | |
| "loss": 1.2707055807113647, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24472573839662448, | |
| "grad_norm": 2.281855344772339, | |
| "learning_rate": 7.997809411684651e-06, | |
| "loss": 1.5997581481933594, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2489451476793249, | |
| "grad_norm": 0.8561908602714539, | |
| "learning_rate": 7.997349444595921e-06, | |
| "loss": 1.2587316036224365, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.25316455696202533, | |
| "grad_norm": 0.5100749135017395, | |
| "learning_rate": 7.996845693601713e-06, | |
| "loss": 0.9957402348518372, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25738396624472576, | |
| "grad_norm": 3.094721794128418, | |
| "learning_rate": 7.996298164833256e-06, | |
| "loss": 1.258643627166748, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2616033755274262, | |
| "grad_norm": 1.02621328830719, | |
| "learning_rate": 7.995706864954613e-06, | |
| "loss": 1.2669998407363892, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.26582278481012656, | |
| "grad_norm": 0.9123193621635437, | |
| "learning_rate": 7.995071801162584e-06, | |
| "loss": 1.3570575714111328, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.270042194092827, | |
| "grad_norm": 1.731139898300171, | |
| "learning_rate": 7.99439298118663e-06, | |
| "loss": 0.8451032042503357, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2742616033755274, | |
| "grad_norm": 1.6111301183700562, | |
| "learning_rate": 7.993670413288777e-06, | |
| "loss": 1.1453604698181152, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27848101265822783, | |
| "grad_norm": 2.9632482528686523, | |
| "learning_rate": 7.992904106263512e-06, | |
| "loss": 1.2021801471710205, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.28270042194092826, | |
| "grad_norm": 3.683976411819458, | |
| "learning_rate": 7.992094069437679e-06, | |
| "loss": 0.8209038972854614, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2869198312236287, | |
| "grad_norm": 0.8348782658576965, | |
| "learning_rate": 7.991240312670361e-06, | |
| "loss": 0.8820058703422546, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2911392405063291, | |
| "grad_norm": 2.7286171913146973, | |
| "learning_rate": 7.99034284635277e-06, | |
| "loss": 1.361273169517517, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.29535864978902954, | |
| "grad_norm": 4.104984283447266, | |
| "learning_rate": 7.989401681408107e-06, | |
| "loss": 0.9259814023971558, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29957805907172996, | |
| "grad_norm": 1.7529696226119995, | |
| "learning_rate": 7.988416829291437e-06, | |
| "loss": 1.2620042562484741, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 3.8300654888153076, | |
| "learning_rate": 7.987388301989553e-06, | |
| "loss": 1.0700979232788086, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3080168776371308, | |
| "grad_norm": 1.7690354585647583, | |
| "learning_rate": 7.986316112020821e-06, | |
| "loss": 1.3733104467391968, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.31223628691983124, | |
| "grad_norm": 1.0495412349700928, | |
| "learning_rate": 7.985200272435035e-06, | |
| "loss": 1.3526469469070435, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.31645569620253167, | |
| "grad_norm": 1.3418861627578735, | |
| "learning_rate": 7.984040796813251e-06, | |
| "loss": 1.2077337503433228, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3206751054852321, | |
| "grad_norm": 1.4774577617645264, | |
| "learning_rate": 7.982837699267632e-06, | |
| "loss": 1.2690041065216064, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.32489451476793246, | |
| "grad_norm": 1.621232032775879, | |
| "learning_rate": 7.981590994441264e-06, | |
| "loss": 1.4557234048843384, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3291139240506329, | |
| "grad_norm": 9.58849811553955, | |
| "learning_rate": 7.98030069750799e-06, | |
| "loss": 1.2517377138137817, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.9245334267616272, | |
| "learning_rate": 7.978966824172219e-06, | |
| "loss": 1.312515377998352, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.33755274261603374, | |
| "grad_norm": 0.9059025049209595, | |
| "learning_rate": 7.977589390668727e-06, | |
| "loss": 1.2920206785202026, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34177215189873417, | |
| "grad_norm": 4.1672210693359375, | |
| "learning_rate": 7.976168413762478e-06, | |
| "loss": 0.8602538108825684, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3459915611814346, | |
| "grad_norm": 3.4024016857147217, | |
| "learning_rate": 7.974703910748405e-06, | |
| "loss": 1.214678168296814, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.350210970464135, | |
| "grad_norm": 2.5402605533599854, | |
| "learning_rate": 7.973195899451203e-06, | |
| "loss": 0.809662401676178, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.35443037974683544, | |
| "grad_norm": 2.9653215408325195, | |
| "learning_rate": 7.971644398225114e-06, | |
| "loss": 1.2221626043319702, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.35864978902953587, | |
| "grad_norm": 1.4855773448944092, | |
| "learning_rate": 7.970049425953705e-06, | |
| "loss": 1.3168489933013916, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3628691983122363, | |
| "grad_norm": 2.3102357387542725, | |
| "learning_rate": 7.968411002049635e-06, | |
| "loss": 1.1956959962844849, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3670886075949367, | |
| "grad_norm": 0.9286043643951416, | |
| "learning_rate": 7.966729146454414e-06, | |
| "loss": 1.0124504566192627, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37130801687763715, | |
| "grad_norm": 2.3038041591644287, | |
| "learning_rate": 7.965003879638177e-06, | |
| "loss": 1.30778169631958, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3755274261603376, | |
| "grad_norm": 1.806934118270874, | |
| "learning_rate": 7.963235222599414e-06, | |
| "loss": 1.307655930519104, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 1.0257319211959839, | |
| "learning_rate": 7.961423196864727e-06, | |
| "loss": 0.990490198135376, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38396624472573837, | |
| "grad_norm": 0.9438347220420837, | |
| "learning_rate": 7.95956782448857e-06, | |
| "loss": 1.2663373947143555, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3881856540084388, | |
| "grad_norm": 0.7051662802696228, | |
| "learning_rate": 7.957669128052967e-06, | |
| "loss": 1.264948844909668, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3924050632911392, | |
| "grad_norm": 1.4901031255722046, | |
| "learning_rate": 7.955727130667254e-06, | |
| "loss": 0.5198807120323181, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.39662447257383965, | |
| "grad_norm": 1.6583565473556519, | |
| "learning_rate": 7.953741855967786e-06, | |
| "loss": 0.9574030041694641, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4008438818565401, | |
| "grad_norm": 0.9092651009559631, | |
| "learning_rate": 7.951713328117653e-06, | |
| "loss": 1.0500378608703613, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 1.398674726486206, | |
| "learning_rate": 7.949641571806384e-06, | |
| "loss": 1.2852396965026855, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4092827004219409, | |
| "grad_norm": 1.3172980546951294, | |
| "learning_rate": 7.947526612249655e-06, | |
| "loss": 1.063001036643982, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41350210970464135, | |
| "grad_norm": 1.0889501571655273, | |
| "learning_rate": 7.945368475188967e-06, | |
| "loss": 1.2641280889511108, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4177215189873418, | |
| "grad_norm": 1.0665010213851929, | |
| "learning_rate": 7.943167186891349e-06, | |
| "loss": 1.0218112468719482, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4219409282700422, | |
| "grad_norm": 3.58223032951355, | |
| "learning_rate": 7.940922774149026e-06, | |
| "loss": 1.0860857963562012, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42616033755274263, | |
| "grad_norm": 1.089058756828308, | |
| "learning_rate": 7.938635264279095e-06, | |
| "loss": 1.11153244972229, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.43037974683544306, | |
| "grad_norm": 0.7927507162094116, | |
| "learning_rate": 7.9363046851232e-06, | |
| "loss": 0.9039996862411499, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4345991561181435, | |
| "grad_norm": 1.210415005683899, | |
| "learning_rate": 7.933931065047189e-06, | |
| "loss": 1.3628909587860107, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4388185654008439, | |
| "grad_norm": 2.830822706222534, | |
| "learning_rate": 7.931514432940762e-06, | |
| "loss": 0.7663958668708801, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4430379746835443, | |
| "grad_norm": 0.9303013682365417, | |
| "learning_rate": 7.92905481821713e-06, | |
| "loss": 1.2602308988571167, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4472573839662447, | |
| "grad_norm": 2.2079782485961914, | |
| "learning_rate": 7.926552250812647e-06, | |
| "loss": 0.7922911047935486, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.45147679324894513, | |
| "grad_norm": 1.6323434114456177, | |
| "learning_rate": 7.92400676118646e-06, | |
| "loss": 1.1895182132720947, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.45569620253164556, | |
| "grad_norm": 1.2990820407867432, | |
| "learning_rate": 7.921418380320117e-06, | |
| "loss": 1.236521601676941, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.459915611814346, | |
| "grad_norm": 1.2548811435699463, | |
| "learning_rate": 7.918787139717211e-06, | |
| "loss": 1.2785851955413818, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4641350210970464, | |
| "grad_norm": 1.9813899993896484, | |
| "learning_rate": 7.916113071402986e-06, | |
| "loss": 1.230564832687378, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.46835443037974683, | |
| "grad_norm": 3.8140616416931152, | |
| "learning_rate": 7.913396207923946e-06, | |
| "loss": 1.2052173614501953, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.47257383966244726, | |
| "grad_norm": 1.2737183570861816, | |
| "learning_rate": 7.910636582347466e-06, | |
| "loss": 1.0253933668136597, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4767932489451477, | |
| "grad_norm": 2.2479248046875, | |
| "learning_rate": 7.907834228261378e-06, | |
| "loss": 1.259740948677063, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4810126582278481, | |
| "grad_norm": 2.846742630004883, | |
| "learning_rate": 7.90498917977358e-06, | |
| "loss": 0.782292366027832, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48523206751054854, | |
| "grad_norm": 1.540499210357666, | |
| "learning_rate": 7.9021014715116e-06, | |
| "loss": 0.9024060368537903, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48945147679324896, | |
| "grad_norm": 4.1563334465026855, | |
| "learning_rate": 7.89917113862219e-06, | |
| "loss": 0.8967229723930359, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4936708860759494, | |
| "grad_norm": 1.8965283632278442, | |
| "learning_rate": 7.896198216770892e-06, | |
| "loss": 1.2712947130203247, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4978902953586498, | |
| "grad_norm": 1.1842418909072876, | |
| "learning_rate": 7.893182742141606e-06, | |
| "loss": 1.312996506690979, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5021097046413502, | |
| "grad_norm": 5.636916637420654, | |
| "learning_rate": 7.890124751436146e-06, | |
| "loss": 0.9852314591407776, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 2.3782975673675537, | |
| "learning_rate": 7.887024281873801e-06, | |
| "loss": 0.5027163028717041, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.510548523206751, | |
| "grad_norm": 2.305345296859741, | |
| "learning_rate": 7.88388137119087e-06, | |
| "loss": 1.4169656038284302, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5147679324894515, | |
| "grad_norm": 3.1710622310638428, | |
| "learning_rate": 7.880696057640214e-06, | |
| "loss": 0.8661314845085144, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5189873417721519, | |
| "grad_norm": 1.3543529510498047, | |
| "learning_rate": 7.877468379990784e-06, | |
| "loss": 1.4801579713821411, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5232067510548524, | |
| "grad_norm": 1.236095666885376, | |
| "learning_rate": 7.874198377527153e-06, | |
| "loss": 1.1857268810272217, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5274261603375527, | |
| "grad_norm": 1.6078490018844604, | |
| "learning_rate": 7.870886090049034e-06, | |
| "loss": 1.301129698753357, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5316455696202531, | |
| "grad_norm": 2.838106632232666, | |
| "learning_rate": 7.867531557870802e-06, | |
| "loss": 1.1084915399551392, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5358649789029536, | |
| "grad_norm": 0.7895591855049133, | |
| "learning_rate": 7.864134821820989e-06, | |
| "loss": 1.2187299728393555, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.540084388185654, | |
| "grad_norm": 0.9526500701904297, | |
| "learning_rate": 7.860695923241808e-06, | |
| "loss": 1.1880545616149902, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5443037974683544, | |
| "grad_norm": 5.490752220153809, | |
| "learning_rate": 7.857214903988633e-06, | |
| "loss": 1.0243443250656128, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5485232067510548, | |
| "grad_norm": 5.083505153656006, | |
| "learning_rate": 7.853691806429497e-06, | |
| "loss": 0.9623079299926758, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5527426160337553, | |
| "grad_norm": 0.914940357208252, | |
| "learning_rate": 7.850126673444574e-06, | |
| "loss": 1.2602107524871826, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5569620253164557, | |
| "grad_norm": 1.6456938982009888, | |
| "learning_rate": 7.846519548425655e-06, | |
| "loss": 1.2723337411880493, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5611814345991561, | |
| "grad_norm": 1.0772303342819214, | |
| "learning_rate": 7.84287047527563e-06, | |
| "loss": 1.2261412143707275, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5654008438818565, | |
| "grad_norm": 1.5974887609481812, | |
| "learning_rate": 7.839179498407939e-06, | |
| "loss": 1.5006755590438843, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.569620253164557, | |
| "grad_norm": 3.4865124225616455, | |
| "learning_rate": 7.835446662746043e-06, | |
| "loss": 1.0508859157562256, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5738396624472574, | |
| "grad_norm": 1.8510380983352661, | |
| "learning_rate": 7.831672013722869e-06, | |
| "loss": 1.303536057472229, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5780590717299579, | |
| "grad_norm": 1.9769766330718994, | |
| "learning_rate": 7.827855597280267e-06, | |
| "loss": 1.1014729738235474, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5822784810126582, | |
| "grad_norm": 1.2615407705307007, | |
| "learning_rate": 7.82399745986844e-06, | |
| "loss": 1.247708797454834, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5864978902953587, | |
| "grad_norm": 1.0950466394424438, | |
| "learning_rate": 7.820097648445383e-06, | |
| "loss": 1.3929113149642944, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5907172995780591, | |
| "grad_norm": 0.7477395534515381, | |
| "learning_rate": 7.816156210476316e-06, | |
| "loss": 0.9548027515411377, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5949367088607594, | |
| "grad_norm": 0.8824933171272278, | |
| "learning_rate": 7.812173193933098e-06, | |
| "loss": 1.321789264678955, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5991561181434599, | |
| "grad_norm": 2.1495587825775146, | |
| "learning_rate": 7.808148647293651e-06, | |
| "loss": 1.5318031311035156, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6033755274261603, | |
| "grad_norm": 0.366915225982666, | |
| "learning_rate": 7.804082619541366e-06, | |
| "loss": 1.1145987510681152, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 0.9638784527778625, | |
| "learning_rate": 7.799975160164503e-06, | |
| "loss": 1.244531512260437, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6118143459915611, | |
| "grad_norm": 1.1089756488800049, | |
| "learning_rate": 7.795826319155596e-06, | |
| "loss": 0.8029107451438904, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6160337552742616, | |
| "grad_norm": 1.2907359600067139, | |
| "learning_rate": 7.791636147010842e-06, | |
| "loss": 0.660262942314148, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.620253164556962, | |
| "grad_norm": 1.7545690536499023, | |
| "learning_rate": 7.787404694729485e-06, | |
| "loss": 1.2182437181472778, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6244725738396625, | |
| "grad_norm": 2.1209237575531006, | |
| "learning_rate": 7.783132013813194e-06, | |
| "loss": 0.698481559753418, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6286919831223629, | |
| "grad_norm": 15.612320899963379, | |
| "learning_rate": 7.778818156265443e-06, | |
| "loss": 0.6525253057479858, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6329113924050633, | |
| "grad_norm": 2.1628663539886475, | |
| "learning_rate": 7.774463174590867e-06, | |
| "loss": 1.7705399990081787, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6371308016877637, | |
| "grad_norm": 0.9601294994354248, | |
| "learning_rate": 7.770067121794634e-06, | |
| "loss": 1.2569221258163452, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6413502109704642, | |
| "grad_norm": 6.321805953979492, | |
| "learning_rate": 7.765630051381794e-06, | |
| "loss": 0.8693046569824219, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6455696202531646, | |
| "grad_norm": 1.1084986925125122, | |
| "learning_rate": 7.761152017356627e-06, | |
| "loss": 0.9949886798858643, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6497890295358649, | |
| "grad_norm": 0.5674229860305786, | |
| "learning_rate": 7.75663307422199e-06, | |
| "loss": 1.2497336864471436, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6540084388185654, | |
| "grad_norm": 0.785914421081543, | |
| "learning_rate": 7.75207327697865e-06, | |
| "loss": 1.197920560836792, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6582278481012658, | |
| "grad_norm": 1.9101108312606812, | |
| "learning_rate": 7.747472681124616e-06, | |
| "loss": 1.0015456676483154, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6624472573839663, | |
| "grad_norm": 1.1794395446777344, | |
| "learning_rate": 7.742831342654461e-06, | |
| "loss": 1.199405312538147, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.8022367358207703, | |
| "learning_rate": 7.738149318058648e-06, | |
| "loss": 1.1004928350448608, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6708860759493671, | |
| "grad_norm": 0.7416080236434937, | |
| "learning_rate": 7.733426664322834e-06, | |
| "loss": 1.0781973600387573, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6751054852320675, | |
| "grad_norm": 1.5531445741653442, | |
| "learning_rate": 7.728663438927177e-06, | |
| "loss": 1.0812546014785767, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.679324894514768, | |
| "grad_norm": 1.3288339376449585, | |
| "learning_rate": 7.723859699845645e-06, | |
| "loss": 0.8775804042816162, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6835443037974683, | |
| "grad_norm": 1.0390150547027588, | |
| "learning_rate": 7.7190155055453e-06, | |
| "loss": 0.8802211284637451, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6877637130801688, | |
| "grad_norm": 0.8296968936920166, | |
| "learning_rate": 7.714130914985593e-06, | |
| "loss": 0.8521700501441956, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6919831223628692, | |
| "grad_norm": 0.742783784866333, | |
| "learning_rate": 7.709205987617642e-06, | |
| "loss": 0.8648751974105835, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6962025316455697, | |
| "grad_norm": 2.3347666263580322, | |
| "learning_rate": 7.704240783383513e-06, | |
| "loss": 1.5764340162277222, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.70042194092827, | |
| "grad_norm": 1.7489248514175415, | |
| "learning_rate": 7.699235362715488e-06, | |
| "loss": 1.0720549821853638, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7046413502109705, | |
| "grad_norm": 1.7539390325546265, | |
| "learning_rate": 7.694189786535325e-06, | |
| "loss": 1.3112399578094482, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7088607594936709, | |
| "grad_norm": 2.141007900238037, | |
| "learning_rate": 7.689104116253529e-06, | |
| "loss": 1.2512861490249634, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7130801687763713, | |
| "grad_norm": 1.4252289533615112, | |
| "learning_rate": 7.683978413768591e-06, | |
| "loss": 1.2772711515426636, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.7172995780590717, | |
| "grad_norm": 0.931917667388916, | |
| "learning_rate": 7.678812741466241e-06, | |
| "loss": 1.2473686933517456, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7215189873417721, | |
| "grad_norm": 0.674299418926239, | |
| "learning_rate": 7.673607162218688e-06, | |
| "loss": 1.2429455518722534, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7257383966244726, | |
| "grad_norm": 3.6710023880004883, | |
| "learning_rate": 7.668361739383856e-06, | |
| "loss": 1.0689202547073364, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.729957805907173, | |
| "grad_norm": 2.2613823413848877, | |
| "learning_rate": 7.66307653680461e-06, | |
| "loss": 0.8741579651832581, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7341772151898734, | |
| "grad_norm": 0.8934361338615417, | |
| "learning_rate": 7.657751618807982e-06, | |
| "loss": 1.234643578529358, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7383966244725738, | |
| "grad_norm": 2.2448158264160156, | |
| "learning_rate": 7.652387050204386e-06, | |
| "loss": 1.1629151105880737, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7426160337552743, | |
| "grad_norm": 2.3110733032226562, | |
| "learning_rate": 7.64698289628683e-06, | |
| "loss": 1.074580192565918, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7468354430379747, | |
| "grad_norm": 2.6933298110961914, | |
| "learning_rate": 7.641539222830117e-06, | |
| "loss": 0.6495164036750793, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7510548523206751, | |
| "grad_norm": 8.03363037109375, | |
| "learning_rate": 7.63605609609006e-06, | |
| "loss": 1.0515384674072266, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7552742616033755, | |
| "grad_norm": 1.2727802991867065, | |
| "learning_rate": 7.630533582802647e-06, | |
| "loss": 1.0052093267440796, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 0.8008536100387573, | |
| "learning_rate": 7.6249717501832616e-06, | |
| "loss": 1.208338975906372, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7637130801687764, | |
| "grad_norm": 1.467780351638794, | |
| "learning_rate": 7.619370665925842e-06, | |
| "loss": 0.9765693545341492, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7679324894514767, | |
| "grad_norm": 1.806458830833435, | |
| "learning_rate": 7.613730398202061e-06, | |
| "loss": 1.5730071067810059, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7721518987341772, | |
| "grad_norm": 2.95654034614563, | |
| "learning_rate": 7.608051015660508e-06, | |
| "loss": 0.6979476809501648, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7763713080168776, | |
| "grad_norm": 1.1918550729751587, | |
| "learning_rate": 7.60233258742584e-06, | |
| "loss": 1.1616028547286987, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7805907172995781, | |
| "grad_norm": 2.9640629291534424, | |
| "learning_rate": 7.596575183097943e-06, | |
| "loss": 1.4773938655853271, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7848101265822784, | |
| "grad_norm": 3.126946210861206, | |
| "learning_rate": 7.590778872751091e-06, | |
| "loss": 0.9821402430534363, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7890295358649789, | |
| "grad_norm": 1.5098716020584106, | |
| "learning_rate": 7.58494372693309e-06, | |
| "loss": 1.0515096187591553, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7932489451476793, | |
| "grad_norm": 1.3018288612365723, | |
| "learning_rate": 7.579069816664417e-06, | |
| "loss": 1.1510859727859497, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7974683544303798, | |
| "grad_norm": 0.4790183901786804, | |
| "learning_rate": 7.573157213437353e-06, | |
| "loss": 1.1152373552322388, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8016877637130801, | |
| "grad_norm": 1.7124649286270142, | |
| "learning_rate": 7.567205989215126e-06, | |
| "loss": 0.729989230632782, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8059071729957806, | |
| "grad_norm": 1.813360333442688, | |
| "learning_rate": 7.5612162164310196e-06, | |
| "loss": 1.2611396312713623, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 3.6355385780334473, | |
| "learning_rate": 7.555187967987499e-06, | |
| "loss": 0.9938818216323853, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8143459915611815, | |
| "grad_norm": 1.1984269618988037, | |
| "learning_rate": 7.549121317255322e-06, | |
| "loss": 1.2364702224731445, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8185654008438819, | |
| "grad_norm": 1.193095326423645, | |
| "learning_rate": 7.543016338072653e-06, | |
| "loss": 0.9437189102172852, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8227848101265823, | |
| "grad_norm": 0.9609726071357727, | |
| "learning_rate": 7.5368731047441486e-06, | |
| "loss": 1.2113581895828247, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8270042194092827, | |
| "grad_norm": 2.6752779483795166, | |
| "learning_rate": 7.530691692040069e-06, | |
| "loss": 0.9650623798370361, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8312236286919831, | |
| "grad_norm": 1.805607795715332, | |
| "learning_rate": 7.52447217519536e-06, | |
| "loss": 0.9127689003944397, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8354430379746836, | |
| "grad_norm": 1.9785407781600952, | |
| "learning_rate": 7.5182146299087375e-06, | |
| "loss": 1.2258358001708984, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8396624472573839, | |
| "grad_norm": 0.2685816287994385, | |
| "learning_rate": 7.51191913234177e-06, | |
| "loss": 1.0780510902404785, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8438818565400844, | |
| "grad_norm": 0.8069247007369995, | |
| "learning_rate": 7.505585759117947e-06, | |
| "loss": 0.9565885663032532, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8481012658227848, | |
| "grad_norm": 2.5125067234039307, | |
| "learning_rate": 7.499214587321749e-06, | |
| "loss": 0.7042322754859924, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8523206751054853, | |
| "grad_norm": 1.6951823234558105, | |
| "learning_rate": 7.49280569449771e-06, | |
| "loss": 1.1971807479858398, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8565400843881856, | |
| "grad_norm": 0.7480567097663879, | |
| "learning_rate": 7.486359158649471e-06, | |
| "loss": 0.9361912608146667, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8607594936708861, | |
| "grad_norm": 1.4761838912963867, | |
| "learning_rate": 7.4798750582388354e-06, | |
| "loss": 0.9626801609992981, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8649789029535865, | |
| "grad_norm": 0.9039321541786194, | |
| "learning_rate": 7.473353472184806e-06, | |
| "loss": 1.2230124473571777, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.869198312236287, | |
| "grad_norm": 1.8411026000976562, | |
| "learning_rate": 7.466794479862632e-06, | |
| "loss": 0.838551938533783, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8734177215189873, | |
| "grad_norm": 1.6176677942276, | |
| "learning_rate": 7.460198161102841e-06, | |
| "loss": 1.2056636810302734, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8776371308016878, | |
| "grad_norm": 0.796684205532074, | |
| "learning_rate": 7.453564596190265e-06, | |
| "loss": 1.0609307289123535, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8818565400843882, | |
| "grad_norm": 3.51039457321167, | |
| "learning_rate": 7.446893865863063e-06, | |
| "loss": 1.1577751636505127, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8860759493670886, | |
| "grad_norm": 1.3933305740356445, | |
| "learning_rate": 7.440186051311744e-06, | |
| "loss": 0.9417897462844849, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.890295358649789, | |
| "grad_norm": 1.077215313911438, | |
| "learning_rate": 7.433441234178174e-06, | |
| "loss": 1.333181619644165, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8945147679324894, | |
| "grad_norm": 3.3169748783111572, | |
| "learning_rate": 7.426659496554582e-06, | |
| "loss": 0.9721631407737732, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8987341772151899, | |
| "grad_norm": 1.1560137271881104, | |
| "learning_rate": 7.4198409209825615e-06, | |
| "loss": 1.1756271123886108, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9029535864978903, | |
| "grad_norm": 0.8857593536376953, | |
| "learning_rate": 7.412985590452066e-06, | |
| "loss": 1.037049651145935, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9071729957805907, | |
| "grad_norm": 1.4016187191009521, | |
| "learning_rate": 7.4060935884004045e-06, | |
| "loss": 1.0027376413345337, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 1.6030839681625366, | |
| "learning_rate": 7.399164998711215e-06, | |
| "loss": 1.0767489671707153, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9156118143459916, | |
| "grad_norm": 1.2388867139816284, | |
| "learning_rate": 7.392199905713454e-06, | |
| "loss": 1.241571307182312, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.919831223628692, | |
| "grad_norm": 1.3980982303619385, | |
| "learning_rate": 7.385198394180359e-06, | |
| "loss": 0.8756187558174133, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9240506329113924, | |
| "grad_norm": 1.7992874383926392, | |
| "learning_rate": 7.378160549328429e-06, | |
| "loss": 1.196347713470459, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9282700421940928, | |
| "grad_norm": 0.9916715025901794, | |
| "learning_rate": 7.371086456816381e-06, | |
| "loss": 0.9922671318054199, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9324894514767933, | |
| "grad_norm": 2.890634536743164, | |
| "learning_rate": 7.363976202744106e-06, | |
| "loss": 0.9319839477539062, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9367088607594937, | |
| "grad_norm": 5.665074348449707, | |
| "learning_rate": 7.356829873651623e-06, | |
| "loss": 1.0942474603652954, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9409282700421941, | |
| "grad_norm": 1.669215440750122, | |
| "learning_rate": 7.3496475565180284e-06, | |
| "loss": 1.2984267473220825, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9451476793248945, | |
| "grad_norm": 0.8537882566452026, | |
| "learning_rate": 7.342429338760431e-06, | |
| "loss": 0.9971826076507568, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9493670886075949, | |
| "grad_norm": 3.3685364723205566, | |
| "learning_rate": 7.3351753082328946e-06, | |
| "loss": 0.9323700666427612, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9535864978902954, | |
| "grad_norm": 1.2475708723068237, | |
| "learning_rate": 7.327885553225365e-06, | |
| "loss": 1.2786669731140137, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9578059071729957, | |
| "grad_norm": 1.7981699705123901, | |
| "learning_rate": 7.320560162462594e-06, | |
| "loss": 0.9830716848373413, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9620253164556962, | |
| "grad_norm": 1.964571237564087, | |
| "learning_rate": 7.313199225103068e-06, | |
| "loss": 1.1577880382537842, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9662447257383966, | |
| "grad_norm": 0.8031247854232788, | |
| "learning_rate": 7.3058028307379104e-06, | |
| "loss": 0.8746158480644226, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9704641350210971, | |
| "grad_norm": 6.945025444030762, | |
| "learning_rate": 7.298371069389798e-06, | |
| "loss": 0.6917670369148254, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9746835443037974, | |
| "grad_norm": 1.1903830766677856, | |
| "learning_rate": 7.290904031511867e-06, | |
| "loss": 0.8951276540756226, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9789029535864979, | |
| "grad_norm": 1.7528347969055176, | |
| "learning_rate": 7.28340180798661e-06, | |
| "loss": 1.1649961471557617, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9831223628691983, | |
| "grad_norm": 2.7463033199310303, | |
| "learning_rate": 7.275864490124769e-06, | |
| "loss": 0.7191216349601746, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9873417721518988, | |
| "grad_norm": 1.2754981517791748, | |
| "learning_rate": 7.268292169664222e-06, | |
| "loss": 1.3055366277694702, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9915611814345991, | |
| "grad_norm": 1.402946949005127, | |
| "learning_rate": 7.260684938768874e-06, | |
| "loss": 0.8869744539260864, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9957805907172996, | |
| "grad_norm": 1.225201964378357, | |
| "learning_rate": 7.253042890027527e-06, | |
| "loss": 1.202407956123352, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.0377144813537598, | |
| "learning_rate": 7.2453661164527565e-06, | |
| "loss": 1.249975562095642, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0042194092827004, | |
| "grad_norm": 2.005743980407715, | |
| "learning_rate": 7.237654711479781e-06, | |
| "loss": 0.9949838519096375, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0084388185654007, | |
| "grad_norm": 1.0849618911743164, | |
| "learning_rate": 7.2299087689653224e-06, | |
| "loss": 1.1602823734283447, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 1.2653388977050781, | |
| "learning_rate": 7.222128383186464e-06, | |
| "loss": 1.13376784324646, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0168776371308017, | |
| "grad_norm": 1.4933967590332031, | |
| "learning_rate": 7.214313648839504e-06, | |
| "loss": 0.8098440766334534, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.021097046413502, | |
| "grad_norm": 1.3467376232147217, | |
| "learning_rate": 7.206464661038802e-06, | |
| "loss": 1.058078408241272, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0253164556962024, | |
| "grad_norm": 1.572613000869751, | |
| "learning_rate": 7.198581515315622e-06, | |
| "loss": 0.46203434467315674, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.029535864978903, | |
| "grad_norm": 1.1707247495651245, | |
| "learning_rate": 7.1906643076169736e-06, | |
| "loss": 0.952732264995575, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0337552742616034, | |
| "grad_norm": 0.9213857650756836, | |
| "learning_rate": 7.182713134304431e-06, | |
| "loss": 0.8125715851783752, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0379746835443038, | |
| "grad_norm": 1.2644870281219482, | |
| "learning_rate": 7.174728092152975e-06, | |
| "loss": 1.1190340518951416, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0421940928270041, | |
| "grad_norm": 2.924903392791748, | |
| "learning_rate": 7.1667092783498105e-06, | |
| "loss": 0.8107770085334778, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0464135021097047, | |
| "grad_norm": 1.9948618412017822, | |
| "learning_rate": 7.15865679049318e-06, | |
| "loss": 0.9311866164207458, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0506329113924051, | |
| "grad_norm": 0.8843758702278137, | |
| "learning_rate": 7.150570726591178e-06, | |
| "loss": 1.216412901878357, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0548523206751055, | |
| "grad_norm": 1.182377815246582, | |
| "learning_rate": 7.14245118506056e-06, | |
| "loss": 1.0732381343841553, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0590717299578059, | |
| "grad_norm": 1.3736239671707153, | |
| "learning_rate": 7.134298264725542e-06, | |
| "loss": 1.0816361904144287, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0632911392405062, | |
| "grad_norm": 0.9931637644767761, | |
| "learning_rate": 7.126112064816598e-06, | |
| "loss": 1.20469331741333, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0675105485232068, | |
| "grad_norm": 0.99040687084198, | |
| "learning_rate": 7.117892684969255e-06, | |
| "loss": 0.7193590402603149, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0717299578059072, | |
| "grad_norm": 1.0574781894683838, | |
| "learning_rate": 7.109640225222874e-06, | |
| "loss": 1.030031442642212, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0759493670886076, | |
| "grad_norm": 0.9034721255302429, | |
| "learning_rate": 7.101354786019443e-06, | |
| "loss": 1.0760937929153442, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.080168776371308, | |
| "grad_norm": 2.8123185634613037, | |
| "learning_rate": 7.0930364682023446e-06, | |
| "loss": 1.0546125173568726, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0843881856540085, | |
| "grad_norm": 1.0355448722839355, | |
| "learning_rate": 7.084685373015131e-06, | |
| "loss": 1.060817003250122, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0886075949367089, | |
| "grad_norm": 0.6953267455101013, | |
| "learning_rate": 7.076301602100294e-06, | |
| "loss": 1.0389786958694458, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0928270042194093, | |
| "grad_norm": 1.1498650312423706, | |
| "learning_rate": 7.067885257498027e-06, | |
| "loss": 0.9518197774887085, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0970464135021096, | |
| "grad_norm": 1.411102533340454, | |
| "learning_rate": 7.059436441644984e-06, | |
| "loss": 0.8960402011871338, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1012658227848102, | |
| "grad_norm": 2.03861927986145, | |
| "learning_rate": 7.0509552573730305e-06, | |
| "loss": 0.8347494602203369, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1054852320675106, | |
| "grad_norm": 1.5810351371765137, | |
| "learning_rate": 7.0424418079079925e-06, | |
| "loss": 0.9857693314552307, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.109704641350211, | |
| "grad_norm": 2.000030040740967, | |
| "learning_rate": 7.033896196868403e-06, | |
| "loss": 0.8366687893867493, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.1139240506329113, | |
| "grad_norm": 1.2057420015335083, | |
| "learning_rate": 7.025318528264234e-06, | |
| "loss": 1.3332631587982178, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1181434599156117, | |
| "grad_norm": 0.9624310731887817, | |
| "learning_rate": 7.016708906495641e-06, | |
| "loss": 1.2037197351455688, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1223628691983123, | |
| "grad_norm": 1.497033715248108, | |
| "learning_rate": 7.008067436351683e-06, | |
| "loss": 0.6526771783828735, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1265822784810127, | |
| "grad_norm": 2.6542580127716064, | |
| "learning_rate": 6.999394223009052e-06, | |
| "loss": 0.8994975090026855, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.130801687763713, | |
| "grad_norm": 0.8712950944900513, | |
| "learning_rate": 6.9906893720307895e-06, | |
| "loss": 1.0523709058761597, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1350210970464134, | |
| "grad_norm": 1.1105852127075195, | |
| "learning_rate": 6.981952989365005e-06, | |
| "loss": 0.8754544854164124, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 0.9426999688148499, | |
| "learning_rate": 6.973185181343585e-06, | |
| "loss": 0.7062304019927979, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1434599156118144, | |
| "grad_norm": 0.9875909090042114, | |
| "learning_rate": 6.9643860546809e-06, | |
| "loss": 0.7558496594429016, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1476793248945147, | |
| "grad_norm": 1.1727705001831055, | |
| "learning_rate": 6.955555716472503e-06, | |
| "loss": 0.7818480730056763, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1518987341772151, | |
| "grad_norm": 1.8272697925567627, | |
| "learning_rate": 6.9466942741938275e-06, | |
| "loss": 1.0798598527908325, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1561181434599157, | |
| "grad_norm": 1.3200129270553589, | |
| "learning_rate": 6.93780183569888e-06, | |
| "loss": 1.1049277782440186, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.160337552742616, | |
| "grad_norm": 12.516786575317383, | |
| "learning_rate": 6.928878509218929e-06, | |
| "loss": 0.8925328850746155, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.1645569620253164, | |
| "grad_norm": 1.5449568033218384, | |
| "learning_rate": 6.919924403361182e-06, | |
| "loss": 1.3479973077774048, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1687763713080168, | |
| "grad_norm": 0.9759451746940613, | |
| "learning_rate": 6.910939627107469e-06, | |
| "loss": 1.0944254398345947, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1729957805907172, | |
| "grad_norm": 3.6769256591796875, | |
| "learning_rate": 6.901924289812913e-06, | |
| "loss": 0.6496379375457764, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1772151898734178, | |
| "grad_norm": 1.0708627700805664, | |
| "learning_rate": 6.892878501204603e-06, | |
| "loss": 0.9399113059043884, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1814345991561181, | |
| "grad_norm": 3.548515558242798, | |
| "learning_rate": 6.883802371380252e-06, | |
| "loss": 0.6334307789802551, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1856540084388185, | |
| "grad_norm": 0.7087482810020447, | |
| "learning_rate": 6.874696010806865e-06, | |
| "loss": 0.6812013983726501, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.189873417721519, | |
| "grad_norm": 1.6790183782577515, | |
| "learning_rate": 6.865559530319386e-06, | |
| "loss": 1.2819935083389282, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1940928270042195, | |
| "grad_norm": 2.5965490341186523, | |
| "learning_rate": 6.8563930411193535e-06, | |
| "loss": 1.02937912940979, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1983122362869199, | |
| "grad_norm": 3.7219197750091553, | |
| "learning_rate": 6.847196654773552e-06, | |
| "loss": 0.7903206944465637, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2025316455696202, | |
| "grad_norm": 0.9391790628433228, | |
| "learning_rate": 6.837970483212643e-06, | |
| "loss": 1.0360606908798218, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2067510548523206, | |
| "grad_norm": 20.603315353393555, | |
| "learning_rate": 6.828714638729809e-06, | |
| "loss": 1.0591099262237549, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2109704641350212, | |
| "grad_norm": 2.5088610649108887, | |
| "learning_rate": 6.81942923397939e-06, | |
| "loss": 1.0366530418395996, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2151898734177216, | |
| "grad_norm": 0.9826826453208923, | |
| "learning_rate": 6.810114381975507e-06, | |
| "loss": 0.9062384963035583, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.219409282700422, | |
| "grad_norm": 1.3147906064987183, | |
| "learning_rate": 6.800770196090688e-06, | |
| "loss": 0.6110230684280396, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2236286919831223, | |
| "grad_norm": 0.8988205194473267, | |
| "learning_rate": 6.791396790054484e-06, | |
| "loss": 0.910240888595581, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2278481012658227, | |
| "grad_norm": 2.201284170150757, | |
| "learning_rate": 6.781994277952099e-06, | |
| "loss": 0.8457823395729065, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2320675105485233, | |
| "grad_norm": 3.1297316551208496, | |
| "learning_rate": 6.7725627742229815e-06, | |
| "loss": 0.8808956146240234, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2362869198312236, | |
| "grad_norm": 5.279428482055664, | |
| "learning_rate": 6.763102393659446e-06, | |
| "loss": 0.9118282198905945, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.240506329113924, | |
| "grad_norm": 1.449725866317749, | |
| "learning_rate": 6.753613251405274e-06, | |
| "loss": 0.8038244247436523, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2447257383966246, | |
| "grad_norm": 1.0893408060073853, | |
| "learning_rate": 6.744095462954303e-06, | |
| "loss": 1.065926194190979, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.248945147679325, | |
| "grad_norm": 11.18133544921875, | |
| "learning_rate": 6.734549144149036e-06, | |
| "loss": 0.6128525733947754, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2531645569620253, | |
| "grad_norm": 0.5239539742469788, | |
| "learning_rate": 6.724974411179218e-06, | |
| "loss": 0.8248177766799927, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2573839662447257, | |
| "grad_norm": 3.62746000289917, | |
| "learning_rate": 6.7153713805804285e-06, | |
| "loss": 0.6825551986694336, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.261603375527426, | |
| "grad_norm": 3.413501501083374, | |
| "learning_rate": 6.7057401692326625e-06, | |
| "loss": 0.567305862903595, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2658227848101267, | |
| "grad_norm": 0.6996157169342041, | |
| "learning_rate": 6.696080894358908e-06, | |
| "loss": 0.8849403262138367, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.270042194092827, | |
| "grad_norm": 6.248124599456787, | |
| "learning_rate": 6.686393673523715e-06, | |
| "loss": 1.3093706369400024, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2742616033755274, | |
| "grad_norm": 0.9306197166442871, | |
| "learning_rate": 6.6766786246317726e-06, | |
| "loss": 1.0244123935699463, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2784810126582278, | |
| "grad_norm": 2.1768555641174316, | |
| "learning_rate": 6.666935865926468e-06, | |
| "loss": 0.8419608473777771, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2827004219409281, | |
| "grad_norm": 1.509337306022644, | |
| "learning_rate": 6.65716551598845e-06, | |
| "loss": 0.8019965291023254, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2869198312236287, | |
| "grad_norm": 5.7914323806762695, | |
| "learning_rate": 6.647367693734181e-06, | |
| "loss": 0.8274118900299072, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2911392405063291, | |
| "grad_norm": 3.3554115295410156, | |
| "learning_rate": 6.637542518414495e-06, | |
| "loss": 0.5377339124679565, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2953586497890295, | |
| "grad_norm": 0.9977070093154907, | |
| "learning_rate": 6.627690109613147e-06, | |
| "loss": 0.6412088871002197, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.29957805907173, | |
| "grad_norm": 5.793771743774414, | |
| "learning_rate": 6.617810587245352e-06, | |
| "loss": 1.0477070808410645, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3037974683544304, | |
| "grad_norm": 1.5624624490737915, | |
| "learning_rate": 6.607904071556331e-06, | |
| "loss": 1.0696133375167847, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3080168776371308, | |
| "grad_norm": 0.9112898111343384, | |
| "learning_rate": 6.597970683119841e-06, | |
| "loss": 0.6664775609970093, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3122362869198312, | |
| "grad_norm": 1.806433081626892, | |
| "learning_rate": 6.588010542836715e-06, | |
| "loss": 0.7590267062187195, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3164556962025316, | |
| "grad_norm": 1.8049041032791138, | |
| "learning_rate": 6.578023771933387e-06, | |
| "loss": 0.8482476472854614, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3206751054852321, | |
| "grad_norm": 0.6221198439598083, | |
| "learning_rate": 6.568010491960412e-06, | |
| "loss": 1.0443530082702637, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3248945147679325, | |
| "grad_norm": 1.562410593032837, | |
| "learning_rate": 6.557970824790997e-06, | |
| "loss": 1.539845585823059, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3291139240506329, | |
| "grad_norm": 0.906804084777832, | |
| "learning_rate": 6.5479048926195106e-06, | |
| "loss": 0.9005885124206543, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.909907341003418, | |
| "learning_rate": 6.53781281795999e-06, | |
| "loss": 0.8963803648948669, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.3375527426160336, | |
| "grad_norm": 0.4755572974681854, | |
| "learning_rate": 6.527694723644668e-06, | |
| "loss": 0.766118049621582, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.3417721518987342, | |
| "grad_norm": 1.1491714715957642, | |
| "learning_rate": 6.517550732822457e-06, | |
| "loss": 0.611838161945343, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3459915611814346, | |
| "grad_norm": 2.129890203475952, | |
| "learning_rate": 6.507380968957463e-06, | |
| "loss": 0.7923972606658936, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.350210970464135, | |
| "grad_norm": 0.800757646560669, | |
| "learning_rate": 6.497185555827484e-06, | |
| "loss": 1.1963096857070923, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3544303797468356, | |
| "grad_norm": 1.0547887086868286, | |
| "learning_rate": 6.486964617522494e-06, | |
| "loss": 0.7548023462295532, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.358649789029536, | |
| "grad_norm": 1.0978026390075684, | |
| "learning_rate": 6.476718278443137e-06, | |
| "loss": 1.230237603187561, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.3628691983122363, | |
| "grad_norm": 0.8420884609222412, | |
| "learning_rate": 6.4664466632992195e-06, | |
| "loss": 1.0555733442306519, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3670886075949367, | |
| "grad_norm": 13.5333890914917, | |
| "learning_rate": 6.456149897108182e-06, | |
| "loss": 0.8676448464393616, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.371308016877637, | |
| "grad_norm": 0.9475505352020264, | |
| "learning_rate": 6.445828105193586e-06, | |
| "loss": 1.2682842016220093, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3755274261603376, | |
| "grad_norm": 0.9018502831459045, | |
| "learning_rate": 6.4354814131835815e-06, | |
| "loss": 1.0565340518951416, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.379746835443038, | |
| "grad_norm": 0.54316645860672, | |
| "learning_rate": 6.425109947009384e-06, | |
| "loss": 0.7839528322219849, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3839662447257384, | |
| "grad_norm": 0.874775767326355, | |
| "learning_rate": 6.414713832903737e-06, | |
| "loss": 1.1698050498962402, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3881856540084387, | |
| "grad_norm": 4.575730323791504, | |
| "learning_rate": 6.404293197399381e-06, | |
| "loss": 0.5863835215568542, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3924050632911391, | |
| "grad_norm": 0.7375707030296326, | |
| "learning_rate": 6.393848167327507e-06, | |
| "loss": 1.086789608001709, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3966244725738397, | |
| "grad_norm": 2.6595211029052734, | |
| "learning_rate": 6.3833788698162205e-06, | |
| "loss": 0.7023826241493225, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.40084388185654, | |
| "grad_norm": 1.631126046180725, | |
| "learning_rate": 6.372885432288982e-06, | |
| "loss": 1.0789552927017212, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4050632911392404, | |
| "grad_norm": 0.8146964907646179, | |
| "learning_rate": 6.362367982463073e-06, | |
| "loss": 0.6926907300949097, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.409282700421941, | |
| "grad_norm": 0.9691137075424194, | |
| "learning_rate": 6.351826648348027e-06, | |
| "loss": 1.0910325050354004, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4135021097046414, | |
| "grad_norm": 0.9395283460617065, | |
| "learning_rate": 6.341261558244079e-06, | |
| "loss": 1.0995792150497437, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4177215189873418, | |
| "grad_norm": 1.9160293340682983, | |
| "learning_rate": 6.3306728407406015e-06, | |
| "loss": 1.3626757860183716, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4219409282700421, | |
| "grad_norm": 1.089788556098938, | |
| "learning_rate": 6.320060624714535e-06, | |
| "loss": 0.6344588994979858, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4261603375527425, | |
| "grad_norm": 0.6872738003730774, | |
| "learning_rate": 6.309425039328834e-06, | |
| "loss": 0.5873957872390747, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4303797468354431, | |
| "grad_norm": 2.0739026069641113, | |
| "learning_rate": 6.298766214030878e-06, | |
| "loss": 0.9192869067192078, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4345991561181435, | |
| "grad_norm": 1.781925082206726, | |
| "learning_rate": 6.288084278550905e-06, | |
| "loss": 1.0239968299865723, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.4388185654008439, | |
| "grad_norm": 1.0395771265029907, | |
| "learning_rate": 6.2773793629004305e-06, | |
| "loss": 0.7735893726348877, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4430379746835442, | |
| "grad_norm": 2.4246017932891846, | |
| "learning_rate": 6.2666515973706635e-06, | |
| "loss": 1.135629415512085, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4472573839662446, | |
| "grad_norm": 0.7276327013969421, | |
| "learning_rate": 6.255901112530928e-06, | |
| "loss": 0.7381588816642761, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4514767932489452, | |
| "grad_norm": 1.8231748342514038, | |
| "learning_rate": 6.245128039227063e-06, | |
| "loss": 0.8623338341712952, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4556962025316456, | |
| "grad_norm": 2.5722429752349854, | |
| "learning_rate": 6.234332508579835e-06, | |
| "loss": 1.0199339389801025, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.459915611814346, | |
| "grad_norm": 1.6927199363708496, | |
| "learning_rate": 6.2235146519833465e-06, | |
| "loss": 0.5960026383399963, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.4641350210970465, | |
| "grad_norm": 1.3126322031021118, | |
| "learning_rate": 6.21267460110343e-06, | |
| "loss": 1.2402201890945435, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4683544303797469, | |
| "grad_norm": 2.016200304031372, | |
| "learning_rate": 6.201812487876048e-06, | |
| "loss": 0.5972878932952881, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4725738396624473, | |
| "grad_norm": 0.913316011428833, | |
| "learning_rate": 6.1909284445056886e-06, | |
| "loss": 1.0932003259658813, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4767932489451476, | |
| "grad_norm": 0.6828026175498962, | |
| "learning_rate": 6.1800226034637514e-06, | |
| "loss": 1.1358331441879272, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.481012658227848, | |
| "grad_norm": 2.209768295288086, | |
| "learning_rate": 6.169095097486947e-06, | |
| "loss": 0.9575251340866089, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4852320675105486, | |
| "grad_norm": 1.6049399375915527, | |
| "learning_rate": 6.158146059575663e-06, | |
| "loss": 0.7674723863601685, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.489451476793249, | |
| "grad_norm": 3.575786828994751, | |
| "learning_rate": 6.147175622992363e-06, | |
| "loss": 1.086501121520996, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4936708860759493, | |
| "grad_norm": 0.9102724194526672, | |
| "learning_rate": 6.136183921259956e-06, | |
| "loss": 1.1395413875579834, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.49789029535865, | |
| "grad_norm": 0.5558487176895142, | |
| "learning_rate": 6.125171088160168e-06, | |
| "loss": 0.9195235371589661, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.50210970464135, | |
| "grad_norm": 0.9477531313896179, | |
| "learning_rate": 6.114137257731925e-06, | |
| "loss": 0.4785539209842682, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5063291139240507, | |
| "grad_norm": 0.8040223717689514, | |
| "learning_rate": 6.10308256426971e-06, | |
| "loss": 1.0396082401275635, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.510548523206751, | |
| "grad_norm": 0.9315183162689209, | |
| "learning_rate": 6.092007142321932e-06, | |
| "loss": 1.043006181716919, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5147679324894514, | |
| "grad_norm": 0.7686951756477356, | |
| "learning_rate": 6.080911126689296e-06, | |
| "loss": 1.0344305038452148, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 2.7792484760284424, | |
| "learning_rate": 6.069794652423152e-06, | |
| "loss": 1.009570598602295, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5232067510548524, | |
| "grad_norm": 2.2277615070343018, | |
| "learning_rate": 6.058657854823854e-06, | |
| "loss": 1.0374475717544556, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5274261603375527, | |
| "grad_norm": 1.6987587213516235, | |
| "learning_rate": 6.047500869439114e-06, | |
| "loss": 1.1916974782943726, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5316455696202531, | |
| "grad_norm": 4.649923801422119, | |
| "learning_rate": 6.036323832062359e-06, | |
| "loss": 0.5684564113616943, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.5358649789029535, | |
| "grad_norm": 2.5940101146698, | |
| "learning_rate": 6.025126878731064e-06, | |
| "loss": 0.3716410994529724, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.540084388185654, | |
| "grad_norm": 0.8527175784111023, | |
| "learning_rate": 6.013910145725112e-06, | |
| "loss": 0.8164302706718445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5443037974683544, | |
| "grad_norm": 0.9312422275543213, | |
| "learning_rate": 6.002673769565118e-06, | |
| "loss": 0.9368805885314941, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5485232067510548, | |
| "grad_norm": 0.9247412085533142, | |
| "learning_rate": 5.991417887010786e-06, | |
| "loss": 1.1238614320755005, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5527426160337554, | |
| "grad_norm": 0.723078727722168, | |
| "learning_rate": 5.98014263505923e-06, | |
| "loss": 0.8048302531242371, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.5569620253164556, | |
| "grad_norm": 1.92336106300354, | |
| "learning_rate": 5.968848150943314e-06, | |
| "loss": 0.8754326105117798, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.5611814345991561, | |
| "grad_norm": 1.0468974113464355, | |
| "learning_rate": 5.957534572129979e-06, | |
| "loss": 0.9829418659210205, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5654008438818565, | |
| "grad_norm": 0.782278835773468, | |
| "learning_rate": 5.946202036318572e-06, | |
| "loss": 0.6887242197990417, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5696202531645569, | |
| "grad_norm": 1.8223977088928223, | |
| "learning_rate": 5.934850681439166e-06, | |
| "loss": 0.5122029185295105, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5738396624472575, | |
| "grad_norm": 1.1448414325714111, | |
| "learning_rate": 5.923480645650887e-06, | |
| "loss": 0.6803614497184753, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5780590717299579, | |
| "grad_norm": 4.745306491851807, | |
| "learning_rate": 5.912092067340226e-06, | |
| "loss": 0.6753883361816406, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5822784810126582, | |
| "grad_norm": 0.8308981657028198, | |
| "learning_rate": 5.900685085119361e-06, | |
| "loss": 1.0774937868118286, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.5864978902953588, | |
| "grad_norm": 1.0071589946746826, | |
| "learning_rate": 5.889259837824464e-06, | |
| "loss": 0.5942963361740112, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.590717299578059, | |
| "grad_norm": 0.7977795600891113, | |
| "learning_rate": 5.8778164645140155e-06, | |
| "loss": 0.644191563129425, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5949367088607596, | |
| "grad_norm": 0.7821984887123108, | |
| "learning_rate": 5.8663551044671125e-06, | |
| "loss": 0.601950466632843, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.59915611814346, | |
| "grad_norm": 1.1435626745224, | |
| "learning_rate": 5.854875897181766e-06, | |
| "loss": 0.8324768543243408, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6033755274261603, | |
| "grad_norm": 0.794941246509552, | |
| "learning_rate": 5.843378982373218e-06, | |
| "loss": 1.0321424007415771, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6075949367088609, | |
| "grad_norm": 0.4165087938308716, | |
| "learning_rate": 5.8318644999722194e-06, | |
| "loss": 0.6179360747337341, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.611814345991561, | |
| "grad_norm": 1.0744069814682007, | |
| "learning_rate": 5.820332590123348e-06, | |
| "loss": 1.0869427919387817, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6160337552742616, | |
| "grad_norm": 2.4255731105804443, | |
| "learning_rate": 5.80878339318329e-06, | |
| "loss": 0.9976139664649963, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.620253164556962, | |
| "grad_norm": 0.3937893807888031, | |
| "learning_rate": 5.797217049719138e-06, | |
| "loss": 0.8773806095123291, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6244725738396624, | |
| "grad_norm": 1.528141975402832, | |
| "learning_rate": 5.785633700506676e-06, | |
| "loss": 1.0529608726501465, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.628691983122363, | |
| "grad_norm": 1.1761523485183716, | |
| "learning_rate": 5.774033486528666e-06, | |
| "loss": 1.1696523427963257, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.6329113924050633, | |
| "grad_norm": 0.6605724096298218, | |
| "learning_rate": 5.762416548973137e-06, | |
| "loss": 1.06764554977417, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6371308016877637, | |
| "grad_norm": 1.0275272130966187, | |
| "learning_rate": 5.750783029231662e-06, | |
| "loss": 1.0699821710586548, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.6413502109704643, | |
| "grad_norm": 0.9171205759048462, | |
| "learning_rate": 5.739133068897638e-06, | |
| "loss": 0.7903687953948975, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6455696202531644, | |
| "grad_norm": 2.0880374908447266, | |
| "learning_rate": 5.727466809764562e-06, | |
| "loss": 0.372045636177063, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.649789029535865, | |
| "grad_norm": 3.6843972206115723, | |
| "learning_rate": 5.715784393824309e-06, | |
| "loss": 1.0749914646148682, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6540084388185654, | |
| "grad_norm": 1.0832284688949585, | |
| "learning_rate": 5.7040859632653985e-06, | |
| "loss": 0.9234107136726379, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6582278481012658, | |
| "grad_norm": 1.366377353668213, | |
| "learning_rate": 5.692371660471269e-06, | |
| "loss": 1.0691020488739014, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.6624472573839664, | |
| "grad_norm": 1.1804683208465576, | |
| "learning_rate": 5.680641628018539e-06, | |
| "loss": 0.5163772702217102, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.0049868822097778, | |
| "learning_rate": 5.6688960086752775e-06, | |
| "loss": 1.0653210878372192, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6708860759493671, | |
| "grad_norm": 2.6457467079162598, | |
| "learning_rate": 5.657134945399265e-06, | |
| "loss": 0.6419547200202942, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6751054852320675, | |
| "grad_norm": 0.2932789921760559, | |
| "learning_rate": 5.645358581336249e-06, | |
| "loss": 0.8718560338020325, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6793248945147679, | |
| "grad_norm": 0.8630058169364929, | |
| "learning_rate": 5.633567059818208e-06, | |
| "loss": 1.0517830848693848, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.6835443037974684, | |
| "grad_norm": 4.652339935302734, | |
| "learning_rate": 5.621760524361605e-06, | |
| "loss": 0.8228880167007446, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.6877637130801688, | |
| "grad_norm": 1.278536081314087, | |
| "learning_rate": 5.6099391186656375e-06, | |
| "loss": 1.1810134649276733, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6919831223628692, | |
| "grad_norm": 5.43027925491333, | |
| "learning_rate": 5.598102986610493e-06, | |
| "loss": 0.5597525238990784, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6962025316455698, | |
| "grad_norm": 4.56681489944458, | |
| "learning_rate": 5.586252272255595e-06, | |
| "loss": 1.1962707042694092, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.70042194092827, | |
| "grad_norm": 0.887153148651123, | |
| "learning_rate": 5.574387119837848e-06, | |
| "loss": 1.0536723136901855, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7046413502109705, | |
| "grad_norm": 8.7942533493042, | |
| "learning_rate": 5.562507673769889e-06, | |
| "loss": 0.7714130282402039, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7088607594936709, | |
| "grad_norm": 2.3065688610076904, | |
| "learning_rate": 5.550614078638324e-06, | |
| "loss": 0.8722562193870544, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7130801687763713, | |
| "grad_norm": 1.0666223764419556, | |
| "learning_rate": 5.5387064792019686e-06, | |
| "loss": 1.1128357648849487, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7172995780590719, | |
| "grad_norm": 3.0997695922851562, | |
| "learning_rate": 5.526785020390084e-06, | |
| "loss": 1.4282304048538208, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.721518987341772, | |
| "grad_norm": 1.0092432498931885, | |
| "learning_rate": 5.514849847300622e-06, | |
| "loss": 1.1036298274993896, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7257383966244726, | |
| "grad_norm": 3.5967748165130615, | |
| "learning_rate": 5.502901105198449e-06, | |
| "loss": 0.7901860475540161, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.729957805907173, | |
| "grad_norm": 2.40335750579834, | |
| "learning_rate": 5.490938939513584e-06, | |
| "loss": 0.3646574020385742, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.7341772151898733, | |
| "grad_norm": 3.5133466720581055, | |
| "learning_rate": 5.478963495839425e-06, | |
| "loss": 0.5445467233657837, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.738396624472574, | |
| "grad_norm": 0.9658949375152588, | |
| "learning_rate": 5.466974919930979e-06, | |
| "loss": 0.7141355276107788, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7426160337552743, | |
| "grad_norm": 1.7418462038040161, | |
| "learning_rate": 5.454973357703087e-06, | |
| "loss": 0.8929092884063721, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7468354430379747, | |
| "grad_norm": 8.529016494750977, | |
| "learning_rate": 5.442958955228649e-06, | |
| "loss": 0.9267692565917969, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7510548523206753, | |
| "grad_norm": 0.9777578115463257, | |
| "learning_rate": 5.430931858736848e-06, | |
| "loss": 1.0351005792617798, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7552742616033754, | |
| "grad_norm": 1.306839108467102, | |
| "learning_rate": 5.418892214611364e-06, | |
| "loss": 1.0336472988128662, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.759493670886076, | |
| "grad_norm": 0.9158060550689697, | |
| "learning_rate": 5.406840169388598e-06, | |
| "loss": 0.8349417448043823, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7637130801687764, | |
| "grad_norm": 4.128747940063477, | |
| "learning_rate": 5.394775869755888e-06, | |
| "loss": 1.078331470489502, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7679324894514767, | |
| "grad_norm": 2.023729085922241, | |
| "learning_rate": 5.3826994625497186e-06, | |
| "loss": 0.8993400931358337, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7721518987341773, | |
| "grad_norm": 1.9145705699920654, | |
| "learning_rate": 5.370611094753943e-06, | |
| "loss": 0.756892740726471, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7763713080168775, | |
| "grad_norm": 4.3195695877075195, | |
| "learning_rate": 5.358510913497981e-06, | |
| "loss": 0.8908122777938843, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.780590717299578, | |
| "grad_norm": 0.7751283645629883, | |
| "learning_rate": 5.346399066055044e-06, | |
| "loss": 0.4248788058757782, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.7848101265822784, | |
| "grad_norm": 0.7409003973007202, | |
| "learning_rate": 5.33427569984033e-06, | |
| "loss": 0.650154173374176, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.7890295358649788, | |
| "grad_norm": 1.8226172924041748, | |
| "learning_rate": 5.322140962409236e-06, | |
| "loss": 0.59881591796875, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.7932489451476794, | |
| "grad_norm": 1.4619311094284058, | |
| "learning_rate": 5.3099950014555554e-06, | |
| "loss": 0.7507359981536865, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.7974683544303798, | |
| "grad_norm": 1.0151058435440063, | |
| "learning_rate": 5.29783796480969e-06, | |
| "loss": 1.127907633781433, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8016877637130801, | |
| "grad_norm": 2.056638240814209, | |
| "learning_rate": 5.2856700004368425e-06, | |
| "loss": 1.3744020462036133, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8059071729957807, | |
| "grad_norm": 0.30007457733154297, | |
| "learning_rate": 5.273491256435222e-06, | |
| "loss": 0.8465395569801331, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.810126582278481, | |
| "grad_norm": 2.211362361907959, | |
| "learning_rate": 5.2613018810342314e-06, | |
| "loss": 0.9668091535568237, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8143459915611815, | |
| "grad_norm": 0.9358858466148376, | |
| "learning_rate": 5.24910202259268e-06, | |
| "loss": 0.664305567741394, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8185654008438819, | |
| "grad_norm": 1.0162758827209473, | |
| "learning_rate": 5.236891829596958e-06, | |
| "loss": 1.0983484983444214, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8227848101265822, | |
| "grad_norm": 0.8416312336921692, | |
| "learning_rate": 5.2246714506592454e-06, | |
| "loss": 0.9112118482589722, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8270042194092828, | |
| "grad_norm": 0.833525538444519, | |
| "learning_rate": 5.212441034515695e-06, | |
| "loss": 0.9819576740264893, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.831223628691983, | |
| "grad_norm": 21.00221824645996, | |
| "learning_rate": 5.200200730024622e-06, | |
| "loss": 0.9238821268081665, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.8354430379746836, | |
| "grad_norm": 0.7736468315124512, | |
| "learning_rate": 5.187950686164699e-06, | |
| "loss": 0.9560548663139343, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.839662447257384, | |
| "grad_norm": 2.4590296745300293, | |
| "learning_rate": 5.175691052033133e-06, | |
| "loss": 0.7176443338394165, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8438818565400843, | |
| "grad_norm": 14.438817977905273, | |
| "learning_rate": 5.163421976843859e-06, | |
| "loss": 0.9139724373817444, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8481012658227849, | |
| "grad_norm": 0.7402142286300659, | |
| "learning_rate": 5.151143609925718e-06, | |
| "loss": 1.0629327297210693, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8523206751054853, | |
| "grad_norm": 2.3395838737487793, | |
| "learning_rate": 5.138856100720645e-06, | |
| "loss": 0.7460686564445496, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.8565400843881856, | |
| "grad_norm": 1.0752966403961182, | |
| "learning_rate": 5.126559598781845e-06, | |
| "loss": 0.6765896677970886, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8607594936708862, | |
| "grad_norm": 1.11525559425354, | |
| "learning_rate": 5.114254253771977e-06, | |
| "loss": 0.8317286968231201, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8649789029535864, | |
| "grad_norm": 1.3351662158966064, | |
| "learning_rate": 5.1019402154613264e-06, | |
| "loss": 0.6764845252037048, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.869198312236287, | |
| "grad_norm": 1.5033775568008423, | |
| "learning_rate": 5.089617633725992e-06, | |
| "loss": 0.7203776240348816, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8734177215189873, | |
| "grad_norm": 0.8415127992630005, | |
| "learning_rate": 5.07728665854605e-06, | |
| "loss": 1.027212142944336, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8776371308016877, | |
| "grad_norm": 6.813464641571045, | |
| "learning_rate": 5.064947440003741e-06, | |
| "loss": 0.3982529640197754, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8818565400843883, | |
| "grad_norm": 1.178280234336853, | |
| "learning_rate": 5.0526001282816285e-06, | |
| "loss": 0.6589434146881104, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.8860759493670884, | |
| "grad_norm": 1.1725980043411255, | |
| "learning_rate": 5.0402448736607874e-06, | |
| "loss": 1.087322473526001, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.890295358649789, | |
| "grad_norm": 0.9317290782928467, | |
| "learning_rate": 5.027881826518963e-06, | |
| "loss": 1.2050056457519531, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.8945147679324894, | |
| "grad_norm": 3.785717010498047, | |
| "learning_rate": 5.015511137328743e-06, | |
| "loss": 0.7949274182319641, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 1.9153603315353394, | |
| "learning_rate": 5.003132956655735e-06, | |
| "loss": 0.9485737085342407, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9029535864978904, | |
| "grad_norm": 1.1202062368392944, | |
| "learning_rate": 4.990747435156715e-06, | |
| "loss": 1.1542925834655762, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.9071729957805907, | |
| "grad_norm": 5.2798967361450195, | |
| "learning_rate": 4.978354723577818e-06, | |
| "loss": 0.9438016414642334, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9113924050632911, | |
| "grad_norm": 1.9839028120040894, | |
| "learning_rate": 4.965954972752677e-06, | |
| "loss": 1.31730055809021, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9156118143459917, | |
| "grad_norm": 3.2454068660736084, | |
| "learning_rate": 4.953548333600616e-06, | |
| "loss": 0.43834638595581055, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9198312236286919, | |
| "grad_norm": 0.7044057250022888, | |
| "learning_rate": 4.9411349571247845e-06, | |
| "loss": 1.0278995037078857, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9240506329113924, | |
| "grad_norm": 4.6270246505737305, | |
| "learning_rate": 4.928714994410341e-06, | |
| "loss": 0.7902883887290955, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9282700421940928, | |
| "grad_norm": 1.1006572246551514, | |
| "learning_rate": 4.9162885966226035e-06, | |
| "loss": 1.0976777076721191, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9324894514767932, | |
| "grad_norm": 1.0053693056106567, | |
| "learning_rate": 4.903855915005212e-06, | |
| "loss": 0.7121254205703735, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9367088607594938, | |
| "grad_norm": 0.7003014087677002, | |
| "learning_rate": 4.8914171008782885e-06, | |
| "loss": 1.054925560951233, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9409282700421941, | |
| "grad_norm": 1.9592584371566772, | |
| "learning_rate": 4.878972305636595e-06, | |
| "loss": 0.46024253964424133, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9451476793248945, | |
| "grad_norm": 0.7745406627655029, | |
| "learning_rate": 4.86652168074769e-06, | |
| "loss": 1.0552338361740112, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9493670886075949, | |
| "grad_norm": 1.3688344955444336, | |
| "learning_rate": 4.8540653777500865e-06, | |
| "loss": 1.0862473249435425, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9535864978902953, | |
| "grad_norm": 2.7827236652374268, | |
| "learning_rate": 4.841603548251406e-06, | |
| "loss": 0.8950420022010803, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.9578059071729959, | |
| "grad_norm": 0.7784646153450012, | |
| "learning_rate": 4.829136343926532e-06, | |
| "loss": 0.720669686794281, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.9620253164556962, | |
| "grad_norm": 1.3589555025100708, | |
| "learning_rate": 4.816663916515772e-06, | |
| "loss": 1.1043243408203125, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9662447257383966, | |
| "grad_norm": 3.5423665046691895, | |
| "learning_rate": 4.804186417822995e-06, | |
| "loss": 0.870411217212677, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9704641350210972, | |
| "grad_norm": 0.7861785292625427, | |
| "learning_rate": 4.791703999713803e-06, | |
| "loss": 1.1790004968643188, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9746835443037973, | |
| "grad_norm": 0.7902828454971313, | |
| "learning_rate": 4.779216814113667e-06, | |
| "loss": 1.103920340538025, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.978902953586498, | |
| "grad_norm": 0.6530998945236206, | |
| "learning_rate": 4.766725013006085e-06, | |
| "loss": 0.7225710153579712, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.9831223628691983, | |
| "grad_norm": 1.588179588317871, | |
| "learning_rate": 4.754228748430731e-06, | |
| "loss": 1.0604408979415894, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9873417721518987, | |
| "grad_norm": 1.7913926839828491, | |
| "learning_rate": 4.741728172481607e-06, | |
| "loss": 0.8651899099349976, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.9915611814345993, | |
| "grad_norm": 0.891537070274353, | |
| "learning_rate": 4.729223437305187e-06, | |
| "loss": 0.6996287107467651, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.9957805907172996, | |
| "grad_norm": 1.4173448085784912, | |
| "learning_rate": 4.716714695098568e-06, | |
| "loss": 1.0344507694244385, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.7296454906463623, | |
| "learning_rate": 4.7042020981076185e-06, | |
| "loss": 0.5512294173240662, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.0042194092827006, | |
| "grad_norm": 2.249424457550049, | |
| "learning_rate": 4.69168579862512e-06, | |
| "loss": 0.8092342615127563, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.0084388185654007, | |
| "grad_norm": 2.6464383602142334, | |
| "learning_rate": 4.679165948988924e-06, | |
| "loss": 0.47413283586502075, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.0126582278481013, | |
| "grad_norm": 1.5369104146957397, | |
| "learning_rate": 4.666642701580086e-06, | |
| "loss": 0.7702062129974365, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0168776371308015, | |
| "grad_norm": 1.0920283794403076, | |
| "learning_rate": 4.65411620882102e-06, | |
| "loss": 0.8473414182662964, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.021097046413502, | |
| "grad_norm": 20.295406341552734, | |
| "learning_rate": 4.6415866231736375e-06, | |
| "loss": 0.6457698345184326, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 3.8915340900421143, | |
| "learning_rate": 4.629054097137493e-06, | |
| "loss": 0.7031627893447876, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.029535864978903, | |
| "grad_norm": 1.0874841213226318, | |
| "learning_rate": 4.616518783247934e-06, | |
| "loss": 1.0022499561309814, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.0337552742616034, | |
| "grad_norm": 5.714715480804443, | |
| "learning_rate": 4.603980834074232e-06, | |
| "loss": 0.7056564688682556, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.037974683544304, | |
| "grad_norm": 0.8951921463012695, | |
| "learning_rate": 4.591440402217741e-06, | |
| "loss": 0.5630991458892822, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.042194092827004, | |
| "grad_norm": 4.608378887176514, | |
| "learning_rate": 4.578897640310025e-06, | |
| "loss": 0.6585802435874939, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0464135021097047, | |
| "grad_norm": 1.6705124378204346, | |
| "learning_rate": 4.566352701011013e-06, | |
| "loss": 0.9024470448493958, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.050632911392405, | |
| "grad_norm": 2.591546058654785, | |
| "learning_rate": 4.5538057370071315e-06, | |
| "loss": 0.7236870527267456, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.0548523206751055, | |
| "grad_norm": 1.0205042362213135, | |
| "learning_rate": 4.541256901009451e-06, | |
| "loss": 0.7728800177574158, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.059071729957806, | |
| "grad_norm": 2.32804799079895, | |
| "learning_rate": 4.528706345751826e-06, | |
| "loss": 0.6220592856407166, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0632911392405062, | |
| "grad_norm": 0.9847302436828613, | |
| "learning_rate": 4.516154223989039e-06, | |
| "loss": 0.6414508819580078, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.067510548523207, | |
| "grad_norm": 1.0494519472122192, | |
| "learning_rate": 4.503600688494938e-06, | |
| "loss": 0.5687150359153748, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.071729957805907, | |
| "grad_norm": 1.0996086597442627, | |
| "learning_rate": 4.491045892060573e-06, | |
| "loss": 0.9595503211021423, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.0759493670886076, | |
| "grad_norm": 1.6307997703552246, | |
| "learning_rate": 4.478489987492346e-06, | |
| "loss": 0.8499625325202942, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.080168776371308, | |
| "grad_norm": 1.1343793869018555, | |
| "learning_rate": 4.465933127610145e-06, | |
| "loss": 0.8802004456520081, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.0843881856540083, | |
| "grad_norm": 0.8233914375305176, | |
| "learning_rate": 4.453375465245486e-06, | |
| "loss": 0.8876461982727051, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.088607594936709, | |
| "grad_norm": 3.605290651321411, | |
| "learning_rate": 4.44081715323965e-06, | |
| "loss": 0.47245436906814575, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0928270042194095, | |
| "grad_norm": 1.4245373010635376, | |
| "learning_rate": 4.428258344441826e-06, | |
| "loss": 0.4930482804775238, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.0970464135021096, | |
| "grad_norm": 1.0939189195632935, | |
| "learning_rate": 4.415699191707251e-06, | |
| "loss": 0.9832253456115723, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.1012658227848102, | |
| "grad_norm": 1.3786028623580933, | |
| "learning_rate": 4.403139847895348e-06, | |
| "loss": 0.8831475377082825, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.1054852320675104, | |
| "grad_norm": 0.33124950528144836, | |
| "learning_rate": 4.39058046586786e-06, | |
| "loss": 0.5398452877998352, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.109704641350211, | |
| "grad_norm": 2.1223366260528564, | |
| "learning_rate": 4.3780211984870044e-06, | |
| "loss": 1.0190367698669434, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1139240506329116, | |
| "grad_norm": 2.0882437229156494, | |
| "learning_rate": 4.365462198613595e-06, | |
| "loss": 0.8691745400428772, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.1181434599156117, | |
| "grad_norm": 0.9551434516906738, | |
| "learning_rate": 4.352903619105196e-06, | |
| "loss": 0.8893840909004211, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1223628691983123, | |
| "grad_norm": 0.49108386039733887, | |
| "learning_rate": 4.340345612814251e-06, | |
| "loss": 0.5169594287872314, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1265822784810124, | |
| "grad_norm": 0.9406089186668396, | |
| "learning_rate": 4.327788332586227e-06, | |
| "loss": 0.5989170074462891, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.130801687763713, | |
| "grad_norm": 1.099560022354126, | |
| "learning_rate": 4.315231931257758e-06, | |
| "loss": 0.5996731519699097, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.1350210970464136, | |
| "grad_norm": 12.219691276550293, | |
| "learning_rate": 4.302676561654775e-06, | |
| "loss": 0.8513282537460327, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.1392405063291138, | |
| "grad_norm": 2.0376791954040527, | |
| "learning_rate": 4.290122376590656e-06, | |
| "loss": 0.9961199164390564, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1434599156118144, | |
| "grad_norm": 1.4444695711135864, | |
| "learning_rate": 4.2775695288643615e-06, | |
| "loss": 0.4728237986564636, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.147679324894515, | |
| "grad_norm": 1.0163081884384155, | |
| "learning_rate": 4.2650181712585735e-06, | |
| "loss": 0.7495555281639099, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.151898734177215, | |
| "grad_norm": 1.1818724870681763, | |
| "learning_rate": 4.252468456537838e-06, | |
| "loss": 0.6457207202911377, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.1561181434599157, | |
| "grad_norm": 2.961237907409668, | |
| "learning_rate": 4.239920537446705e-06, | |
| "loss": 0.7249948978424072, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.160337552742616, | |
| "grad_norm": 2.8546791076660156, | |
| "learning_rate": 4.227374566707871e-06, | |
| "loss": 0.6750069856643677, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1645569620253164, | |
| "grad_norm": 1.0282621383666992, | |
| "learning_rate": 4.214830697020316e-06, | |
| "loss": 0.9150334000587463, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.168776371308017, | |
| "grad_norm": 0.8248642086982727, | |
| "learning_rate": 4.202289081057452e-06, | |
| "loss": 0.9421663284301758, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.172995780590717, | |
| "grad_norm": 0.9548051953315735, | |
| "learning_rate": 4.189749871465253e-06, | |
| "loss": 0.8729570508003235, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.1772151898734178, | |
| "grad_norm": 0.8367507457733154, | |
| "learning_rate": 4.177213220860416e-06, | |
| "loss": 0.8981440663337708, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.181434599156118, | |
| "grad_norm": 1.4248055219650269, | |
| "learning_rate": 4.164679281828482e-06, | |
| "loss": 0.8822668194770813, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.1856540084388185, | |
| "grad_norm": 0.9020785689353943, | |
| "learning_rate": 4.152148206921995e-06, | |
| "loss": 0.8814399838447571, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.189873417721519, | |
| "grad_norm": 1.4970018863677979, | |
| "learning_rate": 4.139620148658634e-06, | |
| "loss": 0.8485023379325867, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.1940928270042193, | |
| "grad_norm": 1.1914066076278687, | |
| "learning_rate": 4.127095259519368e-06, | |
| "loss": 1.0057520866394043, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.19831223628692, | |
| "grad_norm": 5.138652324676514, | |
| "learning_rate": 4.114573691946591e-06, | |
| "loss": 0.26296478509902954, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2025316455696204, | |
| "grad_norm": 1.1444544792175293, | |
| "learning_rate": 4.102055598342269e-06, | |
| "loss": 0.8880115747451782, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.2067510548523206, | |
| "grad_norm": 1.740729808807373, | |
| "learning_rate": 4.089541131066086e-06, | |
| "loss": 0.5347674489021301, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.210970464135021, | |
| "grad_norm": 1.3183239698410034, | |
| "learning_rate": 4.077030442433593e-06, | |
| "loss": 0.790450930595398, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2151898734177213, | |
| "grad_norm": 1.1291550397872925, | |
| "learning_rate": 4.064523684714344e-06, | |
| "loss": 0.8988840579986572, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.219409282700422, | |
| "grad_norm": 2.9497318267822266, | |
| "learning_rate": 4.052021010130056e-06, | |
| "loss": 0.7755071520805359, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2236286919831225, | |
| "grad_norm": 2.4455068111419678, | |
| "learning_rate": 4.039522570852745e-06, | |
| "loss": 0.7849942445755005, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.2278481012658227, | |
| "grad_norm": 0.9835525751113892, | |
| "learning_rate": 4.0270285190028794e-06, | |
| "loss": 0.7088072896003723, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.2320675105485233, | |
| "grad_norm": 20.216365814208984, | |
| "learning_rate": 4.014539006647528e-06, | |
| "loss": 0.42411160469055176, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.2362869198312234, | |
| "grad_norm": 0.8427597284317017, | |
| "learning_rate": 4.002054185798509e-06, | |
| "loss": 0.8620681762695312, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.240506329113924, | |
| "grad_norm": 0.3895626366138458, | |
| "learning_rate": 3.98957420841054e-06, | |
| "loss": 0.6363852024078369, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2447257383966246, | |
| "grad_norm": 1.1307460069656372, | |
| "learning_rate": 3.977099226379386e-06, | |
| "loss": 0.4475446343421936, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.2489451476793247, | |
| "grad_norm": 1.3451250791549683, | |
| "learning_rate": 3.9646293915400145e-06, | |
| "loss": 0.8441832661628723, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.2531645569620253, | |
| "grad_norm": 1.8237205743789673, | |
| "learning_rate": 3.952164855664745e-06, | |
| "loss": 1.0592007637023926, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.257383966244726, | |
| "grad_norm": 1.1085244417190552, | |
| "learning_rate": 3.939705770461403e-06, | |
| "loss": 1.0274057388305664, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.261603375527426, | |
| "grad_norm": 1.4007558822631836, | |
| "learning_rate": 3.927252287571472e-06, | |
| "loss": 0.8607990145683289, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.2658227848101267, | |
| "grad_norm": 3.7572860717773438, | |
| "learning_rate": 3.914804558568251e-06, | |
| "loss": 1.1480568647384644, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.270042194092827, | |
| "grad_norm": 0.819203794002533, | |
| "learning_rate": 3.902362734955003e-06, | |
| "loss": 0.8235105872154236, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.2742616033755274, | |
| "grad_norm": 0.528959333896637, | |
| "learning_rate": 3.889926968163123e-06, | |
| "loss": 0.5926033854484558, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 1.5626213550567627, | |
| "learning_rate": 3.877497409550281e-06, | |
| "loss": 0.7218382358551025, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.282700421940928, | |
| "grad_norm": 1.657475233078003, | |
| "learning_rate": 3.8650742103985865e-06, | |
| "loss": 0.33192554116249084, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.2869198312236287, | |
| "grad_norm": 1.3998394012451172, | |
| "learning_rate": 3.852657521912752e-06, | |
| "loss": 0.5696985721588135, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.291139240506329, | |
| "grad_norm": 0.8090922832489014, | |
| "learning_rate": 3.840247495218242e-06, | |
| "loss": 0.4131937325000763, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.2953586497890295, | |
| "grad_norm": 1.96702241897583, | |
| "learning_rate": 3.827844281359444e-06, | |
| "loss": 0.5371357202529907, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.29957805907173, | |
| "grad_norm": 0.4463783800601959, | |
| "learning_rate": 3.815448031297822e-06, | |
| "loss": 0.48086562752723694, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.3037974683544302, | |
| "grad_norm": 2.2645716667175293, | |
| "learning_rate": 3.8030588959100845e-06, | |
| "loss": 0.759406328201294, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.308016877637131, | |
| "grad_norm": 0.9995399117469788, | |
| "learning_rate": 3.790677025986345e-06, | |
| "loss": 0.5466501116752625, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.3122362869198314, | |
| "grad_norm": 2.6267566680908203, | |
| "learning_rate": 3.7783025722282897e-06, | |
| "loss": 0.35581734776496887, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3164556962025316, | |
| "grad_norm": 2.8866639137268066, | |
| "learning_rate": 3.765935685247338e-06, | |
| "loss": 0.8641759157180786, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.320675105485232, | |
| "grad_norm": 1.3129066228866577, | |
| "learning_rate": 3.753576515562816e-06, | |
| "loss": 0.7505000233650208, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.3248945147679323, | |
| "grad_norm": 1.0732929706573486, | |
| "learning_rate": 3.7412252136001213e-06, | |
| "loss": 0.8979564905166626, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.329113924050633, | |
| "grad_norm": 0.7349892854690552, | |
| "learning_rate": 3.7288819296888898e-06, | |
| "loss": 1.1566518545150757, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 1.2569828033447266, | |
| "learning_rate": 3.716546814061171e-06, | |
| "loss": 0.6977556347846985, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3375527426160336, | |
| "grad_norm": 2.377737522125244, | |
| "learning_rate": 3.7042200168495946e-06, | |
| "loss": 0.44933831691741943, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3417721518987342, | |
| "grad_norm": 1.586523413658142, | |
| "learning_rate": 3.691901688085548e-06, | |
| "loss": 0.8763599395751953, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3459915611814344, | |
| "grad_norm": 1.1108835935592651, | |
| "learning_rate": 3.6795919776973473e-06, | |
| "loss": 0.9540433287620544, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.350210970464135, | |
| "grad_norm": 0.9403799176216125, | |
| "learning_rate": 3.667291035508411e-06, | |
| "loss": 1.034621000289917, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.3544303797468356, | |
| "grad_norm": 0.43867227435112, | |
| "learning_rate": 3.65499901123544e-06, | |
| "loss": 0.4901280999183655, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.3586497890295357, | |
| "grad_norm": 2.578577995300293, | |
| "learning_rate": 3.642716054486595e-06, | |
| "loss": 0.7974634170532227, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.3628691983122363, | |
| "grad_norm": 1.1387748718261719, | |
| "learning_rate": 3.630442314759671e-06, | |
| "loss": 0.5818929672241211, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.367088607594937, | |
| "grad_norm": 2.1316514015197754, | |
| "learning_rate": 3.618177941440285e-06, | |
| "loss": 0.7703042030334473, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.371308016877637, | |
| "grad_norm": 0.8024750351905823, | |
| "learning_rate": 3.605923083800051e-06, | |
| "loss": 0.5044012069702148, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.3755274261603376, | |
| "grad_norm": 1.283586025238037, | |
| "learning_rate": 3.593677890994768e-06, | |
| "loss": 0.663129448890686, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.379746835443038, | |
| "grad_norm": 1.0305255651474, | |
| "learning_rate": 3.581442512062602e-06, | |
| "loss": 0.8820338249206543, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.3839662447257384, | |
| "grad_norm": 2.735337972640991, | |
| "learning_rate": 3.5692170959222735e-06, | |
| "loss": 0.42376741766929626, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.388185654008439, | |
| "grad_norm": 4.083228588104248, | |
| "learning_rate": 3.5570017913712438e-06, | |
| "loss": 0.3104958236217499, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.392405063291139, | |
| "grad_norm": 1.5016670227050781, | |
| "learning_rate": 3.5447967470839038e-06, | |
| "loss": 0.34900638461112976, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.3966244725738397, | |
| "grad_norm": 1.8445940017700195, | |
| "learning_rate": 3.5326021116097655e-06, | |
| "loss": 0.5472123026847839, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.40084388185654, | |
| "grad_norm": 1.0861736536026, | |
| "learning_rate": 3.520418033371655e-06, | |
| "loss": 0.9556151628494263, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.4050632911392404, | |
| "grad_norm": 3.4282290935516357, | |
| "learning_rate": 3.5082446606639014e-06, | |
| "loss": 0.7003535032272339, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.409282700421941, | |
| "grad_norm": 3.9306039810180664, | |
| "learning_rate": 3.4960821416505406e-06, | |
| "loss": 0.24855707585811615, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.413502109704641, | |
| "grad_norm": 0.9033668041229248, | |
| "learning_rate": 3.4839306243635003e-06, | |
| "loss": 0.7160732746124268, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4177215189873418, | |
| "grad_norm": 1.116635799407959, | |
| "learning_rate": 3.4717902567008086e-06, | |
| "loss": 0.9801563620567322, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4219409282700424, | |
| "grad_norm": 4.026218891143799, | |
| "learning_rate": 3.459661186424787e-06, | |
| "loss": 0.7096956968307495, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4261603375527425, | |
| "grad_norm": 1.311513066291809, | |
| "learning_rate": 3.447543561160258e-06, | |
| "loss": 0.9519820809364319, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.430379746835443, | |
| "grad_norm": 3.234283208847046, | |
| "learning_rate": 3.435437528392741e-06, | |
| "loss": 0.6188116073608398, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4345991561181437, | |
| "grad_norm": 0.9476786851882935, | |
| "learning_rate": 3.4233432354666666e-06, | |
| "loss": 1.0032005310058594, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.438818565400844, | |
| "grad_norm": 1.0260239839553833, | |
| "learning_rate": 3.4112608295835718e-06, | |
| "loss": 0.3281160891056061, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4430379746835444, | |
| "grad_norm": 0.7956026196479797, | |
| "learning_rate": 3.3991904578003182e-06, | |
| "loss": 0.627183735370636, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.4472573839662446, | |
| "grad_norm": 0.9774817824363708, | |
| "learning_rate": 3.3871322670273e-06, | |
| "loss": 0.9342701435089111, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.451476793248945, | |
| "grad_norm": 1.73080313205719, | |
| "learning_rate": 3.3750864040266497e-06, | |
| "loss": 0.5555570721626282, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.4556962025316453, | |
| "grad_norm": 1.2167036533355713, | |
| "learning_rate": 3.3630530154104603e-06, | |
| "loss": 0.8571757674217224, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.459915611814346, | |
| "grad_norm": 0.8792468905448914, | |
| "learning_rate": 3.3510322476389953e-06, | |
| "loss": 0.8499954342842102, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4641350210970465, | |
| "grad_norm": 0.3647661805152893, | |
| "learning_rate": 3.33902424701891e-06, | |
| "loss": 0.4817237854003906, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4683544303797467, | |
| "grad_norm": 1.5427345037460327, | |
| "learning_rate": 3.327029159701465e-06, | |
| "loss": 0.8259966373443604, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4725738396624473, | |
| "grad_norm": 0.9573671221733093, | |
| "learning_rate": 3.315047131680755e-06, | |
| "loss": 0.9262470006942749, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.476793248945148, | |
| "grad_norm": 0.8954631686210632, | |
| "learning_rate": 3.3030783087919253e-06, | |
| "loss": 0.8667972087860107, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.481012658227848, | |
| "grad_norm": 0.998231828212738, | |
| "learning_rate": 3.291122836709402e-06, | |
| "loss": 0.6898888349533081, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.4852320675105486, | |
| "grad_norm": 3.1478688716888428, | |
| "learning_rate": 3.2791808609451125e-06, | |
| "loss": 0.3274869918823242, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.489451476793249, | |
| "grad_norm": 11.714877128601074, | |
| "learning_rate": 3.2672525268467225e-06, | |
| "loss": 0.6489510536193848, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.4936708860759493, | |
| "grad_norm": 1.9469349384307861, | |
| "learning_rate": 3.2553379795958604e-06, | |
| "loss": 0.6815069913864136, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.49789029535865, | |
| "grad_norm": 2.3261117935180664, | |
| "learning_rate": 3.2434373642063522e-06, | |
| "loss": 0.3795571029186249, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.50210970464135, | |
| "grad_norm": 2.7311949729919434, | |
| "learning_rate": 3.2315508255224613e-06, | |
| "loss": 0.3261902630329132, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.5063291139240507, | |
| "grad_norm": 2.2631030082702637, | |
| "learning_rate": 3.2196785082171147e-06, | |
| "loss": 0.5865919589996338, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.510548523206751, | |
| "grad_norm": 0.8359600305557251, | |
| "learning_rate": 3.207820556790155e-06, | |
| "loss": 0.8902769088745117, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5147679324894514, | |
| "grad_norm": 2.3550963401794434, | |
| "learning_rate": 3.1959771155665715e-06, | |
| "loss": 0.4082001745700836, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.518987341772152, | |
| "grad_norm": 4.461960315704346, | |
| "learning_rate": 3.184148328694748e-06, | |
| "loss": 1.1846554279327393, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.523206751054852, | |
| "grad_norm": 1.4942057132720947, | |
| "learning_rate": 3.1723343401447107e-06, | |
| "loss": 0.9881184697151184, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.5274261603375527, | |
| "grad_norm": 2.0736021995544434, | |
| "learning_rate": 3.160535293706369e-06, | |
| "loss": 0.9017194509506226, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5316455696202533, | |
| "grad_norm": 3.7537925243377686, | |
| "learning_rate": 3.148751332987772e-06, | |
| "loss": 0.5090019106864929, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5358649789029535, | |
| "grad_norm": 1.3264377117156982, | |
| "learning_rate": 3.1369826014133594e-06, | |
| "loss": 0.67947918176651, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.540084388185654, | |
| "grad_norm": 3.953713893890381, | |
| "learning_rate": 3.125229242222211e-06, | |
| "loss": 0.5951077342033386, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5443037974683547, | |
| "grad_norm": 0.990692675113678, | |
| "learning_rate": 3.1134913984663093e-06, | |
| "loss": 0.8030409812927246, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.548523206751055, | |
| "grad_norm": 3.0001838207244873, | |
| "learning_rate": 3.101769213008796e-06, | |
| "loss": 0.6891695261001587, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.5527426160337554, | |
| "grad_norm": 1.335438847541809, | |
| "learning_rate": 3.0900628285222307e-06, | |
| "loss": 0.9814665913581848, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.5569620253164556, | |
| "grad_norm": 1.2493577003479004, | |
| "learning_rate": 3.078372387486861e-06, | |
| "loss": 0.9131478667259216, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.561181434599156, | |
| "grad_norm": 2.756460428237915, | |
| "learning_rate": 3.0666980321888823e-06, | |
| "loss": 0.27317380905151367, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.5654008438818563, | |
| "grad_norm": 3.6866559982299805, | |
| "learning_rate": 3.055039904718706e-06, | |
| "loss": 0.6986894011497498, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.569620253164557, | |
| "grad_norm": 0.7736930847167969, | |
| "learning_rate": 3.0433981469692346e-06, | |
| "loss": 0.8533654808998108, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.5738396624472575, | |
| "grad_norm": 6.2710161209106445, | |
| "learning_rate": 3.0317729006341315e-06, | |
| "loss": 0.5412061214447021, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.5780590717299576, | |
| "grad_norm": 2.4914796352386475, | |
| "learning_rate": 3.0201643072060964e-06, | |
| "loss": 0.7507292628288269, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.5822784810126582, | |
| "grad_norm": 4.1669840812683105, | |
| "learning_rate": 3.0085725079751465e-06, | |
| "loss": 0.599193274974823, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.586497890295359, | |
| "grad_norm": 1.4165141582489014, | |
| "learning_rate": 2.996997644026889e-06, | |
| "loss": 0.542171835899353, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.590717299578059, | |
| "grad_norm": 1.2593107223510742, | |
| "learning_rate": 2.9854398562408144e-06, | |
| "loss": 0.8244262933731079, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.5949367088607596, | |
| "grad_norm": 1.6781362295150757, | |
| "learning_rate": 2.9738992852885742e-06, | |
| "loss": 1.0771939754486084, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.59915611814346, | |
| "grad_norm": 1.0754374265670776, | |
| "learning_rate": 2.9623760716322706e-06, | |
| "loss": 0.7803739309310913, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.6033755274261603, | |
| "grad_norm": 4.246564865112305, | |
| "learning_rate": 2.950870355522748e-06, | |
| "loss": 0.2662976384162903, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.607594936708861, | |
| "grad_norm": 1.650658369064331, | |
| "learning_rate": 2.939382276997886e-06, | |
| "loss": 0.9140543937683105, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.611814345991561, | |
| "grad_norm": 5.929245471954346, | |
| "learning_rate": 2.9279119758808942e-06, | |
| "loss": 1.1032469272613525, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6160337552742616, | |
| "grad_norm": 1.0307083129882812, | |
| "learning_rate": 2.9164595917786088e-06, | |
| "loss": 0.6352362632751465, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.620253164556962, | |
| "grad_norm": 1.3630961179733276, | |
| "learning_rate": 2.905025264079799e-06, | |
| "loss": 0.8276194334030151, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.6244725738396624, | |
| "grad_norm": 2.032569408416748, | |
| "learning_rate": 2.8936091319534617e-06, | |
| "loss": 0.4083612859249115, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.628691983122363, | |
| "grad_norm": 0.8530462384223938, | |
| "learning_rate": 2.8822113343471365e-06, | |
| "loss": 0.6202731132507324, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.632911392405063, | |
| "grad_norm": 1.9822677373886108, | |
| "learning_rate": 2.8708320099852108e-06, | |
| "loss": 1.1646617650985718, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.6371308016877637, | |
| "grad_norm": 0.7690547108650208, | |
| "learning_rate": 2.8594712973672276e-06, | |
| "loss": 0.8482010364532471, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6413502109704643, | |
| "grad_norm": 8.547155380249023, | |
| "learning_rate": 2.8481293347662067e-06, | |
| "loss": 0.904060959815979, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.6455696202531644, | |
| "grad_norm": 2.017336368560791, | |
| "learning_rate": 2.8368062602269573e-06, | |
| "loss": 0.3393191993236542, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.649789029535865, | |
| "grad_norm": 1.945145845413208, | |
| "learning_rate": 2.8255022115644017e-06, | |
| "loss": 0.39150819182395935, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.6540084388185656, | |
| "grad_norm": 1.3301414251327515, | |
| "learning_rate": 2.8142173263618877e-06, | |
| "loss": 0.7564312815666199, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 0.9791122078895569, | |
| "learning_rate": 2.8029517419695303e-06, | |
| "loss": 0.8787249326705933, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6624472573839664, | |
| "grad_norm": 1.0031580924987793, | |
| "learning_rate": 2.7917055955025285e-06, | |
| "loss": 0.8559532165527344, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.7568211555480957, | |
| "learning_rate": 2.7804790238394958e-06, | |
| "loss": 0.5114046931266785, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.670886075949367, | |
| "grad_norm": 1.7229481935501099, | |
| "learning_rate": 2.7692721636208013e-06, | |
| "loss": 0.8251296281814575, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.6751054852320673, | |
| "grad_norm": 0.9991238713264465, | |
| "learning_rate": 2.7580851512469024e-06, | |
| "loss": 0.6419144868850708, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.679324894514768, | |
| "grad_norm": 1.1213876008987427, | |
| "learning_rate": 2.746918122876686e-06, | |
| "loss": 0.36948972940444946, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.6835443037974684, | |
| "grad_norm": 1.1551014184951782, | |
| "learning_rate": 2.7357712144258074e-06, | |
| "loss": 0.8657974004745483, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.6877637130801686, | |
| "grad_norm": 7.327043533325195, | |
| "learning_rate": 2.724644561565042e-06, | |
| "loss": 0.6017997860908508, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.691983122362869, | |
| "grad_norm": 3.296600818634033, | |
| "learning_rate": 2.713538299718631e-06, | |
| "loss": 0.6844916343688965, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.6962025316455698, | |
| "grad_norm": 0.31361812353134155, | |
| "learning_rate": 2.702452564062635e-06, | |
| "loss": 0.2726902365684509, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.70042194092827, | |
| "grad_norm": 1.6500128507614136, | |
| "learning_rate": 2.69138748952328e-06, | |
| "loss": 0.8048746585845947, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.7046413502109705, | |
| "grad_norm": 1.1757248640060425, | |
| "learning_rate": 2.680343210775331e-06, | |
| "loss": 0.9176240563392639, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.708860759493671, | |
| "grad_norm": 2.345834493637085, | |
| "learning_rate": 2.6693198622404403e-06, | |
| "loss": 0.4069772958755493, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7130801687763713, | |
| "grad_norm": 5.173031330108643, | |
| "learning_rate": 2.658317578085514e-06, | |
| "loss": 0.4281209409236908, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.717299578059072, | |
| "grad_norm": 0.6406076550483704, | |
| "learning_rate": 2.647336492221082e-06, | |
| "loss": 0.4584686756134033, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.721518987341772, | |
| "grad_norm": 0.30545204877853394, | |
| "learning_rate": 2.636376738299666e-06, | |
| "loss": 0.7299985289573669, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.7257383966244726, | |
| "grad_norm": 2.3275787830352783, | |
| "learning_rate": 2.6254384497141563e-06, | |
| "loss": 0.8682552576065063, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.7299578059071727, | |
| "grad_norm": 1.1502134799957275, | |
| "learning_rate": 2.6145217595961786e-06, | |
| "loss": 0.36897793412208557, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7341772151898733, | |
| "grad_norm": 0.9601994752883911, | |
| "learning_rate": 2.603626800814486e-06, | |
| "loss": 0.8473520278930664, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.738396624472574, | |
| "grad_norm": 0.9873552322387695, | |
| "learning_rate": 2.5927537059733337e-06, | |
| "loss": 0.9228261113166809, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.742616033755274, | |
| "grad_norm": 0.5264573097229004, | |
| "learning_rate": 2.5819026074108695e-06, | |
| "loss": 0.6119830012321472, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7468354430379747, | |
| "grad_norm": 0.9602957963943481, | |
| "learning_rate": 2.5710736371975165e-06, | |
| "loss": 0.9762548208236694, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.7510548523206753, | |
| "grad_norm": 0.9380753040313721, | |
| "learning_rate": 2.560266927134375e-06, | |
| "loss": 0.5131715536117554, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7552742616033754, | |
| "grad_norm": 1.438719630241394, | |
| "learning_rate": 2.549482608751613e-06, | |
| "loss": 1.091052532196045, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.759493670886076, | |
| "grad_norm": 1.7355360984802246, | |
| "learning_rate": 2.5387208133068613e-06, | |
| "loss": 0.9066473245620728, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7637130801687766, | |
| "grad_norm": 2.98097825050354, | |
| "learning_rate": 2.5279816717836256e-06, | |
| "loss": 0.7622301578521729, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7679324894514767, | |
| "grad_norm": 0.885686993598938, | |
| "learning_rate": 2.5172653148896842e-06, | |
| "loss": 0.9722012877464294, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.7721518987341773, | |
| "grad_norm": 1.3240593671798706, | |
| "learning_rate": 2.5065718730555033e-06, | |
| "loss": 0.9415172338485718, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.7763713080168775, | |
| "grad_norm": 1.9628123044967651, | |
| "learning_rate": 2.4959014764326415e-06, | |
| "loss": 0.6243242025375366, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.780590717299578, | |
| "grad_norm": 3.583494186401367, | |
| "learning_rate": 2.4852542548921747e-06, | |
| "loss": 0.4649869501590729, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.7848101265822782, | |
| "grad_norm": 0.94072425365448, | |
| "learning_rate": 2.4746303380231085e-06, | |
| "loss": 0.9694103002548218, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.789029535864979, | |
| "grad_norm": 0.6174410581588745, | |
| "learning_rate": 2.4640298551308073e-06, | |
| "loss": 0.5571610331535339, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.7932489451476794, | |
| "grad_norm": 2.0068700313568115, | |
| "learning_rate": 2.453452935235412e-06, | |
| "loss": 1.0208598375320435, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.7974683544303796, | |
| "grad_norm": 1.8920451402664185, | |
| "learning_rate": 2.442899707070277e-06, | |
| "loss": 0.7713922262191772, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.80168776371308, | |
| "grad_norm": 0.9682056903839111, | |
| "learning_rate": 2.432370299080402e-06, | |
| "loss": 0.5502282977104187, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.8059071729957807, | |
| "grad_norm": 0.9725003838539124, | |
| "learning_rate": 2.4218648394208675e-06, | |
| "loss": 0.8966948986053467, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.810126582278481, | |
| "grad_norm": 1.1623132228851318, | |
| "learning_rate": 2.4113834559552725e-06, | |
| "loss": 0.7290566563606262, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8143459915611815, | |
| "grad_norm": 1.2533057928085327, | |
| "learning_rate": 2.4009262762541812e-06, | |
| "loss": 0.4873872697353363, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.818565400843882, | |
| "grad_norm": 0.42495617270469666, | |
| "learning_rate": 2.3904934275935742e-06, | |
| "loss": 0.6868776082992554, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.8227848101265822, | |
| "grad_norm": 1.3464299440383911, | |
| "learning_rate": 2.3800850369532913e-06, | |
| "loss": 0.792182207107544, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.827004219409283, | |
| "grad_norm": 1.2492246627807617, | |
| "learning_rate": 2.3697012310154895e-06, | |
| "loss": 0.8120459318161011, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.831223628691983, | |
| "grad_norm": 1.79072105884552, | |
| "learning_rate": 2.3593421361631063e-06, | |
| "loss": 0.8677684664726257, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8354430379746836, | |
| "grad_norm": 1.2441151142120361, | |
| "learning_rate": 2.3490078784783088e-06, | |
| "loss": 1.0221854448318481, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8396624472573837, | |
| "grad_norm": 2.060967206954956, | |
| "learning_rate": 2.3386985837409736e-06, | |
| "loss": 0.6457461714744568, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8438818565400843, | |
| "grad_norm": 0.8780367970466614, | |
| "learning_rate": 2.328414377427148e-06, | |
| "loss": 0.514173686504364, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.848101265822785, | |
| "grad_norm": 0.9615793228149414, | |
| "learning_rate": 2.318155384707524e-06, | |
| "loss": 0.9813417792320251, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.852320675105485, | |
| "grad_norm": 0.7979256510734558, | |
| "learning_rate": 2.3079217304459114e-06, | |
| "loss": 0.6034799218177795, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8565400843881856, | |
| "grad_norm": 2.0170516967773438, | |
| "learning_rate": 2.2977135391977264e-06, | |
| "loss": 0.6767147779464722, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.8607594936708862, | |
| "grad_norm": 2.4936254024505615, | |
| "learning_rate": 2.287530935208469e-06, | |
| "loss": 0.5042116045951843, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.8649789029535864, | |
| "grad_norm": 1.2325421571731567, | |
| "learning_rate": 2.277374042412214e-06, | |
| "loss": 0.9337244033813477, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.869198312236287, | |
| "grad_norm": 2.9698169231414795, | |
| "learning_rate": 2.2672429844300972e-06, | |
| "loss": 0.7304012179374695, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8734177215189876, | |
| "grad_norm": 1.5197981595993042, | |
| "learning_rate": 2.257137884568819e-06, | |
| "loss": 0.5767084956169128, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.8776371308016877, | |
| "grad_norm": 2.374297857284546, | |
| "learning_rate": 2.24705886581914e-06, | |
| "loss": 0.9020572304725647, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.8818565400843883, | |
| "grad_norm": 1.3976613283157349, | |
| "learning_rate": 2.237006050854378e-06, | |
| "loss": 0.8876560926437378, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.8860759493670884, | |
| "grad_norm": 1.1186343431472778, | |
| "learning_rate": 2.2269795620289255e-06, | |
| "loss": 0.9599936008453369, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.890295358649789, | |
| "grad_norm": 2.704097270965576, | |
| "learning_rate": 2.2169795213767533e-06, | |
| "loss": 0.8696321249008179, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.894514767932489, | |
| "grad_norm": 7.440235614776611, | |
| "learning_rate": 2.207006050609931e-06, | |
| "loss": 0.3180171847343445, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.8987341772151898, | |
| "grad_norm": 0.950478196144104, | |
| "learning_rate": 2.1970592711171343e-06, | |
| "loss": 0.6180795431137085, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.9029535864978904, | |
| "grad_norm": 1.206428050994873, | |
| "learning_rate": 2.1871393039621813e-06, | |
| "loss": 0.8911280035972595, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.9071729957805905, | |
| "grad_norm": 3.0545897483825684, | |
| "learning_rate": 2.177246269882552e-06, | |
| "loss": 0.752612292766571, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.911392405063291, | |
| "grad_norm": 1.6597026586532593, | |
| "learning_rate": 2.1673802892879202e-06, | |
| "loss": 1.0073306560516357, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9156118143459917, | |
| "grad_norm": 2.8480212688446045, | |
| "learning_rate": 2.1575414822586834e-06, | |
| "loss": 0.49533841013908386, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.919831223628692, | |
| "grad_norm": 2.9914588928222656, | |
| "learning_rate": 2.1477299685445093e-06, | |
| "loss": 0.6439518332481384, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.9240506329113924, | |
| "grad_norm": 1.6400901079177856, | |
| "learning_rate": 2.1379458675628758e-06, | |
| "loss": 0.5329881906509399, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.928270042194093, | |
| "grad_norm": 0.9584951400756836, | |
| "learning_rate": 2.128189298397611e-06, | |
| "loss": 0.9460800290107727, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.932489451476793, | |
| "grad_norm": 1.2493575811386108, | |
| "learning_rate": 2.118460379797452e-06, | |
| "loss": 0.7834473848342896, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9367088607594938, | |
| "grad_norm": 1.484129548072815, | |
| "learning_rate": 2.1087592301745965e-06, | |
| "loss": 0.4930620491504669, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.9409282700421944, | |
| "grad_norm": 1.0145891904830933, | |
| "learning_rate": 2.0990859676032623e-06, | |
| "loss": 0.4643522799015045, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.9451476793248945, | |
| "grad_norm": 0.9809361696243286, | |
| "learning_rate": 2.0894407098182474e-06, | |
| "loss": 0.8622637987136841, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.9493670886075947, | |
| "grad_norm": 3.8030622005462646, | |
| "learning_rate": 2.0798235742134995e-06, | |
| "loss": 0.6468316316604614, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.9535864978902953, | |
| "grad_norm": 3.291412830352783, | |
| "learning_rate": 2.0702346778406887e-06, | |
| "loss": 0.871576726436615, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.957805907172996, | |
| "grad_norm": 2.847675085067749, | |
| "learning_rate": 2.0606741374077804e-06, | |
| "loss": 0.6290037631988525, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.962025316455696, | |
| "grad_norm": 0.8518403172492981, | |
| "learning_rate": 2.0511420692776135e-06, | |
| "loss": 0.8591277003288269, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.9662447257383966, | |
| "grad_norm": 2.023810386657715, | |
| "learning_rate": 2.041638589466487e-06, | |
| "loss": 0.8211725354194641, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.970464135021097, | |
| "grad_norm": 2.9551258087158203, | |
| "learning_rate": 2.0321638136427495e-06, | |
| "loss": 0.46553725004196167, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.9746835443037973, | |
| "grad_norm": 3.8522558212280273, | |
| "learning_rate": 2.0227178571253846e-06, | |
| "loss": 0.7728868126869202, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.978902953586498, | |
| "grad_norm": 0.8442367911338806, | |
| "learning_rate": 2.013300834882615e-06, | |
| "loss": 0.9526476860046387, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.9831223628691985, | |
| "grad_norm": 2.8707711696624756, | |
| "learning_rate": 2.0039128615304967e-06, | |
| "loss": 0.6912641525268555, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.9873417721518987, | |
| "grad_norm": 0.9124540686607361, | |
| "learning_rate": 1.994554051331532e-06, | |
| "loss": 0.7677329778671265, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.9915611814345993, | |
| "grad_norm": 0.7803240418434143, | |
| "learning_rate": 1.9852245181932674e-06, | |
| "loss": 0.8512239456176758, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.9957805907173, | |
| "grad_norm": 3.4592530727386475, | |
| "learning_rate": 1.975924375666918e-06, | |
| "loss": 0.8197758197784424, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.0075371265411377, | |
| "learning_rate": 1.9666537369459813e-06, | |
| "loss": 0.26588016748428345, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.0042194092827006, | |
| "grad_norm": 0.8261951208114624, | |
| "learning_rate": 1.9574127148648586e-06, | |
| "loss": 0.4992481768131256, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.0084388185654007, | |
| "grad_norm": 1.8350886106491089, | |
| "learning_rate": 1.94820142189748e-06, | |
| "loss": 0.4615590572357178, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.0126582278481013, | |
| "grad_norm": 7.030728816986084, | |
| "learning_rate": 1.9390199701559407e-06, | |
| "loss": 0.5607567429542542, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.0168776371308015, | |
| "grad_norm": 1.843036413192749, | |
| "learning_rate": 1.929868471389133e-06, | |
| "loss": 0.1959325075149536, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.021097046413502, | |
| "grad_norm": 1.2027599811553955, | |
| "learning_rate": 1.920747036981388e-06, | |
| "loss": 0.8035475611686707, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.0253164556962027, | |
| "grad_norm": 1.0378309488296509, | |
| "learning_rate": 1.9116557779511153e-06, | |
| "loss": 0.7113970518112183, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.029535864978903, | |
| "grad_norm": 1.079108715057373, | |
| "learning_rate": 1.9025948049494587e-06, | |
| "loss": 0.8759698271751404, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.0337552742616034, | |
| "grad_norm": 1.387281060218811, | |
| "learning_rate": 1.8935642282589452e-06, | |
| "loss": 0.4212711453437805, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 1.6048085689544678, | |
| "learning_rate": 1.884564157792141e-06, | |
| "loss": 0.7371959090232849, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.042194092827004, | |
| "grad_norm": 0.33521798253059387, | |
| "learning_rate": 1.87559470309032e-06, | |
| "loss": 0.5267896056175232, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.0464135021097047, | |
| "grad_norm": 1.3722892999649048, | |
| "learning_rate": 1.8666559733221244e-06, | |
| "loss": 0.657349169254303, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.050632911392405, | |
| "grad_norm": 1.0858877897262573, | |
| "learning_rate": 1.8577480772822405e-06, | |
| "loss": 0.8311367034912109, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.0548523206751055, | |
| "grad_norm": 5.020367622375488, | |
| "learning_rate": 1.8488711233900686e-06, | |
| "loss": 0.5246130228042603, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.059071729957806, | |
| "grad_norm": 3.7570173740386963, | |
| "learning_rate": 1.8400252196884106e-06, | |
| "loss": 0.6080931425094604, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.0632911392405062, | |
| "grad_norm": 1.1105659008026123, | |
| "learning_rate": 1.8312104738421518e-06, | |
| "loss": 0.8224632740020752, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.067510548523207, | |
| "grad_norm": 3.6815249919891357, | |
| "learning_rate": 1.8224269931369494e-06, | |
| "loss": 0.6160001158714294, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.071729957805907, | |
| "grad_norm": 9.295499801635742, | |
| "learning_rate": 1.8136748844779257e-06, | |
| "loss": 0.49316591024398804, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.0759493670886076, | |
| "grad_norm": 4.4355974197387695, | |
| "learning_rate": 1.8049542543883718e-06, | |
| "loss": 0.6495121121406555, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.080168776371308, | |
| "grad_norm": 2.505272626876831, | |
| "learning_rate": 1.7962652090084483e-06, | |
| "loss": 0.4862138032913208, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.0843881856540083, | |
| "grad_norm": 0.9544802904129028, | |
| "learning_rate": 1.7876078540938897e-06, | |
| "loss": 0.7817291021347046, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.088607594936709, | |
| "grad_norm": 0.9137688875198364, | |
| "learning_rate": 1.778982295014725e-06, | |
| "loss": 0.7803807258605957, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.0928270042194095, | |
| "grad_norm": 0.9232447743415833, | |
| "learning_rate": 1.7703886367539886e-06, | |
| "loss": 0.7208024859428406, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.0970464135021096, | |
| "grad_norm": 2.5386898517608643, | |
| "learning_rate": 1.7618269839064476e-06, | |
| "loss": 0.535610556602478, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.1012658227848102, | |
| "grad_norm": 2.476505756378174, | |
| "learning_rate": 1.7532974406773215e-06, | |
| "loss": 0.11650805175304413, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.1054852320675104, | |
| "grad_norm": 3.4205284118652344, | |
| "learning_rate": 1.744800110881024e-06, | |
| "loss": 0.9236214756965637, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.109704641350211, | |
| "grad_norm": 0.38351741433143616, | |
| "learning_rate": 1.7363350979398904e-06, | |
| "loss": 0.3822326362133026, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.1139240506329116, | |
| "grad_norm": 1.7231391668319702, | |
| "learning_rate": 1.7279025048829247e-06, | |
| "loss": 0.8056196570396423, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.1181434599156117, | |
| "grad_norm": 1.3952598571777344, | |
| "learning_rate": 1.7195024343445406e-06, | |
| "loss": 0.8253889679908752, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.1223628691983123, | |
| "grad_norm": 1.235793113708496, | |
| "learning_rate": 1.711134988563318e-06, | |
| "loss": 0.7869700193405151, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.1265822784810124, | |
| "grad_norm": 1.5086437463760376, | |
| "learning_rate": 1.7028002693807553e-06, | |
| "loss": 0.74970543384552, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.130801687763713, | |
| "grad_norm": 1.1958047151565552, | |
| "learning_rate": 1.694498378240028e-06, | |
| "loss": 0.7713515758514404, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.1350210970464136, | |
| "grad_norm": 0.9930305480957031, | |
| "learning_rate": 1.6862294161847582e-06, | |
| "loss": 0.4803518056869507, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.1392405063291138, | |
| "grad_norm": 1.338038444519043, | |
| "learning_rate": 1.6779934838577833e-06, | |
| "loss": 0.4478246569633484, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.1434599156118144, | |
| "grad_norm": 1.8812412023544312, | |
| "learning_rate": 1.6697906814999316e-06, | |
| "loss": 0.8487708568572998, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.147679324894515, | |
| "grad_norm": 1.079730749130249, | |
| "learning_rate": 1.6616211089487968e-06, | |
| "loss": 0.4909372329711914, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.151898734177215, | |
| "grad_norm": 3.950795888900757, | |
| "learning_rate": 1.653484865637532e-06, | |
| "loss": 0.6456606388092041, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.1561181434599157, | |
| "grad_norm": 0.8888868093490601, | |
| "learning_rate": 1.645382050593633e-06, | |
| "loss": 0.5738848447799683, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.160337552742616, | |
| "grad_norm": 0.8062717318534851, | |
| "learning_rate": 1.6373127624377361e-06, | |
| "loss": 0.3924991488456726, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.1645569620253164, | |
| "grad_norm": 1.1965993642807007, | |
| "learning_rate": 1.6292770993824138e-06, | |
| "loss": 0.4241105318069458, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.168776371308017, | |
| "grad_norm": 1.7078224420547485, | |
| "learning_rate": 1.621275159230986e-06, | |
| "loss": 0.7920833230018616, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.172995780590717, | |
| "grad_norm": 3.2493438720703125, | |
| "learning_rate": 1.6133070393763222e-06, | |
| "loss": 0.7387109994888306, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.1772151898734178, | |
| "grad_norm": 1.1433643102645874, | |
| "learning_rate": 1.605372836799664e-06, | |
| "loss": 0.8177753686904907, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.181434599156118, | |
| "grad_norm": 1.1686694622039795, | |
| "learning_rate": 1.5974726480694356e-06, | |
| "loss": 0.810562014579773, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.1856540084388185, | |
| "grad_norm": 1.6440011262893677, | |
| "learning_rate": 1.589606569340076e-06, | |
| "loss": 0.8004451394081116, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.189873417721519, | |
| "grad_norm": 3.572957754135132, | |
| "learning_rate": 1.5817746963508675e-06, | |
| "loss": 0.19780634343624115, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.1940928270042193, | |
| "grad_norm": 1.8729281425476074, | |
| "learning_rate": 1.5739771244247647e-06, | |
| "loss": 0.8508098721504211, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.19831223628692, | |
| "grad_norm": 0.22832605242729187, | |
| "learning_rate": 1.5662139484672423e-06, | |
| "loss": 0.5102086663246155, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.2025316455696204, | |
| "grad_norm": 1.493944764137268, | |
| "learning_rate": 1.558485262965135e-06, | |
| "loss": 0.8561201691627502, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.2067510548523206, | |
| "grad_norm": 2.02929949760437, | |
| "learning_rate": 1.55079116198549e-06, | |
| "loss": 0.7038779258728027, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.210970464135021, | |
| "grad_norm": 2.459091901779175, | |
| "learning_rate": 1.5431317391744167e-06, | |
| "loss": 0.2252277433872223, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.2151898734177213, | |
| "grad_norm": 2.103160858154297, | |
| "learning_rate": 1.535507087755956e-06, | |
| "loss": 0.548999011516571, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.219409282700422, | |
| "grad_norm": 1.064772129058838, | |
| "learning_rate": 1.527917300530938e-06, | |
| "loss": 0.7090752124786377, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.2236286919831225, | |
| "grad_norm": 0.5920833945274353, | |
| "learning_rate": 1.5203624698758573e-06, | |
| "loss": 0.28943130373954773, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.2278481012658227, | |
| "grad_norm": 2.5098395347595215, | |
| "learning_rate": 1.5128426877417428e-06, | |
| "loss": 0.5822982788085938, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.2320675105485233, | |
| "grad_norm": 0.6460347175598145, | |
| "learning_rate": 1.5053580456530459e-06, | |
| "loss": 0.15637226402759552, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.2362869198312234, | |
| "grad_norm": 1.804608702659607, | |
| "learning_rate": 1.4979086347065225e-06, | |
| "loss": 0.7296754121780396, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.240506329113924, | |
| "grad_norm": 1.5082496404647827, | |
| "learning_rate": 1.4904945455701232e-06, | |
| "loss": 0.7508465647697449, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.2447257383966246, | |
| "grad_norm": 1.1056941747665405, | |
| "learning_rate": 1.4831158684818917e-06, | |
| "loss": 0.6265556812286377, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.2489451476793247, | |
| "grad_norm": 2.1995933055877686, | |
| "learning_rate": 1.4757726932488672e-06, | |
| "loss": 0.5779432058334351, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.2531645569620253, | |
| "grad_norm": 2.594663619995117, | |
| "learning_rate": 1.4684651092459906e-06, | |
| "loss": 0.4649961590766907, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.257383966244726, | |
| "grad_norm": 2.5885109901428223, | |
| "learning_rate": 1.4611932054150132e-06, | |
| "loss": 0.5126054883003235, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.261603375527426, | |
| "grad_norm": 2.8481526374816895, | |
| "learning_rate": 1.4539570702634208e-06, | |
| "loss": 0.49317800998687744, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.2658227848101267, | |
| "grad_norm": 1.6855295896530151, | |
| "learning_rate": 1.446756791863351e-06, | |
| "loss": 0.6522631049156189, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.270042194092827, | |
| "grad_norm": 2.981158971786499, | |
| "learning_rate": 1.4395924578505253e-06, | |
| "loss": 0.20762769877910614, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.2742616033755274, | |
| "grad_norm": 0.8789273500442505, | |
| "learning_rate": 1.4324641554231767e-06, | |
| "loss": 0.2234586775302887, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.278481012658228, | |
| "grad_norm": 5.3056182861328125, | |
| "learning_rate": 1.4253719713409958e-06, | |
| "loss": 0.40713340044021606, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.282700421940928, | |
| "grad_norm": 0.8367089033126831, | |
| "learning_rate": 1.41831599192407e-06, | |
| "loss": 0.7326263189315796, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.2869198312236287, | |
| "grad_norm": 1.1955314874649048, | |
| "learning_rate": 1.4112963030518329e-06, | |
| "loss": 0.5510862469673157, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.291139240506329, | |
| "grad_norm": 1.1264405250549316, | |
| "learning_rate": 1.4043129901620198e-06, | |
| "loss": 0.44987189769744873, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.2953586497890295, | |
| "grad_norm": 2.407663345336914, | |
| "learning_rate": 1.397366138249633e-06, | |
| "loss": 0.42221248149871826, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.29957805907173, | |
| "grad_norm": 2.001704692840576, | |
| "learning_rate": 1.3904558318658964e-06, | |
| "loss": 0.7191241383552551, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.3037974683544302, | |
| "grad_norm": 2.9357941150665283, | |
| "learning_rate": 1.3835821551172352e-06, | |
| "loss": 0.5609620809555054, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.308016877637131, | |
| "grad_norm": 0.1518426090478897, | |
| "learning_rate": 1.3767451916642502e-06, | |
| "loss": 0.3671785891056061, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.3122362869198314, | |
| "grad_norm": 2.9103848934173584, | |
| "learning_rate": 1.3699450247206987e-06, | |
| "loss": 0.3877882659435272, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.3164556962025316, | |
| "grad_norm": 1.832383394241333, | |
| "learning_rate": 1.363181737052479e-06, | |
| "loss": 0.38887959718704224, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.320675105485232, | |
| "grad_norm": 1.458479404449463, | |
| "learning_rate": 1.3564554109766303e-06, | |
| "loss": 0.87562096118927, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.3248945147679323, | |
| "grad_norm": 1.4098705053329468, | |
| "learning_rate": 1.3497661283603241e-06, | |
| "loss": 0.618715763092041, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.329113924050633, | |
| "grad_norm": 0.9463833570480347, | |
| "learning_rate": 1.3431139706198703e-06, | |
| "loss": 0.7363364100456238, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 1.1084620952606201, | |
| "learning_rate": 1.336499018719726e-06, | |
| "loss": 0.46182820200920105, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.3375527426160336, | |
| "grad_norm": 2.6807730197906494, | |
| "learning_rate": 1.3299213531715104e-06, | |
| "loss": 0.4027124345302582, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.3417721518987342, | |
| "grad_norm": 3.307328939437866, | |
| "learning_rate": 1.3233810540330258e-06, | |
| "loss": 0.7045289278030396, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.3459915611814344, | |
| "grad_norm": 3.4561619758605957, | |
| "learning_rate": 1.3168782009072792e-06, | |
| "loss": 0.5450237989425659, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.350210970464135, | |
| "grad_norm": 0.9626051783561707, | |
| "learning_rate": 1.3104128729415191e-06, | |
| "loss": 0.29501575231552124, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.3544303797468356, | |
| "grad_norm": 0.9624335169792175, | |
| "learning_rate": 1.3039851488262682e-06, | |
| "loss": 0.7472168207168579, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.3586497890295357, | |
| "grad_norm": 1.6804134845733643, | |
| "learning_rate": 1.2975951067943673e-06, | |
| "loss": 0.7001281976699829, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.3628691983122363, | |
| "grad_norm": 0.3944559097290039, | |
| "learning_rate": 1.2912428246200215e-06, | |
| "loss": 0.45443102717399597, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.367088607594937, | |
| "grad_norm": 3.5907063484191895, | |
| "learning_rate": 1.2849283796178554e-06, | |
| "loss": 0.32309669256210327, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.371308016877637, | |
| "grad_norm": 1.1190893650054932, | |
| "learning_rate": 1.2786518486419726e-06, | |
| "loss": 0.369854599237442, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.3755274261603376, | |
| "grad_norm": 5.47310733795166, | |
| "learning_rate": 1.2724133080850176e-06, | |
| "loss": 0.5572913289070129, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.379746835443038, | |
| "grad_norm": 1.1562130451202393, | |
| "learning_rate": 1.266212833877248e-06, | |
| "loss": 0.4165474772453308, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.3839662447257384, | |
| "grad_norm": 0.32527777552604675, | |
| "learning_rate": 1.2600505014856088e-06, | |
| "loss": 0.3750830888748169, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.388185654008439, | |
| "grad_norm": 1.6657167673110962, | |
| "learning_rate": 1.253926385912818e-06, | |
| "loss": 0.8115463852882385, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.392405063291139, | |
| "grad_norm": 1.3920835256576538, | |
| "learning_rate": 1.2478405616964485e-06, | |
| "loss": 0.4179677963256836, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.3966244725738397, | |
| "grad_norm": 1.1664825677871704, | |
| "learning_rate": 1.2417931029080215e-06, | |
| "loss": 0.41709059476852417, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.40084388185654, | |
| "grad_norm": 1.5139544010162354, | |
| "learning_rate": 1.23578408315211e-06, | |
| "loss": 0.7101098299026489, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.4050632911392404, | |
| "grad_norm": 0.8697150945663452, | |
| "learning_rate": 1.2298135755654378e-06, | |
| "loss": 0.20523357391357422, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.409282700421941, | |
| "grad_norm": 2.3192219734191895, | |
| "learning_rate": 1.2238816528159904e-06, | |
| "loss": 0.6002774238586426, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.413502109704641, | |
| "grad_norm": 1.1426221132278442, | |
| "learning_rate": 1.2179883871021322e-06, | |
| "loss": 0.8457775712013245, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.4177215189873418, | |
| "grad_norm": 0.9432998895645142, | |
| "learning_rate": 1.2121338501517264e-06, | |
| "loss": 0.7718835473060608, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.4219409282700424, | |
| "grad_norm": 0.8319332599639893, | |
| "learning_rate": 1.2063181132212632e-06, | |
| "loss": 0.43066444993019104, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.4261603375527425, | |
| "grad_norm": 6.067880630493164, | |
| "learning_rate": 1.200541247094989e-06, | |
| "loss": 0.23788659274578094, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.430379746835443, | |
| "grad_norm": 5.315701007843018, | |
| "learning_rate": 1.1948033220840512e-06, | |
| "loss": 0.17813172936439514, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.4345991561181437, | |
| "grad_norm": 1.0699946880340576, | |
| "learning_rate": 1.1891044080256355e-06, | |
| "loss": 0.67367023229599, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.438818565400844, | |
| "grad_norm": 1.1057825088500977, | |
| "learning_rate": 1.1834445742821226e-06, | |
| "loss": 0.27095526456832886, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.4430379746835444, | |
| "grad_norm": 1.340883493423462, | |
| "learning_rate": 1.1778238897402362e-06, | |
| "loss": 0.8471240401268005, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.4472573839662446, | |
| "grad_norm": 0.34447231888771057, | |
| "learning_rate": 1.1722424228102123e-06, | |
| "loss": 0.4438764452934265, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.451476793248945, | |
| "grad_norm": 3.183422327041626, | |
| "learning_rate": 1.1667002414249631e-06, | |
| "loss": 0.7752975225448608, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.4556962025316453, | |
| "grad_norm": 2.9104697704315186, | |
| "learning_rate": 1.1611974130392475e-06, | |
| "loss": 0.9540504813194275, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.459915611814346, | |
| "grad_norm": 1.6280553340911865, | |
| "learning_rate": 1.1557340046288554e-06, | |
| "loss": 0.8632485270500183, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.4641350210970465, | |
| "grad_norm": 2.387031078338623, | |
| "learning_rate": 1.1503100826897889e-06, | |
| "loss": 0.8144734501838684, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.4683544303797467, | |
| "grad_norm": 0.9215964674949646, | |
| "learning_rate": 1.144925713237456e-06, | |
| "loss": 0.20231464505195618, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.4725738396624473, | |
| "grad_norm": 1.58251953125, | |
| "learning_rate": 1.1395809618058614e-06, | |
| "loss": 0.5774148106575012, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.476793248945148, | |
| "grad_norm": 2.2536582946777344, | |
| "learning_rate": 1.1342758934468158e-06, | |
| "loss": 0.6982643604278564, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.481012658227848, | |
| "grad_norm": 1.4097844362258911, | |
| "learning_rate": 1.12901057272914e-06, | |
| "loss": 0.38915500044822693, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.4852320675105486, | |
| "grad_norm": 1.30046546459198, | |
| "learning_rate": 1.1237850637378808e-06, | |
| "loss": 0.6481969356536865, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.489451476793249, | |
| "grad_norm": 0.18971386551856995, | |
| "learning_rate": 1.1185994300735278e-06, | |
| "loss": 0.3767941892147064, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.4936708860759493, | |
| "grad_norm": 0.3824913203716278, | |
| "learning_rate": 1.1134537348512443e-06, | |
| "loss": 0.5739644169807434, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.49789029535865, | |
| "grad_norm": 2.9707915782928467, | |
| "learning_rate": 1.1083480407000954e-06, | |
| "loss": 0.609894335269928, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.50210970464135, | |
| "grad_norm": 1.3457541465759277, | |
| "learning_rate": 1.103282409762287e-06, | |
| "loss": 0.6929283142089844, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.5063291139240507, | |
| "grad_norm": 2.39221453666687, | |
| "learning_rate": 1.0982569036924092e-06, | |
| "loss": 0.8087446093559265, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.510548523206751, | |
| "grad_norm": 5.895007610321045, | |
| "learning_rate": 1.0932715836566866e-06, | |
| "loss": 0.3411268889904022, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.5147679324894514, | |
| "grad_norm": 1.3041728734970093, | |
| "learning_rate": 1.0883265103322333e-06, | |
| "loss": 0.8067029714584351, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.518987341772152, | |
| "grad_norm": 1.6455022096633911, | |
| "learning_rate": 1.083421743906313e-06, | |
| "loss": 0.4951574504375458, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.523206751054852, | |
| "grad_norm": 1.431204915046692, | |
| "learning_rate": 1.0785573440756093e-06, | |
| "loss": 0.7452267408370972, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.5274261603375527, | |
| "grad_norm": 7.941998481750488, | |
| "learning_rate": 1.0737333700454966e-06, | |
| "loss": 0.2036304473876953, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.5316455696202533, | |
| "grad_norm": 1.081209659576416, | |
| "learning_rate": 1.068949880529322e-06, | |
| "loss": 0.4741116166114807, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.5358649789029535, | |
| "grad_norm": 3.1109554767608643, | |
| "learning_rate": 1.0642069337476872e-06, | |
| "loss": 0.5494669675827026, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.540084388185654, | |
| "grad_norm": 3.2354819774627686, | |
| "learning_rate": 1.0595045874277425e-06, | |
| "loss": 0.4578985571861267, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.5443037974683547, | |
| "grad_norm": 1.4328290224075317, | |
| "learning_rate": 1.0548428988024858e-06, | |
| "loss": 0.7518556714057922, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.548523206751055, | |
| "grad_norm": 1.069136619567871, | |
| "learning_rate": 1.050221924610061e-06, | |
| "loss": 0.567197859287262, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.5527426160337554, | |
| "grad_norm": 21.512428283691406, | |
| "learning_rate": 1.045641721093071e-06, | |
| "loss": 0.6879177093505859, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.5569620253164556, | |
| "grad_norm": 3.211840867996216, | |
| "learning_rate": 1.041102343997893e-06, | |
| "loss": 0.23187503218650818, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.561181434599156, | |
| "grad_norm": 0.7154665589332581, | |
| "learning_rate": 1.0366038485739996e-06, | |
| "loss": 0.4495694935321808, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.5654008438818563, | |
| "grad_norm": 1.3137481212615967, | |
| "learning_rate": 1.032146289573284e-06, | |
| "loss": 0.7427676320075989, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.569620253164557, | |
| "grad_norm": 4.688238620758057, | |
| "learning_rate": 1.027729721249399e-06, | |
| "loss": 0.16239574551582336, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.5738396624472575, | |
| "grad_norm": 0.26294824481010437, | |
| "learning_rate": 1.023354197357091e-06, | |
| "loss": 0.6016992926597595, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.5780590717299576, | |
| "grad_norm": 2.513110637664795, | |
| "learning_rate": 1.0190197711515498e-06, | |
| "loss": 0.20142441987991333, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.5822784810126582, | |
| "grad_norm": 1.3879189491271973, | |
| "learning_rate": 1.014726495387757e-06, | |
| "loss": 0.5553002953529358, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.586497890295359, | |
| "grad_norm": 1.3632709980010986, | |
| "learning_rate": 1.0104744223198471e-06, | |
| "loss": 0.4727664589881897, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.590717299578059, | |
| "grad_norm": 1.0121846199035645, | |
| "learning_rate": 1.0062636037004696e-06, | |
| "loss": 0.3748111128807068, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.5949367088607596, | |
| "grad_norm": 1.831874132156372, | |
| "learning_rate": 1.0020940907801604e-06, | |
| "loss": 0.869547963142395, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.59915611814346, | |
| "grad_norm": 7.198643684387207, | |
| "learning_rate": 9.979659343067154e-07, | |
| "loss": 0.5534847974777222, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.6033755274261603, | |
| "grad_norm": 2.4725635051727295, | |
| "learning_rate": 9.938791845245768e-07, | |
| "loss": 0.5149208307266235, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.607594936708861, | |
| "grad_norm": 0.5918768048286438, | |
| "learning_rate": 9.898338911742186e-07, | |
| "loss": 0.364676296710968, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.611814345991561, | |
| "grad_norm": 1.779348611831665, | |
| "learning_rate": 9.85830103491541e-07, | |
| "loss": 0.7533677816390991, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.6160337552742616, | |
| "grad_norm": 3.70202374458313, | |
| "learning_rate": 9.818678702072734e-07, | |
| "loss": 0.9169490933418274, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.620253164556962, | |
| "grad_norm": 1.195534110069275, | |
| "learning_rate": 9.779472395463802e-07, | |
| "loss": 0.39904284477233887, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.6244725738396624, | |
| "grad_norm": 1.971677303314209, | |
| "learning_rate": 9.740682592274744e-07, | |
| "loss": 0.3311789035797119, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.628691983122363, | |
| "grad_norm": 1.8470239639282227, | |
| "learning_rate": 9.702309764622328e-07, | |
| "loss": 0.1799009144306183, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.632911392405063, | |
| "grad_norm": 2.5582504272460938, | |
| "learning_rate": 9.664354379548284e-07, | |
| "loss": 0.8046585321426392, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.6371308016877637, | |
| "grad_norm": 3.1312508583068848, | |
| "learning_rate": 9.62681689901357e-07, | |
| "loss": 0.3371848165988922, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.6413502109704643, | |
| "grad_norm": 2.6263599395751953, | |
| "learning_rate": 9.589697779892765e-07, | |
| "loss": 0.2725059986114502, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.6455696202531644, | |
| "grad_norm": 1.8412586450576782, | |
| "learning_rate": 9.552997473968485e-07, | |
| "loss": 0.8444567918777466, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.649789029535865, | |
| "grad_norm": 2.1324095726013184, | |
| "learning_rate": 9.516716427925936e-07, | |
| "loss": 0.15560747683048248, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.6540084388185656, | |
| "grad_norm": 3.671393394470215, | |
| "learning_rate": 9.480855083347428e-07, | |
| "loss": 0.7069560289382935, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.6582278481012658, | |
| "grad_norm": 2.5802621841430664, | |
| "learning_rate": 9.445413876707028e-07, | |
| "loss": 0.2358541190624237, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.6624472573839664, | |
| "grad_norm": 1.5859323740005493, | |
| "learning_rate": 9.41039323936522e-07, | |
| "loss": 0.20277546346187592, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 0.9495890736579895, | |
| "learning_rate": 9.375793597563692e-07, | |
| "loss": 0.5327252745628357, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.670886075949367, | |
| "grad_norm": 5.723246097564697, | |
| "learning_rate": 9.341615372420126e-07, | |
| "loss": 0.2760300636291504, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.6751054852320673, | |
| "grad_norm": 1.0034908056259155, | |
| "learning_rate": 9.307858979923064e-07, | |
| "loss": 0.905087411403656, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.679324894514768, | |
| "grad_norm": 0.9102334976196289, | |
| "learning_rate": 9.274524830926866e-07, | |
| "loss": 0.40605294704437256, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.6835443037974684, | |
| "grad_norm": 1.6663873195648193, | |
| "learning_rate": 9.241613331146703e-07, | |
| "loss": 0.4531800448894501, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.6877637130801686, | |
| "grad_norm": 0.9356587529182434, | |
| "learning_rate": 9.209124881153613e-07, | |
| "loss": 0.8058107495307922, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.691983122362869, | |
| "grad_norm": 1.130460500717163, | |
| "learning_rate": 9.177059876369619e-07, | |
| "loss": 0.5929072499275208, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.6962025316455698, | |
| "grad_norm": 1.5055688619613647, | |
| "learning_rate": 9.145418707062941e-07, | |
| "loss": 0.7090030908584595, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.70042194092827, | |
| "grad_norm": 1.6110928058624268, | |
| "learning_rate": 9.114201758343216e-07, | |
| "loss": 0.8376182913780212, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.7046413502109705, | |
| "grad_norm": 2.095933198928833, | |
| "learning_rate": 9.083409410156845e-07, | |
| "loss": 0.6055005788803101, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.708860759493671, | |
| "grad_norm": 4.733712673187256, | |
| "learning_rate": 9.053042037282327e-07, | |
| "loss": 0.6132983565330505, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.7130801687763713, | |
| "grad_norm": 5.780431270599365, | |
| "learning_rate": 9.023100009325733e-07, | |
| "loss": 0.5792241096496582, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.717299578059072, | |
| "grad_norm": 2.4617018699645996, | |
| "learning_rate": 8.993583690716196e-07, | |
| "loss": 0.16029909253120422, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.721518987341772, | |
| "grad_norm": 22.150638580322266, | |
| "learning_rate": 8.964493440701455e-07, | |
| "loss": 0.41341426968574524, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.7257383966244726, | |
| "grad_norm": 1.3757541179656982, | |
| "learning_rate": 8.935829613343528e-07, | |
| "loss": 0.6639930605888367, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.7299578059071727, | |
| "grad_norm": 2.44677472114563, | |
| "learning_rate": 8.907592557514363e-07, | |
| "loss": 0.404757022857666, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.7341772151898733, | |
| "grad_norm": 1.3542742729187012, | |
| "learning_rate": 8.8797826168916e-07, | |
| "loss": 0.539573073387146, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.738396624472574, | |
| "grad_norm": 1.3471025228500366, | |
| "learning_rate": 8.852400129954396e-07, | |
| "loss": 0.7064318656921387, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.742616033755274, | |
| "grad_norm": 0.8148087859153748, | |
| "learning_rate": 8.825445429979306e-07, | |
| "loss": 0.22752483189105988, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.7468354430379747, | |
| "grad_norm": 0.2493860125541687, | |
| "learning_rate": 8.798918845036217e-07, | |
| "loss": 0.4672152101993561, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.7510548523206753, | |
| "grad_norm": 1.1036282777786255, | |
| "learning_rate": 8.772820697984369e-07, | |
| "loss": 0.6906728148460388, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.7552742616033754, | |
| "grad_norm": 1.202394962310791, | |
| "learning_rate": 8.747151306468404e-07, | |
| "loss": 0.689781904220581, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.759493670886076, | |
| "grad_norm": 1.5368152856826782, | |
| "learning_rate": 8.721910982914527e-07, | |
| "loss": 0.6156559586524963, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.7637130801687766, | |
| "grad_norm": 2.000227928161621, | |
| "learning_rate": 8.697100034526685e-07, | |
| "loss": 0.6539533734321594, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.7679324894514767, | |
| "grad_norm": 1.3653666973114014, | |
| "learning_rate": 8.672718763282814e-07, | |
| "loss": 0.7773669362068176, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.7721518987341773, | |
| "grad_norm": 4.830440521240234, | |
| "learning_rate": 8.648767465931215e-07, | |
| "loss": 0.11648596078157425, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.7763713080168775, | |
| "grad_norm": 1.1531578302383423, | |
| "learning_rate": 8.625246433986894e-07, | |
| "loss": 0.3612111806869507, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.780590717299578, | |
| "grad_norm": 1.085537075996399, | |
| "learning_rate": 8.602155953728014e-07, | |
| "loss": 0.7319397330284119, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.7848101265822782, | |
| "grad_norm": 2.1902003288269043, | |
| "learning_rate": 8.579496306192452e-07, | |
| "loss": 0.42418360710144043, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.789029535864979, | |
| "grad_norm": 2.2132256031036377, | |
| "learning_rate": 8.557267767174329e-07, | |
| "loss": 0.6966800093650818, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.7932489451476794, | |
| "grad_norm": 15.147263526916504, | |
| "learning_rate": 8.535470607220696e-07, | |
| "loss": 0.7651135325431824, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.7974683544303796, | |
| "grad_norm": 1.363494634628296, | |
| "learning_rate": 8.514105091628205e-07, | |
| "loss": 0.6677999496459961, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.80168776371308, | |
| "grad_norm": 2.6623036861419678, | |
| "learning_rate": 8.493171480439908e-07, | |
| "loss": 0.8932458758354187, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 3.8059071729957807, | |
| "grad_norm": 1.0123828649520874, | |
| "learning_rate": 8.47267002844208e-07, | |
| "loss": 0.3895692527294159, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 3.810126582278481, | |
| "grad_norm": 7.874610900878906, | |
| "learning_rate": 8.452600985161112e-07, | |
| "loss": 0.0816773921251297, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 3.8143459915611815, | |
| "grad_norm": 2.1802990436553955, | |
| "learning_rate": 8.432964594860478e-07, | |
| "loss": 0.7556171417236328, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 3.818565400843882, | |
| "grad_norm": 1.1918423175811768, | |
| "learning_rate": 8.413761096537786e-07, | |
| "loss": 0.6875542402267456, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.8227848101265822, | |
| "grad_norm": 1.7377078533172607, | |
| "learning_rate": 8.394990723921816e-07, | |
| "loss": 0.29866987466812134, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 3.827004219409283, | |
| "grad_norm": 1.0950947999954224, | |
| "learning_rate": 8.376653705469733e-07, | |
| "loss": 0.7598391771316528, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 3.831223628691983, | |
| "grad_norm": 2.3216395378112793, | |
| "learning_rate": 8.358750264364267e-07, | |
| "loss": 0.7117894291877747, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 3.8354430379746836, | |
| "grad_norm": 4.284765720367432, | |
| "learning_rate": 8.341280618511016e-07, | |
| "loss": 0.6586706042289734, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 3.8396624472573837, | |
| "grad_norm": 2.1526107788085938, | |
| "learning_rate": 8.324244980535782e-07, | |
| "loss": 0.5206190347671509, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.8438818565400843, | |
| "grad_norm": 1.1617799997329712, | |
| "learning_rate": 8.307643557781994e-07, | |
| "loss": 0.7454214692115784, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 3.848101265822785, | |
| "grad_norm": 1.9797450304031372, | |
| "learning_rate": 8.291476552308179e-07, | |
| "loss": 0.6207857728004456, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 3.852320675105485, | |
| "grad_norm": 1.9322015047073364, | |
| "learning_rate": 8.275744160885501e-07, | |
| "loss": 0.685775876045227, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 3.8565400843881856, | |
| "grad_norm": 2.0633387565612793, | |
| "learning_rate": 8.260446574995363e-07, | |
| "loss": 0.7667111754417419, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 3.8607594936708862, | |
| "grad_norm": 1.2368988990783691, | |
| "learning_rate": 8.245583980827098e-07, | |
| "loss": 0.6670578718185425, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.8649789029535864, | |
| "grad_norm": 2.3721048831939697, | |
| "learning_rate": 8.231156559275666e-07, | |
| "loss": 0.15816515684127808, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 3.869198312236287, | |
| "grad_norm": 3.6182823181152344, | |
| "learning_rate": 8.217164485939484e-07, | |
| "loss": 0.4539300501346588, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 3.8734177215189876, | |
| "grad_norm": 4.013774394989014, | |
| "learning_rate": 8.203607931118281e-07, | |
| "loss": 0.5095362663269043, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 3.8776371308016877, | |
| "grad_norm": 2.4649147987365723, | |
| "learning_rate": 8.190487059811013e-07, | |
| "loss": 0.4961618483066559, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 3.8818565400843883, | |
| "grad_norm": 4.491702079772949, | |
| "learning_rate": 8.177802031713863e-07, | |
| "loss": 0.7962309122085571, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.8860759493670884, | |
| "grad_norm": 0.9100720286369324, | |
| "learning_rate": 8.165553001218308e-07, | |
| "loss": 0.4460848867893219, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 3.890295358649789, | |
| "grad_norm": 0.9674596786499023, | |
| "learning_rate": 8.153740117409218e-07, | |
| "loss": 0.44232675433158875, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 3.894514767932489, | |
| "grad_norm": 1.0640747547149658, | |
| "learning_rate": 8.142363524063067e-07, | |
| "loss": 0.7083509564399719, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.8987341772151898, | |
| "grad_norm": 7.521092414855957, | |
| "learning_rate": 8.131423359646147e-07, | |
| "loss": 0.309792697429657, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 3.9029535864978904, | |
| "grad_norm": 1.9627479314804077, | |
| "learning_rate": 8.120919757312934e-07, | |
| "loss": 0.7434027194976807, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.9071729957805905, | |
| "grad_norm": 1.8858873844146729, | |
| "learning_rate": 8.110852844904411e-07, | |
| "loss": 0.7783426642417908, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 3.911392405063291, | |
| "grad_norm": 1.578383445739746, | |
| "learning_rate": 8.101222744946554e-07, | |
| "loss": 0.7528443336486816, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 3.9156118143459917, | |
| "grad_norm": 1.0352789163589478, | |
| "learning_rate": 8.092029574648825e-07, | |
| "loss": 0.6360561847686768, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 3.919831223628692, | |
| "grad_norm": 1.9537928104400635, | |
| "learning_rate": 8.08327344590275e-07, | |
| "loss": 0.7542226314544678, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 3.9240506329113924, | |
| "grad_norm": 1.5767334699630737, | |
| "learning_rate": 8.074954465280533e-07, | |
| "loss": 0.7059440016746521, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.928270042194093, | |
| "grad_norm": 1.4566371440887451, | |
| "learning_rate": 8.067072734033808e-07, | |
| "loss": 0.44404223561286926, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 3.932489451476793, | |
| "grad_norm": 1.6387444734573364, | |
| "learning_rate": 8.05962834809236e-07, | |
| "loss": 0.4295271039009094, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 3.9367088607594938, | |
| "grad_norm": 4.41506290435791, | |
| "learning_rate": 8.052621398062982e-07, | |
| "loss": 0.9274621605873108, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 3.9409282700421944, | |
| "grad_norm": 2.0459539890289307, | |
| "learning_rate": 8.046051969228362e-07, | |
| "loss": 0.6663318872451782, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 3.9451476793248945, | |
| "grad_norm": 1.6559040546417236, | |
| "learning_rate": 8.039920141546053e-07, | |
| "loss": 0.5702696442604065, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.9493670886075947, | |
| "grad_norm": 0.035610347986221313, | |
| "learning_rate": 8.034225989647494e-07, | |
| "loss": 0.2956307530403137, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 3.9535864978902953, | |
| "grad_norm": 5.27208948135376, | |
| "learning_rate": 8.028969582837097e-07, | |
| "loss": 0.22891630232334137, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 3.957805907172996, | |
| "grad_norm": 1.6149741411209106, | |
| "learning_rate": 8.024150985091419e-07, | |
| "loss": 0.5240350961685181, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 3.962025316455696, | |
| "grad_norm": 3.051912307739258, | |
| "learning_rate": 8.019770255058373e-07, | |
| "loss": 0.6355645060539246, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.9662447257383966, | |
| "grad_norm": 1.413213849067688, | |
| "learning_rate": 8.015827446056511e-07, | |
| "loss": 0.4071570634841919, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.970464135021097, | |
| "grad_norm": 1.0149176120758057, | |
| "learning_rate": 8.012322606074381e-07, | |
| "loss": 0.6791200637817383, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 3.9746835443037973, | |
| "grad_norm": 6.349963188171387, | |
| "learning_rate": 8.009255777769939e-07, | |
| "loss": 0.1739484965801239, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 3.978902953586498, | |
| "grad_norm": 1.85853111743927, | |
| "learning_rate": 8.006626998470039e-07, | |
| "loss": 0.6670107245445251, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 3.9831223628691985, | |
| "grad_norm": 1.893870234489441, | |
| "learning_rate": 8.004436300169959e-07, | |
| "loss": 0.5138272047042847, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 3.9873417721518987, | |
| "grad_norm": 3.016247272491455, | |
| "learning_rate": 8.002683709533043e-07, | |
| "loss": 0.7126239538192749, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.9915611814345993, | |
| "grad_norm": 2.8701586723327637, | |
| "learning_rate": 8.001369247890338e-07, | |
| "loss": 0.4470701813697815, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 3.9957805907173, | |
| "grad_norm": 1.1190752983093262, | |
| "learning_rate": 8.00049293124037e-07, | |
| "loss": 0.6980884075164795, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.7359868288040161, | |
| "learning_rate": 8.000054770248921e-07, | |
| "loss": 0.6384545564651489, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1896, | |
| "total_flos": 3.5948540672197263e+18, | |
| "train_loss": 0.8366947202287017, | |
| "train_runtime": 8313.2571, | |
| "train_samples_per_second": 6.842, | |
| "train_steps_per_second": 0.228 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1896, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.5948540672197263e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |