Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-89 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-89 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-89") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-89") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-89") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-89 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-89" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-89", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-89
- SGLang
How to use furproxy/9b-89 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-89" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-89", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-89" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-89", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-89 with Docker Model Runner:
docker model run hf.co/furproxy/9b-89
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 2008, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00398406374501992, | |
| "grad_norm": 4.348448276519775, | |
| "learning_rate": 5.940594059405941e-08, | |
| "loss": 2.1171607971191406, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00796812749003984, | |
| "grad_norm": 2.55696177482605, | |
| "learning_rate": 1.782178217821782e-07, | |
| "loss": 2.068465232849121, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01195219123505976, | |
| "grad_norm": 3.159899950027466, | |
| "learning_rate": 2.9702970297029703e-07, | |
| "loss": 2.136167287826538, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 2.0796260833740234, | |
| "learning_rate": 4.158415841584159e-07, | |
| "loss": 1.8786698579788208, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0199203187250996, | |
| "grad_norm": 5.41955041885376, | |
| "learning_rate": 5.346534653465346e-07, | |
| "loss": 1.9257912635803223, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02390438247011952, | |
| "grad_norm": 11.406185150146484, | |
| "learning_rate": 6.534653465346535e-07, | |
| "loss": 2.368868827819824, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.027888446215139442, | |
| "grad_norm": 1.901093602180481, | |
| "learning_rate": 7.722772277227723e-07, | |
| "loss": 1.9428346157073975, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 1.393601894378662, | |
| "learning_rate": 8.910891089108911e-07, | |
| "loss": 1.7873543500900269, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.035856573705179286, | |
| "grad_norm": 1.4436230659484863, | |
| "learning_rate": 1.00990099009901e-06, | |
| "loss": 1.2166668176651, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0398406374501992, | |
| "grad_norm": 1.8145285844802856, | |
| "learning_rate": 1.1287128712871288e-06, | |
| "loss": 1.6057647466659546, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.043824701195219126, | |
| "grad_norm": 1.2188401222229004, | |
| "learning_rate": 1.2475247524752474e-06, | |
| "loss": 1.7550266981124878, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 1.664843201637268, | |
| "learning_rate": 1.3663366336633665e-06, | |
| "loss": 1.5540839433670044, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05179282868525897, | |
| "grad_norm": 9.42098617553711, | |
| "learning_rate": 1.4851485148514852e-06, | |
| "loss": 2.344756841659546, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.055776892430278883, | |
| "grad_norm": 2.532942771911621, | |
| "learning_rate": 1.603960396039604e-06, | |
| "loss": 1.6220295429229736, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05976095617529881, | |
| "grad_norm": 14.82198429107666, | |
| "learning_rate": 1.7227722772277227e-06, | |
| "loss": 1.83803129196167, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 4.736570358276367, | |
| "learning_rate": 1.8415841584158415e-06, | |
| "loss": 0.9668034315109253, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06772908366533864, | |
| "grad_norm": 1.077216386795044, | |
| "learning_rate": 1.9603960396039604e-06, | |
| "loss": 1.5424432754516602, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07171314741035857, | |
| "grad_norm": 2.908050060272217, | |
| "learning_rate": 2.079207920792079e-06, | |
| "loss": 1.578654408454895, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07569721115537849, | |
| "grad_norm": 1.1227400302886963, | |
| "learning_rate": 2.198019801980198e-06, | |
| "loss": 1.4881534576416016, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 1.1037142276763916, | |
| "learning_rate": 2.316831683168317e-06, | |
| "loss": 1.4990899562835693, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08366533864541832, | |
| "grad_norm": 11.898150444030762, | |
| "learning_rate": 2.4356435643564358e-06, | |
| "loss": 0.6503542065620422, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08764940239043825, | |
| "grad_norm": 1.8804869651794434, | |
| "learning_rate": 2.5544554455445544e-06, | |
| "loss": 1.5055851936340332, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09163346613545817, | |
| "grad_norm": 1.0558547973632812, | |
| "learning_rate": 2.6732673267326735e-06, | |
| "loss": 1.4046030044555664, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 1.6279054880142212, | |
| "learning_rate": 2.792079207920792e-06, | |
| "loss": 1.228537678718567, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.099601593625498, | |
| "grad_norm": 9.176322937011719, | |
| "learning_rate": 2.9108910891089108e-06, | |
| "loss": 1.4280906915664673, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10358565737051793, | |
| "grad_norm": 0.7322860956192017, | |
| "learning_rate": 3.02970297029703e-06, | |
| "loss": 1.3861970901489258, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10756972111553785, | |
| "grad_norm": 1.9037761688232422, | |
| "learning_rate": 3.148514851485149e-06, | |
| "loss": 1.8983885049819946, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 2.3619227409362793, | |
| "learning_rate": 3.2673267326732676e-06, | |
| "loss": 1.265608549118042, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11553784860557768, | |
| "grad_norm": 1.079222559928894, | |
| "learning_rate": 3.3861386138613858e-06, | |
| "loss": 1.3720718622207642, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.11952191235059761, | |
| "grad_norm": 2.0091183185577393, | |
| "learning_rate": 3.504950495049505e-06, | |
| "loss": 1.221197485923767, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12350597609561753, | |
| "grad_norm": 1.6702687740325928, | |
| "learning_rate": 3.623762376237624e-06, | |
| "loss": 1.3659617900848389, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 1.4772052764892578, | |
| "learning_rate": 3.7425742574257425e-06, | |
| "loss": 0.9504954218864441, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13147410358565736, | |
| "grad_norm": 1.1839888095855713, | |
| "learning_rate": 3.861386138613861e-06, | |
| "loss": 1.3058485984802246, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.13545816733067728, | |
| "grad_norm": 2.274195671081543, | |
| "learning_rate": 3.98019801980198e-06, | |
| "loss": 1.142093300819397, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1394422310756972, | |
| "grad_norm": 1.23581063747406, | |
| "learning_rate": 4.099009900990099e-06, | |
| "loss": 1.3501553535461426, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 0.9609231352806091, | |
| "learning_rate": 4.2178217821782175e-06, | |
| "loss": 1.394975185394287, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.14741035856573706, | |
| "grad_norm": 1.0941250324249268, | |
| "learning_rate": 4.336633663366337e-06, | |
| "loss": 1.3037439584732056, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.15139442231075698, | |
| "grad_norm": 2.8824353218078613, | |
| "learning_rate": 4.455445544554456e-06, | |
| "loss": 1.0416653156280518, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1553784860557769, | |
| "grad_norm": 0.9365191459655762, | |
| "learning_rate": 4.574257425742575e-06, | |
| "loss": 1.2687112092971802, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 1.5261850357055664, | |
| "learning_rate": 4.693069306930693e-06, | |
| "loss": 1.317929744720459, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16334661354581673, | |
| "grad_norm": 1.2774893045425415, | |
| "learning_rate": 4.811881188118812e-06, | |
| "loss": 1.0010876655578613, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.16733067729083664, | |
| "grad_norm": 2.2119550704956055, | |
| "learning_rate": 4.93069306930693e-06, | |
| "loss": 0.4978000223636627, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.17131474103585656, | |
| "grad_norm": 0.8584203124046326, | |
| "learning_rate": 5.049504950495049e-06, | |
| "loss": 0.5417638421058655, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 1.1234948635101318, | |
| "learning_rate": 5.168316831683168e-06, | |
| "loss": 1.3661960363388062, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.17928286852589642, | |
| "grad_norm": 1.9819002151489258, | |
| "learning_rate": 5.2871287128712874e-06, | |
| "loss": 0.8402650952339172, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18326693227091634, | |
| "grad_norm": 9.981027603149414, | |
| "learning_rate": 5.4059405940594065e-06, | |
| "loss": 1.0407862663269043, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.18725099601593626, | |
| "grad_norm": 5.5226335525512695, | |
| "learning_rate": 5.524752475247525e-06, | |
| "loss": 1.3026604652404785, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 2.536931037902832, | |
| "learning_rate": 5.643564356435644e-06, | |
| "loss": 1.050534963607788, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1952191235059761, | |
| "grad_norm": 2.8480377197265625, | |
| "learning_rate": 5.762376237623762e-06, | |
| "loss": 1.2697981595993042, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.199203187250996, | |
| "grad_norm": 1.2788549661636353, | |
| "learning_rate": 5.881188118811881e-06, | |
| "loss": 1.3857513666152954, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20318725099601595, | |
| "grad_norm": 4.938348293304443, | |
| "learning_rate": 6e-06, | |
| "loss": 1.2503310441970825, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 1.3763278722763062, | |
| "learning_rate": 5.99998534480079e-06, | |
| "loss": 1.316627025604248, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.21115537848605578, | |
| "grad_norm": 1.2016820907592773, | |
| "learning_rate": 5.9999413793622525e-06, | |
| "loss": 1.3336181640625, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2151394422310757, | |
| "grad_norm": 2.7037742137908936, | |
| "learning_rate": 5.9998681041616624e-06, | |
| "loss": 0.848972737789154, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.21912350597609562, | |
| "grad_norm": 2.082820177078247, | |
| "learning_rate": 5.999765519994475e-06, | |
| "loss": 1.1773113012313843, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 1.349158525466919, | |
| "learning_rate": 5.999633627974312e-06, | |
| "loss": 1.838499903678894, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.22709163346613545, | |
| "grad_norm": 1.0457987785339355, | |
| "learning_rate": 5.9994724295329546e-06, | |
| "loss": 1.2931954860687256, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.23107569721115537, | |
| "grad_norm": 1.0233925580978394, | |
| "learning_rate": 5.999281926420326e-06, | |
| "loss": 1.3657619953155518, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2350597609561753, | |
| "grad_norm": 1.456226110458374, | |
| "learning_rate": 5.999062120704471e-06, | |
| "loss": 0.39271149039268494, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 1.1591161489486694, | |
| "learning_rate": 5.998813014771534e-06, | |
| "loss": 1.283569097518921, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24302788844621515, | |
| "grad_norm": 1.4893031120300293, | |
| "learning_rate": 5.998534611325737e-06, | |
| "loss": 1.3696374893188477, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.24701195219123506, | |
| "grad_norm": 1.0916317701339722, | |
| "learning_rate": 5.998226913389344e-06, | |
| "loss": 1.2977485656738281, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.250996015936255, | |
| "grad_norm": 1.5058797597885132, | |
| "learning_rate": 5.997889924302632e-06, | |
| "loss": 1.2800962924957275, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 2.89294695854187, | |
| "learning_rate": 5.997523647723856e-06, | |
| "loss": 0.9177144169807434, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2589641434262948, | |
| "grad_norm": 2.416161060333252, | |
| "learning_rate": 5.997128087629205e-06, | |
| "loss": 1.280983567237854, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.26294820717131473, | |
| "grad_norm": 1.2975496053695679, | |
| "learning_rate": 5.996703248312762e-06, | |
| "loss": 1.2503688335418701, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.26693227091633465, | |
| "grad_norm": 0.9795719385147095, | |
| "learning_rate": 5.996249134386455e-06, | |
| "loss": 1.2679003477096558, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 1.4742954969406128, | |
| "learning_rate": 5.995765750780013e-06, | |
| "loss": 0.5531994700431824, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2749003984063745, | |
| "grad_norm": 2.563380241394043, | |
| "learning_rate": 5.995253102740903e-06, | |
| "loss": 1.901612401008606, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2788844621513944, | |
| "grad_norm": 1.4704535007476807, | |
| "learning_rate": 5.994711195834279e-06, | |
| "loss": 1.1717365980148315, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28286852589641437, | |
| "grad_norm": 1.1811615228652954, | |
| "learning_rate": 5.994140035942923e-06, | |
| "loss": 0.7471544742584229, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 1.6094988584518433, | |
| "learning_rate": 5.993539629267178e-06, | |
| "loss": 0.9018757939338684, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2908366533864542, | |
| "grad_norm": 2.305218458175659, | |
| "learning_rate": 5.992909982324879e-06, | |
| "loss": 1.277273178100586, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2948207171314741, | |
| "grad_norm": 3.697319746017456, | |
| "learning_rate": 5.992251101951287e-06, | |
| "loss": 1.0025593042373657, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.29880478087649404, | |
| "grad_norm": 1.539844036102295, | |
| "learning_rate": 5.991562995299011e-06, | |
| "loss": 1.3024755716323853, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 1.0249600410461426, | |
| "learning_rate": 5.990845669837933e-06, | |
| "loss": 1.5959429740905762, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.30677290836653387, | |
| "grad_norm": 0.8561967015266418, | |
| "learning_rate": 5.990099133355126e-06, | |
| "loss": 1.2801433801651, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3107569721115538, | |
| "grad_norm": 4.086156845092773, | |
| "learning_rate": 5.989323393954767e-06, | |
| "loss": 0.4956245422363281, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3147410358565737, | |
| "grad_norm": 3.771010398864746, | |
| "learning_rate": 5.988518460058054e-06, | |
| "loss": 0.4668130576610565, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 1.3703054189682007, | |
| "learning_rate": 5.9876843404031096e-06, | |
| "loss": 1.2212884426116943, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.32270916334661354, | |
| "grad_norm": 1.210668921470642, | |
| "learning_rate": 5.986821044044889e-06, | |
| "loss": 1.7916109561920166, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.32669322709163345, | |
| "grad_norm": 1.0227242708206177, | |
| "learning_rate": 5.985928580355082e-06, | |
| "loss": 0.8739029765129089, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.33067729083665337, | |
| "grad_norm": 2.860746383666992, | |
| "learning_rate": 5.985006959022008e-06, | |
| "loss": 0.4693869352340698, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 1.755257487297058, | |
| "learning_rate": 5.984056190050517e-06, | |
| "loss": 1.324602723121643, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3386454183266932, | |
| "grad_norm": 7.148312568664551, | |
| "learning_rate": 5.983076283761872e-06, | |
| "loss": 1.3821817636489868, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3426294820717131, | |
| "grad_norm": 1.2952216863632202, | |
| "learning_rate": 5.982067250793646e-06, | |
| "loss": 1.2612062692642212, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3466135458167331, | |
| "grad_norm": 1.727574348449707, | |
| "learning_rate": 5.981029102099601e-06, | |
| "loss": 1.341408133506775, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 2.543426513671875, | |
| "learning_rate": 5.979961848949572e-06, | |
| "loss": 0.5157387852668762, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3545816733067729, | |
| "grad_norm": 1.489472508430481, | |
| "learning_rate": 5.978865502929343e-06, | |
| "loss": 1.3691034317016602, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.35856573705179284, | |
| "grad_norm": 3.3407742977142334, | |
| "learning_rate": 5.977740075940517e-06, | |
| "loss": 1.2798420190811157, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.36254980079681276, | |
| "grad_norm": 0.7936763763427734, | |
| "learning_rate": 5.976585580200399e-06, | |
| "loss": 1.2865771055221558, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 1.722764492034912, | |
| "learning_rate": 5.9754020282418505e-06, | |
| "loss": 0.9274950623512268, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3705179282868526, | |
| "grad_norm": 1.4277971982955933, | |
| "learning_rate": 5.974189432913161e-06, | |
| "loss": 1.2118057012557983, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.3745019920318725, | |
| "grad_norm": 0.7755621671676636, | |
| "learning_rate": 5.972947807377905e-06, | |
| "loss": 1.262542724609375, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.3784860557768924, | |
| "grad_norm": 2.0006139278411865, | |
| "learning_rate": 5.971677165114801e-06, | |
| "loss": 1.1163339614868164, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 1.9247850179672241, | |
| "learning_rate": 5.970377519917563e-06, | |
| "loss": 1.0671018362045288, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.38645418326693226, | |
| "grad_norm": 1.1371593475341797, | |
| "learning_rate": 5.969048885894754e-06, | |
| "loss": 1.2458205223083496, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.3904382470119522, | |
| "grad_norm": 1.5814062356948853, | |
| "learning_rate": 5.967691277469631e-06, | |
| "loss": 1.2479208707809448, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3944223107569721, | |
| "grad_norm": 1.3527947664260864, | |
| "learning_rate": 5.9663047093799874e-06, | |
| "loss": 0.46853581070899963, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.9908071160316467, | |
| "learning_rate": 5.964889196677996e-06, | |
| "loss": 1.2344821691513062, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.40239043824701193, | |
| "grad_norm": 0.9923727512359619, | |
| "learning_rate": 5.9634447547300415e-06, | |
| "loss": 1.2732172012329102, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4063745019920319, | |
| "grad_norm": 2.537524700164795, | |
| "learning_rate": 5.961971399216556e-06, | |
| "loss": 1.234106183052063, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4103585657370518, | |
| "grad_norm": 3.067852735519409, | |
| "learning_rate": 5.960469146131851e-06, | |
| "loss": 0.38716864585876465, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.8039565086364746, | |
| "learning_rate": 5.95893801178394e-06, | |
| "loss": 1.223067045211792, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.41832669322709165, | |
| "grad_norm": 1.5125787258148193, | |
| "learning_rate": 5.957378012794361e-06, | |
| "loss": 0.698806881904602, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.42231075697211157, | |
| "grad_norm": 1.2418526411056519, | |
| "learning_rate": 5.955789166098002e-06, | |
| "loss": 0.7970227599143982, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4262948207171315, | |
| "grad_norm": 2.7106666564941406, | |
| "learning_rate": 5.954171488942911e-06, | |
| "loss": 0.8325067758560181, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 3.5096561908721924, | |
| "learning_rate": 5.952524998890109e-06, | |
| "loss": 1.1556031703948975, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4342629482071713, | |
| "grad_norm": 1.513983130455017, | |
| "learning_rate": 5.950849713813405e-06, | |
| "loss": 1.263627529144287, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.43824701195219123, | |
| "grad_norm": 0.7860940098762512, | |
| "learning_rate": 5.949145651899196e-06, | |
| "loss": 1.2762495279312134, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.44223107569721115, | |
| "grad_norm": 1.6819899082183838, | |
| "learning_rate": 5.947412831646271e-06, | |
| "loss": 0.5981872081756592, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 1.2630786895751953, | |
| "learning_rate": 5.945651271865616e-06, | |
| "loss": 1.120012879371643, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.450199203187251, | |
| "grad_norm": 0.9950310587882996, | |
| "learning_rate": 5.943860991680195e-06, | |
| "loss": 1.2754716873168945, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4541832669322709, | |
| "grad_norm": 1.6684496402740479, | |
| "learning_rate": 5.942042010524764e-06, | |
| "loss": 0.9846575856208801, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.4581673306772908, | |
| "grad_norm": 1.4847872257232666, | |
| "learning_rate": 5.9401943481456386e-06, | |
| "loss": 1.2583152055740356, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.9578908681869507, | |
| "learning_rate": 5.9383180246004935e-06, | |
| "loss": 1.2739794254302979, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.46613545816733065, | |
| "grad_norm": 1.1821162700653076, | |
| "learning_rate": 5.936413060258143e-06, | |
| "loss": 1.4074854850769043, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4701195219123506, | |
| "grad_norm": 0.8178677558898926, | |
| "learning_rate": 5.9344794757983115e-06, | |
| "loss": 1.2413185834884644, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.47410358565737054, | |
| "grad_norm": 2.4166979789733887, | |
| "learning_rate": 5.932517292211418e-06, | |
| "loss": 1.1744059324264526, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 1.1220707893371582, | |
| "learning_rate": 5.930526530798347e-06, | |
| "loss": 1.2574900388717651, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4820717131474104, | |
| "grad_norm": 0.7189679741859436, | |
| "learning_rate": 5.928507213170211e-06, | |
| "loss": 1.2059662342071533, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4860557768924303, | |
| "grad_norm": 1.4799033403396606, | |
| "learning_rate": 5.926459361248125e-06, | |
| "loss": 0.7257046103477478, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4900398406374502, | |
| "grad_norm": 8.812633514404297, | |
| "learning_rate": 5.9243829972629584e-06, | |
| "loss": 1.0781515836715698, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 2.5435431003570557, | |
| "learning_rate": 5.922278143755105e-06, | |
| "loss": 0.9890032410621643, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.49800796812749004, | |
| "grad_norm": 1.1066993474960327, | |
| "learning_rate": 5.920144823574229e-06, | |
| "loss": 1.275596261024475, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.50199203187251, | |
| "grad_norm": 3.8385164737701416, | |
| "learning_rate": 5.917983059879021e-06, | |
| "loss": 0.5777413249015808, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5059760956175299, | |
| "grad_norm": 2.5549728870391846, | |
| "learning_rate": 5.915792876136944e-06, | |
| "loss": 1.2903834581375122, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 1.1752848625183105, | |
| "learning_rate": 5.913574296123985e-06, | |
| "loss": 1.2607370615005493, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5139442231075697, | |
| "grad_norm": 3.4985756874084473, | |
| "learning_rate": 5.9113273439243885e-06, | |
| "loss": 0.6077223420143127, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5179282868525896, | |
| "grad_norm": 0.8346880674362183, | |
| "learning_rate": 5.909052043930402e-06, | |
| "loss": 1.2486491203308105, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5219123505976095, | |
| "grad_norm": 1.6400198936462402, | |
| "learning_rate": 5.9067484208420046e-06, | |
| "loss": 0.3859616219997406, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 2.0709147453308105, | |
| "learning_rate": 5.904416499666646e-06, | |
| "loss": 1.250545620918274, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5298804780876494, | |
| "grad_norm": 3.2738661766052246, | |
| "learning_rate": 5.902056305718969e-06, | |
| "loss": 0.5132614970207214, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5338645418326693, | |
| "grad_norm": 1.4471163749694824, | |
| "learning_rate": 5.89966786462054e-06, | |
| "loss": 1.2536060810089111, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5378486055776892, | |
| "grad_norm": 2.023653030395508, | |
| "learning_rate": 5.897251202299566e-06, | |
| "loss": 1.7837636470794678, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.7867792248725891, | |
| "learning_rate": 5.894806344990614e-06, | |
| "loss": 0.7907792329788208, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.545816733067729, | |
| "grad_norm": 0.9616872072219849, | |
| "learning_rate": 5.892333319234332e-06, | |
| "loss": 1.240364670753479, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.549800796812749, | |
| "grad_norm": 1.5364048480987549, | |
| "learning_rate": 5.889832151877152e-06, | |
| "loss": 0.6271519064903259, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5537848605577689, | |
| "grad_norm": 1.9956889152526855, | |
| "learning_rate": 5.887302870071004e-06, | |
| "loss": 1.354748010635376, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 3.179105043411255, | |
| "learning_rate": 5.88474550127302e-06, | |
| "loss": 0.7769224047660828, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5617529880478087, | |
| "grad_norm": 2.1050288677215576, | |
| "learning_rate": 5.882160073245238e-06, | |
| "loss": 0.7815161347389221, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5657370517928287, | |
| "grad_norm": 1.0835380554199219, | |
| "learning_rate": 5.879546614054295e-06, | |
| "loss": 1.2420227527618408, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5697211155378487, | |
| "grad_norm": 0.9784935712814331, | |
| "learning_rate": 5.876905152071131e-06, | |
| "loss": 1.2437528371810913, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 1.059682011604309, | |
| "learning_rate": 5.874235715970671e-06, | |
| "loss": 1.1747212409973145, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5776892430278885, | |
| "grad_norm": 1.0844000577926636, | |
| "learning_rate": 5.87153833473152e-06, | |
| "loss": 1.2218478918075562, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5816733067729084, | |
| "grad_norm": 1.2831990718841553, | |
| "learning_rate": 5.868813037635649e-06, | |
| "loss": 1.1690454483032227, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5856573705179283, | |
| "grad_norm": 2.694718360900879, | |
| "learning_rate": 5.866059854268076e-06, | |
| "loss": 0.49895596504211426, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 1.1014599800109863, | |
| "learning_rate": 5.863278814516539e-06, | |
| "loss": 1.4519755840301514, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5936254980079682, | |
| "grad_norm": 6.0046305656433105, | |
| "learning_rate": 5.860469948571181e-06, | |
| "loss": 0.6872335076332092, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5976095617529881, | |
| "grad_norm": 1.493370771408081, | |
| "learning_rate": 5.857633286924219e-06, | |
| "loss": 1.241629958152771, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.601593625498008, | |
| "grad_norm": 1.3740859031677246, | |
| "learning_rate": 5.854768860369607e-06, | |
| "loss": 1.0279847383499146, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 4.5894083976745605, | |
| "learning_rate": 5.85187670000271e-06, | |
| "loss": 0.8594214916229248, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6095617529880478, | |
| "grad_norm": 1.9348714351654053, | |
| "learning_rate": 5.848956837219964e-06, | |
| "loss": 1.1640937328338623, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6135458167330677, | |
| "grad_norm": 3.6650631427764893, | |
| "learning_rate": 5.846009303718529e-06, | |
| "loss": 1.083706259727478, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6175298804780877, | |
| "grad_norm": 0.8985078930854797, | |
| "learning_rate": 5.8430341314959565e-06, | |
| "loss": 1.2840549945831299, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 3.3366034030914307, | |
| "learning_rate": 5.840031352849833e-06, | |
| "loss": 0.6729341149330139, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6254980079681275, | |
| "grad_norm": 0.5400150418281555, | |
| "learning_rate": 5.83700100037743e-06, | |
| "loss": 0.9031069874763489, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6294820717131474, | |
| "grad_norm": 0.8818338513374329, | |
| "learning_rate": 5.833943106975355e-06, | |
| "loss": 1.403872013092041, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6334661354581673, | |
| "grad_norm": 0.9534677267074585, | |
| "learning_rate": 5.830857705839191e-06, | |
| "loss": 0.7257641553878784, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 1.2703937292099, | |
| "learning_rate": 5.8277448304631385e-06, | |
| "loss": 1.2789297103881836, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6414342629482072, | |
| "grad_norm": 2.5597033500671387, | |
| "learning_rate": 5.824604514639647e-06, | |
| "loss": 0.5666279792785645, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6454183266932271, | |
| "grad_norm": 1.932152509689331, | |
| "learning_rate": 5.8214367924590515e-06, | |
| "loss": 0.9416989088058472, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.649402390438247, | |
| "grad_norm": 2.5085222721099854, | |
| "learning_rate": 5.818241698309205e-06, | |
| "loss": 0.9871986508369446, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 0.8283513784408569, | |
| "learning_rate": 5.8150192668751015e-06, | |
| "loss": 1.2529672384262085, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6573705179282868, | |
| "grad_norm": 7.669778347015381, | |
| "learning_rate": 5.811769533138499e-06, | |
| "loss": 0.46496719121932983, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6613545816733067, | |
| "grad_norm": 3.1111960411071777, | |
| "learning_rate": 5.808492532377542e-06, | |
| "loss": 1.1308894157409668, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6653386454183267, | |
| "grad_norm": 1.0599477291107178, | |
| "learning_rate": 5.805188300166379e-06, | |
| "loss": 1.1927093267440796, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.7919442653656006, | |
| "learning_rate": 5.801856872374772e-06, | |
| "loss": 1.2229902744293213, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6733067729083665, | |
| "grad_norm": 0.874751627445221, | |
| "learning_rate": 5.798498285167714e-06, | |
| "loss": 1.239054560661316, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6772908366533864, | |
| "grad_norm": 3.267413854598999, | |
| "learning_rate": 5.795112575005031e-06, | |
| "loss": 0.5422060489654541, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6812749003984063, | |
| "grad_norm": 0.603284478187561, | |
| "learning_rate": 5.791699778640985e-06, | |
| "loss": 0.5057201385498047, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 1.073237419128418, | |
| "learning_rate": 5.788259933123882e-06, | |
| "loss": 1.212401270866394, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6892430278884463, | |
| "grad_norm": 0.9039257168769836, | |
| "learning_rate": 5.7847930757956626e-06, | |
| "loss": 1.2373487949371338, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6932270916334662, | |
| "grad_norm": 0.6864405870437622, | |
| "learning_rate": 5.7812992442915016e-06, | |
| "loss": 1.1827311515808105, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6972111553784861, | |
| "grad_norm": 1.7330577373504639, | |
| "learning_rate": 5.777778476539397e-06, | |
| "loss": 0.7856748104095459, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 4.816940784454346, | |
| "learning_rate": 5.774230810759756e-06, | |
| "loss": 0.7216228246688843, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7051792828685259, | |
| "grad_norm": 2.1332626342773438, | |
| "learning_rate": 5.7706562854649866e-06, | |
| "loss": 0.49049532413482666, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7091633466135459, | |
| "grad_norm": 2.8059940338134766, | |
| "learning_rate": 5.767054939459075e-06, | |
| "loss": 1.3019351959228516, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7131474103585658, | |
| "grad_norm": 4.427498817443848, | |
| "learning_rate": 5.763426811837164e-06, | |
| "loss": 0.48208871483802795, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 4.743298530578613, | |
| "learning_rate": 5.759771941985128e-06, | |
| "loss": 1.6483818292617798, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7211155378486056, | |
| "grad_norm": 0.8030229210853577, | |
| "learning_rate": 5.75609036957915e-06, | |
| "loss": 0.7936917543411255, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7250996015936255, | |
| "grad_norm": 4.138736248016357, | |
| "learning_rate": 5.752382134585289e-06, | |
| "loss": 0.19702184200286865, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7290836653386454, | |
| "grad_norm": 0.7204448580741882, | |
| "learning_rate": 5.748647277259041e-06, | |
| "loss": 1.3097480535507202, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 0.6811744570732117, | |
| "learning_rate": 5.744885838144908e-06, | |
| "loss": 1.282241702079773, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7370517928286853, | |
| "grad_norm": 1.3216296434402466, | |
| "learning_rate": 5.741097858075958e-06, | |
| "loss": 1.1899917125701904, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7410358565737052, | |
| "grad_norm": 0.7291891574859619, | |
| "learning_rate": 5.737283378173377e-06, | |
| "loss": 1.289171576499939, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7450199203187251, | |
| "grad_norm": 1.4926878213882446, | |
| "learning_rate": 5.733442439846028e-06, | |
| "loss": 0.9133517742156982, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 1.1999213695526123, | |
| "learning_rate": 5.729575084789995e-06, | |
| "loss": 1.2485815286636353, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7529880478087649, | |
| "grad_norm": 0.4571026563644409, | |
| "learning_rate": 5.725681354988137e-06, | |
| "loss": 0.41173255443573, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.7569721115537849, | |
| "grad_norm": 0.9662789106369019, | |
| "learning_rate": 5.72176129270963e-06, | |
| "loss": 1.3222002983093262, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7609561752988048, | |
| "grad_norm": 0.8864423036575317, | |
| "learning_rate": 5.717814940509503e-06, | |
| "loss": 1.2533366680145264, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 1.8013001680374146, | |
| "learning_rate": 5.713842341228187e-06, | |
| "loss": 1.132637858390808, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7689243027888446, | |
| "grad_norm": 1.4815607070922852, | |
| "learning_rate": 5.70984353799104e-06, | |
| "loss": 0.28086692094802856, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7729083665338645, | |
| "grad_norm": 0.8467429280281067, | |
| "learning_rate": 5.705818574207883e-06, | |
| "loss": 1.4608538150787354, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7768924302788844, | |
| "grad_norm": 2.4864161014556885, | |
| "learning_rate": 5.701767493572526e-06, | |
| "loss": 0.7464155554771423, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 2.4926576614379883, | |
| "learning_rate": 5.6976903400623e-06, | |
| "loss": 0.5242215991020203, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7848605577689243, | |
| "grad_norm": 3.3884170055389404, | |
| "learning_rate": 5.693587157937572e-06, | |
| "loss": 0.7744420766830444, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7888446215139442, | |
| "grad_norm": 1.3466330766677856, | |
| "learning_rate": 5.689457991741267e-06, | |
| "loss": 0.8062616586685181, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7928286852589641, | |
| "grad_norm": 0.8415664434432983, | |
| "learning_rate": 5.685302886298392e-06, | |
| "loss": 0.9788842797279358, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 1.0375547409057617, | |
| "learning_rate": 5.681121886715534e-06, | |
| "loss": 1.068263053894043, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8007968127490039, | |
| "grad_norm": 1.184495210647583, | |
| "learning_rate": 5.676915038380384e-06, | |
| "loss": 0.7641897797584534, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8047808764940239, | |
| "grad_norm": 0.5623915195465088, | |
| "learning_rate": 5.67268238696124e-06, | |
| "loss": 1.194584846496582, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8087649402390438, | |
| "grad_norm": 1.6544809341430664, | |
| "learning_rate": 5.668423978406509e-06, | |
| "loss": 1.8557928800582886, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.9776933193206787, | |
| "learning_rate": 5.664139858944209e-06, | |
| "loss": 1.157083511352539, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8167330677290837, | |
| "grad_norm": 0.9368433356285095, | |
| "learning_rate": 5.65983007508147e-06, | |
| "loss": 1.1894208192825317, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8207171314741036, | |
| "grad_norm": 1.024929165840149, | |
| "learning_rate": 5.655494673604024e-06, | |
| "loss": 1.2211333513259888, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8247011952191236, | |
| "grad_norm": 0.9331441521644592, | |
| "learning_rate": 5.651133701575706e-06, | |
| "loss": 0.9813644289970398, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 0.43455296754837036, | |
| "learning_rate": 5.64674720633793e-06, | |
| "loss": 0.2262841910123825, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8326693227091634, | |
| "grad_norm": 0.9842036366462708, | |
| "learning_rate": 5.642335235509189e-06, | |
| "loss": 1.2737834453582764, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8366533864541833, | |
| "grad_norm": 1.0286755561828613, | |
| "learning_rate": 5.637897836984526e-06, | |
| "loss": 1.2228126525878906, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8406374501992032, | |
| "grad_norm": 0.8756253123283386, | |
| "learning_rate": 5.633435058935023e-06, | |
| "loss": 1.1928170919418335, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.758901834487915, | |
| "learning_rate": 5.628946949807274e-06, | |
| "loss": 1.1966356039047241, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.848605577689243, | |
| "grad_norm": 2.6789400577545166, | |
| "learning_rate": 5.624433558322859e-06, | |
| "loss": 0.7115716338157654, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.852589641434263, | |
| "grad_norm": 1.1329255104064941, | |
| "learning_rate": 5.619894933477816e-06, | |
| "loss": 1.2351547479629517, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.8565737051792829, | |
| "grad_norm": 0.8669703602790833, | |
| "learning_rate": 5.615331124542109e-06, | |
| "loss": 1.0460853576660156, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 1.4718725681304932, | |
| "learning_rate": 5.610742181059092e-06, | |
| "loss": 1.8136500120162964, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.8645418326693227, | |
| "grad_norm": 1.955024003982544, | |
| "learning_rate": 5.606128152844975e-06, | |
| "loss": 1.2090433835983276, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.8685258964143426, | |
| "grad_norm": 2.959174156188965, | |
| "learning_rate": 5.601489089988277e-06, | |
| "loss": 0.4959055483341217, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8725099601593626, | |
| "grad_norm": 0.8022291660308838, | |
| "learning_rate": 5.596825042849287e-06, | |
| "loss": 1.2489244937896729, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 0.867755651473999, | |
| "learning_rate": 5.592136062059517e-06, | |
| "loss": 1.187935709953308, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8804780876494024, | |
| "grad_norm": 2.0213284492492676, | |
| "learning_rate": 5.587422198521149e-06, | |
| "loss": 1.6624571084976196, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8844621513944223, | |
| "grad_norm": 1.8472967147827148, | |
| "learning_rate": 5.582683503406488e-06, | |
| "loss": 1.3048073053359985, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8884462151394422, | |
| "grad_norm": 0.8281286954879761, | |
| "learning_rate": 5.5779200281574e-06, | |
| "loss": 1.043340802192688, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 1.8063609600067139, | |
| "learning_rate": 5.573131824484758e-06, | |
| "loss": 0.371786892414093, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.896414342629482, | |
| "grad_norm": 0.8337019681930542, | |
| "learning_rate": 5.56831894436788e-06, | |
| "loss": 1.1593928337097168, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.900398406374502, | |
| "grad_norm": 0.808246374130249, | |
| "learning_rate": 5.563481440053964e-06, | |
| "loss": 0.8130660057067871, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9043824701195219, | |
| "grad_norm": 0.7648867964744568, | |
| "learning_rate": 5.55861936405752e-06, | |
| "loss": 1.2445188760757446, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 4.679040431976318, | |
| "learning_rate": 5.5537327691598026e-06, | |
| "loss": 0.9090757966041565, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9123505976095617, | |
| "grad_norm": 0.8703306317329407, | |
| "learning_rate": 5.548821708408234e-06, | |
| "loss": 1.2912606000900269, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9163346613545816, | |
| "grad_norm": 3.33894681930542, | |
| "learning_rate": 5.543886235115832e-06, | |
| "loss": 1.0427659749984741, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9203187250996016, | |
| "grad_norm": 1.598880410194397, | |
| "learning_rate": 5.538926402860631e-06, | |
| "loss": 1.2816940546035767, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 1.35460364818573, | |
| "learning_rate": 5.533942265485095e-06, | |
| "loss": 1.3399840593338013, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9282868525896414, | |
| "grad_norm": 7.064363956451416, | |
| "learning_rate": 5.528933877095541e-06, | |
| "loss": 0.40876510739326477, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9322709163346613, | |
| "grad_norm": 0.7858706712722778, | |
| "learning_rate": 5.523901292061547e-06, | |
| "loss": 1.1805975437164307, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9362549800796812, | |
| "grad_norm": 8.24327278137207, | |
| "learning_rate": 5.518844565015361e-06, | |
| "loss": 0.38794469833374023, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 0.7928199768066406, | |
| "learning_rate": 5.51376375085131e-06, | |
| "loss": 1.2316607236862183, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9442231075697212, | |
| "grad_norm": 4.031145095825195, | |
| "learning_rate": 5.508658904725206e-06, | |
| "loss": 0.5695405602455139, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.9482071713147411, | |
| "grad_norm": 2.9237377643585205, | |
| "learning_rate": 5.503530082053741e-06, | |
| "loss": 0.338968962430954, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.952191235059761, | |
| "grad_norm": 0.8833221793174744, | |
| "learning_rate": 5.498377338513894e-06, | |
| "loss": 1.2102028131484985, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 25.611223220825195, | |
| "learning_rate": 5.493200730042317e-06, | |
| "loss": 0.4739567041397095, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9601593625498008, | |
| "grad_norm": 5.376172065734863, | |
| "learning_rate": 5.488000312834735e-06, | |
| "loss": 0.9883483648300171, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.9641434262948207, | |
| "grad_norm": 1.7662686109542847, | |
| "learning_rate": 5.482776143345333e-06, | |
| "loss": 1.2430894374847412, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.9681274900398407, | |
| "grad_norm": 2.5627293586730957, | |
| "learning_rate": 5.477528278286145e-06, | |
| "loss": 1.2240179777145386, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 0.8417234420776367, | |
| "learning_rate": 5.472256774626435e-06, | |
| "loss": 1.1680150032043457, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.9760956175298805, | |
| "grad_norm": 0.8709147572517395, | |
| "learning_rate": 5.4669616895920826e-06, | |
| "loss": 1.2006162405014038, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9800796812749004, | |
| "grad_norm": 5.11852502822876, | |
| "learning_rate": 5.46164308066496e-06, | |
| "loss": 0.7005679607391357, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9840637450199203, | |
| "grad_norm": 2.7665576934814453, | |
| "learning_rate": 5.456301005582304e-06, | |
| "loss": 0.7001307606697083, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.8219811320304871, | |
| "learning_rate": 5.4509355223360956e-06, | |
| "loss": 1.254296898841858, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9920318725099602, | |
| "grad_norm": 1.0245788097381592, | |
| "learning_rate": 5.445546689172432e-06, | |
| "loss": 1.267047643661499, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9960159362549801, | |
| "grad_norm": 1.1505917310714722, | |
| "learning_rate": 5.440134564590883e-06, | |
| "loss": 0.7141546010971069, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.24027681350708, | |
| "learning_rate": 5.434699207343867e-06, | |
| "loss": 1.0391122102737427, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.00398406374502, | |
| "grad_norm": 1.2134792804718018, | |
| "learning_rate": 5.429240676436008e-06, | |
| "loss": 0.7802969217300415, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.0079681274900398, | |
| "grad_norm": 1.5164703130722046, | |
| "learning_rate": 5.423759031123498e-06, | |
| "loss": 0.31817543506622314, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0119521912350598, | |
| "grad_norm": 0.6141365170478821, | |
| "learning_rate": 5.41825433091345e-06, | |
| "loss": 1.0097558498382568, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0159362549800797, | |
| "grad_norm": 0.8733232021331787, | |
| "learning_rate": 5.4127266355632575e-06, | |
| "loss": 1.0352897644042969, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0199203187250996, | |
| "grad_norm": 2.5583245754241943, | |
| "learning_rate": 5.407176005079938e-06, | |
| "loss": 1.0885701179504395, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0239043824701195, | |
| "grad_norm": 1.0007575750350952, | |
| "learning_rate": 5.401602499719488e-06, | |
| "loss": 1.0486167669296265, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0278884462151394, | |
| "grad_norm": 1.1661553382873535, | |
| "learning_rate": 5.396006179986228e-06, | |
| "loss": 1.0347387790679932, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0318725099601593, | |
| "grad_norm": 0.8863986134529114, | |
| "learning_rate": 5.390387106632143e-06, | |
| "loss": 1.0672526359558105, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.0358565737051793, | |
| "grad_norm": 2.13053035736084, | |
| "learning_rate": 5.384745340656227e-06, | |
| "loss": 0.8640899062156677, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0398406374501992, | |
| "grad_norm": 2.6343281269073486, | |
| "learning_rate": 5.379080943303814e-06, | |
| "loss": 0.943762481212616, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.043824701195219, | |
| "grad_norm": 1.45510733127594, | |
| "learning_rate": 5.373393976065921e-06, | |
| "loss": 0.9649692177772522, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.047808764940239, | |
| "grad_norm": 1.4119848012924194, | |
| "learning_rate": 5.367684500678576e-06, | |
| "loss": 1.1445621252059937, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.051792828685259, | |
| "grad_norm": 1.0543644428253174, | |
| "learning_rate": 5.361952579122149e-06, | |
| "loss": 0.9114750027656555, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.0557768924302788, | |
| "grad_norm": 1.5039920806884766, | |
| "learning_rate": 5.356198273620678e-06, | |
| "loss": 0.8998257517814636, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0597609561752988, | |
| "grad_norm": 2.6351239681243896, | |
| "learning_rate": 5.350421646641195e-06, | |
| "loss": 0.3897404074668884, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.0637450199203187, | |
| "grad_norm": 1.1779015064239502, | |
| "learning_rate": 5.344622760893049e-06, | |
| "loss": 1.2084486484527588, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.0677290836653386, | |
| "grad_norm": 0.50465989112854, | |
| "learning_rate": 5.338801679327221e-06, | |
| "loss": 0.48134946823120117, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.0717131474103585, | |
| "grad_norm": 6.834875106811523, | |
| "learning_rate": 5.332958465135645e-06, | |
| "loss": 0.8534721732139587, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.0756972111553784, | |
| "grad_norm": 0.8775362372398376, | |
| "learning_rate": 5.327093181750519e-06, | |
| "loss": 0.1745588630437851, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0796812749003983, | |
| "grad_norm": 0.8401792049407959, | |
| "learning_rate": 5.3212058928436175e-06, | |
| "loss": 1.0862375497817993, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.0836653386454183, | |
| "grad_norm": 1.2075270414352417, | |
| "learning_rate": 5.3152966623256026e-06, | |
| "loss": 1.2837507724761963, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.0876494023904382, | |
| "grad_norm": 3.44868803024292, | |
| "learning_rate": 5.309365554345325e-06, | |
| "loss": 0.4348865747451782, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.091633466135458, | |
| "grad_norm": 1.060323715209961, | |
| "learning_rate": 5.303412633289133e-06, | |
| "loss": 0.7609821557998657, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.095617529880478, | |
| "grad_norm": 0.48030683398246765, | |
| "learning_rate": 5.297437963780171e-06, | |
| "loss": 0.5199949741363525, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.099601593625498, | |
| "grad_norm": 0.8254769444465637, | |
| "learning_rate": 5.2914416106776745e-06, | |
| "loss": 1.0883558988571167, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1035856573705178, | |
| "grad_norm": 2.637892246246338, | |
| "learning_rate": 5.2854236390762755e-06, | |
| "loss": 0.48916831612586975, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1075697211155378, | |
| "grad_norm": 1.684272050857544, | |
| "learning_rate": 5.2793841143052855e-06, | |
| "loss": 1.0254663228988647, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1115537848605577, | |
| "grad_norm": 2.17739200592041, | |
| "learning_rate": 5.273323101927994e-06, | |
| "loss": 0.9679847359657288, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1155378486055776, | |
| "grad_norm": 5.525514125823975, | |
| "learning_rate": 5.26724066774095e-06, | |
| "loss": 0.9007784128189087, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1195219123505975, | |
| "grad_norm": 1.1246291399002075, | |
| "learning_rate": 5.261136877773254e-06, | |
| "loss": 1.0599032640457153, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.1235059760956174, | |
| "grad_norm": 1.811063289642334, | |
| "learning_rate": 5.255011798285838e-06, | |
| "loss": 1.053318738937378, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.1274900398406373, | |
| "grad_norm": 1.0067085027694702, | |
| "learning_rate": 5.248865495770747e-06, | |
| "loss": 1.0161441564559937, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.1314741035856573, | |
| "grad_norm": 1.653944730758667, | |
| "learning_rate": 5.242698036950416e-06, | |
| "loss": 1.211927890777588, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.1354581673306772, | |
| "grad_norm": 5.520211219787598, | |
| "learning_rate": 5.236509488776946e-06, | |
| "loss": 0.2512112259864807, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.139442231075697, | |
| "grad_norm": 0.6854221224784851, | |
| "learning_rate": 5.230299918431381e-06, | |
| "loss": 0.20837584137916565, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.1434262948207172, | |
| "grad_norm": 1.0965662002563477, | |
| "learning_rate": 5.224069393322971e-06, | |
| "loss": 0.8550689220428467, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.1474103585657371, | |
| "grad_norm": 0.5142279863357544, | |
| "learning_rate": 5.2178179810884465e-06, | |
| "loss": 0.5071516633033752, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.151394422310757, | |
| "grad_norm": 1.3928073644638062, | |
| "learning_rate": 5.211545749591285e-06, | |
| "loss": 1.1629210710525513, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.155378486055777, | |
| "grad_norm": 4.516799449920654, | |
| "learning_rate": 5.205252766920967e-06, | |
| "loss": 0.615897536277771, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.159362549800797, | |
| "grad_norm": 1.9076368808746338, | |
| "learning_rate": 5.198939101392247e-06, | |
| "loss": 0.6484902501106262, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.1633466135458168, | |
| "grad_norm": 2.9412710666656494, | |
| "learning_rate": 5.192604821544402e-06, | |
| "loss": 0.22438056766986847, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.1673306772908367, | |
| "grad_norm": 0.8736124038696289, | |
| "learning_rate": 5.186249996140492e-06, | |
| "loss": 1.1574631929397583, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.1713147410358566, | |
| "grad_norm": 1.56623375415802, | |
| "learning_rate": 5.179874694166617e-06, | |
| "loss": 1.0566999912261963, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.1752988047808766, | |
| "grad_norm": 3.406691551208496, | |
| "learning_rate": 5.1734789848311635e-06, | |
| "loss": 1.28257417678833, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1792828685258965, | |
| "grad_norm": 1.163465976715088, | |
| "learning_rate": 5.16706293756405e-06, | |
| "loss": 1.0826280117034912, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.1832669322709164, | |
| "grad_norm": 3.0535504817962646, | |
| "learning_rate": 5.160626622015983e-06, | |
| "loss": 1.4529417753219604, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.1872509960159363, | |
| "grad_norm": 0.8099126815795898, | |
| "learning_rate": 5.154170108057693e-06, | |
| "loss": 1.1337939500808716, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.1912350597609562, | |
| "grad_norm": 3.8160228729248047, | |
| "learning_rate": 5.147693465779179e-06, | |
| "loss": 0.3046616017818451, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.1952191235059761, | |
| "grad_norm": 1.2103179693222046, | |
| "learning_rate": 5.141196765488946e-06, | |
| "loss": 0.8739789724349976, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.199203187250996, | |
| "grad_norm": 3.3165013790130615, | |
| "learning_rate": 5.134680077713244e-06, | |
| "loss": 0.5771604776382446, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.203187250996016, | |
| "grad_norm": 1.3412213325500488, | |
| "learning_rate": 5.1281434731953e-06, | |
| "loss": 1.1980223655700684, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.207171314741036, | |
| "grad_norm": 14.288922309875488, | |
| "learning_rate": 5.121587022894554e-06, | |
| "loss": 0.4752068817615509, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2111553784860558, | |
| "grad_norm": 0.9397494196891785, | |
| "learning_rate": 5.115010797985882e-06, | |
| "loss": 0.5870952010154724, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.2151394422310757, | |
| "grad_norm": 0.735195517539978, | |
| "learning_rate": 5.108414869858831e-06, | |
| "loss": 1.0899227857589722, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2191235059760956, | |
| "grad_norm": 0.9480123519897461, | |
| "learning_rate": 5.1017993101168374e-06, | |
| "loss": 1.1740434169769287, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.2231075697211156, | |
| "grad_norm": 1.5338431596755981, | |
| "learning_rate": 5.095164190576452e-06, | |
| "loss": 1.4396584033966064, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.2270916334661355, | |
| "grad_norm": 11.36307144165039, | |
| "learning_rate": 5.0885095832665666e-06, | |
| "loss": 0.3999689817428589, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.2310756972111554, | |
| "grad_norm": 1.546046495437622, | |
| "learning_rate": 5.081835560427619e-06, | |
| "loss": 0.9995384812355042, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.2350597609561753, | |
| "grad_norm": 1.254744291305542, | |
| "learning_rate": 5.075142194510823e-06, | |
| "loss": 1.0542714595794678, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.2390438247011952, | |
| "grad_norm": 2.047104597091675, | |
| "learning_rate": 5.068429558177369e-06, | |
| "loss": 0.9798321723937988, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.2430278884462151, | |
| "grad_norm": 1.0986047983169556, | |
| "learning_rate": 5.061697724297646e-06, | |
| "loss": 1.068199872970581, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.247011952191235, | |
| "grad_norm": 1.8080114126205444, | |
| "learning_rate": 5.054946765950443e-06, | |
| "loss": 0.9513214230537415, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.250996015936255, | |
| "grad_norm": 1.3059947490692139, | |
| "learning_rate": 5.048176756422159e-06, | |
| "loss": 0.7849744558334351, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.254980079681275, | |
| "grad_norm": 0.7330244779586792, | |
| "learning_rate": 5.041387769206009e-06, | |
| "loss": 1.0498535633087158, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2589641434262948, | |
| "grad_norm": 5.962719440460205, | |
| "learning_rate": 5.034579878001222e-06, | |
| "loss": 0.2894093096256256, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.2629482071713147, | |
| "grad_norm": 4.925858974456787, | |
| "learning_rate": 5.027753156712246e-06, | |
| "loss": 0.36715632677078247, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.2669322709163346, | |
| "grad_norm": 3.4104573726654053, | |
| "learning_rate": 5.020907679447936e-06, | |
| "loss": 0.844882071018219, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.2709163346613546, | |
| "grad_norm": 1.9961673021316528, | |
| "learning_rate": 5.0140435205207636e-06, | |
| "loss": 0.8165204524993896, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.2749003984063745, | |
| "grad_norm": 2.4332053661346436, | |
| "learning_rate": 5.007160754446002e-06, | |
| "loss": 0.3054620623588562, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.2788844621513944, | |
| "grad_norm": 0.6446577906608582, | |
| "learning_rate": 5.000259455940913e-06, | |
| "loss": 0.9809127449989319, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.2828685258964143, | |
| "grad_norm": 1.2125827074050903, | |
| "learning_rate": 4.9933396999239455e-06, | |
| "loss": 0.7705118060112, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.2868525896414342, | |
| "grad_norm": 0.7487397193908691, | |
| "learning_rate": 4.986401561513917e-06, | |
| "loss": 1.0824811458587646, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.2908366533864541, | |
| "grad_norm": 1.9600952863693237, | |
| "learning_rate": 4.979445116029199e-06, | |
| "loss": 0.6253088116645813, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.294820717131474, | |
| "grad_norm": 1.7079068422317505, | |
| "learning_rate": 4.972470438986896e-06, | |
| "loss": 1.5013655424118042, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.298804780876494, | |
| "grad_norm": 1.1496132612228394, | |
| "learning_rate": 4.965477606102033e-06, | |
| "loss": 0.8948485255241394, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.302788844621514, | |
| "grad_norm": 1.8034613132476807, | |
| "learning_rate": 4.9584666932867285e-06, | |
| "loss": 0.24509888887405396, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3067729083665338, | |
| "grad_norm": 0.6996963620185852, | |
| "learning_rate": 4.951437776649368e-06, | |
| "loss": 1.0769448280334473, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.3107569721115537, | |
| "grad_norm": 0.571880578994751, | |
| "learning_rate": 4.944390932493787e-06, | |
| "loss": 0.8138774633407593, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.3147410358565736, | |
| "grad_norm": 0.9483959674835205, | |
| "learning_rate": 4.937326237318431e-06, | |
| "loss": 0.6459387540817261, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3187250996015936, | |
| "grad_norm": 0.9495901465415955, | |
| "learning_rate": 4.930243767815534e-06, | |
| "loss": 1.1829910278320312, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.3227091633466135, | |
| "grad_norm": 1.2907254695892334, | |
| "learning_rate": 4.923143600870284e-06, | |
| "loss": 0.5661064386367798, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.3266932270916334, | |
| "grad_norm": 1.5633907318115234, | |
| "learning_rate": 4.916025813559983e-06, | |
| "loss": 0.8189319372177124, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.3306772908366533, | |
| "grad_norm": 1.9113082885742188, | |
| "learning_rate": 4.908890483153218e-06, | |
| "loss": 0.38532766699790955, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.3346613545816732, | |
| "grad_norm": 0.9342731237411499, | |
| "learning_rate": 4.901737687109019e-06, | |
| "loss": 1.0321613550186157, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.3386454183266931, | |
| "grad_norm": 3.1048390865325928, | |
| "learning_rate": 4.894567503076014e-06, | |
| "loss": 0.5770927667617798, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.342629482071713, | |
| "grad_norm": 0.820324182510376, | |
| "learning_rate": 4.887380008891593e-06, | |
| "loss": 1.0886192321777344, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.3466135458167332, | |
| "grad_norm": 1.3751561641693115, | |
| "learning_rate": 4.880175282581059e-06, | |
| "loss": 0.97751384973526, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.3505976095617531, | |
| "grad_norm": 0.7426400184631348, | |
| "learning_rate": 4.872953402356782e-06, | |
| "loss": 1.076625943183899, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.354581673306773, | |
| "grad_norm": 1.1565395593643188, | |
| "learning_rate": 4.86571444661735e-06, | |
| "loss": 1.0121248960494995, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.358565737051793, | |
| "grad_norm": 0.7444704174995422, | |
| "learning_rate": 4.858458493946716e-06, | |
| "loss": 1.0811046361923218, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.3625498007968129, | |
| "grad_norm": 1.0144495964050293, | |
| "learning_rate": 4.851185623113349e-06, | |
| "loss": 1.1279915571212769, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.3665338645418328, | |
| "grad_norm": 0.7559702396392822, | |
| "learning_rate": 4.843895913069377e-06, | |
| "loss": 1.0942429304122925, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.3705179282868527, | |
| "grad_norm": 0.8456003069877625, | |
| "learning_rate": 4.836589442949727e-06, | |
| "loss": 1.0091909170150757, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.3745019920318726, | |
| "grad_norm": 0.7402591705322266, | |
| "learning_rate": 4.829266292071268e-06, | |
| "loss": 0.9695682525634766, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.3784860557768925, | |
| "grad_norm": 1.815006136894226, | |
| "learning_rate": 4.821926539931952e-06, | |
| "loss": 0.3355652689933777, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.3824701195219125, | |
| "grad_norm": 1.0571285486221313, | |
| "learning_rate": 4.814570266209952e-06, | |
| "loss": 1.1081352233886719, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.3864541832669324, | |
| "grad_norm": 1.3027758598327637, | |
| "learning_rate": 4.80719755076279e-06, | |
| "loss": 1.0507612228393555, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.3904382470119523, | |
| "grad_norm": 0.9322640299797058, | |
| "learning_rate": 4.799808473626476e-06, | |
| "loss": 1.1305720806121826, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.3944223107569722, | |
| "grad_norm": 1.1364309787750244, | |
| "learning_rate": 4.792403115014637e-06, | |
| "loss": 0.1400398164987564, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3984063745019921, | |
| "grad_norm": 1.2325326204299927, | |
| "learning_rate": 4.7849815553176476e-06, | |
| "loss": 1.1220163106918335, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.402390438247012, | |
| "grad_norm": 1.0282156467437744, | |
| "learning_rate": 4.777543875101757e-06, | |
| "loss": 1.0591614246368408, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.406374501992032, | |
| "grad_norm": 0.7515193223953247, | |
| "learning_rate": 4.770090155108215e-06, | |
| "loss": 1.1357749700546265, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.4103585657370519, | |
| "grad_norm": 1.05164635181427, | |
| "learning_rate": 4.7626204762523905e-06, | |
| "loss": 0.9992522597312927, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.4143426294820718, | |
| "grad_norm": 0.7848185896873474, | |
| "learning_rate": 4.755134919622901e-06, | |
| "loss": 1.0771911144256592, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.4183266932270917, | |
| "grad_norm": 2.0036990642547607, | |
| "learning_rate": 4.747633566480726e-06, | |
| "loss": 0.6499975323677063, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.4223107569721116, | |
| "grad_norm": 1.088212251663208, | |
| "learning_rate": 4.740116498258328e-06, | |
| "loss": 1.0736567974090576, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.4262948207171315, | |
| "grad_norm": 1.0202051401138306, | |
| "learning_rate": 4.73258379655877e-06, | |
| "loss": 1.1317867040634155, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.4302788844621515, | |
| "grad_norm": 0.6986392140388489, | |
| "learning_rate": 4.7250355431548244e-06, | |
| "loss": 0.1079653948545456, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.4342629482071714, | |
| "grad_norm": 1.2315129041671753, | |
| "learning_rate": 4.717471819988088e-06, | |
| "loss": 1.070616364479065, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.4382470119521913, | |
| "grad_norm": 2.786571502685547, | |
| "learning_rate": 4.709892709168096e-06, | |
| "loss": 0.2563188672065735, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.4422310756972112, | |
| "grad_norm": 0.634524941444397, | |
| "learning_rate": 4.702298292971422e-06, | |
| "loss": 1.0500552654266357, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.4462151394422311, | |
| "grad_norm": 0.7324956059455872, | |
| "learning_rate": 4.6946886538407975e-06, | |
| "loss": 1.092575192451477, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.450199203187251, | |
| "grad_norm": 1.8564890623092651, | |
| "learning_rate": 4.687063874384204e-06, | |
| "loss": 0.8989277482032776, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.454183266932271, | |
| "grad_norm": 0.6646371483802795, | |
| "learning_rate": 4.679424037373984e-06, | |
| "loss": 1.0014073848724365, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.4581673306772909, | |
| "grad_norm": 2.136218786239624, | |
| "learning_rate": 4.671769225745939e-06, | |
| "loss": 1.0647640228271484, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.4621513944223108, | |
| "grad_norm": 0.5179296135902405, | |
| "learning_rate": 4.664099522598432e-06, | |
| "loss": 0.12710000574588776, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.4661354581673307, | |
| "grad_norm": 0.8502590656280518, | |
| "learning_rate": 4.656415011191484e-06, | |
| "loss": 1.085228681564331, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.4701195219123506, | |
| "grad_norm": 1.1160621643066406, | |
| "learning_rate": 4.648715774945869e-06, | |
| "loss": 1.1700797080993652, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.4741035856573705, | |
| "grad_norm": 4.530128002166748, | |
| "learning_rate": 4.641001897442209e-06, | |
| "loss": 0.19807864725589752, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.4780876494023905, | |
| "grad_norm": 1.182551383972168, | |
| "learning_rate": 4.633273462420069e-06, | |
| "loss": 1.2210465669631958, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.4820717131474104, | |
| "grad_norm": 7.367408752441406, | |
| "learning_rate": 4.625530553777045e-06, | |
| "loss": 1.2010120153427124, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.4860557768924303, | |
| "grad_norm": 0.8875226378440857, | |
| "learning_rate": 4.617773255567855e-06, | |
| "loss": 1.0283279418945312, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.4900398406374502, | |
| "grad_norm": 1.780938744544983, | |
| "learning_rate": 4.610001652003426e-06, | |
| "loss": 1.0667709112167358, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.4940239043824701, | |
| "grad_norm": 1.2433035373687744, | |
| "learning_rate": 4.602215827449976e-06, | |
| "loss": 1.0492123365402222, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.49800796812749, | |
| "grad_norm": 0.8798750638961792, | |
| "learning_rate": 4.594415866428108e-06, | |
| "loss": 1.0049997568130493, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.50199203187251, | |
| "grad_norm": 1.146921992301941, | |
| "learning_rate": 4.586601853611882e-06, | |
| "loss": 0.994334876537323, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.5059760956175299, | |
| "grad_norm": 3.869616746902466, | |
| "learning_rate": 4.578773873827901e-06, | |
| "loss": 0.7532044053077698, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.5099601593625498, | |
| "grad_norm": 1.7733598947525024, | |
| "learning_rate": 4.57093201205439e-06, | |
| "loss": 1.0711463689804077, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.5139442231075697, | |
| "grad_norm": 4.040090560913086, | |
| "learning_rate": 4.563076353420272e-06, | |
| "loss": 1.1239742040634155, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.5179282868525896, | |
| "grad_norm": 1.1118268966674805, | |
| "learning_rate": 4.5552069832042455e-06, | |
| "loss": 0.22398273646831512, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.5219123505976095, | |
| "grad_norm": 0.8436402678489685, | |
| "learning_rate": 4.547323986833857e-06, | |
| "loss": 1.0367255210876465, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.5258964143426295, | |
| "grad_norm": 1.7664424180984497, | |
| "learning_rate": 4.539427449884576e-06, | |
| "loss": 0.7687526941299438, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.5298804780876494, | |
| "grad_norm": 1.0416488647460938, | |
| "learning_rate": 4.53151745807886e-06, | |
| "loss": 0.5652468204498291, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.5338645418326693, | |
| "grad_norm": 1.3710383176803589, | |
| "learning_rate": 4.523594097285234e-06, | |
| "loss": 1.0875599384307861, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.5378486055776892, | |
| "grad_norm": 1.310120701789856, | |
| "learning_rate": 4.51565745351735e-06, | |
| "loss": 0.8149851560592651, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.5418326693227091, | |
| "grad_norm": 1.0462884902954102, | |
| "learning_rate": 4.507707612933059e-06, | |
| "loss": 1.044182300567627, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.545816733067729, | |
| "grad_norm": 2.2944624423980713, | |
| "learning_rate": 4.4997446618334664e-06, | |
| "loss": 1.1731159687042236, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.549800796812749, | |
| "grad_norm": 6.394598960876465, | |
| "learning_rate": 4.491768686662005e-06, | |
| "loss": 0.5516869425773621, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.5537848605577689, | |
| "grad_norm": 2.329699754714966, | |
| "learning_rate": 4.483779774003498e-06, | |
| "loss": 0.5405542850494385, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.5577689243027888, | |
| "grad_norm": 0.42006587982177734, | |
| "learning_rate": 4.475778010583205e-06, | |
| "loss": 0.20549674332141876, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.5617529880478087, | |
| "grad_norm": 2.271444082260132, | |
| "learning_rate": 4.467763483265897e-06, | |
| "loss": 0.9095351696014404, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.5657370517928286, | |
| "grad_norm": 1.6157774925231934, | |
| "learning_rate": 4.459736279054901e-06, | |
| "loss": 1.3291853666305542, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.5697211155378485, | |
| "grad_norm": 4.978515625, | |
| "learning_rate": 4.451696485091164e-06, | |
| "loss": 0.7586594223976135, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.5737051792828685, | |
| "grad_norm": 1.2765519618988037, | |
| "learning_rate": 4.4436441886523025e-06, | |
| "loss": 1.1358023881912231, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.5776892430278884, | |
| "grad_norm": 8.105411529541016, | |
| "learning_rate": 4.435579477151655e-06, | |
| "loss": 0.8000907897949219, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.5816733067729083, | |
| "grad_norm": 0.7435089349746704, | |
| "learning_rate": 4.427502438137337e-06, | |
| "loss": 1.073531150817871, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.5856573705179282, | |
| "grad_norm": 0.9908289313316345, | |
| "learning_rate": 4.419413159291284e-06, | |
| "loss": 1.011960744857788, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.5896414342629481, | |
| "grad_norm": 1.1573151350021362, | |
| "learning_rate": 4.411311728428307e-06, | |
| "loss": 0.8743354082107544, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.593625498007968, | |
| "grad_norm": 6.756656646728516, | |
| "learning_rate": 4.403198233495133e-06, | |
| "loss": 0.32545700669288635, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.597609561752988, | |
| "grad_norm": 1.2311936616897583, | |
| "learning_rate": 4.395072762569457e-06, | |
| "loss": 0.9778568744659424, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.6015936254980079, | |
| "grad_norm": 3.5830166339874268, | |
| "learning_rate": 4.386935403858977e-06, | |
| "loss": 1.0981725454330444, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.6055776892430278, | |
| "grad_norm": 0.9334324598312378, | |
| "learning_rate": 4.378786245700443e-06, | |
| "loss": 1.3115934133529663, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.6095617529880477, | |
| "grad_norm": 0.8329153656959534, | |
| "learning_rate": 4.370625376558698e-06, | |
| "loss": 1.028051733970642, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.6135458167330676, | |
| "grad_norm": 1.030179500579834, | |
| "learning_rate": 4.362452885025713e-06, | |
| "loss": 0.9735574722290039, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.6175298804780875, | |
| "grad_norm": 6.181675434112549, | |
| "learning_rate": 4.35426885981963e-06, | |
| "loss": 0.42590758204460144, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.6215139442231075, | |
| "grad_norm": 3.902128219604492, | |
| "learning_rate": 4.346073389783799e-06, | |
| "loss": 0.7486605048179626, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.6254980079681274, | |
| "grad_norm": 0.6811983585357666, | |
| "learning_rate": 4.337866563885808e-06, | |
| "loss": 0.2310914248228073, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.6294820717131473, | |
| "grad_norm": 0.7712540030479431, | |
| "learning_rate": 4.329648471216523e-06, | |
| "loss": 1.112511157989502, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.6334661354581672, | |
| "grad_norm": 1.0290017127990723, | |
| "learning_rate": 4.321419200989117e-06, | |
| "loss": 0.287282794713974, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.6374501992031871, | |
| "grad_norm": 2.3703389167785645, | |
| "learning_rate": 4.313178842538107e-06, | |
| "loss": 0.7247891426086426, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.641434262948207, | |
| "grad_norm": 1.919006586074829, | |
| "learning_rate": 4.304927485318375e-06, | |
| "loss": 0.21648265421390533, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.645418326693227, | |
| "grad_norm": 1.1350631713867188, | |
| "learning_rate": 4.296665218904207e-06, | |
| "loss": 1.0472216606140137, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.6494023904382469, | |
| "grad_norm": 0.42043375968933105, | |
| "learning_rate": 4.288392132988313e-06, | |
| "loss": 0.40000608563423157, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.6533864541832668, | |
| "grad_norm": 1.6645681858062744, | |
| "learning_rate": 4.280108317380859e-06, | |
| "loss": 0.4568580985069275, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.6573705179282867, | |
| "grad_norm": 1.5291117429733276, | |
| "learning_rate": 4.27181386200849e-06, | |
| "loss": 0.9923895597457886, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.6613545816733066, | |
| "grad_norm": 1.294873833656311, | |
| "learning_rate": 4.263508856913346e-06, | |
| "loss": 0.994326651096344, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.6653386454183265, | |
| "grad_norm": 2.7709615230560303, | |
| "learning_rate": 4.2551933922521e-06, | |
| "loss": 0.8918184041976929, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.6693227091633465, | |
| "grad_norm": 1.2106887102127075, | |
| "learning_rate": 4.246867558294967e-06, | |
| "loss": 1.1439393758773804, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.6733067729083664, | |
| "grad_norm": 1.091464877128601, | |
| "learning_rate": 4.2385314454247275e-06, | |
| "loss": 1.0264958143234253, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.6772908366533863, | |
| "grad_norm": 1.5609543323516846, | |
| "learning_rate": 4.230185144135749e-06, | |
| "loss": 0.8460158109664917, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.6812749003984062, | |
| "grad_norm": 0.8120943903923035, | |
| "learning_rate": 4.221828745033002e-06, | |
| "loss": 1.0981191396713257, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.6852589641434261, | |
| "grad_norm": 1.0494468212127686, | |
| "learning_rate": 4.2134623388310706e-06, | |
| "loss": 0.3851274847984314, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.6892430278884463, | |
| "grad_norm": 1.039975643157959, | |
| "learning_rate": 4.20508601635318e-06, | |
| "loss": 0.7145401239395142, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.6932270916334662, | |
| "grad_norm": 1.385925054550171, | |
| "learning_rate": 4.1966998685302e-06, | |
| "loss": 1.1264657974243164, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.697211155378486, | |
| "grad_norm": 0.7857804894447327, | |
| "learning_rate": 4.18830398639966e-06, | |
| "loss": 1.1105672121047974, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.701195219123506, | |
| "grad_norm": 1.1625089645385742, | |
| "learning_rate": 4.179898461104764e-06, | |
| "loss": 1.078861117362976, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.705179282868526, | |
| "grad_norm": 0.9041614532470703, | |
| "learning_rate": 4.1714833838934006e-06, | |
| "loss": 1.0313189029693604, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.7091633466135459, | |
| "grad_norm": 0.8065091967582703, | |
| "learning_rate": 4.163058846117148e-06, | |
| "loss": 0.34671998023986816, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.7131474103585658, | |
| "grad_norm": 1.2888925075531006, | |
| "learning_rate": 4.154624939230289e-06, | |
| "loss": 1.031374454498291, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.7171314741035857, | |
| "grad_norm": 0.8425755500793457, | |
| "learning_rate": 4.146181754788813e-06, | |
| "loss": 1.0426599979400635, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.7211155378486056, | |
| "grad_norm": 1.4209198951721191, | |
| "learning_rate": 4.13772938444942e-06, | |
| "loss": 0.6024843454360962, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.7250996015936255, | |
| "grad_norm": 1.0409010648727417, | |
| "learning_rate": 4.129267919968536e-06, | |
| "loss": 0.4379670023918152, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.7290836653386454, | |
| "grad_norm": 1.4887381792068481, | |
| "learning_rate": 4.120797453201309e-06, | |
| "loss": 0.8161473274230957, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.7330677290836654, | |
| "grad_norm": 12.129778861999512, | |
| "learning_rate": 4.112318076100608e-06, | |
| "loss": 0.22986909747123718, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.7370517928286853, | |
| "grad_norm": 2.050231456756592, | |
| "learning_rate": 4.103829880716036e-06, | |
| "loss": 0.5155397057533264, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.7410358565737052, | |
| "grad_norm": 3.127119541168213, | |
| "learning_rate": 4.0953329591929204e-06, | |
| "loss": 0.42298442125320435, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.745019920318725, | |
| "grad_norm": 1.210281491279602, | |
| "learning_rate": 4.08682740377132e-06, | |
| "loss": 1.0322401523590088, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.749003984063745, | |
| "grad_norm": 0.7078624367713928, | |
| "learning_rate": 4.0783133067850185e-06, | |
| "loss": 1.0741485357284546, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.752988047808765, | |
| "grad_norm": 0.9627106189727783, | |
| "learning_rate": 4.069790760660525e-06, | |
| "loss": 0.08892940729856491, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.7569721115537849, | |
| "grad_norm": 2.872758388519287, | |
| "learning_rate": 4.06125985791607e-06, | |
| "loss": 1.2808747291564941, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.7609561752988048, | |
| "grad_norm": 1.4781732559204102, | |
| "learning_rate": 4.0527206911606025e-06, | |
| "loss": 1.6314507722854614, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.7649402390438247, | |
| "grad_norm": 0.4292491674423218, | |
| "learning_rate": 4.044173353092779e-06, | |
| "loss": 0.2118670642375946, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.7689243027888446, | |
| "grad_norm": 1.0890276432037354, | |
| "learning_rate": 4.035617936499967e-06, | |
| "loss": 1.1356523036956787, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.7729083665338645, | |
| "grad_norm": 1.0168540477752686, | |
| "learning_rate": 4.0270545342572265e-06, | |
| "loss": 0.9910404086112976, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.7768924302788844, | |
| "grad_norm": 0.8853142261505127, | |
| "learning_rate": 4.018483239326312e-06, | |
| "loss": 0.9891409277915955, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.7808764940239044, | |
| "grad_norm": 0.7593168020248413, | |
| "learning_rate": 4.009904144754655e-06, | |
| "loss": 1.1023067235946655, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.7848605577689243, | |
| "grad_norm": 3.0125675201416016, | |
| "learning_rate": 4.00131734367436e-06, | |
| "loss": 0.9771660566329956, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.7888446215139442, | |
| "grad_norm": 1.7285772562026978, | |
| "learning_rate": 3.99272292930119e-06, | |
| "loss": 0.5689830780029297, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.792828685258964, | |
| "grad_norm": 0.7325118184089661, | |
| "learning_rate": 3.984120994933558e-06, | |
| "loss": 1.026572823524475, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.796812749003984, | |
| "grad_norm": 1.3268436193466187, | |
| "learning_rate": 3.975511633951506e-06, | |
| "loss": 0.5517056584358215, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.800796812749004, | |
| "grad_norm": 0.8117510676383972, | |
| "learning_rate": 3.966894939815702e-06, | |
| "loss": 0.3609198033809662, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.8047808764940239, | |
| "grad_norm": 1.122198224067688, | |
| "learning_rate": 3.958271006066421e-06, | |
| "loss": 0.9236494898796082, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.8087649402390438, | |
| "grad_norm": 2.9102554321289062, | |
| "learning_rate": 3.949639926322527e-06, | |
| "loss": 0.8726416230201721, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.812749003984064, | |
| "grad_norm": 13.756661415100098, | |
| "learning_rate": 3.941001794280458e-06, | |
| "loss": 1.0099586248397827, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.8167330677290838, | |
| "grad_norm": 3.1848342418670654, | |
| "learning_rate": 3.932356703713212e-06, | |
| "loss": 0.25727564096450806, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.8207171314741037, | |
| "grad_norm": 1.389024019241333, | |
| "learning_rate": 3.923704748469326e-06, | |
| "loss": 1.0060839653015137, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.8247011952191237, | |
| "grad_norm": 0.8609137535095215, | |
| "learning_rate": 3.915046022471857e-06, | |
| "loss": 1.0158603191375732, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.8286852589641436, | |
| "grad_norm": 0.8087533116340637, | |
| "learning_rate": 3.906380619717363e-06, | |
| "loss": 1.0479439496994019, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.8326693227091635, | |
| "grad_norm": 3.3105380535125732, | |
| "learning_rate": 3.897708634274886e-06, | |
| "loss": 0.36958053708076477, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.8366533864541834, | |
| "grad_norm": 1.9331108331680298, | |
| "learning_rate": 3.889030160284922e-06, | |
| "loss": 0.35556235909461975, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.8406374501992033, | |
| "grad_norm": 0.7566105723381042, | |
| "learning_rate": 3.88034529195841e-06, | |
| "loss": 1.1607534885406494, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.8446215139442232, | |
| "grad_norm": 0.2870655953884125, | |
| "learning_rate": 3.871654123575704e-06, | |
| "loss": 0.14478978514671326, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.8486055776892432, | |
| "grad_norm": 0.3280292749404907, | |
| "learning_rate": 3.8629567494855445e-06, | |
| "loss": 0.0896715372800827, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.852589641434263, | |
| "grad_norm": 1.354030728340149, | |
| "learning_rate": 3.854253264104045e-06, | |
| "loss": 1.078214168548584, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.856573705179283, | |
| "grad_norm": 1.015066146850586, | |
| "learning_rate": 3.845543761913657e-06, | |
| "loss": 1.114577293395996, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.860557768924303, | |
| "grad_norm": 0.39395958185195923, | |
| "learning_rate": 3.836828337462152e-06, | |
| "loss": 0.5930612087249756, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.8645418326693228, | |
| "grad_norm": 3.372042417526245, | |
| "learning_rate": 3.82810708536159e-06, | |
| "loss": 0.34988486766815186, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.8685258964143427, | |
| "grad_norm": 1.3925652503967285, | |
| "learning_rate": 3.819380100287294e-06, | |
| "loss": 1.0657780170440674, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.8725099601593627, | |
| "grad_norm": 1.6448031663894653, | |
| "learning_rate": 3.810647476976824e-06, | |
| "loss": 1.0907565355300903, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.8764940239043826, | |
| "grad_norm": 0.7891445159912109, | |
| "learning_rate": 3.801909310228945e-06, | |
| "loss": 0.35766711831092834, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.8804780876494025, | |
| "grad_norm": 1.724031686782837, | |
| "learning_rate": 3.7931656949026028e-06, | |
| "loss": 1.7528119087219238, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.8844621513944224, | |
| "grad_norm": 1.0190646648406982, | |
| "learning_rate": 3.784416725915887e-06, | |
| "loss": 0.706551194190979, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.8884462151394423, | |
| "grad_norm": 3.7524330615997314, | |
| "learning_rate": 3.7756624982450105e-06, | |
| "loss": 1.3365905284881592, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.8924302788844622, | |
| "grad_norm": 1.1480021476745605, | |
| "learning_rate": 3.7669031069232684e-06, | |
| "loss": 0.7811166048049927, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.8964143426294822, | |
| "grad_norm": 0.7147510647773743, | |
| "learning_rate": 3.7581386470400106e-06, | |
| "loss": 1.0117745399475098, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.900398406374502, | |
| "grad_norm": 2.004282236099243, | |
| "learning_rate": 3.7493692137396153e-06, | |
| "loss": 0.5164535045623779, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.904382470119522, | |
| "grad_norm": 0.7438123822212219, | |
| "learning_rate": 3.7405949022204435e-06, | |
| "loss": 1.0378838777542114, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.908366533864542, | |
| "grad_norm": 3.5988733768463135, | |
| "learning_rate": 3.731815807733818e-06, | |
| "loss": 0.6023346781730652, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.9123505976095618, | |
| "grad_norm": 2.4353888034820557, | |
| "learning_rate": 3.723032025582982e-06, | |
| "loss": 0.5875221490859985, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.9163346613545817, | |
| "grad_norm": 1.3933720588684082, | |
| "learning_rate": 3.7142436511220676e-06, | |
| "loss": 0.1774052381515503, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.9203187250996017, | |
| "grad_norm": 2.9852864742279053, | |
| "learning_rate": 3.7054507797550564e-06, | |
| "loss": 1.3314721584320068, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.9243027888446216, | |
| "grad_norm": 0.7507312893867493, | |
| "learning_rate": 3.6966535069347523e-06, | |
| "loss": 1.0096935033798218, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.9282868525896415, | |
| "grad_norm": 1.7996251583099365, | |
| "learning_rate": 3.6878519281617354e-06, | |
| "loss": 1.0307931900024414, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.9322709163346614, | |
| "grad_norm": 1.16811203956604, | |
| "learning_rate": 3.6790461389833317e-06, | |
| "loss": 0.9180192351341248, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.9362549800796813, | |
| "grad_norm": 0.7789274454116821, | |
| "learning_rate": 3.670236234992576e-06, | |
| "loss": 1.1056816577911377, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.9402390438247012, | |
| "grad_norm": 0.8071714639663696, | |
| "learning_rate": 3.661422311827169e-06, | |
| "loss": 1.061263084411621, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.9442231075697212, | |
| "grad_norm": 2.5436365604400635, | |
| "learning_rate": 3.652604465168444e-06, | |
| "loss": 0.9830687642097473, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.948207171314741, | |
| "grad_norm": 0.7201181054115295, | |
| "learning_rate": 3.6437827907403273e-06, | |
| "loss": 1.0000416040420532, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.952191235059761, | |
| "grad_norm": 0.7345990538597107, | |
| "learning_rate": 3.6349573843082966e-06, | |
| "loss": 1.0285298824310303, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.956175298804781, | |
| "grad_norm": 0.6029013395309448, | |
| "learning_rate": 3.6261283416783447e-06, | |
| "loss": 0.3689904808998108, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.9601593625498008, | |
| "grad_norm": 5.31935977935791, | |
| "learning_rate": 3.6172957586959372e-06, | |
| "loss": 1.075624704360962, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.9641434262948207, | |
| "grad_norm": 2.391829252243042, | |
| "learning_rate": 3.6084597312449725e-06, | |
| "loss": 0.8474624156951904, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.9681274900398407, | |
| "grad_norm": 5.1822967529296875, | |
| "learning_rate": 3.599620355246742e-06, | |
| "loss": 0.31603577733039856, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.9721115537848606, | |
| "grad_norm": 1.8022582530975342, | |
| "learning_rate": 3.5907777266588856e-06, | |
| "loss": 0.911726713180542, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.9760956175298805, | |
| "grad_norm": 0.7391871213912964, | |
| "learning_rate": 3.5819319414743555e-06, | |
| "loss": 1.0421473979949951, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.9800796812749004, | |
| "grad_norm": 1.211188554763794, | |
| "learning_rate": 3.573083095720369e-06, | |
| "loss": 1.0375580787658691, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.9840637450199203, | |
| "grad_norm": 6.231225967407227, | |
| "learning_rate": 3.5642312854573686e-06, | |
| "loss": 0.5392568707466125, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.9880478087649402, | |
| "grad_norm": 1.1782855987548828, | |
| "learning_rate": 3.5553766067779785e-06, | |
| "loss": 1.188450813293457, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.9920318725099602, | |
| "grad_norm": 0.6256092190742493, | |
| "learning_rate": 3.546519155805962e-06, | |
| "loss": 1.0698131322860718, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.99601593625498, | |
| "grad_norm": 0.89486163854599, | |
| "learning_rate": 3.5376590286951774e-06, | |
| "loss": 1.02101469039917, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5744116902351379, | |
| "learning_rate": 3.5287963216285337e-06, | |
| "loss": 0.08481757342815399, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.00398406374502, | |
| "grad_norm": 0.4444674849510193, | |
| "learning_rate": 3.519931130816947e-06, | |
| "loss": 0.14744052290916443, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.00796812749004, | |
| "grad_norm": 1.0349431037902832, | |
| "learning_rate": 3.511063552498299e-06, | |
| "loss": 0.894745945930481, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.0119521912350598, | |
| "grad_norm": 0.5005489587783813, | |
| "learning_rate": 3.502193682936385e-06, | |
| "loss": 0.29803839325904846, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 1.0027674436569214, | |
| "learning_rate": 3.493321618419877e-06, | |
| "loss": 0.6132505536079407, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.0199203187250996, | |
| "grad_norm": 0.722247302532196, | |
| "learning_rate": 3.484447455261272e-06, | |
| "loss": 0.8650059700012207, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.0239043824701195, | |
| "grad_norm": 0.1125183254480362, | |
| "learning_rate": 3.4755712897958524e-06, | |
| "loss": 0.06626415252685547, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.0278884462151394, | |
| "grad_norm": 2.244713306427002, | |
| "learning_rate": 3.4666932183806345e-06, | |
| "loss": 0.6729474663734436, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 0.8710299730300903, | |
| "learning_rate": 3.4578133373933263e-06, | |
| "loss": 0.8701741099357605, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.0358565737051793, | |
| "grad_norm": 0.8872413635253906, | |
| "learning_rate": 3.4489317432312796e-06, | |
| "loss": 0.8716042041778564, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.039840637450199, | |
| "grad_norm": 1.219373106956482, | |
| "learning_rate": 3.4400485323104426e-06, | |
| "loss": 0.34580960869789124, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.043824701195219, | |
| "grad_norm": 1.7070385217666626, | |
| "learning_rate": 3.431163801064317e-06, | |
| "loss": 0.3066391348838806, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 3.4397644996643066, | |
| "learning_rate": 3.422277645942907e-06, | |
| "loss": 0.3099243938922882, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.051792828685259, | |
| "grad_norm": 20.93805694580078, | |
| "learning_rate": 3.413390163411675e-06, | |
| "loss": 0.6691966652870178, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.055776892430279, | |
| "grad_norm": 1.0854685306549072, | |
| "learning_rate": 3.4045014499504923e-06, | |
| "loss": 0.8780809640884399, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.0597609561752988, | |
| "grad_norm": 11.395671844482422, | |
| "learning_rate": 3.3956116020525924e-06, | |
| "loss": 0.2683337926864624, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 2.4742014408111572, | |
| "learning_rate": 3.3867207162235272e-06, | |
| "loss": 0.7748890519142151, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.0677290836653386, | |
| "grad_norm": 2.432234525680542, | |
| "learning_rate": 3.377828888980112e-06, | |
| "loss": 0.8894884586334229, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.0717131474103585, | |
| "grad_norm": 2.468468427658081, | |
| "learning_rate": 3.3689362168493844e-06, | |
| "loss": 0.6649755239486694, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.0756972111553784, | |
| "grad_norm": 0.6127830147743225, | |
| "learning_rate": 3.3600427963675516e-06, | |
| "loss": 0.8452335596084595, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 1.180112361907959, | |
| "learning_rate": 3.3511487240789483e-06, | |
| "loss": 0.929725170135498, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.0836653386454183, | |
| "grad_norm": 0.738735020160675, | |
| "learning_rate": 3.3422540965349806e-06, | |
| "loss": 0.8923982381820679, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.087649402390438, | |
| "grad_norm": 3.025284767150879, | |
| "learning_rate": 3.333359010293085e-06, | |
| "loss": 0.9607875347137451, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.091633466135458, | |
| "grad_norm": 0.7996847033500671, | |
| "learning_rate": 3.3244635619156786e-06, | |
| "loss": 0.4797319769859314, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 10.094463348388672, | |
| "learning_rate": 3.315567847969106e-06, | |
| "loss": 0.2578115165233612, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.099601593625498, | |
| "grad_norm": 0.6219993233680725, | |
| "learning_rate": 3.306671965022598e-06, | |
| "loss": 0.315256267786026, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.103585657370518, | |
| "grad_norm": 1.1088297367095947, | |
| "learning_rate": 3.2977760096472184e-06, | |
| "loss": 0.9286193251609802, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.1075697211155378, | |
| "grad_norm": 1.1025009155273438, | |
| "learning_rate": 3.2888800784148174e-06, | |
| "loss": 0.7976268529891968, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 0.7398043274879456, | |
| "learning_rate": 3.2799842678969835e-06, | |
| "loss": 0.3379042148590088, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.1155378486055776, | |
| "grad_norm": 1.8223795890808105, | |
| "learning_rate": 3.2710886746639964e-06, | |
| "loss": 0.29785844683647156, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.1195219123505975, | |
| "grad_norm": 0.9167846441268921, | |
| "learning_rate": 3.262193395283773e-06, | |
| "loss": 0.10107379406690598, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.1235059760956174, | |
| "grad_norm": 6.6176300048828125, | |
| "learning_rate": 3.2532985263208266e-06, | |
| "loss": 0.4440305829048157, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 0.8213241696357727, | |
| "learning_rate": 3.244404164335213e-06, | |
| "loss": 0.8258364796638489, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.1314741035856573, | |
| "grad_norm": 2.339560031890869, | |
| "learning_rate": 3.2355104058814874e-06, | |
| "loss": 0.9001627564430237, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.135458167330677, | |
| "grad_norm": 1.07158625125885, | |
| "learning_rate": 3.226617347507649e-06, | |
| "loss": 0.3943869471549988, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.139442231075697, | |
| "grad_norm": 0.9587336182594299, | |
| "learning_rate": 3.2177250857541007e-06, | |
| "loss": 1.0341042280197144, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 0.8883066773414612, | |
| "learning_rate": 3.208833717152594e-06, | |
| "loss": 0.19238322973251343, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.147410358565737, | |
| "grad_norm": 1.4621644020080566, | |
| "learning_rate": 3.199943338225189e-06, | |
| "loss": 0.7075263261795044, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.151394422310757, | |
| "grad_norm": 0.9659390449523926, | |
| "learning_rate": 3.1910540454832e-06, | |
| "loss": 0.9844989776611328, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.1553784860557768, | |
| "grad_norm": 0.9126376509666443, | |
| "learning_rate": 3.1821659354261478e-06, | |
| "loss": 0.8773077130317688, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 1.5047764778137207, | |
| "learning_rate": 3.173279104540719e-06, | |
| "loss": 0.7283194065093994, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.1633466135458166, | |
| "grad_norm": 2.4488370418548584, | |
| "learning_rate": 3.164393649299711e-06, | |
| "loss": 1.0191715955734253, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.1673306772908365, | |
| "grad_norm": 0.6298505663871765, | |
| "learning_rate": 3.155509666160986e-06, | |
| "loss": 0.19404178857803345, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.1713147410358564, | |
| "grad_norm": 3.298346519470215, | |
| "learning_rate": 3.1466272515664287e-06, | |
| "loss": 0.4330817759037018, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 1.4736095666885376, | |
| "learning_rate": 3.137746501940894e-06, | |
| "loss": 0.8412344455718994, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.1792828685258963, | |
| "grad_norm": 1.3612383604049683, | |
| "learning_rate": 3.1288675136911653e-06, | |
| "loss": 0.7719582915306091, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.183266932270916, | |
| "grad_norm": 1.6760456562042236, | |
| "learning_rate": 3.1199903832049025e-06, | |
| "loss": 0.8681936264038086, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.187250996015936, | |
| "grad_norm": 0.9944242238998413, | |
| "learning_rate": 3.1111152068495982e-06, | |
| "loss": 0.8590313196182251, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 1.1411633491516113, | |
| "learning_rate": 3.102242080971531e-06, | |
| "loss": 0.8502429723739624, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.195219123505976, | |
| "grad_norm": 1.0093145370483398, | |
| "learning_rate": 3.0933711018947217e-06, | |
| "loss": 0.8326080441474915, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.199203187250996, | |
| "grad_norm": 1.3518801927566528, | |
| "learning_rate": 3.084502365919887e-06, | |
| "loss": 0.31851112842559814, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.2031872509960158, | |
| "grad_norm": 0.8486732840538025, | |
| "learning_rate": 3.0756359693233897e-06, | |
| "loss": 0.12462817877531052, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 3.158237934112549, | |
| "learning_rate": 3.066772008356201e-06, | |
| "loss": 0.7065569162368774, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.2111553784860556, | |
| "grad_norm": 1.6595673561096191, | |
| "learning_rate": 3.057910579242848e-06, | |
| "loss": 0.32911333441734314, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.2151394422310755, | |
| "grad_norm": 0.9766960740089417, | |
| "learning_rate": 3.0490517781803748e-06, | |
| "loss": 0.8282409906387329, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.2191235059760954, | |
| "grad_norm": 2.551868438720703, | |
| "learning_rate": 3.040195701337296e-06, | |
| "loss": 0.8591130971908569, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 2.4142255783081055, | |
| "learning_rate": 3.0313424448525513e-06, | |
| "loss": 0.6863746643066406, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.2270916334661353, | |
| "grad_norm": 1.8660197257995605, | |
| "learning_rate": 3.022492104834467e-06, | |
| "loss": 0.867939829826355, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.231075697211155, | |
| "grad_norm": 1.012052297592163, | |
| "learning_rate": 3.013644777359706e-06, | |
| "loss": 0.862476110458374, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.235059760956175, | |
| "grad_norm": 1.3242058753967285, | |
| "learning_rate": 3.004800558472228e-06, | |
| "loss": 0.8478327393531799, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 1.5202715396881104, | |
| "learning_rate": 2.995959544182248e-06, | |
| "loss": 0.8780950307846069, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.243027888446215, | |
| "grad_norm": 1.5164873600006104, | |
| "learning_rate": 2.9871218304651926e-06, | |
| "loss": 0.8773269653320312, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.247011952191235, | |
| "grad_norm": 12.062283515930176, | |
| "learning_rate": 2.9782875132606573e-06, | |
| "loss": 0.5782788991928101, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.2509960159362548, | |
| "grad_norm": 0.4626627266407013, | |
| "learning_rate": 2.969456688471368e-06, | |
| "loss": 0.17795492708683014, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 8.622909545898438, | |
| "learning_rate": 2.960629451962137e-06, | |
| "loss": 0.876864492893219, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.2589641434262946, | |
| "grad_norm": 2.5603370666503906, | |
| "learning_rate": 2.9518058995588217e-06, | |
| "loss": 0.5039679408073425, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.2629482071713145, | |
| "grad_norm": 1.9047883749008179, | |
| "learning_rate": 2.9429861270472884e-06, | |
| "loss": 0.8298702836036682, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.2669322709163344, | |
| "grad_norm": 1.333377480506897, | |
| "learning_rate": 2.9341702301723704e-06, | |
| "loss": 0.8177191019058228, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 0.8072558641433716, | |
| "learning_rate": 2.9253583046368243e-06, | |
| "loss": 0.8483671545982361, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.2749003984063743, | |
| "grad_norm": 1.162376046180725, | |
| "learning_rate": 2.916550446100299e-06, | |
| "loss": 0.8442429900169373, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.278884462151394, | |
| "grad_norm": 2.1500282287597656, | |
| "learning_rate": 2.907746750178293e-06, | |
| "loss": 0.40876924991607666, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.2828685258964145, | |
| "grad_norm": 1.5930662155151367, | |
| "learning_rate": 2.8989473124411136e-06, | |
| "loss": 0.3929884433746338, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 0.9812231659889221, | |
| "learning_rate": 2.8901522284128454e-06, | |
| "loss": 0.8924030661582947, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.2908366533864544, | |
| "grad_norm": 4.809815883636475, | |
| "learning_rate": 2.881361593570308e-06, | |
| "loss": 0.412593275308609, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.2948207171314743, | |
| "grad_norm": 0.34295371174812317, | |
| "learning_rate": 2.872575503342027e-06, | |
| "loss": 0.07170237600803375, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.298804780876494, | |
| "grad_norm": 2.6662888526916504, | |
| "learning_rate": 2.8637940531071856e-06, | |
| "loss": 0.9125880599021912, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 1.016099214553833, | |
| "learning_rate": 2.8550173381946035e-06, | |
| "loss": 0.20460867881774902, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.306772908366534, | |
| "grad_norm": 1.2535561323165894, | |
| "learning_rate": 2.84624545388169e-06, | |
| "loss": 0.18213213980197906, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.310756972111554, | |
| "grad_norm": 5.914939880371094, | |
| "learning_rate": 2.837478495393418e-06, | |
| "loss": 1.015434980392456, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.314741035856574, | |
| "grad_norm": 3.516514539718628, | |
| "learning_rate": 2.828716557901286e-06, | |
| "loss": 0.4791782796382904, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 1.2415333986282349, | |
| "learning_rate": 2.819959736522286e-06, | |
| "loss": 0.6430278420448303, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.3227091633466137, | |
| "grad_norm": 6.374106407165527, | |
| "learning_rate": 2.8112081263178727e-06, | |
| "loss": 0.7340620756149292, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.3266932270916336, | |
| "grad_norm": 0.7349236011505127, | |
| "learning_rate": 2.8024618222929257e-06, | |
| "loss": 0.8904776573181152, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.3306772908366535, | |
| "grad_norm": 3.1692311763763428, | |
| "learning_rate": 2.793720919394726e-06, | |
| "loss": 0.3335300385951996, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 1.9627305269241333, | |
| "learning_rate": 2.7849855125119204e-06, | |
| "loss": 0.9338223338127136, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.3386454183266934, | |
| "grad_norm": 1.715811014175415, | |
| "learning_rate": 2.7762556964734925e-06, | |
| "loss": 0.8548279404640198, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.3426294820717133, | |
| "grad_norm": 1.2761598825454712, | |
| "learning_rate": 2.7675315660477342e-06, | |
| "loss": 0.6551219820976257, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.346613545816733, | |
| "grad_norm": 0.5829970836639404, | |
| "learning_rate": 2.7588132159412153e-06, | |
| "loss": 0.8633916974067688, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 0.8791594505310059, | |
| "learning_rate": 2.7501007407977554e-06, | |
| "loss": 0.8312200903892517, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.354581673306773, | |
| "grad_norm": 0.8145209550857544, | |
| "learning_rate": 2.7413942351973994e-06, | |
| "loss": 0.8451777696609497, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.358565737051793, | |
| "grad_norm": 0.8338920474052429, | |
| "learning_rate": 2.7326937936553845e-06, | |
| "loss": 0.9415311813354492, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.362549800796813, | |
| "grad_norm": 0.9346828460693359, | |
| "learning_rate": 2.7239995106211244e-06, | |
| "loss": 0.8471455574035645, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 1.4322340488433838, | |
| "learning_rate": 2.715311480477173e-06, | |
| "loss": 0.30060604214668274, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.3705179282868527, | |
| "grad_norm": 1.1024688482284546, | |
| "learning_rate": 2.7066297975382065e-06, | |
| "loss": 0.7530568838119507, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.3745019920318726, | |
| "grad_norm": 0.5967240333557129, | |
| "learning_rate": 2.697954556049997e-06, | |
| "loss": 0.867277204990387, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.3784860557768925, | |
| "grad_norm": 0.9026405811309814, | |
| "learning_rate": 2.689285850188391e-06, | |
| "loss": 0.9335858225822449, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.48514679074287415, | |
| "learning_rate": 2.6806237740582855e-06, | |
| "loss": 0.2793917655944824, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.3864541832669324, | |
| "grad_norm": 2.9039154052734375, | |
| "learning_rate": 2.671968421692607e-06, | |
| "loss": 1.4733071327209473, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.3904382470119523, | |
| "grad_norm": 3.6072850227355957, | |
| "learning_rate": 2.6633198870512927e-06, | |
| "loss": 0.3655731976032257, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.394422310756972, | |
| "grad_norm": 0.6584874391555786, | |
| "learning_rate": 2.6546782640202666e-06, | |
| "loss": 0.8660189509391785, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 0.5407839417457581, | |
| "learning_rate": 2.6460436464104216e-06, | |
| "loss": 0.848800003528595, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.402390438247012, | |
| "grad_norm": 1.0635416507720947, | |
| "learning_rate": 2.6374161279566035e-06, | |
| "loss": 0.9516815543174744, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.406374501992032, | |
| "grad_norm": 0.41980046033859253, | |
| "learning_rate": 2.628795802316591e-06, | |
| "loss": 0.120535708963871, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.410358565737052, | |
| "grad_norm": 0.3191829323768616, | |
| "learning_rate": 2.620182763070081e-06, | |
| "loss": 0.023226367309689522, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 1.4996663331985474, | |
| "learning_rate": 2.61157710371767e-06, | |
| "loss": 0.45069432258605957, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.4183266932270917, | |
| "grad_norm": 1.0962636470794678, | |
| "learning_rate": 2.6029789176798417e-06, | |
| "loss": 0.6983217000961304, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.4223107569721116, | |
| "grad_norm": 0.8529632091522217, | |
| "learning_rate": 2.594388298295949e-06, | |
| "loss": 0.17169800400733948, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.4262948207171315, | |
| "grad_norm": 0.9947030544281006, | |
| "learning_rate": 2.585805338823208e-06, | |
| "loss": 0.8718166947364807, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 0.39905738830566406, | |
| "learning_rate": 2.577230132435678e-06, | |
| "loss": 0.5236790776252747, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.4342629482071714, | |
| "grad_norm": 1.6986416578292847, | |
| "learning_rate": 2.5686627722232518e-06, | |
| "loss": 0.4206949770450592, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.4382470119521913, | |
| "grad_norm": 0.8914661407470703, | |
| "learning_rate": 2.560103351190651e-06, | |
| "loss": 0.8530100584030151, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.442231075697211, | |
| "grad_norm": 1.940697193145752, | |
| "learning_rate": 2.5515519622564086e-06, | |
| "loss": 0.03098766878247261, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 0.740294873714447, | |
| "learning_rate": 2.543008698251863e-06, | |
| "loss": 0.8904476165771484, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.450199203187251, | |
| "grad_norm": 1.2256784439086914, | |
| "learning_rate": 2.534473651920153e-06, | |
| "loss": 0.6660670042037964, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.454183266932271, | |
| "grad_norm": 1.3577665090560913, | |
| "learning_rate": 2.5259469159152063e-06, | |
| "loss": 0.8957257270812988, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.458167330677291, | |
| "grad_norm": 5.5895209312438965, | |
| "learning_rate": 2.5174285828007387e-06, | |
| "loss": 0.4879809319972992, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 1.602962851524353, | |
| "learning_rate": 2.5089187450492464e-06, | |
| "loss": 0.8527651429176331, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.4661354581673307, | |
| "grad_norm": 1.6139048337936401, | |
| "learning_rate": 2.5004174950409996e-06, | |
| "loss": 0.814254641532898, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.4701195219123506, | |
| "grad_norm": 2.1591413021087646, | |
| "learning_rate": 2.4919249250630463e-06, | |
| "loss": 0.620861828327179, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.4741035856573705, | |
| "grad_norm": 2.2499430179595947, | |
| "learning_rate": 2.483441127308202e-06, | |
| "loss": 0.622882068157196, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 0.8735558390617371, | |
| "learning_rate": 2.47496619387406e-06, | |
| "loss": 0.8819273114204407, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.4820717131474104, | |
| "grad_norm": 1.0973459482192993, | |
| "learning_rate": 2.4665002167619798e-06, | |
| "loss": 0.85080885887146, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.4860557768924303, | |
| "grad_norm": 1.19606351852417, | |
| "learning_rate": 2.4580432878760968e-06, | |
| "loss": 0.5080418586730957, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.49003984063745, | |
| "grad_norm": 0.36084145307540894, | |
| "learning_rate": 2.449595499022318e-06, | |
| "loss": 0.3111553192138672, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 0.7546538710594177, | |
| "learning_rate": 2.441156941907333e-06, | |
| "loss": 0.6624001264572144, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.49800796812749, | |
| "grad_norm": 0.7720620632171631, | |
| "learning_rate": 2.432727708137612e-06, | |
| "loss": 0.7852078676223755, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.50199203187251, | |
| "grad_norm": 2.640068292617798, | |
| "learning_rate": 2.424307889218414e-06, | |
| "loss": 0.9888243079185486, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.50597609561753, | |
| "grad_norm": 0.47891512513160706, | |
| "learning_rate": 2.415897576552795e-06, | |
| "loss": 0.11806351691484451, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 1.773125171661377, | |
| "learning_rate": 2.407496861440611e-06, | |
| "loss": 0.712026834487915, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.5139442231075697, | |
| "grad_norm": 0.8916162848472595, | |
| "learning_rate": 2.3991058350775316e-06, | |
| "loss": 0.27510854601860046, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.5179282868525896, | |
| "grad_norm": 2.915144205093384, | |
| "learning_rate": 2.3907245885540473e-06, | |
| "loss": 0.5907682180404663, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.5219123505976095, | |
| "grad_norm": 0.7523391842842102, | |
| "learning_rate": 2.382353212854483e-06, | |
| "loss": 0.875799298286438, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 0.7640947699546814, | |
| "learning_rate": 2.373991798856008e-06, | |
| "loss": 0.8100597858428955, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.5298804780876494, | |
| "grad_norm": 0.9602063894271851, | |
| "learning_rate": 2.3656404373276496e-06, | |
| "loss": 0.8617823719978333, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.5338645418326693, | |
| "grad_norm": 1.0857386589050293, | |
| "learning_rate": 2.35729921892931e-06, | |
| "loss": 0.7695320248603821, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.537848605577689, | |
| "grad_norm": 2.655921220779419, | |
| "learning_rate": 2.3489682342107787e-06, | |
| "loss": 1.0393037796020508, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 1.602705478668213, | |
| "learning_rate": 2.3406475736107537e-06, | |
| "loss": 0.8128276467323303, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.545816733067729, | |
| "grad_norm": 1.7629623413085938, | |
| "learning_rate": 2.332337327455856e-06, | |
| "loss": 0.8416529893875122, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.549800796812749, | |
| "grad_norm": 0.3072420656681061, | |
| "learning_rate": 2.3240375859596493e-06, | |
| "loss": 0.21107147634029388, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.553784860557769, | |
| "grad_norm": 0.7584460973739624, | |
| "learning_rate": 2.3157484392216645e-06, | |
| "loss": 0.7613718509674072, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 0.7467636466026306, | |
| "learning_rate": 2.3074699772264184e-06, | |
| "loss": 0.9068883657455444, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.5617529880478087, | |
| "grad_norm": 2.827934503555298, | |
| "learning_rate": 2.2992022898424358e-06, | |
| "loss": 0.9814170002937317, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.5657370517928286, | |
| "grad_norm": 0.6314749717712402, | |
| "learning_rate": 2.2909454668212763e-06, | |
| "loss": 0.9777659177780151, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.5697211155378485, | |
| "grad_norm": 1.5785683393478394, | |
| "learning_rate": 2.2826995977965586e-06, | |
| "loss": 0.14857736229896545, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 0.8036978244781494, | |
| "learning_rate": 2.27446477228299e-06, | |
| "loss": 0.9405508041381836, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.5776892430278884, | |
| "grad_norm": 0.7155508399009705, | |
| "learning_rate": 2.2662410796753924e-06, | |
| "loss": 0.8522077202796936, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.5816733067729083, | |
| "grad_norm": 1.1586476564407349, | |
| "learning_rate": 2.2580286092477285e-06, | |
| "loss": 0.8515244722366333, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.585657370517928, | |
| "grad_norm": 1.105276346206665, | |
| "learning_rate": 2.2498274501521414e-06, | |
| "loss": 0.8348259925842285, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 0.5298115611076355, | |
| "learning_rate": 2.2416376914179776e-06, | |
| "loss": 0.37851282954216003, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.593625498007968, | |
| "grad_norm": 0.8865681290626526, | |
| "learning_rate": 2.2334594219508283e-06, | |
| "loss": 0.493791401386261, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.597609561752988, | |
| "grad_norm": 0.8937894105911255, | |
| "learning_rate": 2.2252927305315587e-06, | |
| "loss": 0.768490731716156, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.601593625498008, | |
| "grad_norm": 2.249807119369507, | |
| "learning_rate": 2.2171377058153465e-06, | |
| "loss": 0.28239089250564575, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 0.7723252773284912, | |
| "learning_rate": 2.2089944363307165e-06, | |
| "loss": 0.8856875896453857, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.6095617529880477, | |
| "grad_norm": 0.43645548820495605, | |
| "learning_rate": 2.2008630104785874e-06, | |
| "loss": 0.352665513753891, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.6135458167330676, | |
| "grad_norm": 2.615204095840454, | |
| "learning_rate": 2.1927435165313036e-06, | |
| "loss": 0.1691545695066452, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.6175298804780875, | |
| "grad_norm": 0.7458433508872986, | |
| "learning_rate": 2.184636042631679e-06, | |
| "loss": 0.06585448980331421, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 1.3437604904174805, | |
| "learning_rate": 2.176540676792046e-06, | |
| "loss": 0.956698477268219, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.6254980079681274, | |
| "grad_norm": 2.3479928970336914, | |
| "learning_rate": 2.168457506893292e-06, | |
| "loss": 0.669885516166687, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.6294820717131473, | |
| "grad_norm": 0.6726356744766235, | |
| "learning_rate": 2.1603866206839074e-06, | |
| "loss": 0.9108378887176514, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.633466135458167, | |
| "grad_norm": 0.6728199124336243, | |
| "learning_rate": 2.152328105779041e-06, | |
| "loss": 0.46163687109947205, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 3.6970763206481934, | |
| "learning_rate": 2.1442820496595337e-06, | |
| "loss": 1.0799225568771362, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.641434262948207, | |
| "grad_norm": 2.347198009490967, | |
| "learning_rate": 2.1362485396709847e-06, | |
| "loss": 0.2297479808330536, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.645418326693227, | |
| "grad_norm": 1.014694094657898, | |
| "learning_rate": 2.128227663022794e-06, | |
| "loss": 0.7543836832046509, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.649402390438247, | |
| "grad_norm": 1.9803884029388428, | |
| "learning_rate": 2.1202195067872153e-06, | |
| "loss": 0.8650748133659363, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 1.038819432258606, | |
| "learning_rate": 2.112224157898416e-06, | |
| "loss": 0.7467201352119446, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.6573705179282867, | |
| "grad_norm": 4.248292922973633, | |
| "learning_rate": 2.1042417031515303e-06, | |
| "loss": 1.0267494916915894, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.6613545816733066, | |
| "grad_norm": 0.40952640771865845, | |
| "learning_rate": 2.096272229201716e-06, | |
| "loss": 0.06949189305305481, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.6653386454183265, | |
| "grad_norm": 1.2858881950378418, | |
| "learning_rate": 2.0883158225632168e-06, | |
| "loss": 0.9944968223571777, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 1.2663077116012573, | |
| "learning_rate": 2.0803725696084224e-06, | |
| "loss": 0.32381299138069153, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.6733067729083664, | |
| "grad_norm": 2.5092110633850098, | |
| "learning_rate": 2.072442556566928e-06, | |
| "loss": 0.5067175626754761, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.6772908366533863, | |
| "grad_norm": 0.4816880226135254, | |
| "learning_rate": 2.0645258695245993e-06, | |
| "loss": 0.06836852431297302, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.681274900398406, | |
| "grad_norm": 0.8811363577842712, | |
| "learning_rate": 2.0566225944226414e-06, | |
| "loss": 0.8118082284927368, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 0.7595816850662231, | |
| "learning_rate": 2.0487328170566643e-06, | |
| "loss": 0.833029568195343, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.6892430278884465, | |
| "grad_norm": 0.9555457830429077, | |
| "learning_rate": 2.0408566230757465e-06, | |
| "loss": 0.8837859034538269, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.6932270916334664, | |
| "grad_norm": 2.7736618518829346, | |
| "learning_rate": 2.0329940979815116e-06, | |
| "loss": 0.3744777739048004, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.6972111553784863, | |
| "grad_norm": 1.4651148319244385, | |
| "learning_rate": 2.0251453271272e-06, | |
| "loss": 0.3069399297237396, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 1.0298899412155151, | |
| "learning_rate": 2.0173103957167367e-06, | |
| "loss": 0.8419727087020874, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.705179282868526, | |
| "grad_norm": 1.365960955619812, | |
| "learning_rate": 2.009489388803809e-06, | |
| "loss": 0.8394007682800293, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.709163346613546, | |
| "grad_norm": 0.9906344413757324, | |
| "learning_rate": 2.0016823912909486e-06, | |
| "loss": 0.8413975238800049, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.713147410358566, | |
| "grad_norm": 0.6724693775177002, | |
| "learning_rate": 1.9938894879286024e-06, | |
| "loss": 0.8469905853271484, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 1.9248793125152588, | |
| "learning_rate": 1.9861107633142155e-06, | |
| "loss": 0.8509299755096436, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.721115537848606, | |
| "grad_norm": 1.4797543287277222, | |
| "learning_rate": 1.978346301891312e-06, | |
| "loss": 0.35483643412590027, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.7250996015936257, | |
| "grad_norm": 0.8299886584281921, | |
| "learning_rate": 1.9705961879485813e-06, | |
| "loss": 0.8987928628921509, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.7290836653386457, | |
| "grad_norm": 1.4776321649551392, | |
| "learning_rate": 1.962860505618958e-06, | |
| "loss": 0.6491652131080627, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 6.724909782409668, | |
| "learning_rate": 1.955139338878714e-06, | |
| "loss": 0.19401389360427856, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.7370517928286855, | |
| "grad_norm": 0.943676233291626, | |
| "learning_rate": 1.9474327715465444e-06, | |
| "loss": 0.8299869894981384, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.7410358565737054, | |
| "grad_norm": 1.2990317344665527, | |
| "learning_rate": 1.9397408872826545e-06, | |
| "loss": 0.871895968914032, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.7450199203187253, | |
| "grad_norm": 1.9206279516220093, | |
| "learning_rate": 1.9320637695878555e-06, | |
| "loss": 0.30201855301856995, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 0.7692667841911316, | |
| "learning_rate": 1.924401501802659e-06, | |
| "loss": 0.6371020078659058, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.752988047808765, | |
| "grad_norm": 0.8262352347373962, | |
| "learning_rate": 1.9167541671063703e-06, | |
| "loss": 0.9497525691986084, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.756972111553785, | |
| "grad_norm": 1.0128363370895386, | |
| "learning_rate": 1.9091218485161824e-06, | |
| "loss": 0.9976522922515869, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.760956175298805, | |
| "grad_norm": 0.8022831082344055, | |
| "learning_rate": 1.9015046288862815e-06, | |
| "loss": 0.8430491089820862, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 1.4386292695999146, | |
| "learning_rate": 1.893902590906943e-06, | |
| "loss": 0.6075490117073059, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.768924302788845, | |
| "grad_norm": 1.3775461912155151, | |
| "learning_rate": 1.8863158171036336e-06, | |
| "loss": 0.12825116515159607, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.7729083665338647, | |
| "grad_norm": 1.3699278831481934, | |
| "learning_rate": 1.8787443898361158e-06, | |
| "loss": 1.1316020488739014, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.7768924302788847, | |
| "grad_norm": 0.8569239377975464, | |
| "learning_rate": 1.8711883912975575e-06, | |
| "loss": 0.655997633934021, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 0.7035950422286987, | |
| "learning_rate": 1.8636479035136368e-06, | |
| "loss": 0.8871821165084839, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.7848605577689245, | |
| "grad_norm": 0.7683161497116089, | |
| "learning_rate": 1.8561230083416488e-06, | |
| "loss": 0.9570977687835693, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.7888446215139444, | |
| "grad_norm": 0.8087801337242126, | |
| "learning_rate": 1.8486137874696223e-06, | |
| "loss": 0.8703477382659912, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.7928286852589643, | |
| "grad_norm": 0.9088819622993469, | |
| "learning_rate": 1.8411203224154289e-06, | |
| "loss": 0.8619301915168762, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 0.3485574424266815, | |
| "learning_rate": 1.833642694525902e-06, | |
| "loss": 0.13462619483470917, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.800796812749004, | |
| "grad_norm": 0.9604331851005554, | |
| "learning_rate": 1.826180984975948e-06, | |
| "loss": 0.8676316142082214, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.804780876494024, | |
| "grad_norm": 1.302273154258728, | |
| "learning_rate": 1.8187352747676718e-06, | |
| "loss": 1.241036295890808, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.808764940239044, | |
| "grad_norm": 1.2466564178466797, | |
| "learning_rate": 1.8113056447294936e-06, | |
| "loss": 1.0569744110107422, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 0.9512035846710205, | |
| "learning_rate": 1.8038921755152704e-06, | |
| "loss": 0.8206438422203064, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.816733067729084, | |
| "grad_norm": 1.0051904916763306, | |
| "learning_rate": 1.7964949476034223e-06, | |
| "loss": 0.9369583129882812, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.8207171314741037, | |
| "grad_norm": 3.8374409675598145, | |
| "learning_rate": 1.7891140412960615e-06, | |
| "loss": 1.116792917251587, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.8247011952191237, | |
| "grad_norm": 1.1146875619888306, | |
| "learning_rate": 1.7817495367181132e-06, | |
| "loss": 0.8257051110267639, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 0.2130766063928604, | |
| "learning_rate": 1.774401513816454e-06, | |
| "loss": 0.08374066650867462, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.8326693227091635, | |
| "grad_norm": 0.8484716415405273, | |
| "learning_rate": 1.76707005235904e-06, | |
| "loss": 0.9364421963691711, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.8366533864541834, | |
| "grad_norm": 0.7365440130233765, | |
| "learning_rate": 1.759755231934039e-06, | |
| "loss": 0.9269137978553772, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.8406374501992033, | |
| "grad_norm": 0.9674385190010071, | |
| "learning_rate": 1.7524571319489695e-06, | |
| "loss": 0.24093596637248993, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 0.8217137455940247, | |
| "learning_rate": 1.7451758316298386e-06, | |
| "loss": 0.8590070605278015, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.848605577689243, | |
| "grad_norm": 0.818912148475647, | |
| "learning_rate": 1.7379114100202824e-06, | |
| "loss": 0.8883748650550842, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.852589641434263, | |
| "grad_norm": 2.239244222640991, | |
| "learning_rate": 1.7306639459807026e-06, | |
| "loss": 0.8789231777191162, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.856573705179283, | |
| "grad_norm": 1.3130366802215576, | |
| "learning_rate": 1.7234335181874197e-06, | |
| "loss": 0.41715553402900696, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 2.1881866455078125, | |
| "learning_rate": 1.7162202051318092e-06, | |
| "loss": 0.8317433595657349, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.864541832669323, | |
| "grad_norm": 0.4997340440750122, | |
| "learning_rate": 1.7090240851194576e-06, | |
| "loss": 0.06248881667852402, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.8685258964143427, | |
| "grad_norm": 0.7684650421142578, | |
| "learning_rate": 1.7018452362693062e-06, | |
| "loss": 0.9771674871444702, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.8725099601593627, | |
| "grad_norm": 2.6358094215393066, | |
| "learning_rate": 1.694683736512807e-06, | |
| "loss": 0.4274534285068512, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 3.7041735649108887, | |
| "learning_rate": 1.6875396635930767e-06, | |
| "loss": 0.8502193689346313, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.8804780876494025, | |
| "grad_norm": 1.7656716108322144, | |
| "learning_rate": 1.6804130950640492e-06, | |
| "loss": 0.2269526571035385, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.8844621513944224, | |
| "grad_norm": 0.9704077839851379, | |
| "learning_rate": 1.6733041082896355e-06, | |
| "loss": 0.9017117619514465, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.8884462151394423, | |
| "grad_norm": 1.1423131227493286, | |
| "learning_rate": 1.666212780442887e-06, | |
| "loss": 0.7310890555381775, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 0.8818380832672119, | |
| "learning_rate": 1.659139188505152e-06, | |
| "loss": 0.9649314880371094, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.896414342629482, | |
| "grad_norm": 0.9627234935760498, | |
| "learning_rate": 1.652083409265246e-06, | |
| "loss": 0.1323651671409607, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.900398406374502, | |
| "grad_norm": 0.625633955001831, | |
| "learning_rate": 1.6450455193186137e-06, | |
| "loss": 0.8300275206565857, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.904382470119522, | |
| "grad_norm": 1.691175103187561, | |
| "learning_rate": 1.638025595066499e-06, | |
| "loss": 0.7612891793251038, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 0.9278882145881653, | |
| "learning_rate": 1.6310237127151137e-06, | |
| "loss": 0.9076191782951355, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.912350597609562, | |
| "grad_norm": 2.7954494953155518, | |
| "learning_rate": 1.624039948274815e-06, | |
| "loss": 0.37150129675865173, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.9163346613545817, | |
| "grad_norm": 0.423910528421402, | |
| "learning_rate": 1.6170743775592773e-06, | |
| "loss": 0.20058873295783997, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.9203187250996017, | |
| "grad_norm": 0.9244667887687683, | |
| "learning_rate": 1.610127076184667e-06, | |
| "loss": 0.8625198602676392, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 0.8803090453147888, | |
| "learning_rate": 1.6031981195688252e-06, | |
| "loss": 0.9291595816612244, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.9282868525896415, | |
| "grad_norm": 1.0361244678497314, | |
| "learning_rate": 1.59628758293045e-06, | |
| "loss": 0.23180729150772095, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.9322709163346614, | |
| "grad_norm": 5.147000789642334, | |
| "learning_rate": 1.5893955412882733e-06, | |
| "loss": 0.5987867712974548, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.9362549800796813, | |
| "grad_norm": 0.5982325673103333, | |
| "learning_rate": 1.582522069460253e-06, | |
| "loss": 0.8363850116729736, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 3.7226884365081787, | |
| "learning_rate": 1.5756672420627596e-06, | |
| "loss": 0.8606371283531189, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.944223107569721, | |
| "grad_norm": 1.0484495162963867, | |
| "learning_rate": 1.5688311335097646e-06, | |
| "loss": 0.9633500576019287, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.948207171314741, | |
| "grad_norm": 0.7016828656196594, | |
| "learning_rate": 1.5620138180120331e-06, | |
| "loss": 0.8571369647979736, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.952191235059761, | |
| "grad_norm": 2.1188414096832275, | |
| "learning_rate": 1.5552153695763156e-06, | |
| "loss": 0.44183531403541565, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 2.2254960536956787, | |
| "learning_rate": 1.5484358620045534e-06, | |
| "loss": 0.28760015964508057, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.960159362549801, | |
| "grad_norm": 2.748490333557129, | |
| "learning_rate": 1.5416753688930654e-06, | |
| "loss": 0.6493697166442871, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.9641434262948207, | |
| "grad_norm": 1.3967127799987793, | |
| "learning_rate": 1.5349339636317584e-06, | |
| "loss": 0.8622140288352966, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.9681274900398407, | |
| "grad_norm": 1.959518313407898, | |
| "learning_rate": 1.528211719403328e-06, | |
| "loss": 0.722124457359314, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.9721115537848606, | |
| "grad_norm": 1.3386509418487549, | |
| "learning_rate": 1.521508709182461e-06, | |
| "loss": 0.9694193601608276, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.9760956175298805, | |
| "grad_norm": 0.9864974617958069, | |
| "learning_rate": 1.514825005735045e-06, | |
| "loss": 0.8088407516479492, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.9800796812749004, | |
| "grad_norm": 2.115551471710205, | |
| "learning_rate": 1.5081606816173814e-06, | |
| "loss": 0.12242338061332703, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.9840637450199203, | |
| "grad_norm": 0.75198894739151, | |
| "learning_rate": 1.5015158091753958e-06, | |
| "loss": 0.1432493031024933, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.9880478087649402, | |
| "grad_norm": 1.4102544784545898, | |
| "learning_rate": 1.4948904605438477e-06, | |
| "loss": 0.0790117010474205, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.99203187250996, | |
| "grad_norm": 0.6461302638053894, | |
| "learning_rate": 1.488284707645557e-06, | |
| "loss": 0.7927932739257812, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.99601593625498, | |
| "grad_norm": 0.9944819211959839, | |
| "learning_rate": 1.4816986221906159e-06, | |
| "loss": 0.8774588704109192, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.3869407176971436, | |
| "learning_rate": 1.4751322756756127e-06, | |
| "loss": 0.23395386338233948, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.00398406374502, | |
| "grad_norm": 0.6929567456245422, | |
| "learning_rate": 1.4685857393828543e-06, | |
| "loss": 0.6813750267028809, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.00796812749004, | |
| "grad_norm": 1.4428455829620361, | |
| "learning_rate": 1.4620590843795967e-06, | |
| "loss": 0.27471280097961426, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.0119521912350598, | |
| "grad_norm": 1.1208453178405762, | |
| "learning_rate": 1.4555523815172693e-06, | |
| "loss": 0.7926130294799805, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.0159362549800797, | |
| "grad_norm": 1.4112131595611572, | |
| "learning_rate": 1.449065701430705e-06, | |
| "loss": 0.3855717182159424, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.0199203187250996, | |
| "grad_norm": 7.652811527252197, | |
| "learning_rate": 1.4425991145373788e-06, | |
| "loss": 0.1316222846508026, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.0239043824701195, | |
| "grad_norm": 1.6621893644332886, | |
| "learning_rate": 1.4361526910366368e-06, | |
| "loss": 0.2520155906677246, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.0278884462151394, | |
| "grad_norm": 0.8125709891319275, | |
| "learning_rate": 1.4297265009089397e-06, | |
| "loss": 0.7272902727127075, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.0318725099601593, | |
| "grad_norm": 1.4255092144012451, | |
| "learning_rate": 1.423320613915099e-06, | |
| "loss": 0.5655202865600586, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.0358565737051793, | |
| "grad_norm": 1.9694007635116577, | |
| "learning_rate": 1.416935099595522e-06, | |
| "loss": 0.21059830486774445, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.039840637450199, | |
| "grad_norm": 0.7592612504959106, | |
| "learning_rate": 1.4105700272694578e-06, | |
| "loss": 0.6575446724891663, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.043824701195219, | |
| "grad_norm": 1.133392572402954, | |
| "learning_rate": 1.4042254660342408e-06, | |
| "loss": 0.9429333209991455, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.047808764940239, | |
| "grad_norm": 1.231631875038147, | |
| "learning_rate": 1.3979014847645435e-06, | |
| "loss": 0.2242284119129181, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.051792828685259, | |
| "grad_norm": 1.1999961137771606, | |
| "learning_rate": 1.391598152111631e-06, | |
| "loss": 0.15949700772762299, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.055776892430279, | |
| "grad_norm": 1.6939618587493896, | |
| "learning_rate": 1.385315536502609e-06, | |
| "loss": 0.21413640677928925, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.0597609561752988, | |
| "grad_norm": 1.3219988346099854, | |
| "learning_rate": 1.3790537061396887e-06, | |
| "loss": 0.6202045679092407, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.0637450199203187, | |
| "grad_norm": 0.998444676399231, | |
| "learning_rate": 1.372812728999442e-06, | |
| "loss": 0.7671471238136292, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.0677290836653386, | |
| "grad_norm": 1.4698975086212158, | |
| "learning_rate": 1.3665926728320632e-06, | |
| "loss": 0.47750726342201233, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.0717131474103585, | |
| "grad_norm": 0.9587137699127197, | |
| "learning_rate": 1.3603936051606346e-06, | |
| "loss": 0.7269394397735596, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.0756972111553784, | |
| "grad_norm": 2.3286054134368896, | |
| "learning_rate": 1.3542155932803954e-06, | |
| "loss": 0.7805855870246887, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.0796812749003983, | |
| "grad_norm": 0.7439804077148438, | |
| "learning_rate": 1.3480587042580092e-06, | |
| "loss": 0.6787388324737549, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.0836653386454183, | |
| "grad_norm": 1.8882228136062622, | |
| "learning_rate": 1.3419230049308333e-06, | |
| "loss": 0.6134771108627319, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.087649402390438, | |
| "grad_norm": 1.0494561195373535, | |
| "learning_rate": 1.3358085619062003e-06, | |
| "loss": 0.7737662196159363, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.091633466135458, | |
| "grad_norm": 0.31838488578796387, | |
| "learning_rate": 1.3297154415606864e-06, | |
| "loss": 0.034840308129787445, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.095617529880478, | |
| "grad_norm": 1.5378990173339844, | |
| "learning_rate": 1.3236437100393992e-06, | |
| "loss": 0.21899044513702393, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.099601593625498, | |
| "grad_norm": 0.9580462574958801, | |
| "learning_rate": 1.3175934332552511e-06, | |
| "loss": 0.635277271270752, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.103585657370518, | |
| "grad_norm": 1.2689288854599, | |
| "learning_rate": 1.3115646768882522e-06, | |
| "loss": 0.6710810661315918, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.1075697211155378, | |
| "grad_norm": 0.9133360385894775, | |
| "learning_rate": 1.3055575063847923e-06, | |
| "loss": 0.7197314500808716, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.1115537848605577, | |
| "grad_norm": 3.067455768585205, | |
| "learning_rate": 1.29957198695693e-06, | |
| "loss": 0.21895435452461243, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.1155378486055776, | |
| "grad_norm": 0.27349138259887695, | |
| "learning_rate": 1.2936081835816867e-06, | |
| "loss": 0.19600287079811096, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.1195219123505975, | |
| "grad_norm": 1.1419686079025269, | |
| "learning_rate": 1.2876661610003428e-06, | |
| "loss": 0.7878577709197998, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.1235059760956174, | |
| "grad_norm": 1.1395351886749268, | |
| "learning_rate": 1.2817459837177298e-06, | |
| "loss": 0.7802326679229736, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.1274900398406373, | |
| "grad_norm": 1.9237797260284424, | |
| "learning_rate": 1.2758477160015355e-06, | |
| "loss": 0.5069929361343384, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.1314741035856573, | |
| "grad_norm": 0.7889575958251953, | |
| "learning_rate": 1.2699714218816036e-06, | |
| "loss": 0.6714158654212952, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.135458167330677, | |
| "grad_norm": 0.9449037313461304, | |
| "learning_rate": 1.2641171651492383e-06, | |
| "loss": 0.6565294861793518, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.139442231075697, | |
| "grad_norm": 1.7222603559494019, | |
| "learning_rate": 1.2582850093565115e-06, | |
| "loss": 0.2423674762248993, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.143426294820717, | |
| "grad_norm": 0.8361628651618958, | |
| "learning_rate": 1.2524750178155762e-06, | |
| "loss": 0.6483781933784485, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.147410358565737, | |
| "grad_norm": 0.4106227159500122, | |
| "learning_rate": 1.2466872535979755e-06, | |
| "loss": 0.06941226869821548, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.151394422310757, | |
| "grad_norm": 1.131303071975708, | |
| "learning_rate": 1.2409217795339592e-06, | |
| "loss": 0.6722179651260376, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.1553784860557768, | |
| "grad_norm": 1.3526575565338135, | |
| "learning_rate": 1.2351786582118018e-06, | |
| "loss": 0.37432199716567993, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.1593625498007967, | |
| "grad_norm": 1.5046707391738892, | |
| "learning_rate": 1.2294579519771246e-06, | |
| "loss": 0.36908501386642456, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.1633466135458166, | |
| "grad_norm": 0.14365744590759277, | |
| "learning_rate": 1.2237597229322155e-06, | |
| "loss": 0.01732539013028145, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.1673306772908365, | |
| "grad_norm": 0.7536062598228455, | |
| "learning_rate": 1.2180840329353564e-06, | |
| "loss": 0.2823001444339752, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.1713147410358564, | |
| "grad_norm": 6.318256855010986, | |
| "learning_rate": 1.2124309436001533e-06, | |
| "loss": 0.5411125421524048, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.1752988047808763, | |
| "grad_norm": 1.1654754877090454, | |
| "learning_rate": 1.2068005162948668e-06, | |
| "loss": 0.7602944374084473, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.1792828685258963, | |
| "grad_norm": 2.5576841831207275, | |
| "learning_rate": 1.2011928121417431e-06, | |
| "loss": 0.1262691169977188, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.183266932270916, | |
| "grad_norm": 1.2924350500106812, | |
| "learning_rate": 1.195607892016354e-06, | |
| "loss": 0.6975268721580505, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.187250996015936, | |
| "grad_norm": 2.0278656482696533, | |
| "learning_rate": 1.1900458165469345e-06, | |
| "loss": 0.5072341561317444, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.191235059760956, | |
| "grad_norm": 2.13330078125, | |
| "learning_rate": 1.184506646113724e-06, | |
| "loss": 0.7287152409553528, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.195219123505976, | |
| "grad_norm": 0.19735604524612427, | |
| "learning_rate": 1.1789904408483123e-06, | |
| "loss": 0.20490704476833344, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.199203187250996, | |
| "grad_norm": 2.342869997024536, | |
| "learning_rate": 1.1734972606329874e-06, | |
| "loss": 0.6201443076133728, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.2031872509960158, | |
| "grad_norm": 1.9951808452606201, | |
| "learning_rate": 1.1680271651000819e-06, | |
| "loss": 0.2740911543369293, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.2071713147410357, | |
| "grad_norm": 1.075411319732666, | |
| "learning_rate": 1.162580213631328e-06, | |
| "loss": 0.6568232774734497, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.2111553784860556, | |
| "grad_norm": 2.3391730785369873, | |
| "learning_rate": 1.1571564653572148e-06, | |
| "loss": 1.0995919704437256, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.2151394422310755, | |
| "grad_norm": 0.11555808782577515, | |
| "learning_rate": 1.1517559791563439e-06, | |
| "loss": 0.003191891126334667, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.2191235059760954, | |
| "grad_norm": 2.371424674987793, | |
| "learning_rate": 1.1463788136547887e-06, | |
| "loss": 0.396582692861557, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.2231075697211153, | |
| "grad_norm": 1.8076469898223877, | |
| "learning_rate": 1.141025027225463e-06, | |
| "loss": 0.3241533637046814, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.2270916334661353, | |
| "grad_norm": 0.9942080974578857, | |
| "learning_rate": 1.1356946779874825e-06, | |
| "loss": 0.6740264296531677, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.231075697211155, | |
| "grad_norm": 1.624965786933899, | |
| "learning_rate": 1.1303878238055357e-06, | |
| "loss": 0.44572022557258606, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.235059760956175, | |
| "grad_norm": 1.6572600603103638, | |
| "learning_rate": 1.1251045222892553e-06, | |
| "loss": 0.21951913833618164, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.239043824701195, | |
| "grad_norm": 1.5844409465789795, | |
| "learning_rate": 1.119844830792595e-06, | |
| "loss": 0.7072573900222778, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.243027888446215, | |
| "grad_norm": 1.0160541534423828, | |
| "learning_rate": 1.1146088064132052e-06, | |
| "loss": 0.6218189001083374, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.247011952191235, | |
| "grad_norm": 0.6660611033439636, | |
| "learning_rate": 1.10939650599181e-06, | |
| "loss": 0.15160006284713745, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.2509960159362548, | |
| "grad_norm": 4.854979038238525, | |
| "learning_rate": 1.1042079861115967e-06, | |
| "loss": 0.4013654887676239, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.2549800796812747, | |
| "grad_norm": 1.7456501722335815, | |
| "learning_rate": 1.099043303097596e-06, | |
| "loss": 0.6942977905273438, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.2589641434262946, | |
| "grad_norm": 0.6688535809516907, | |
| "learning_rate": 1.0939025130160743e-06, | |
| "loss": 0.7660707831382751, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.2629482071713145, | |
| "grad_norm": 1.3489729166030884, | |
| "learning_rate": 1.088785671673921e-06, | |
| "loss": 0.4087866544723511, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.2669322709163344, | |
| "grad_norm": 3.7537801265716553, | |
| "learning_rate": 1.0836928346180481e-06, | |
| "loss": 0.26779600977897644, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.2709163346613543, | |
| "grad_norm": 1.0913664102554321, | |
| "learning_rate": 1.0786240571347827e-06, | |
| "loss": 0.11661072820425034, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.2749003984063743, | |
| "grad_norm": 1.3544014692306519, | |
| "learning_rate": 1.0735793942492676e-06, | |
| "loss": 0.9415394067764282, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.278884462151394, | |
| "grad_norm": 1.880513072013855, | |
| "learning_rate": 1.068558900724865e-06, | |
| "loss": 0.6600284576416016, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.2828685258964145, | |
| "grad_norm": 2.517366647720337, | |
| "learning_rate": 1.0635626310625637e-06, | |
| "loss": 0.3240680694580078, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.2868525896414345, | |
| "grad_norm": 0.825859010219574, | |
| "learning_rate": 1.058590639500382e-06, | |
| "loss": 0.6646403074264526, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.2908366533864544, | |
| "grad_norm": 0.9859835505485535, | |
| "learning_rate": 1.0536429800127851e-06, | |
| "loss": 0.642147958278656, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.2948207171314743, | |
| "grad_norm": 1.7152155637741089, | |
| "learning_rate": 1.0487197063100961e-06, | |
| "loss": 0.7060829401016235, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.298804780876494, | |
| "grad_norm": 1.7756178379058838, | |
| "learning_rate": 1.0438208718379124e-06, | |
| "loss": 0.7361951470375061, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.302788844621514, | |
| "grad_norm": 1.7107096910476685, | |
| "learning_rate": 1.0389465297765253e-06, | |
| "loss": 0.6126337647438049, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.306772908366534, | |
| "grad_norm": 1.4858530759811401, | |
| "learning_rate": 1.0340967330403468e-06, | |
| "loss": 0.614052414894104, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.310756972111554, | |
| "grad_norm": 6.398506164550781, | |
| "learning_rate": 1.02927153427733e-06, | |
| "loss": 0.6388739347457886, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.314741035856574, | |
| "grad_norm": 4.903992652893066, | |
| "learning_rate": 1.0244709858683996e-06, | |
| "loss": 0.1377391517162323, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.318725099601594, | |
| "grad_norm": 1.644950270652771, | |
| "learning_rate": 1.0196951399268847e-06, | |
| "loss": 0.3214379549026489, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.3227091633466137, | |
| "grad_norm": 6.5153608322143555, | |
| "learning_rate": 1.0149440482979503e-06, | |
| "loss": 0.23638975620269775, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.3266932270916336, | |
| "grad_norm": 1.4857839345932007, | |
| "learning_rate": 1.0102177625580375e-06, | |
| "loss": 0.22218865156173706, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.3306772908366535, | |
| "grad_norm": 8.828252792358398, | |
| "learning_rate": 1.0055163340143e-06, | |
| "loss": 0.6645467877388, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.3346613545816735, | |
| "grad_norm": 1.0999014377593994, | |
| "learning_rate": 1.0008398137040507e-06, | |
| "loss": 0.5620592832565308, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.3386454183266934, | |
| "grad_norm": 2.528717279434204, | |
| "learning_rate": 9.961882523942068e-07, | |
| "loss": 0.6080818176269531, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.3426294820717133, | |
| "grad_norm": 0.1366569846868515, | |
| "learning_rate": 9.915617005807357e-07, | |
| "loss": 0.01138792559504509, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.346613545816733, | |
| "grad_norm": 5.231603622436523, | |
| "learning_rate": 9.869602084881103e-07, | |
| "loss": 0.29557374119758606, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.350597609561753, | |
| "grad_norm": 0.9051138758659363, | |
| "learning_rate": 9.823838260687635e-07, | |
| "loss": 0.41544756293296814, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.354581673306773, | |
| "grad_norm": 1.6163842678070068, | |
| "learning_rate": 9.778326030025432e-07, | |
| "loss": 0.45938849449157715, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.358565737051793, | |
| "grad_norm": 7.820988178253174, | |
| "learning_rate": 9.733065886961764e-07, | |
| "loss": 0.4935106337070465, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.362549800796813, | |
| "grad_norm": 1.3769513368606567, | |
| "learning_rate": 9.688058322827313e-07, | |
| "loss": 0.5252028107643127, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.366533864541833, | |
| "grad_norm": 1.0140272378921509, | |
| "learning_rate": 9.643303826210824e-07, | |
| "loss": 0.7207529544830322, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.3705179282868527, | |
| "grad_norm": 1.3448855876922607, | |
| "learning_rate": 9.598802882953828e-07, | |
| "loss": 0.7529066205024719, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.3745019920318726, | |
| "grad_norm": 1.0818604230880737, | |
| "learning_rate": 9.554555976145349e-07, | |
| "loss": 0.6526249647140503, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.3784860557768925, | |
| "grad_norm": 0.858180046081543, | |
| "learning_rate": 9.510563586116686e-07, | |
| "loss": 0.6609078645706177, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.3824701195219125, | |
| "grad_norm": 1.1475756168365479, | |
| "learning_rate": 9.466826190436147e-07, | |
| "loss": 0.7812352180480957, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.3864541832669324, | |
| "grad_norm": 2.1600332260131836, | |
| "learning_rate": 9.423344263903926e-07, | |
| "loss": 0.7400810122489929, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.3904382470119523, | |
| "grad_norm": 1.9892051219940186, | |
| "learning_rate": 9.380118278546906e-07, | |
| "loss": 0.6348077654838562, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.394422310756972, | |
| "grad_norm": 0.9929773211479187, | |
| "learning_rate": 9.337148703613554e-07, | |
| "loss": 0.6541098356246948, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.398406374501992, | |
| "grad_norm": 5.213384628295898, | |
| "learning_rate": 9.29443600556881e-07, | |
| "loss": 0.20520062744617462, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.402390438247012, | |
| "grad_norm": 1.8277703523635864, | |
| "learning_rate": 9.251980648089045e-07, | |
| "loss": 0.596899688243866, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.406374501992032, | |
| "grad_norm": 0.9781650304794312, | |
| "learning_rate": 9.209783092057025e-07, | |
| "loss": 0.7202063202857971, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.410358565737052, | |
| "grad_norm": 1.2887661457061768, | |
| "learning_rate": 9.16784379555688e-07, | |
| "loss": 0.668391764163971, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.414342629482072, | |
| "grad_norm": 1.2524248361587524, | |
| "learning_rate": 9.126163213869171e-07, | |
| "loss": 0.6738901138305664, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.4183266932270917, | |
| "grad_norm": 0.8974006772041321, | |
| "learning_rate": 9.084741799465915e-07, | |
| "loss": 0.6369835734367371, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.4223107569721116, | |
| "grad_norm": 0.9271976351737976, | |
| "learning_rate": 9.043580002005681e-07, | |
| "loss": 0.7468122839927673, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.4262948207171315, | |
| "grad_norm": 0.9398600459098816, | |
| "learning_rate": 9.002678268328732e-07, | |
| "loss": 0.6316313743591309, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.4302788844621515, | |
| "grad_norm": 3.112898111343384, | |
| "learning_rate": 8.962037042452146e-07, | |
| "loss": 0.3467191755771637, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.4342629482071714, | |
| "grad_norm": 0.8903955817222595, | |
| "learning_rate": 8.921656765564998e-07, | |
| "loss": 0.5496594309806824, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.4382470119521913, | |
| "grad_norm": 2.7363598346710205, | |
| "learning_rate": 8.881537876023597e-07, | |
| "loss": 0.6846615076065063, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.442231075697211, | |
| "grad_norm": 1.7913397550582886, | |
| "learning_rate": 8.841680809346684e-07, | |
| "loss": 0.4614332914352417, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.446215139442231, | |
| "grad_norm": 2.286719560623169, | |
| "learning_rate": 8.802085998210754e-07, | |
| "loss": 0.6514830589294434, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.450199203187251, | |
| "grad_norm": 1.2754535675048828, | |
| "learning_rate": 8.762753872445316e-07, | |
| "loss": 0.6596709489822388, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.454183266932271, | |
| "grad_norm": 3.1973865032196045, | |
| "learning_rate": 8.723684859028244e-07, | |
| "loss": 0.3601575791835785, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.458167330677291, | |
| "grad_norm": 1.0521482229232788, | |
| "learning_rate": 8.684879382081163e-07, | |
| "loss": 0.6533339023590088, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.462151394422311, | |
| "grad_norm": 1.2476742267608643, | |
| "learning_rate": 8.646337862864804e-07, | |
| "loss": 0.7225340604782104, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.4661354581673307, | |
| "grad_norm": 8.218664169311523, | |
| "learning_rate": 8.608060719774452e-07, | |
| "loss": 0.14243163168430328, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.4701195219123506, | |
| "grad_norm": 0.8877552151679993, | |
| "learning_rate": 8.570048368335411e-07, | |
| "loss": 0.7387225031852722, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.4741035856573705, | |
| "grad_norm": 0.26608389616012573, | |
| "learning_rate": 8.532301221198491e-07, | |
| "loss": 0.060973528772592545, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.4780876494023905, | |
| "grad_norm": 1.655069351196289, | |
| "learning_rate": 8.494819688135502e-07, | |
| "loss": 0.6722233891487122, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.4820717131474104, | |
| "grad_norm": 0.3229190409183502, | |
| "learning_rate": 8.457604176034851e-07, | |
| "loss": 0.16490302979946136, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.4860557768924303, | |
| "grad_norm": 0.3072760999202728, | |
| "learning_rate": 8.42065508889708e-07, | |
| "loss": 0.06224316358566284, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.49003984063745, | |
| "grad_norm": 1.0425161123275757, | |
| "learning_rate": 8.383972827830517e-07, | |
| "loss": 0.6595985293388367, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.49402390438247, | |
| "grad_norm": 1.6916478872299194, | |
| "learning_rate": 8.347557791046892e-07, | |
| "loss": 0.18403995037078857, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.49800796812749, | |
| "grad_norm": 0.8162530064582825, | |
| "learning_rate": 8.311410373857033e-07, | |
| "loss": 0.6693860292434692, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.50199203187251, | |
| "grad_norm": 3.898818254470825, | |
| "learning_rate": 8.275530968666578e-07, | |
| "loss": 0.5436112880706787, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.50597609561753, | |
| "grad_norm": 0.576738178730011, | |
| "learning_rate": 8.239919964971689e-07, | |
| "loss": 0.1252291202545166, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.50996015936255, | |
| "grad_norm": 0.9629335403442383, | |
| "learning_rate": 8.20457774935485e-07, | |
| "loss": 0.2324841022491455, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.5139442231075697, | |
| "grad_norm": 1.051251769065857, | |
| "learning_rate": 8.16950470548067e-07, | |
| "loss": 0.5175900459289551, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.5179282868525896, | |
| "grad_norm": 5.374156951904297, | |
| "learning_rate": 8.134701214091691e-07, | |
| "loss": 0.19936859607696533, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.5219123505976095, | |
| "grad_norm": 1.134244680404663, | |
| "learning_rate": 8.100167653004285e-07, | |
| "loss": 0.09222012758255005, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.5258964143426295, | |
| "grad_norm": 1.0654293298721313, | |
| "learning_rate": 8.065904397104543e-07, | |
| "loss": 0.6717595458030701, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.5298804780876494, | |
| "grad_norm": 2.4975504875183105, | |
| "learning_rate": 8.031911818344201e-07, | |
| "loss": 0.5180625915527344, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.5338645418326693, | |
| "grad_norm": 0.9296510219573975, | |
| "learning_rate": 7.998190285736589e-07, | |
| "loss": 0.6407575607299805, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.537848605577689, | |
| "grad_norm": 2.6143455505371094, | |
| "learning_rate": 7.964740165352664e-07, | |
| "loss": 0.6667947769165039, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.541832669322709, | |
| "grad_norm": 0.19827701151371002, | |
| "learning_rate": 7.931561820317005e-07, | |
| "loss": 0.023438258096575737, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.545816733067729, | |
| "grad_norm": 1.148992657661438, | |
| "learning_rate": 7.898655610803869e-07, | |
| "loss": 0.6734960675239563, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.549800796812749, | |
| "grad_norm": 1.8085567951202393, | |
| "learning_rate": 7.866021894033296e-07, | |
| "loss": 0.6972249150276184, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.553784860557769, | |
| "grad_norm": 2.9096920490264893, | |
| "learning_rate": 7.833661024267235e-07, | |
| "loss": 0.6476399302482605, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.557768924302789, | |
| "grad_norm": 0.7224079966545105, | |
| "learning_rate": 7.80157335280568e-07, | |
| "loss": 0.9946411848068237, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.5617529880478087, | |
| "grad_norm": 1.2070460319519043, | |
| "learning_rate": 7.769759227982855e-07, | |
| "loss": 0.711801290512085, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.5657370517928286, | |
| "grad_norm": 2.714474678039551, | |
| "learning_rate": 7.738218995163462e-07, | |
| "loss": 0.15059031546115875, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.5697211155378485, | |
| "grad_norm": 1.3999918699264526, | |
| "learning_rate": 7.70695299673891e-07, | |
| "loss": 0.139665424823761, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.5737051792828685, | |
| "grad_norm": 0.37299129366874695, | |
| "learning_rate": 7.67596157212359e-07, | |
| "loss": 0.11374976485967636, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.5776892430278884, | |
| "grad_norm": 0.8067252039909363, | |
| "learning_rate": 7.645245057751201e-07, | |
| "loss": 0.6304631233215332, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.5816733067729083, | |
| "grad_norm": 1.578432559967041, | |
| "learning_rate": 7.614803787071115e-07, | |
| "loss": 0.22770892083644867, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.585657370517928, | |
| "grad_norm": 3.3027656078338623, | |
| "learning_rate": 7.584638090544717e-07, | |
| "loss": 0.20699705183506012, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.589641434262948, | |
| "grad_norm": 0.14634272456169128, | |
| "learning_rate": 7.554748295641862e-07, | |
| "loss": 0.055411506444215775, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 3.593625498007968, | |
| "grad_norm": 1.2589038610458374, | |
| "learning_rate": 7.525134726837289e-07, | |
| "loss": 0.15108336508274078, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 3.597609561752988, | |
| "grad_norm": 1.8965911865234375, | |
| "learning_rate": 7.49579770560711e-07, | |
| "loss": 0.4452376961708069, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 3.601593625498008, | |
| "grad_norm": 1.1629970073699951, | |
| "learning_rate": 7.46673755042531e-07, | |
| "loss": 0.6423868536949158, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 3.605577689243028, | |
| "grad_norm": 0.5293740630149841, | |
| "learning_rate": 7.437954576760312e-07, | |
| "loss": 0.21336103975772858, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.6095617529880477, | |
| "grad_norm": 1.164920449256897, | |
| "learning_rate": 7.409449097071536e-07, | |
| "loss": 0.5466434359550476, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 3.6135458167330676, | |
| "grad_norm": 1.1033563613891602, | |
| "learning_rate": 7.381221420805999e-07, | |
| "loss": 0.6399943232536316, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 3.6175298804780875, | |
| "grad_norm": 1.056943416595459, | |
| "learning_rate": 7.353271854394979e-07, | |
| "loss": 0.5917325019836426, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 3.6215139442231075, | |
| "grad_norm": 0.9444670677185059, | |
| "learning_rate": 7.325600701250674e-07, | |
| "loss": 0.7685708403587341, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 3.6254980079681274, | |
| "grad_norm": 1.8602865934371948, | |
| "learning_rate": 7.298208261762906e-07, | |
| "loss": 0.45633015036582947, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.6294820717131473, | |
| "grad_norm": 0.10787267237901688, | |
| "learning_rate": 7.271094833295859e-07, | |
| "loss": 0.011536069214344025, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 3.633466135458167, | |
| "grad_norm": 0.2886284291744232, | |
| "learning_rate": 7.244260710184868e-07, | |
| "loss": 0.024275042116642, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 3.637450199203187, | |
| "grad_norm": 0.6795600652694702, | |
| "learning_rate": 7.21770618373321e-07, | |
| "loss": 0.45940348505973816, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 3.641434262948207, | |
| "grad_norm": 2.2104618549346924, | |
| "learning_rate": 7.191431542208935e-07, | |
| "loss": 0.6470014452934265, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 3.645418326693227, | |
| "grad_norm": 1.12752103805542, | |
| "learning_rate": 7.165437070841758e-07, | |
| "loss": 0.7721574902534485, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.649402390438247, | |
| "grad_norm": 6.11736536026001, | |
| "learning_rate": 7.139723051819938e-07, | |
| "loss": 0.5740348696708679, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 3.653386454183267, | |
| "grad_norm": 0.4044356048107147, | |
| "learning_rate": 7.114289764287227e-07, | |
| "loss": 0.05502355471253395, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 3.6573705179282867, | |
| "grad_norm": 4.303436279296875, | |
| "learning_rate": 7.08913748433985e-07, | |
| "loss": 0.17597807943820953, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 3.6613545816733066, | |
| "grad_norm": 1.0884654521942139, | |
| "learning_rate": 7.064266485023493e-07, | |
| "loss": 0.6930414438247681, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 3.6653386454183265, | |
| "grad_norm": 2.256512403488159, | |
| "learning_rate": 7.039677036330331e-07, | |
| "loss": 0.6587978601455688, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.6693227091633465, | |
| "grad_norm": 0.19702738523483276, | |
| "learning_rate": 7.015369405196132e-07, | |
| "loss": 0.016245799139142036, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 3.6733067729083664, | |
| "grad_norm": 0.9400996565818787, | |
| "learning_rate": 6.991343855497312e-07, | |
| "loss": 0.15207843482494354, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 3.6772908366533863, | |
| "grad_norm": 1.0055437088012695, | |
| "learning_rate": 6.967600648048113e-07, | |
| "loss": 0.6164069175720215, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 3.681274900398406, | |
| "grad_norm": 1.8582080602645874, | |
| "learning_rate": 6.944140040597742e-07, | |
| "loss": 0.7226882576942444, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 3.685258964143426, | |
| "grad_norm": 1.656290054321289, | |
| "learning_rate": 6.920962287827587e-07, | |
| "loss": 0.07943466305732727, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.6892430278884465, | |
| "grad_norm": 1.666813611984253, | |
| "learning_rate": 6.898067641348459e-07, | |
| "loss": 0.30842339992523193, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 3.6932270916334664, | |
| "grad_norm": 0.8802257776260376, | |
| "learning_rate": 6.875456349697834e-07, | |
| "loss": 0.6316725611686707, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 3.6972111553784863, | |
| "grad_norm": 2.5803232192993164, | |
| "learning_rate": 6.853128658337188e-07, | |
| "loss": 0.09659645706415176, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 3.7011952191235062, | |
| "grad_norm": 1.351311206817627, | |
| "learning_rate": 6.831084809649302e-07, | |
| "loss": 0.6809911131858826, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 3.705179282868526, | |
| "grad_norm": 1.1612941026687622, | |
| "learning_rate": 6.809325042935666e-07, | |
| "loss": 0.3540644943714142, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.709163346613546, | |
| "grad_norm": 0.9889734387397766, | |
| "learning_rate": 6.787849594413833e-07, | |
| "loss": 0.6793351173400879, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 3.713147410358566, | |
| "grad_norm": 1.0778642892837524, | |
| "learning_rate": 6.766658697214906e-07, | |
| "loss": 0.6664227247238159, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 3.717131474103586, | |
| "grad_norm": 2.6285629272460938, | |
| "learning_rate": 6.745752581380965e-07, | |
| "loss": 0.33559897541999817, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 3.721115537848606, | |
| "grad_norm": 1.0389450788497925, | |
| "learning_rate": 6.72513147386261e-07, | |
| "loss": 0.5156994462013245, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 3.7250996015936257, | |
| "grad_norm": 0.9331614375114441, | |
| "learning_rate": 6.704795598516451e-07, | |
| "loss": 0.5414950251579285, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.7290836653386457, | |
| "grad_norm": 1.0866365432739258, | |
| "learning_rate": 6.684745176102714e-07, | |
| "loss": 0.735094428062439, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 3.7330677290836656, | |
| "grad_norm": 1.4017014503479004, | |
| "learning_rate": 6.664980424282842e-07, | |
| "loss": 0.2802731692790985, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 3.7370517928286855, | |
| "grad_norm": 2.2784199714660645, | |
| "learning_rate": 6.645501557617104e-07, | |
| "loss": 0.5592929124832153, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 3.7410358565737054, | |
| "grad_norm": 4.115759372711182, | |
| "learning_rate": 6.626308787562294e-07, | |
| "loss": 0.41764435172080994, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 3.7450199203187253, | |
| "grad_norm": 0.9289363622665405, | |
| "learning_rate": 6.607402322469429e-07, | |
| "loss": 0.6480333209037781, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.7490039840637452, | |
| "grad_norm": 2.0568838119506836, | |
| "learning_rate": 6.588782367581475e-07, | |
| "loss": 0.773093581199646, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 3.752988047808765, | |
| "grad_norm": 3.918016195297241, | |
| "learning_rate": 6.570449125031144e-07, | |
| "loss": 0.5592324137687683, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 3.756972111553785, | |
| "grad_norm": 0.8172755241394043, | |
| "learning_rate": 6.552402793838667e-07, | |
| "loss": 0.6393176913261414, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 3.760956175298805, | |
| "grad_norm": 0.3844411075115204, | |
| "learning_rate": 6.534643569909665e-07, | |
| "loss": 0.08161535859107971, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 3.764940239043825, | |
| "grad_norm": 2.660936117172241, | |
| "learning_rate": 6.517171646032988e-07, | |
| "loss": 0.7531623244285583, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.768924302788845, | |
| "grad_norm": 2.1934661865234375, | |
| "learning_rate": 6.499987211878666e-07, | |
| "loss": 0.6893159747123718, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 3.7729083665338647, | |
| "grad_norm": 1.1734172105789185, | |
| "learning_rate": 6.483090453995811e-07, | |
| "loss": 0.09743469953536987, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 3.7768924302788847, | |
| "grad_norm": 1.5317673683166504, | |
| "learning_rate": 6.466481555810608e-07, | |
| "loss": 0.6921253204345703, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 3.7808764940239046, | |
| "grad_norm": 0.8458757996559143, | |
| "learning_rate": 6.450160697624327e-07, | |
| "loss": 0.6649323105812073, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 3.7848605577689245, | |
| "grad_norm": 1.0291515588760376, | |
| "learning_rate": 6.434128056611361e-07, | |
| "loss": 0.6685061454772949, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.7888446215139444, | |
| "grad_norm": 0.8199694156646729, | |
| "learning_rate": 6.418383806817298e-07, | |
| "loss": 0.7103414535522461, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 3.7928286852589643, | |
| "grad_norm": 0.8696004748344421, | |
| "learning_rate": 6.40292811915704e-07, | |
| "loss": 0.6235980987548828, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 3.7968127490039842, | |
| "grad_norm": 2.7558107376098633, | |
| "learning_rate": 6.387761161412942e-07, | |
| "loss": 0.14641408622264862, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 3.800796812749004, | |
| "grad_norm": 0.8049102425575256, | |
| "learning_rate": 6.372883098232999e-07, | |
| "loss": 0.6313645839691162, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 3.804780876494024, | |
| "grad_norm": 1.0484040975570679, | |
| "learning_rate": 6.358294091129044e-07, | |
| "loss": 0.689453661441803, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 3.808764940239044, | |
| "grad_norm": 1.3624324798583984, | |
| "learning_rate": 6.34399429847501e-07, | |
| "loss": 0.4293438196182251, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 3.812749003984064, | |
| "grad_norm": 2.118128538131714, | |
| "learning_rate": 6.329983875505202e-07, | |
| "loss": 0.7885560989379883, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 3.816733067729084, | |
| "grad_norm": 1.88889479637146, | |
| "learning_rate": 6.316262974312607e-07, | |
| "loss": 0.12458698451519012, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 3.8207171314741037, | |
| "grad_norm": 2.0474905967712402, | |
| "learning_rate": 6.302831743847255e-07, | |
| "loss": 0.7278786897659302, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 3.8247011952191237, | |
| "grad_norm": 1.8699114322662354, | |
| "learning_rate": 6.289690329914599e-07, | |
| "loss": 0.10339318215847015, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.8286852589641436, | |
| "grad_norm": 0.9766838550567627, | |
| "learning_rate": 6.276838875173931e-07, | |
| "loss": 0.7524492144584656, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 3.8326693227091635, | |
| "grad_norm": 0.34323349595069885, | |
| "learning_rate": 6.264277519136821e-07, | |
| "loss": 0.051684651523828506, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 3.8366533864541834, | |
| "grad_norm": 1.1233506202697754, | |
| "learning_rate": 6.252006398165622e-07, | |
| "loss": 0.7036517262458801, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 3.8406374501992033, | |
| "grad_norm": 1.529929757118225, | |
| "learning_rate": 6.240025645471986e-07, | |
| "loss": 0.8575693368911743, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 3.8446215139442232, | |
| "grad_norm": 0.11210882663726807, | |
| "learning_rate": 6.228335391115402e-07, | |
| "loss": 0.02451253868639469, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 3.848605577689243, | |
| "grad_norm": 1.864715576171875, | |
| "learning_rate": 6.216935762001803e-07, | |
| "loss": 0.5305463671684265, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 3.852589641434263, | |
| "grad_norm": 1.8157854080200195, | |
| "learning_rate": 6.205826881882179e-07, | |
| "loss": 0.13252875208854675, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 3.856573705179283, | |
| "grad_norm": 0.9740794897079468, | |
| "learning_rate": 6.195008871351232e-07, | |
| "loss": 0.7859750986099243, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 3.860557768924303, | |
| "grad_norm": 1.070713758468628, | |
| "learning_rate": 6.184481847846074e-07, | |
| "loss": 0.7027934789657593, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 3.864541832669323, | |
| "grad_norm": 1.440918207168579, | |
| "learning_rate": 6.174245925644948e-07, | |
| "loss": 0.30577710270881653, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.8685258964143427, | |
| "grad_norm": 2.0320322513580322, | |
| "learning_rate": 6.164301215865982e-07, | |
| "loss": 0.9369683265686035, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 3.8725099601593627, | |
| "grad_norm": 0.6125801801681519, | |
| "learning_rate": 6.154647826465999e-07, | |
| "loss": 0.03845952823758125, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 3.8764940239043826, | |
| "grad_norm": 3.9984986782073975, | |
| "learning_rate": 6.145285862239327e-07, | |
| "loss": 0.6496099233627319, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 3.8804780876494025, | |
| "grad_norm": 0.08795814216136932, | |
| "learning_rate": 6.136215424816668e-07, | |
| "loss": 0.04779617115855217, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 3.8844621513944224, | |
| "grad_norm": 0.9127535820007324, | |
| "learning_rate": 6.127436612664e-07, | |
| "loss": 0.6776239275932312, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.8884462151394423, | |
| "grad_norm": 1.5462641716003418, | |
| "learning_rate": 6.118949521081495e-07, | |
| "loss": 0.7221356630325317, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 3.8924302788844622, | |
| "grad_norm": 0.6864924430847168, | |
| "learning_rate": 6.11075424220251e-07, | |
| "loss": 0.6018074154853821, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 3.896414342629482, | |
| "grad_norm": 8.130626678466797, | |
| "learning_rate": 6.102850864992553e-07, | |
| "loss": 0.15544459223747253, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 3.900398406374502, | |
| "grad_norm": 1.5887444019317627, | |
| "learning_rate": 6.095239475248345e-07, | |
| "loss": 0.5947393178939819, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 3.904382470119522, | |
| "grad_norm": 0.9882814288139343, | |
| "learning_rate": 6.087920155596867e-07, | |
| "loss": 0.016275843605399132, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.908366533864542, | |
| "grad_norm": 0.3859656751155853, | |
| "learning_rate": 6.080892985494482e-07, | |
| "loss": 0.04228988662362099, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 3.912350597609562, | |
| "grad_norm": 1.2562545537948608, | |
| "learning_rate": 6.074158041226068e-07, | |
| "loss": 0.6111615300178528, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 3.9163346613545817, | |
| "grad_norm": 3.6256649494171143, | |
| "learning_rate": 6.067715395904173e-07, | |
| "loss": 0.6986129283905029, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 3.9203187250996017, | |
| "grad_norm": 1.0995627641677856, | |
| "learning_rate": 6.061565119468247e-07, | |
| "loss": 0.7141016125679016, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 3.9243027888446216, | |
| "grad_norm": 2.30956768989563, | |
| "learning_rate": 6.055707278683863e-07, | |
| "loss": 0.22550952434539795, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 3.9282868525896415, | |
| "grad_norm": 1.4764176607131958, | |
| "learning_rate": 6.050141937142003e-07, | |
| "loss": 0.1283264309167862, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 3.9322709163346614, | |
| "grad_norm": 0.9012427926063538, | |
| "learning_rate": 6.04486915525836e-07, | |
| "loss": 0.8311380743980408, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 3.9362549800796813, | |
| "grad_norm": 1.559435486793518, | |
| "learning_rate": 6.039888990272691e-07, | |
| "loss": 0.1916397362947464, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 3.9402390438247012, | |
| "grad_norm": 0.8929998874664307, | |
| "learning_rate": 6.035201496248188e-07, | |
| "loss": 0.6807030439376831, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 3.944223107569721, | |
| "grad_norm": 0.25589969754219055, | |
| "learning_rate": 6.030806724070893e-07, | |
| "loss": 0.07943480461835861, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.948207171314741, | |
| "grad_norm": 1.3471908569335938, | |
| "learning_rate": 6.026704721449152e-07, | |
| "loss": 0.805228590965271, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 3.952191235059761, | |
| "grad_norm": 0.9127321243286133, | |
| "learning_rate": 6.022895532913081e-07, | |
| "loss": 0.6197107434272766, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 3.956175298804781, | |
| "grad_norm": 2.661827802658081, | |
| "learning_rate": 6.019379199814108e-07, | |
| "loss": 0.49690714478492737, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 3.960159362549801, | |
| "grad_norm": 0.08383038640022278, | |
| "learning_rate": 6.016155760324495e-07, | |
| "loss": 0.00437126774340868, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 3.9641434262948207, | |
| "grad_norm": 0.9041069746017456, | |
| "learning_rate": 6.013225249436945e-07, | |
| "loss": 0.7191581726074219, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 3.9681274900398407, | |
| "grad_norm": 1.6254363059997559, | |
| "learning_rate": 6.010587698964216e-07, | |
| "loss": 0.5217870473861694, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 3.9721115537848606, | |
| "grad_norm": 1.7610574960708618, | |
| "learning_rate": 6.008243137538774e-07, | |
| "loss": 0.7896353006362915, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 3.9760956175298805, | |
| "grad_norm": 0.506505012512207, | |
| "learning_rate": 6.006191590612478e-07, | |
| "loss": 0.06072104722261429, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 3.9800796812749004, | |
| "grad_norm": 1.679490566253662, | |
| "learning_rate": 6.004433080456312e-07, | |
| "loss": 0.0873764306306839, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 3.9840637450199203, | |
| "grad_norm": 1.07437002658844, | |
| "learning_rate": 6.002967626160147e-07, | |
| "loss": 0.6510695219039917, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.9880478087649402, | |
| "grad_norm": 1.063508152961731, | |
| "learning_rate": 6.001795243632514e-07, | |
| "loss": 0.6352625489234924, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 3.99203187250996, | |
| "grad_norm": 0.9537666440010071, | |
| "learning_rate": 6.00091594560045e-07, | |
| "loss": 0.7177177667617798, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 3.99601593625498, | |
| "grad_norm": 4.541738986968994, | |
| "learning_rate": 6.000329741609355e-07, | |
| "loss": 0.23844213783740997, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5011924505233765, | |
| "learning_rate": 6.000036638022886e-07, | |
| "loss": 0.15317194163799286, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 2008, | |
| "total_flos": 3.519329208629199e+18, | |
| "train_loss": 0.7788769946752703, | |
| "train_runtime": 8944.5824, | |
| "train_samples_per_second": 6.735, | |
| "train_steps_per_second": 0.224 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2008, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.519329208629199e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |