Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-125 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-125 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-125") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-125") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-125") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-125 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-125" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-125
- SGLang
How to use furproxy/9b-125 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-125" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-125" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-125 with Docker Model Runner:
docker model run hf.co/furproxy/9b-125
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 3276, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002442002442002442, | |
| "grad_norm": 6.441359996795654, | |
| "learning_rate": 1.829268292682927e-08, | |
| "loss": 1.9273300170898438, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 4.505171775817871, | |
| "learning_rate": 5.48780487804878e-08, | |
| "loss": 2.098156452178955, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 22.627925872802734, | |
| "learning_rate": 9.146341463414634e-08, | |
| "loss": 2.4185168743133545, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009768009768009768, | |
| "grad_norm": 4.893591403961182, | |
| "learning_rate": 1.2804878048780488e-07, | |
| "loss": 1.9591546058654785, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 6.887365818023682, | |
| "learning_rate": 1.6463414634146343e-07, | |
| "loss": 2.1646714210510254, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 10.891618728637695, | |
| "learning_rate": 2.0121951219512198e-07, | |
| "loss": 1.9397008419036865, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 3.5464656352996826, | |
| "learning_rate": 2.378048780487805e-07, | |
| "loss": 1.682539939880371, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019536019536019536, | |
| "grad_norm": 36.83236312866211, | |
| "learning_rate": 2.74390243902439e-07, | |
| "loss": 2.0558395385742188, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 23.17645835876465, | |
| "learning_rate": 3.1097560975609756e-07, | |
| "loss": 2.718336582183838, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 19.684049606323242, | |
| "learning_rate": 3.475609756097561e-07, | |
| "loss": 1.878582239151001, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026862026862026864, | |
| "grad_norm": 18.354463577270508, | |
| "learning_rate": 3.8414634146341466e-07, | |
| "loss": 2.064652442932129, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 4.063495635986328, | |
| "learning_rate": 4.207317073170732e-07, | |
| "loss": 1.9824515581130981, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 6.251904487609863, | |
| "learning_rate": 4.573170731707317e-07, | |
| "loss": 1.91408371925354, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 7.478927135467529, | |
| "learning_rate": 4.939024390243903e-07, | |
| "loss": 1.9867796897888184, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 16.5777587890625, | |
| "learning_rate": 5.304878048780488e-07, | |
| "loss": 1.7946174144744873, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03907203907203907, | |
| "grad_norm": 3.6455445289611816, | |
| "learning_rate": 5.670731707317073e-07, | |
| "loss": 1.353484869003296, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04151404151404151, | |
| "grad_norm": 7.290659427642822, | |
| "learning_rate": 6.036585365853659e-07, | |
| "loss": 1.836714506149292, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 5.724033832550049, | |
| "learning_rate": 6.402439024390244e-07, | |
| "loss": 1.416590929031372, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0463980463980464, | |
| "grad_norm": 15.72848892211914, | |
| "learning_rate": 6.768292682926829e-07, | |
| "loss": 1.8037256002426147, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 14.915055274963379, | |
| "learning_rate": 7.134146341463414e-07, | |
| "loss": 1.6154413223266602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 4.961650371551514, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.749605417251587, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05372405372405373, | |
| "grad_norm": 4.586040019989014, | |
| "learning_rate": 7.865853658536586e-07, | |
| "loss": 1.7227602005004883, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05616605616605617, | |
| "grad_norm": 5.149880409240723, | |
| "learning_rate": 8.231707317073171e-07, | |
| "loss": 1.621924877166748, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 15.53164005279541, | |
| "learning_rate": 8.597560975609755e-07, | |
| "loss": 1.3624954223632812, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 13.610709190368652, | |
| "learning_rate": 8.963414634146341e-07, | |
| "loss": 1.2766221761703491, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 4.839217662811279, | |
| "learning_rate": 9.329268292682927e-07, | |
| "loss": 1.5215702056884766, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 12.606855392456055, | |
| "learning_rate": 9.695121951219512e-07, | |
| "loss": 1.234262466430664, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 3.4070212841033936, | |
| "learning_rate": 1.0060975609756098e-06, | |
| "loss": 1.5606050491333008, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07081807081807082, | |
| "grad_norm": 3.2556285858154297, | |
| "learning_rate": 1.0426829268292682e-06, | |
| "loss": 1.5480737686157227, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 3.512446641921997, | |
| "learning_rate": 1.0792682926829268e-06, | |
| "loss": 1.5879229307174683, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0757020757020757, | |
| "grad_norm": 4.8943705558776855, | |
| "learning_rate": 1.1158536585365854e-06, | |
| "loss": 1.1277639865875244, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07814407814407814, | |
| "grad_norm": 3.4585883617401123, | |
| "learning_rate": 1.152439024390244e-06, | |
| "loss": 1.7503468990325928, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 9.716004371643066, | |
| "learning_rate": 1.1890243902439024e-06, | |
| "loss": 1.3350019454956055, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08302808302808302, | |
| "grad_norm": 3.3465495109558105, | |
| "learning_rate": 1.225609756097561e-06, | |
| "loss": 1.1846568584442139, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 1.5333378314971924, | |
| "learning_rate": 1.2621951219512194e-06, | |
| "loss": 1.249038815498352, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 3.3081634044647217, | |
| "learning_rate": 1.298780487804878e-06, | |
| "loss": 1.520157814025879, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09035409035409035, | |
| "grad_norm": 3.575029134750366, | |
| "learning_rate": 1.3353658536585366e-06, | |
| "loss": 1.487170934677124, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0927960927960928, | |
| "grad_norm": 6.776123046875, | |
| "learning_rate": 1.3719512195121952e-06, | |
| "loss": 1.4752193689346313, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 3.180765151977539, | |
| "learning_rate": 1.4085365853658536e-06, | |
| "loss": 1.32558012008667, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 13.021065711975098, | |
| "learning_rate": 1.4451219512195122e-06, | |
| "loss": 1.2420995235443115, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10012210012210013, | |
| "grad_norm": 8.390753746032715, | |
| "learning_rate": 1.4817073170731708e-06, | |
| "loss": 1.1292033195495605, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 3.938790798187256, | |
| "learning_rate": 1.5182926829268292e-06, | |
| "loss": 1.1599137783050537, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10500610500610501, | |
| "grad_norm": 4.348516941070557, | |
| "learning_rate": 1.5548780487804878e-06, | |
| "loss": 1.3389999866485596, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10744810744810745, | |
| "grad_norm": 4.486913681030273, | |
| "learning_rate": 1.5914634146341464e-06, | |
| "loss": 1.5000271797180176, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 3.7140731811523438, | |
| "learning_rate": 1.628048780487805e-06, | |
| "loss": 1.2032554149627686, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11233211233211234, | |
| "grad_norm": 5.849864959716797, | |
| "learning_rate": 1.6646341463414637e-06, | |
| "loss": 1.5772106647491455, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11477411477411477, | |
| "grad_norm": 11.207864761352539, | |
| "learning_rate": 1.7012195121951218e-06, | |
| "loss": 1.1542203426361084, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 6.1343092918396, | |
| "learning_rate": 1.7378048780487804e-06, | |
| "loss": 1.4634003639221191, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 12.016220092773438, | |
| "learning_rate": 1.774390243902439e-06, | |
| "loss": 1.4061574935913086, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 3.215977668762207, | |
| "learning_rate": 1.8109756097560976e-06, | |
| "loss": 1.1406853199005127, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 3.398221969604492, | |
| "learning_rate": 1.847560975609756e-06, | |
| "loss": 1.2488505840301514, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 2.5725579261779785, | |
| "learning_rate": 1.8841463414634146e-06, | |
| "loss": 1.0142134428024292, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12942612942612944, | |
| "grad_norm": 1.9511150121688843, | |
| "learning_rate": 1.9207317073170733e-06, | |
| "loss": 0.9592709541320801, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 3.6233012676239014, | |
| "learning_rate": 1.9573170731707316e-06, | |
| "loss": 1.3949918746948242, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 8.483538627624512, | |
| "learning_rate": 1.9939024390243905e-06, | |
| "loss": 1.6442538499832153, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 27.255704879760742, | |
| "learning_rate": 2.030487804878049e-06, | |
| "loss": 1.4405344724655151, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 2.791217088699341, | |
| "learning_rate": 2.0670731707317072e-06, | |
| "loss": 1.2020237445831299, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14163614163614163, | |
| "grad_norm": 3.0012288093566895, | |
| "learning_rate": 2.1036585365853656e-06, | |
| "loss": 1.3609352111816406, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14407814407814407, | |
| "grad_norm": 8.366510391235352, | |
| "learning_rate": 2.1402439024390245e-06, | |
| "loss": 1.23958420753479, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 4.177932262420654, | |
| "learning_rate": 2.176829268292683e-06, | |
| "loss": 1.341966152191162, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14896214896214896, | |
| "grad_norm": 4.080813407897949, | |
| "learning_rate": 2.2134146341463417e-06, | |
| "loss": 1.3612099885940552, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1514041514041514, | |
| "grad_norm": 3.6502838134765625, | |
| "learning_rate": 2.25e-06, | |
| "loss": 1.3284516334533691, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 3.3276309967041016, | |
| "learning_rate": 2.2865853658536584e-06, | |
| "loss": 1.1893606185913086, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1562881562881563, | |
| "grad_norm": 3.0986099243164062, | |
| "learning_rate": 2.3231707317073173e-06, | |
| "loss": 1.418761134147644, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 2.7775375843048096, | |
| "learning_rate": 2.3597560975609757e-06, | |
| "loss": 1.2669707536697388, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 3.8466997146606445, | |
| "learning_rate": 2.3963414634146345e-06, | |
| "loss": 1.0622456073760986, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16361416361416362, | |
| "grad_norm": 4.408446788787842, | |
| "learning_rate": 2.432926829268293e-06, | |
| "loss": 1.1616021394729614, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16605616605616605, | |
| "grad_norm": 12.887857437133789, | |
| "learning_rate": 2.4695121951219513e-06, | |
| "loss": 1.1696486473083496, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 13.57415771484375, | |
| "learning_rate": 2.5060975609756097e-06, | |
| "loss": 1.1216490268707275, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 3.6646318435668945, | |
| "learning_rate": 2.5426829268292685e-06, | |
| "loss": 1.097174882888794, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17338217338217338, | |
| "grad_norm": 3.3640549182891846, | |
| "learning_rate": 2.579268292682927e-06, | |
| "loss": 1.3600409030914307, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 2.1831912994384766, | |
| "learning_rate": 2.6158536585365853e-06, | |
| "loss": 1.1026687622070312, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17826617826617827, | |
| "grad_norm": 33.82872772216797, | |
| "learning_rate": 2.652439024390244e-06, | |
| "loss": 0.993908166885376, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1807081807081807, | |
| "grad_norm": 4.16090726852417, | |
| "learning_rate": 2.6890243902439025e-06, | |
| "loss": 0.9646387100219727, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 3.4467294216156006, | |
| "learning_rate": 2.7256097560975613e-06, | |
| "loss": 1.3205649852752686, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1855921855921856, | |
| "grad_norm": 4.7642316818237305, | |
| "learning_rate": 2.7621951219512197e-06, | |
| "loss": 1.4573540687561035, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 4.0588908195495605, | |
| "learning_rate": 2.798780487804878e-06, | |
| "loss": 1.3297356367111206, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 5.6793928146362305, | |
| "learning_rate": 2.8353658536585365e-06, | |
| "loss": 1.6339439153671265, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19291819291819293, | |
| "grad_norm": 4.2020134925842285, | |
| "learning_rate": 2.8719512195121953e-06, | |
| "loss": 1.0838576555252075, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 3.1445655822753906, | |
| "learning_rate": 2.9085365853658537e-06, | |
| "loss": 0.8670494556427002, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 4.079596042633057, | |
| "learning_rate": 2.945121951219512e-06, | |
| "loss": 1.3294378519058228, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20024420024420025, | |
| "grad_norm": 5.8057684898376465, | |
| "learning_rate": 2.981707317073171e-06, | |
| "loss": 1.218698501586914, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2026862026862027, | |
| "grad_norm": 3.8487844467163086, | |
| "learning_rate": 2.9999993121015916e-06, | |
| "loss": 1.2791287899017334, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 7.372045516967773, | |
| "learning_rate": 2.99999380891853e-06, | |
| "loss": 1.163785457611084, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 6.911922931671143, | |
| "learning_rate": 2.9999828025748386e-06, | |
| "loss": 0.9233613014221191, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21001221001221002, | |
| "grad_norm": 3.84979248046875, | |
| "learning_rate": 2.9999662931153863e-06, | |
| "loss": 1.0050299167633057, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 3.471709728240967, | |
| "learning_rate": 2.9999442806074717e-06, | |
| "loss": 1.296844482421875, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2148962148962149, | |
| "grad_norm": 3.5143232345581055, | |
| "learning_rate": 2.999916765140827e-06, | |
| "loss": 1.2434308528900146, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21733821733821734, | |
| "grad_norm": 9.043089866638184, | |
| "learning_rate": 2.9998837468276177e-06, | |
| "loss": 1.3477281332015991, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 1.834376335144043, | |
| "learning_rate": 2.9998452258024397e-06, | |
| "loss": 0.9811362028121948, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 16.055896759033203, | |
| "learning_rate": 2.9998012022223233e-06, | |
| "loss": 0.8636922240257263, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22466422466422467, | |
| "grad_norm": 4.910029411315918, | |
| "learning_rate": 2.9997516762667265e-06, | |
| "loss": 1.2827658653259277, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 6.604949951171875, | |
| "learning_rate": 2.999696648137539e-06, | |
| "loss": 1.2893941402435303, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.22954822954822954, | |
| "grad_norm": 6.563926696777344, | |
| "learning_rate": 2.999636118059079e-06, | |
| "loss": 1.3384151458740234, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 4.46897029876709, | |
| "learning_rate": 2.9995700862780938e-06, | |
| "loss": 1.3492507934570312, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 3.4802074432373047, | |
| "learning_rate": 2.999498553063757e-06, | |
| "loss": 1.3010103702545166, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23687423687423687, | |
| "grad_norm": 4.670327186584473, | |
| "learning_rate": 2.9994215187076693e-06, | |
| "loss": 1.3023922443389893, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 212.8003387451172, | |
| "learning_rate": 2.9993389835238563e-06, | |
| "loss": 0.9995719194412231, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 2.234907388687134, | |
| "learning_rate": 2.999250947848767e-06, | |
| "loss": 1.2751619815826416, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 6.568592071533203, | |
| "learning_rate": 2.999157412041274e-06, | |
| "loss": 1.3642621040344238, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24664224664224665, | |
| "grad_norm": 2.7516143321990967, | |
| "learning_rate": 2.999058376482668e-06, | |
| "loss": 1.2963935136795044, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 5.1726226806640625, | |
| "learning_rate": 2.9989538415766623e-06, | |
| "loss": 0.9885914921760559, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2515262515262515, | |
| "grad_norm": 4.441065311431885, | |
| "learning_rate": 2.9988438077493863e-06, | |
| "loss": 1.25881028175354, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 3.2798068523406982, | |
| "learning_rate": 2.9987282754493857e-06, | |
| "loss": 1.1204559803009033, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 7.684624195098877, | |
| "learning_rate": 2.99860724514762e-06, | |
| "loss": 0.955866813659668, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2588522588522589, | |
| "grad_norm": 34.730289459228516, | |
| "learning_rate": 2.998480717337461e-06, | |
| "loss": 0.6288090348243713, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2612942612942613, | |
| "grad_norm": 1.641717791557312, | |
| "learning_rate": 2.998348692534691e-06, | |
| "loss": 1.0912883281707764, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 3.164410352706909, | |
| "learning_rate": 2.9982111712775015e-06, | |
| "loss": 1.2621415853500366, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2661782661782662, | |
| "grad_norm": 2.8006389141082764, | |
| "learning_rate": 2.9980681541264875e-06, | |
| "loss": 1.18211829662323, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 3.1097168922424316, | |
| "learning_rate": 2.9979196416646495e-06, | |
| "loss": 1.293933629989624, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 3.9366023540496826, | |
| "learning_rate": 2.9977656344973887e-06, | |
| "loss": 0.9433372616767883, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 23.4581298828125, | |
| "learning_rate": 2.997606133252505e-06, | |
| "loss": 0.9114547371864319, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27594627594627597, | |
| "grad_norm": 3.7550017833709717, | |
| "learning_rate": 2.997441138580196e-06, | |
| "loss": 1.3184466361999512, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 4.339027404785156, | |
| "learning_rate": 2.997270651153049e-06, | |
| "loss": 1.356973648071289, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 17.372758865356445, | |
| "learning_rate": 2.9970946716660465e-06, | |
| "loss": 1.335922122001648, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.28327228327228327, | |
| "grad_norm": 8.560553550720215, | |
| "learning_rate": 2.9969132008365578e-06, | |
| "loss": 0.7592540979385376, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 17.910593032836914, | |
| "learning_rate": 2.9967262394043354e-06, | |
| "loss": 1.0156524181365967, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28815628815628813, | |
| "grad_norm": 3.8950822353363037, | |
| "learning_rate": 2.9965337881315156e-06, | |
| "loss": 1.264986515045166, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 4.062821865081787, | |
| "learning_rate": 2.9963358478026134e-06, | |
| "loss": 1.2388455867767334, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 4.20897102355957, | |
| "learning_rate": 2.9961324192245196e-06, | |
| "loss": 1.1307477951049805, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2954822954822955, | |
| "grad_norm": 14.355788230895996, | |
| "learning_rate": 2.9959235032264977e-06, | |
| "loss": 0.9655016660690308, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2979242979242979, | |
| "grad_norm": 3.031242847442627, | |
| "learning_rate": 2.9957091006601795e-06, | |
| "loss": 1.3131451606750488, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 4.857945442199707, | |
| "learning_rate": 2.995489212399562e-06, | |
| "loss": 0.7641839981079102, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3028083028083028, | |
| "grad_norm": 6.122247219085693, | |
| "learning_rate": 2.9952638393410076e-06, | |
| "loss": 1.19805908203125, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 4.154690742492676, | |
| "learning_rate": 2.9950329824032323e-06, | |
| "loss": 0.9278289079666138, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 15.398279190063477, | |
| "learning_rate": 2.9947966425273106e-06, | |
| "loss": 1.0428712368011475, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.31013431013431014, | |
| "grad_norm": 3.4890997409820557, | |
| "learning_rate": 2.9945548206766655e-06, | |
| "loss": 1.2764599323272705, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3125763125763126, | |
| "grad_norm": 3.448875904083252, | |
| "learning_rate": 2.9943075178370684e-06, | |
| "loss": 1.2555460929870605, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 3.940031051635742, | |
| "learning_rate": 2.994054735016633e-06, | |
| "loss": 1.2957007884979248, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 5.8517537117004395, | |
| "learning_rate": 2.993796473245811e-06, | |
| "loss": 1.2518693208694458, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3199023199023199, | |
| "grad_norm": 9.949854850769043, | |
| "learning_rate": 2.993532733577391e-06, | |
| "loss": 1.4786887168884277, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 10.004648208618164, | |
| "learning_rate": 2.9932635170864887e-06, | |
| "loss": 0.7135213613510132, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 6.521066188812256, | |
| "learning_rate": 2.992988824870548e-06, | |
| "loss": 1.0240241289138794, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.32722832722832723, | |
| "grad_norm": 3.2186930179595947, | |
| "learning_rate": 2.992708658049334e-06, | |
| "loss": 1.3185689449310303, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 3.3630194664001465, | |
| "learning_rate": 2.992423017764928e-06, | |
| "loss": 0.9591858386993408, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3321123321123321, | |
| "grad_norm": 7.637097358703613, | |
| "learning_rate": 2.992131905181723e-06, | |
| "loss": 1.2712072134017944, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33455433455433453, | |
| "grad_norm": 14.014065742492676, | |
| "learning_rate": 2.9918353214864214e-06, | |
| "loss": 0.9579728841781616, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 5.264770030975342, | |
| "learning_rate": 2.991533267888026e-06, | |
| "loss": 0.9980106949806213, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33943833943833945, | |
| "grad_norm": 3.0331125259399414, | |
| "learning_rate": 2.9912257456178393e-06, | |
| "loss": 1.3985265493392944, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 3.4260036945343018, | |
| "learning_rate": 2.990912755929455e-06, | |
| "loss": 1.287198781967163, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 2.3566527366638184, | |
| "learning_rate": 2.990594300098755e-06, | |
| "loss": 1.2664443254470825, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.34676434676434675, | |
| "grad_norm": 20.097270965576172, | |
| "learning_rate": 2.9902703794239038e-06, | |
| "loss": 1.3103134632110596, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 13.092287063598633, | |
| "learning_rate": 2.989940995225342e-06, | |
| "loss": 1.115279197692871, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 7.133892059326172, | |
| "learning_rate": 2.9896061488457827e-06, | |
| "loss": 1.6538463830947876, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 13.034635543823242, | |
| "learning_rate": 2.9892658416502053e-06, | |
| "loss": 0.9154413342475891, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35653235653235654, | |
| "grad_norm": 22.553916931152344, | |
| "learning_rate": 2.9889200750258497e-06, | |
| "loss": 1.2926669120788574, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 3.3931753635406494, | |
| "learning_rate": 2.98856885038221e-06, | |
| "loss": 1.0205590724945068, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3614163614163614, | |
| "grad_norm": 4.805473804473877, | |
| "learning_rate": 2.9882121691510315e-06, | |
| "loss": 1.2486145496368408, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.36385836385836384, | |
| "grad_norm": 12.839898109436035, | |
| "learning_rate": 2.9878500327863005e-06, | |
| "loss": 1.3906102180480957, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 5.3344902992248535, | |
| "learning_rate": 2.987482442764243e-06, | |
| "loss": 1.210636019706726, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36874236874236876, | |
| "grad_norm": 3.737217426300049, | |
| "learning_rate": 2.9871094005833147e-06, | |
| "loss": 0.8750394582748413, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3711843711843712, | |
| "grad_norm": 5.778133392333984, | |
| "learning_rate": 2.986730907764197e-06, | |
| "loss": 1.187114953994751, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 5.849246978759766, | |
| "learning_rate": 2.9863469658497923e-06, | |
| "loss": 1.3161380290985107, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 3.1636006832122803, | |
| "learning_rate": 2.985957576405213e-06, | |
| "loss": 1.299119472503662, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 4.255359172821045, | |
| "learning_rate": 2.985562741017781e-06, | |
| "loss": 1.3168572187423706, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 4.4595489501953125, | |
| "learning_rate": 2.9851624612970154e-06, | |
| "loss": 0.9910668134689331, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3833943833943834, | |
| "grad_norm": 5.871083736419678, | |
| "learning_rate": 2.9847567388746303e-06, | |
| "loss": 1.3671818971633911, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.38583638583638585, | |
| "grad_norm": 5.709342002868652, | |
| "learning_rate": 2.984345575404527e-06, | |
| "loss": 0.6426190137863159, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 4.615055084228516, | |
| "learning_rate": 2.9839289725627856e-06, | |
| "loss": 0.9546952247619629, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 5.657520294189453, | |
| "learning_rate": 2.9835069320476605e-06, | |
| "loss": 1.3316638469696045, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 7.760973930358887, | |
| "learning_rate": 2.9830794555795713e-06, | |
| "loss": 1.3453943729400635, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 9.573053359985352, | |
| "learning_rate": 2.9826465449010985e-06, | |
| "loss": 1.3073155879974365, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.398046398046398, | |
| "grad_norm": 3.39166522026062, | |
| "learning_rate": 2.982208201776973e-06, | |
| "loss": 1.2447479963302612, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4004884004884005, | |
| "grad_norm": 3.000147581100464, | |
| "learning_rate": 2.9817644279940734e-06, | |
| "loss": 1.2358218431472778, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 7.2135233879089355, | |
| "learning_rate": 2.9813152253614114e-06, | |
| "loss": 0.8832641243934631, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4053724053724054, | |
| "grad_norm": 5.4000325202941895, | |
| "learning_rate": 2.980860595710134e-06, | |
| "loss": 0.607711672782898, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4078144078144078, | |
| "grad_norm": 3.0173025131225586, | |
| "learning_rate": 2.980400540893508e-06, | |
| "loss": 1.044655203819275, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 4.2564616203308105, | |
| "learning_rate": 2.9799350627869173e-06, | |
| "loss": 1.3458592891693115, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 5.331943511962891, | |
| "learning_rate": 2.979464163287852e-06, | |
| "loss": 0.9420923590660095, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 7.787961006164551, | |
| "learning_rate": 2.9789878443159028e-06, | |
| "loss": 1.0719984769821167, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 6.272089958190918, | |
| "learning_rate": 2.978506107812753e-06, | |
| "loss": 1.6223466396331787, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42002442002442003, | |
| "grad_norm": 3.4842610359191895, | |
| "learning_rate": 2.978018955742169e-06, | |
| "loss": 0.9255164265632629, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42246642246642246, | |
| "grad_norm": 3.1731863021850586, | |
| "learning_rate": 2.977526390089995e-06, | |
| "loss": 1.1120378971099854, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 11.810261726379395, | |
| "learning_rate": 2.9770284128641415e-06, | |
| "loss": 1.2913296222686768, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 4.617822647094727, | |
| "learning_rate": 2.97652502609458e-06, | |
| "loss": 0.8451753258705139, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4297924297924298, | |
| "grad_norm": 3.888181447982788, | |
| "learning_rate": 2.976016231833334e-06, | |
| "loss": 1.0580174922943115, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 3.3150265216827393, | |
| "learning_rate": 2.9755020321544686e-06, | |
| "loss": 1.243308424949646, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4346764346764347, | |
| "grad_norm": 7.587791442871094, | |
| "learning_rate": 2.9749824291540855e-06, | |
| "loss": 0.8922522068023682, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4371184371184371, | |
| "grad_norm": 3.322112560272217, | |
| "learning_rate": 2.9744574249503127e-06, | |
| "loss": 1.1295361518859863, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 3.2228076457977295, | |
| "learning_rate": 2.973927021683294e-06, | |
| "loss": 1.2044621706008911, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.442002442002442, | |
| "grad_norm": 6.780779838562012, | |
| "learning_rate": 2.9733912215151837e-06, | |
| "loss": 1.0825629234313965, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 2.6231162548065186, | |
| "learning_rate": 2.9728500266301363e-06, | |
| "loss": 1.1095397472381592, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 6.354375839233398, | |
| "learning_rate": 2.9723034392342963e-06, | |
| "loss": 1.3557205200195312, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.44932844932844934, | |
| "grad_norm": 9.511418342590332, | |
| "learning_rate": 2.971751461555791e-06, | |
| "loss": 1.026384949684143, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 1.5220327377319336, | |
| "learning_rate": 2.9711940958447217e-06, | |
| "loss": 1.0174123048782349, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 5.583617210388184, | |
| "learning_rate": 2.9706313443731525e-06, | |
| "loss": 1.2979340553283691, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.45665445665445664, | |
| "grad_norm": 3.093553304672241, | |
| "learning_rate": 2.970063209435103e-06, | |
| "loss": 0.8846883773803711, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4590964590964591, | |
| "grad_norm": 3.7214324474334717, | |
| "learning_rate": 2.9694896933465376e-06, | |
| "loss": 1.216248631477356, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 3.53817081451416, | |
| "learning_rate": 2.9689107984453564e-06, | |
| "loss": 0.9267280697822571, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 5.406538963317871, | |
| "learning_rate": 2.9683265270913866e-06, | |
| "loss": 0.8711439371109009, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.46642246642246643, | |
| "grad_norm": 2.3514599800109863, | |
| "learning_rate": 2.9677368816663717e-06, | |
| "loss": 1.2632300853729248, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 2.9938712120056152, | |
| "learning_rate": 2.967141864573962e-06, | |
| "loss": 1.0389857292175293, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4713064713064713, | |
| "grad_norm": 6.273382663726807, | |
| "learning_rate": 2.966541478239706e-06, | |
| "loss": 1.1980342864990234, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.47374847374847373, | |
| "grad_norm": 6.093961715698242, | |
| "learning_rate": 2.965935725111039e-06, | |
| "loss": 1.5937999486923218, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 2.8185184001922607, | |
| "learning_rate": 2.965324607657273e-06, | |
| "loss": 0.8034391403198242, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 9.279390335083008, | |
| "learning_rate": 2.9647081283695884e-06, | |
| "loss": 0.8456100225448608, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4810744810744811, | |
| "grad_norm": 2.946828842163086, | |
| "learning_rate": 2.9640862897610217e-06, | |
| "loss": 0.9232727289199829, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 3.2411246299743652, | |
| "learning_rate": 2.963459094366458e-06, | |
| "loss": 1.2164127826690674, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.48595848595848595, | |
| "grad_norm": 3.0650200843811035, | |
| "learning_rate": 2.9628265447426166e-06, | |
| "loss": 1.2682867050170898, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 5.058322429656982, | |
| "learning_rate": 2.9621886434680456e-06, | |
| "loss": 0.5417430400848389, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 4.885420322418213, | |
| "learning_rate": 2.961545393143107e-06, | |
| "loss": 1.2176759243011475, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4932844932844933, | |
| "grad_norm": 2.67753267288208, | |
| "learning_rate": 2.9608967963899673e-06, | |
| "loss": 1.3221197128295898, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 18.8695011138916, | |
| "learning_rate": 2.96024285585259e-06, | |
| "loss": 0.6289905309677124, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 7.418961048126221, | |
| "learning_rate": 2.9595835741967195e-06, | |
| "loss": 0.8911920785903931, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 6.898990631103516, | |
| "learning_rate": 2.958918954109874e-06, | |
| "loss": 1.252363920211792, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.503052503052503, | |
| "grad_norm": 7.224813461303711, | |
| "learning_rate": 2.9582489983013332e-06, | |
| "loss": 0.5519344806671143, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 3.046077251434326, | |
| "learning_rate": 2.957573709502129e-06, | |
| "loss": 1.3178116083145142, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 3.8101770877838135, | |
| "learning_rate": 2.9568930904650297e-06, | |
| "loss": 1.2741634845733643, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5103785103785103, | |
| "grad_norm": 3.8740415573120117, | |
| "learning_rate": 2.956207143964535e-06, | |
| "loss": 1.2111685276031494, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 11.586557388305664, | |
| "learning_rate": 2.95551587279686e-06, | |
| "loss": 1.2424992322921753, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5152625152625152, | |
| "grad_norm": 2.9132723808288574, | |
| "learning_rate": 2.9548192797799274e-06, | |
| "loss": 1.2334108352661133, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5177045177045178, | |
| "grad_norm": 4.264636039733887, | |
| "learning_rate": 2.9541173677533505e-06, | |
| "loss": 1.2619736194610596, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 11.61112117767334, | |
| "learning_rate": 2.9534101395784286e-06, | |
| "loss": 1.0475389957427979, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5225885225885226, | |
| "grad_norm": 2.66593599319458, | |
| "learning_rate": 2.952697598138131e-06, | |
| "loss": 1.2480018138885498, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 16.38984489440918, | |
| "learning_rate": 2.951979746337085e-06, | |
| "loss": 0.8896474838256836, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 31.188690185546875, | |
| "learning_rate": 2.951256587101566e-06, | |
| "loss": 0.5834943056106567, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5299145299145299, | |
| "grad_norm": 2.201815128326416, | |
| "learning_rate": 2.9505281233794856e-06, | |
| "loss": 1.261406421661377, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5323565323565324, | |
| "grad_norm": 3.4427995681762695, | |
| "learning_rate": 2.9497943581403773e-06, | |
| "loss": 1.223460078239441, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 2.1992275714874268, | |
| "learning_rate": 2.9490552943753873e-06, | |
| "loss": 1.2381012439727783, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 8.412099838256836, | |
| "learning_rate": 2.94831093509726e-06, | |
| "loss": 1.1994678974151611, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 6.589860439300537, | |
| "learning_rate": 2.947561283340327e-06, | |
| "loss": 1.0292491912841797, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 3.5161733627319336, | |
| "learning_rate": 2.9468063421604935e-06, | |
| "loss": 0.8312960863113403, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5445665445665445, | |
| "grad_norm": 6.2591552734375, | |
| "learning_rate": 2.946046114635228e-06, | |
| "loss": 0.9817121028900146, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5470085470085471, | |
| "grad_norm": 2.4296693801879883, | |
| "learning_rate": 2.945280603863548e-06, | |
| "loss": 1.090214729309082, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 17.264671325683594, | |
| "learning_rate": 2.944509812966007e-06, | |
| "loss": 1.0016728639602661, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5518925518925519, | |
| "grad_norm": 11.218456268310547, | |
| "learning_rate": 2.943733745084684e-06, | |
| "loss": 0.7395261526107788, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5543345543345544, | |
| "grad_norm": 9.569270133972168, | |
| "learning_rate": 2.942952403383168e-06, | |
| "loss": 1.2577192783355713, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 2.8571317195892334, | |
| "learning_rate": 2.9421657910465467e-06, | |
| "loss": 1.2232944965362549, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5592185592185592, | |
| "grad_norm": 1.8410813808441162, | |
| "learning_rate": 2.941373911281393e-06, | |
| "loss": 0.6923422813415527, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 11.234055519104004, | |
| "learning_rate": 2.9405767673157535e-06, | |
| "loss": 1.1405192613601685, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 3.442074775695801, | |
| "learning_rate": 2.9397743623991323e-06, | |
| "loss": 1.1699142456054688, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5665445665445665, | |
| "grad_norm": 6.694809436798096, | |
| "learning_rate": 2.938966699802481e-06, | |
| "loss": 0.5613974332809448, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.568986568986569, | |
| "grad_norm": 3.4113035202026367, | |
| "learning_rate": 2.9381537828181804e-06, | |
| "loss": 1.4812266826629639, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 3.343217134475708, | |
| "learning_rate": 2.937335614760036e-06, | |
| "loss": 1.2924840450286865, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 3.833444833755493, | |
| "learning_rate": 2.936512198963254e-06, | |
| "loss": 1.0193979740142822, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5763125763125763, | |
| "grad_norm": 6.23240852355957, | |
| "learning_rate": 2.935683538784435e-06, | |
| "loss": 1.2634341716766357, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 8.669657707214355, | |
| "learning_rate": 2.934849637601557e-06, | |
| "loss": 0.8609904050827026, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5811965811965812, | |
| "grad_norm": 6.522804260253906, | |
| "learning_rate": 2.9340104988139643e-06, | |
| "loss": 0.9937872290611267, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5836385836385837, | |
| "grad_norm": 3.8986525535583496, | |
| "learning_rate": 2.9331661258423497e-06, | |
| "loss": 1.1503345966339111, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 3.6188859939575195, | |
| "learning_rate": 2.9323165221287442e-06, | |
| "loss": 1.283237338066101, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5885225885225885, | |
| "grad_norm": 15.03325080871582, | |
| "learning_rate": 2.9314616911365013e-06, | |
| "loss": 0.5041643977165222, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.590964590964591, | |
| "grad_norm": 1.815745234489441, | |
| "learning_rate": 2.9306016363502832e-06, | |
| "loss": 0.9499940872192383, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 1.3562991619110107, | |
| "learning_rate": 2.929736361276046e-06, | |
| "loss": 1.0015653371810913, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5958485958485958, | |
| "grad_norm": 7.885785102844238, | |
| "learning_rate": 2.928865869441027e-06, | |
| "loss": 0.9330072402954102, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 4.100306987762451, | |
| "learning_rate": 2.927990164393728e-06, | |
| "loss": 1.0022904872894287, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 6.103216648101807, | |
| "learning_rate": 2.927109249703904e-06, | |
| "loss": 0.7623094916343689, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 4.293742656707764, | |
| "learning_rate": 2.926223128962544e-06, | |
| "loss": 0.9449283480644226, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6056166056166056, | |
| "grad_norm": 3.4057295322418213, | |
| "learning_rate": 2.925331805781862e-06, | |
| "loss": 1.2140758037567139, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 5.223704814910889, | |
| "learning_rate": 2.9244352837952776e-06, | |
| "loss": 0.5771359205245972, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 2.622556209564209, | |
| "learning_rate": 2.9235335666574035e-06, | |
| "loss": 0.8636125326156616, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.612942612942613, | |
| "grad_norm": 2.7476792335510254, | |
| "learning_rate": 2.9226266580440293e-06, | |
| "loss": 1.202211618423462, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 5.067960262298584, | |
| "learning_rate": 2.92171456165211e-06, | |
| "loss": 1.2281854152679443, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6178266178266179, | |
| "grad_norm": 4.597941875457764, | |
| "learning_rate": 2.920797281199744e-06, | |
| "loss": 1.2377547025680542, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6202686202686203, | |
| "grad_norm": 2.562920093536377, | |
| "learning_rate": 2.9198748204261668e-06, | |
| "loss": 1.229562759399414, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 3.1850571632385254, | |
| "learning_rate": 2.918947183091727e-06, | |
| "loss": 1.2102892398834229, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6251526251526252, | |
| "grad_norm": 8.950359344482422, | |
| "learning_rate": 2.918014372977878e-06, | |
| "loss": 1.0556927919387817, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6275946275946276, | |
| "grad_norm": 2.960409641265869, | |
| "learning_rate": 2.9170763938871575e-06, | |
| "loss": 1.0265876054763794, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 2.5331928730010986, | |
| "learning_rate": 2.916133249643177e-06, | |
| "loss": 0.9497431516647339, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6324786324786325, | |
| "grad_norm": 4.141901969909668, | |
| "learning_rate": 2.9151849440906004e-06, | |
| "loss": 1.2384538650512695, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 2.216688394546509, | |
| "learning_rate": 2.914231481095133e-06, | |
| "loss": 0.7612370252609253, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 7.122840404510498, | |
| "learning_rate": 2.913272864543504e-06, | |
| "loss": 0.9650014638900757, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6398046398046398, | |
| "grad_norm": 10.005997657775879, | |
| "learning_rate": 2.9123090983434505e-06, | |
| "loss": 1.0028713941574097, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6422466422466423, | |
| "grad_norm": 4.779255390167236, | |
| "learning_rate": 2.9113401864237014e-06, | |
| "loss": 1.2550266981124878, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 4.8014631271362305, | |
| "learning_rate": 2.9103661327339626e-06, | |
| "loss": 1.313009262084961, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 9.017115592956543, | |
| "learning_rate": 2.9093869412448993e-06, | |
| "loss": 1.041703462600708, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6495726495726496, | |
| "grad_norm": 7.53822660446167, | |
| "learning_rate": 2.9084026159481215e-06, | |
| "loss": 1.0079383850097656, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 8.407513618469238, | |
| "learning_rate": 2.907413160856165e-06, | |
| "loss": 1.523638129234314, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6544566544566545, | |
| "grad_norm": 5.040467739105225, | |
| "learning_rate": 2.90641858000248e-06, | |
| "loss": 0.8550742864608765, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6568986568986569, | |
| "grad_norm": 2.9475791454315186, | |
| "learning_rate": 2.9054188774414087e-06, | |
| "loss": 1.2336068153381348, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 3.542649984359741, | |
| "learning_rate": 2.904414057248172e-06, | |
| "loss": 1.2540559768676758, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6617826617826618, | |
| "grad_norm": 4.496221542358398, | |
| "learning_rate": 2.903404123518854e-06, | |
| "loss": 0.9419100880622864, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6642246642246642, | |
| "grad_norm": 4.80670166015625, | |
| "learning_rate": 2.902389080370383e-06, | |
| "loss": 1.2315528392791748, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.7183027267456055, | |
| "learning_rate": 2.901368931940514e-06, | |
| "loss": 1.124593734741211, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6691086691086691, | |
| "grad_norm": 5.0275444984436035, | |
| "learning_rate": 2.900343682387816e-06, | |
| "loss": 1.2242764234542847, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 2.5301308631896973, | |
| "learning_rate": 2.899313335891651e-06, | |
| "loss": 1.1373231410980225, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 6.004204273223877, | |
| "learning_rate": 2.8982778966521583e-06, | |
| "loss": 0.9901567101478577, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6764346764346765, | |
| "grad_norm": 6.363122463226318, | |
| "learning_rate": 2.897237368890237e-06, | |
| "loss": 1.0255892276763916, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6788766788766789, | |
| "grad_norm": 6.87668514251709, | |
| "learning_rate": 2.8961917568475306e-06, | |
| "loss": 0.7408100962638855, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 4.938057899475098, | |
| "learning_rate": 2.8951410647864077e-06, | |
| "loss": 1.161991834640503, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 1.2714885473251343, | |
| "learning_rate": 2.894085296989944e-06, | |
| "loss": 1.1618590354919434, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6862026862026862, | |
| "grad_norm": 25.83792495727539, | |
| "learning_rate": 2.8930244577619084e-06, | |
| "loss": 0.9859790205955505, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 3.5575103759765625, | |
| "learning_rate": 2.8919585514267412e-06, | |
| "loss": 1.2875609397888184, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6910866910866911, | |
| "grad_norm": 2.967928409576416, | |
| "learning_rate": 2.89088758232954e-06, | |
| "loss": 1.091280221939087, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6935286935286935, | |
| "grad_norm": 2.462228298187256, | |
| "learning_rate": 2.889811554836039e-06, | |
| "loss": 1.282207727432251, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 2.4124059677124023, | |
| "learning_rate": 2.888730473332593e-06, | |
| "loss": 0.9509236216545105, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 5.270227432250977, | |
| "learning_rate": 2.8876443422261597e-06, | |
| "loss": 0.984878659248352, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7008547008547008, | |
| "grad_norm": 5.570053577423096, | |
| "learning_rate": 2.886553165944281e-06, | |
| "loss": 1.4137537479400635, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 3.1119489669799805, | |
| "learning_rate": 2.885456948935065e-06, | |
| "loss": 1.2045481204986572, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7057387057387058, | |
| "grad_norm": 4.997451305389404, | |
| "learning_rate": 2.8843556956671674e-06, | |
| "loss": 1.1804019212722778, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 2.689262866973877, | |
| "learning_rate": 2.8832494106297754e-06, | |
| "loss": 1.142741084098816, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 3.3894829750061035, | |
| "learning_rate": 2.8821380983325856e-06, | |
| "loss": 1.1947393417358398, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7130647130647131, | |
| "grad_norm": 2.5320944786071777, | |
| "learning_rate": 2.8810217633057902e-06, | |
| "loss": 1.03977370262146, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7155067155067155, | |
| "grad_norm": 4.905697345733643, | |
| "learning_rate": 2.879900410100055e-06, | |
| "loss": 1.2815613746643066, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 3.281200408935547, | |
| "learning_rate": 2.8787740432865024e-06, | |
| "loss": 1.0557091236114502, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 3.2250187397003174, | |
| "learning_rate": 2.8776426674566923e-06, | |
| "loss": 0.9464690089225769, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7228327228327228, | |
| "grad_norm": 3.132697343826294, | |
| "learning_rate": 2.876506287222603e-06, | |
| "loss": 0.8595614433288574, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 4.664443016052246, | |
| "learning_rate": 2.8753649072166146e-06, | |
| "loss": 1.1968210935592651, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7277167277167277, | |
| "grad_norm": 6.2733659744262695, | |
| "learning_rate": 2.874218532091486e-06, | |
| "loss": 0.949120283126831, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 2.1079089641571045, | |
| "learning_rate": 2.8730671665203414e-06, | |
| "loss": 0.9083657264709473, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 4.500316619873047, | |
| "learning_rate": 2.871910815196645e-06, | |
| "loss": 1.172386646270752, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7350427350427351, | |
| "grad_norm": 3.394716501235962, | |
| "learning_rate": 2.870749482834187e-06, | |
| "loss": 1.3153018951416016, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7374847374847375, | |
| "grad_norm": 4.49886417388916, | |
| "learning_rate": 2.8695831741670626e-06, | |
| "loss": 1.1366627216339111, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 5.643487453460693, | |
| "learning_rate": 2.8684118939496516e-06, | |
| "loss": 1.4186596870422363, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7423687423687424, | |
| "grad_norm": 2.5566513538360596, | |
| "learning_rate": 2.8672356469566007e-06, | |
| "loss": 1.257333755493164, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 1.967860221862793, | |
| "learning_rate": 2.866054437982803e-06, | |
| "loss": 0.8869744539260864, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 3.111006021499634, | |
| "learning_rate": 2.864868271843379e-06, | |
| "loss": 1.231367588043213, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7496947496947497, | |
| "grad_norm": 7.100964069366455, | |
| "learning_rate": 2.863677153373656e-06, | |
| "loss": 1.2216570377349854, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7521367521367521, | |
| "grad_norm": 4.099492073059082, | |
| "learning_rate": 2.8624810874291516e-06, | |
| "loss": 0.8563480377197266, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 2.678740978240967, | |
| "learning_rate": 2.8612800788855475e-06, | |
| "loss": 1.528752088546753, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 15.396073341369629, | |
| "learning_rate": 2.860074132638677e-06, | |
| "loss": 1.0209803581237793, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7594627594627594, | |
| "grad_norm": 3.2725391387939453, | |
| "learning_rate": 2.8588632536044993e-06, | |
| "loss": 0.8620501756668091, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 2.9715523719787598, | |
| "learning_rate": 2.8576474467190847e-06, | |
| "loss": 1.2061121463775635, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7643467643467643, | |
| "grad_norm": 4.054410457611084, | |
| "learning_rate": 2.8564267169385877e-06, | |
| "loss": 1.2589521408081055, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7667887667887668, | |
| "grad_norm": 9.180256843566895, | |
| "learning_rate": 2.855201069239234e-06, | |
| "loss": 1.0279699563980103, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 3.257628917694092, | |
| "learning_rate": 2.8539705086172946e-06, | |
| "loss": 1.2448331117630005, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7716727716727717, | |
| "grad_norm": 2.673267126083374, | |
| "learning_rate": 2.852735040089069e-06, | |
| "loss": 0.8844334483146667, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7741147741147741, | |
| "grad_norm": 1.805360198020935, | |
| "learning_rate": 2.8514946686908636e-06, | |
| "loss": 0.8991525769233704, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 4.867867946624756, | |
| "learning_rate": 2.85024939947897e-06, | |
| "loss": 1.2330354452133179, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.778998778998779, | |
| "grad_norm": 2.9628164768218994, | |
| "learning_rate": 2.8489992375296457e-06, | |
| "loss": 1.2352819442749023, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 8.211139678955078, | |
| "learning_rate": 2.8477441879390937e-06, | |
| "loss": 0.9022507667541504, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 11.121219635009766, | |
| "learning_rate": 2.8464842558234405e-06, | |
| "loss": 1.0079975128173828, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7863247863247863, | |
| "grad_norm": 2.873725652694702, | |
| "learning_rate": 2.845219446318717e-06, | |
| "loss": 1.1311874389648438, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7887667887667887, | |
| "grad_norm": 3.470582962036133, | |
| "learning_rate": 2.8439497645808343e-06, | |
| "loss": 0.9233945608139038, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 5.232900142669678, | |
| "learning_rate": 2.842675215785567e-06, | |
| "loss": 1.190352201461792, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 8.92779541015625, | |
| "learning_rate": 2.841395805128529e-06, | |
| "loss": 0.5505790710449219, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.796092796092796, | |
| "grad_norm": 2.6852498054504395, | |
| "learning_rate": 2.8401115378251536e-06, | |
| "loss": 0.7804176807403564, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 5.359552383422852, | |
| "learning_rate": 2.8388224191106716e-06, | |
| "loss": 1.1561279296875, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.800976800976801, | |
| "grad_norm": 5.934221267700195, | |
| "learning_rate": 2.8375284542400894e-06, | |
| "loss": 1.2025530338287354, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8034188034188035, | |
| "grad_norm": 5.867283821105957, | |
| "learning_rate": 2.836229648488171e-06, | |
| "loss": 0.46977946162223816, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 3.4654107093811035, | |
| "learning_rate": 2.834926007149411e-06, | |
| "loss": 1.0585201978683472, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8083028083028083, | |
| "grad_norm": 3.0599331855773926, | |
| "learning_rate": 2.8336175355380176e-06, | |
| "loss": 1.1910674571990967, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8107448107448108, | |
| "grad_norm": 2.914846181869507, | |
| "learning_rate": 2.8323042389878893e-06, | |
| "loss": 1.2794017791748047, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 2.4599173069000244, | |
| "learning_rate": 2.8309861228525924e-06, | |
| "loss": 0.8746715784072876, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8156288156288156, | |
| "grad_norm": 6.9391560554504395, | |
| "learning_rate": 2.82966319250534e-06, | |
| "loss": 1.142193078994751, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 2.252155303955078, | |
| "learning_rate": 2.8283354533389714e-06, | |
| "loss": 1.3751626014709473, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 2.4821345806121826, | |
| "learning_rate": 2.8270029107659257e-06, | |
| "loss": 1.2328722476959229, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8229548229548229, | |
| "grad_norm": 2.7743475437164307, | |
| "learning_rate": 2.825665570218227e-06, | |
| "loss": 0.9911469221115112, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 11.23116683959961, | |
| "learning_rate": 2.824323437147454e-06, | |
| "loss": 0.8837311267852783, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 5.99837589263916, | |
| "learning_rate": 2.822976517024725e-06, | |
| "loss": 1.210353136062622, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 6.790897369384766, | |
| "learning_rate": 2.82162481534067e-06, | |
| "loss": 1.2284214496612549, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8327228327228328, | |
| "grad_norm": 3.236633062362671, | |
| "learning_rate": 2.820268337605412e-06, | |
| "loss": 1.1277165412902832, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 5.381369113922119, | |
| "learning_rate": 2.818907089348542e-06, | |
| "loss": 0.6816163659095764, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8376068376068376, | |
| "grad_norm": 3.039348602294922, | |
| "learning_rate": 2.8175410761191e-06, | |
| "loss": 1.3247039318084717, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8400488400488401, | |
| "grad_norm": 3.0645182132720947, | |
| "learning_rate": 2.8161703034855483e-06, | |
| "loss": 1.285701036453247, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 16.049558639526367, | |
| "learning_rate": 2.8147947770357504e-06, | |
| "loss": 0.9545083045959473, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8449328449328449, | |
| "grad_norm": 2.831643581390381, | |
| "learning_rate": 2.81341450237695e-06, | |
| "loss": 1.243680477142334, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8473748473748474, | |
| "grad_norm": 1.85174560546875, | |
| "learning_rate": 2.8120294851357447e-06, | |
| "loss": 0.9751272201538086, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 6.775423049926758, | |
| "learning_rate": 2.810639730958067e-06, | |
| "loss": 1.1404168605804443, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8522588522588522, | |
| "grad_norm": 2.1591057777404785, | |
| "learning_rate": 2.809245245509157e-06, | |
| "loss": 0.984785795211792, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 2.3239328861236572, | |
| "learning_rate": 2.8078460344735426e-06, | |
| "loss": 1.0640941858291626, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 2.8549911975860596, | |
| "learning_rate": 2.8064421035550157e-06, | |
| "loss": 1.0445160865783691, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8595848595848596, | |
| "grad_norm": 15.758867263793945, | |
| "learning_rate": 2.805033458476608e-06, | |
| "loss": 0.9395806789398193, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8620268620268621, | |
| "grad_norm": 2.8815691471099854, | |
| "learning_rate": 2.803620104980568e-06, | |
| "loss": 0.42350003123283386, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 2.8151259422302246, | |
| "learning_rate": 2.802202048828338e-06, | |
| "loss": 1.1812381744384766, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 3.383596420288086, | |
| "learning_rate": 2.8007792958005295e-06, | |
| "loss": 0.8781920671463013, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8693528693528694, | |
| "grad_norm": 4.9643731117248535, | |
| "learning_rate": 2.7993518516969025e-06, | |
| "loss": 1.340822696685791, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 3.035583019256592, | |
| "learning_rate": 2.797919722336338e-06, | |
| "loss": 1.5426077842712402, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8742368742368742, | |
| "grad_norm": 2.791269063949585, | |
| "learning_rate": 2.796482913556817e-06, | |
| "loss": 1.3969719409942627, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8766788766788767, | |
| "grad_norm": 3.4995269775390625, | |
| "learning_rate": 2.795041431215395e-06, | |
| "loss": 0.9012230634689331, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 2.8634181022644043, | |
| "learning_rate": 2.7935952811881806e-06, | |
| "loss": 1.18161940574646, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8815628815628815, | |
| "grad_norm": 12.326301574707031, | |
| "learning_rate": 2.7921444693703077e-06, | |
| "loss": 0.8556126356124878, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.884004884004884, | |
| "grad_norm": 4.906359672546387, | |
| "learning_rate": 2.790689001675916e-06, | |
| "loss": 1.2112202644348145, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 4.475518703460693, | |
| "learning_rate": 2.7892288840381238e-06, | |
| "loss": 1.1582276821136475, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 4.005245208740234, | |
| "learning_rate": 2.7877641224090025e-06, | |
| "loss": 1.152219295501709, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 3.4264960289001465, | |
| "learning_rate": 2.786294722759557e-06, | |
| "loss": 1.3757500648498535, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 6.461520671844482, | |
| "learning_rate": 2.7848206910796984e-06, | |
| "loss": 1.1789906024932861, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8962148962148963, | |
| "grad_norm": 2.1114742755889893, | |
| "learning_rate": 2.7833420333782197e-06, | |
| "loss": 1.0014762878417969, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8986568986568987, | |
| "grad_norm": 1.8925999402999878, | |
| "learning_rate": 2.7818587556827704e-06, | |
| "loss": 0.7306645512580872, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 5.776151657104492, | |
| "learning_rate": 2.7803708640398345e-06, | |
| "loss": 0.9089866280555725, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 4.932161331176758, | |
| "learning_rate": 2.7788783645147043e-06, | |
| "loss": 1.0046944618225098, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.905982905982906, | |
| "grad_norm": 2.201463460922241, | |
| "learning_rate": 2.7773812631914555e-06, | |
| "loss": 0.8681966662406921, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 3.0060999393463135, | |
| "learning_rate": 2.775879566172923e-06, | |
| "loss": 0.9016242027282715, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9108669108669109, | |
| "grad_norm": 4.842814922332764, | |
| "learning_rate": 2.7743732795806743e-06, | |
| "loss": 1.1348944902420044, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9133089133089133, | |
| "grad_norm": 4.199369430541992, | |
| "learning_rate": 2.772862409554989e-06, | |
| "loss": 0.8463287353515625, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 2.597773790359497, | |
| "learning_rate": 2.771346962254828e-06, | |
| "loss": 1.3388140201568604, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9181929181929182, | |
| "grad_norm": 4.5675272941589355, | |
| "learning_rate": 2.7698269438578124e-06, | |
| "loss": 1.2172050476074219, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 61.261924743652344, | |
| "learning_rate": 2.7683023605601966e-06, | |
| "loss": 1.2228550910949707, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 2.4283735752105713, | |
| "learning_rate": 2.7667732185768442e-06, | |
| "loss": 1.2123913764953613, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9255189255189256, | |
| "grad_norm": 4.314785480499268, | |
| "learning_rate": 2.7652395241412013e-06, | |
| "loss": 1.2451503276824951, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 4.969833850860596, | |
| "learning_rate": 2.7637012835052717e-06, | |
| "loss": 0.7596385478973389, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 3.0779378414154053, | |
| "learning_rate": 2.762158502939593e-06, | |
| "loss": 1.0997289419174194, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9328449328449329, | |
| "grad_norm": 2.6760990619659424, | |
| "learning_rate": 2.760611188733206e-06, | |
| "loss": 0.865337610244751, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9352869352869353, | |
| "grad_norm": 4.566402912139893, | |
| "learning_rate": 2.7590593471936372e-06, | |
| "loss": 0.5705887079238892, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 4.343785285949707, | |
| "learning_rate": 2.7575029846468648e-06, | |
| "loss": 0.8618502616882324, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 5.141389846801758, | |
| "learning_rate": 2.755942107437299e-06, | |
| "loss": 0.6358861923217773, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9426129426129426, | |
| "grad_norm": 4.704695701599121, | |
| "learning_rate": 2.7543767219277518e-06, | |
| "loss": 1.0651301145553589, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 2.7546803951263428, | |
| "learning_rate": 2.7528068344994146e-06, | |
| "loss": 1.2204885482788086, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9474969474969475, | |
| "grad_norm": 1.9365019798278809, | |
| "learning_rate": 2.7512324515518285e-06, | |
| "loss": 1.1578611135482788, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9499389499389499, | |
| "grad_norm": 10.408919334411621, | |
| "learning_rate": 2.749653579502863e-06, | |
| "loss": 0.8569886684417725, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 2.6625332832336426, | |
| "learning_rate": 2.7480702247886855e-06, | |
| "loss": 0.964225172996521, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9548229548229549, | |
| "grad_norm": 6.262792587280273, | |
| "learning_rate": 2.7464823938637367e-06, | |
| "loss": 1.084264874458313, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9572649572649573, | |
| "grad_norm": 3.690931558609009, | |
| "learning_rate": 2.744890093200705e-06, | |
| "loss": 1.2007842063903809, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 20.666481018066406, | |
| "learning_rate": 2.7432933292904987e-06, | |
| "loss": 1.2361834049224854, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9621489621489622, | |
| "grad_norm": 4.015703201293945, | |
| "learning_rate": 2.7416921086422192e-06, | |
| "loss": 1.3444561958312988, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 5.810867786407471, | |
| "learning_rate": 2.7400864377831388e-06, | |
| "loss": 1.2651550769805908, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 10.315156936645508, | |
| "learning_rate": 2.7384763232586672e-06, | |
| "loss": 1.0138130187988281, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9694749694749695, | |
| "grad_norm": 3.1477701663970947, | |
| "learning_rate": 2.7368617716323308e-06, | |
| "loss": 1.261399269104004, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9719169719169719, | |
| "grad_norm": 2.0946080684661865, | |
| "learning_rate": 2.735242789485743e-06, | |
| "loss": 1.1625337600708008, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 2.2116124629974365, | |
| "learning_rate": 2.7336193834185767e-06, | |
| "loss": 1.1595247983932495, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 2.7013678550720215, | |
| "learning_rate": 2.7319915600485394e-06, | |
| "loss": 1.2124404907226562, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9792429792429792, | |
| "grad_norm": 6.299833297729492, | |
| "learning_rate": 2.730359326011347e-06, | |
| "loss": 0.8965975046157837, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 2.5298030376434326, | |
| "learning_rate": 2.728722687960692e-06, | |
| "loss": 1.0946530103683472, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 25.68947410583496, | |
| "learning_rate": 2.7270816525682228e-06, | |
| "loss": 1.5015652179718018, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9865689865689866, | |
| "grad_norm": 6.138635635375977, | |
| "learning_rate": 2.7254362265235114e-06, | |
| "loss": 0.8905894756317139, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 2.091486692428589, | |
| "learning_rate": 2.723786416534028e-06, | |
| "loss": 1.2607147693634033, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9914529914529915, | |
| "grad_norm": 2.1075570583343506, | |
| "learning_rate": 2.7221322293251146e-06, | |
| "loss": 1.2005870342254639, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9938949938949939, | |
| "grad_norm": 4.031181335449219, | |
| "learning_rate": 2.7204736716399558e-06, | |
| "loss": 1.2900550365447998, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 3.114694118499756, | |
| "learning_rate": 2.7188107502395527e-06, | |
| "loss": 1.1653718948364258, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9987789987789988, | |
| "grad_norm": 3.7620744705200195, | |
| "learning_rate": 2.7171434719026945e-06, | |
| "loss": 1.0833420753479004, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.0012210012210012, | |
| "grad_norm": 2.0748419761657715, | |
| "learning_rate": 2.715471843425931e-06, | |
| "loss": 0.9233535528182983, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 2.267605781555176, | |
| "learning_rate": 2.7137958716235464e-06, | |
| "loss": 1.1397910118103027, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.006105006105006, | |
| "grad_norm": 2.89157772064209, | |
| "learning_rate": 2.7121155633275274e-06, | |
| "loss": 0.5471587181091309, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.0085470085470085, | |
| "grad_norm": 2.945436477661133, | |
| "learning_rate": 2.710430925387542e-06, | |
| "loss": 1.0451737642288208, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 2.9246578216552734, | |
| "learning_rate": 2.7087419646709032e-06, | |
| "loss": 0.8177531957626343, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.0134310134310134, | |
| "grad_norm": 1.650028109550476, | |
| "learning_rate": 2.707048688062549e-06, | |
| "loss": 0.8926088809967041, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0158730158730158, | |
| "grad_norm": 5.467007637023926, | |
| "learning_rate": 2.70535110246501e-06, | |
| "loss": 0.8956806659698486, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 2.1799449920654297, | |
| "learning_rate": 2.703649214798381e-06, | |
| "loss": 0.8282803297042847, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0207570207570207, | |
| "grad_norm": 7.208977222442627, | |
| "learning_rate": 2.7019430320002953e-06, | |
| "loss": 1.1033618450164795, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0231990231990231, | |
| "grad_norm": 2.4455599784851074, | |
| "learning_rate": 2.7002325610258945e-06, | |
| "loss": 1.1690531969070435, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 3.7783608436584473, | |
| "learning_rate": 2.6985178088478002e-06, | |
| "loss": 0.8802350163459778, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.028083028083028, | |
| "grad_norm": 16.13953399658203, | |
| "learning_rate": 2.696798782456086e-06, | |
| "loss": 0.9397566914558411, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0305250305250304, | |
| "grad_norm": 2.2074880599975586, | |
| "learning_rate": 2.695075488858251e-06, | |
| "loss": 1.1322135925292969, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 2.5015647411346436, | |
| "learning_rate": 2.693347935079186e-06, | |
| "loss": 0.8415539264678955, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0354090354090355, | |
| "grad_norm": 2.349015235900879, | |
| "learning_rate": 2.6916161281611517e-06, | |
| "loss": 1.1573381423950195, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.037851037851038, | |
| "grad_norm": 3.6411995887756348, | |
| "learning_rate": 2.6898800751637434e-06, | |
| "loss": 0.9261655807495117, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 4.917046546936035, | |
| "learning_rate": 2.6881397831638663e-06, | |
| "loss": 0.5568342208862305, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0427350427350428, | |
| "grad_norm": 2.052685260772705, | |
| "learning_rate": 2.686395259255707e-06, | |
| "loss": 1.1047786474227905, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.0451770451770452, | |
| "grad_norm": 3.5746982097625732, | |
| "learning_rate": 2.684646510550701e-06, | |
| "loss": 0.916144609451294, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 3.18715238571167, | |
| "learning_rate": 2.6828935441775072e-06, | |
| "loss": 1.150810718536377, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.05006105006105, | |
| "grad_norm": 2.545165538787842, | |
| "learning_rate": 2.6811363672819776e-06, | |
| "loss": 0.7769479155540466, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0525030525030525, | |
| "grad_norm": 9.713841438293457, | |
| "learning_rate": 2.6793749870271276e-06, | |
| "loss": 1.0475445985794067, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 2.5627520084381104, | |
| "learning_rate": 2.6776094105931078e-06, | |
| "loss": 1.0767505168914795, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.0573870573870574, | |
| "grad_norm": 5.523263931274414, | |
| "learning_rate": 2.6758396451771735e-06, | |
| "loss": 1.4204485416412354, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0598290598290598, | |
| "grad_norm": 5.128425598144531, | |
| "learning_rate": 2.674065697993657e-06, | |
| "loss": 0.433662474155426, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 3.0596179962158203, | |
| "learning_rate": 2.6722875762739364e-06, | |
| "loss": 0.9546130299568176, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0647130647130647, | |
| "grad_norm": 11.463479042053223, | |
| "learning_rate": 2.670505287266407e-06, | |
| "loss": 0.9810203909873962, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0671550671550671, | |
| "grad_norm": 3.037356376647949, | |
| "learning_rate": 2.6687188382364542e-06, | |
| "loss": 0.7727240324020386, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 4.121923923492432, | |
| "learning_rate": 2.6669282364664176e-06, | |
| "loss": 1.0611509084701538, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.072039072039072, | |
| "grad_norm": 2.471468210220337, | |
| "learning_rate": 2.665133489255568e-06, | |
| "loss": 1.0493874549865723, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0744810744810744, | |
| "grad_norm": 2.382373094558716, | |
| "learning_rate": 2.6633346039200737e-06, | |
| "loss": 0.7326397895812988, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 3.580219030380249, | |
| "learning_rate": 2.661531587792972e-06, | |
| "loss": 1.0943450927734375, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0793650793650793, | |
| "grad_norm": 4.25157356262207, | |
| "learning_rate": 2.6597244482241388e-06, | |
| "loss": 0.8091971278190613, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0818070818070817, | |
| "grad_norm": 6.874667644500732, | |
| "learning_rate": 2.65791319258026e-06, | |
| "loss": 0.715369462966919, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 11.470134735107422, | |
| "learning_rate": 2.656097828244799e-06, | |
| "loss": 1.1214711666107178, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0866910866910866, | |
| "grad_norm": 3.6015729904174805, | |
| "learning_rate": 2.6542783626179684e-06, | |
| "loss": 0.8701647520065308, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.089133089133089, | |
| "grad_norm": 2.125896453857422, | |
| "learning_rate": 2.652454803116701e-06, | |
| "loss": 1.0516958236694336, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 2.0405824184417725, | |
| "learning_rate": 2.650627157174615e-06, | |
| "loss": 1.0900695323944092, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0940170940170941, | |
| "grad_norm": 3.2506299018859863, | |
| "learning_rate": 2.64879543224199e-06, | |
| "loss": 1.0140894651412964, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0964590964590966, | |
| "grad_norm": 2.162565231323242, | |
| "learning_rate": 2.6469596357857302e-06, | |
| "loss": 0.8647994995117188, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 3.627469301223755, | |
| "learning_rate": 2.6451197752893394e-06, | |
| "loss": 0.8046786785125732, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1013431013431014, | |
| "grad_norm": 4.078906059265137, | |
| "learning_rate": 2.6432758582528885e-06, | |
| "loss": 0.6189166307449341, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1037851037851039, | |
| "grad_norm": 1.9645013809204102, | |
| "learning_rate": 2.6414278921929814e-06, | |
| "loss": 0.6203906536102295, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 5.907010078430176, | |
| "learning_rate": 2.6395758846427314e-06, | |
| "loss": 1.0113993883132935, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1086691086691087, | |
| "grad_norm": 2.3445451259613037, | |
| "learning_rate": 2.6377198431517246e-06, | |
| "loss": 1.0835185050964355, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 4.328078746795654, | |
| "learning_rate": 2.6358597752859913e-06, | |
| "loss": 0.7534465193748474, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 4.2437286376953125, | |
| "learning_rate": 2.6339956886279755e-06, | |
| "loss": 1.1679747104644775, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.115995115995116, | |
| "grad_norm": 4.095772743225098, | |
| "learning_rate": 2.6321275907765036e-06, | |
| "loss": 0.7348451614379883, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1184371184371185, | |
| "grad_norm": 3.989534854888916, | |
| "learning_rate": 2.6302554893467523e-06, | |
| "loss": 0.7824081182479858, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 2.9929897785186768, | |
| "learning_rate": 2.6283793919702203e-06, | |
| "loss": 1.1799620389938354, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1233211233211233, | |
| "grad_norm": 4.536725044250488, | |
| "learning_rate": 2.6264993062946946e-06, | |
| "loss": 1.0080196857452393, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1257631257631258, | |
| "grad_norm": 2.9040870666503906, | |
| "learning_rate": 2.6246152399842207e-06, | |
| "loss": 0.949799656867981, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 6.9592671394348145, | |
| "learning_rate": 2.62272720071907e-06, | |
| "loss": 0.6982100605964661, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1306471306471306, | |
| "grad_norm": 3.2331862449645996, | |
| "learning_rate": 2.6208351961957092e-06, | |
| "loss": 0.65423983335495, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.133089133089133, | |
| "grad_norm": 6.353219509124756, | |
| "learning_rate": 2.6189392341267706e-06, | |
| "loss": 0.5744038820266724, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 3.098659038543701, | |
| "learning_rate": 2.617039322241019e-06, | |
| "loss": 1.0322375297546387, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.137973137973138, | |
| "grad_norm": 4.610161781311035, | |
| "learning_rate": 2.6151354682833186e-06, | |
| "loss": 1.1047866344451904, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.1404151404151404, | |
| "grad_norm": 6.845473289489746, | |
| "learning_rate": 2.613227680014605e-06, | |
| "loss": 0.9233366250991821, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 2.7027156352996826, | |
| "learning_rate": 2.611315965211851e-06, | |
| "loss": 0.9028387069702148, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.1452991452991452, | |
| "grad_norm": 3.1289923191070557, | |
| "learning_rate": 2.609400331668036e-06, | |
| "loss": 1.0243358612060547, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1477411477411477, | |
| "grad_norm": 3.091111660003662, | |
| "learning_rate": 2.607480787192113e-06, | |
| "loss": 1.074292540550232, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 2.6475534439086914, | |
| "learning_rate": 2.6055573396089786e-06, | |
| "loss": 1.0188792943954468, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1526251526251525, | |
| "grad_norm": 3.1709463596343994, | |
| "learning_rate": 2.6036299967594408e-06, | |
| "loss": 0.819939136505127, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.155067155067155, | |
| "grad_norm": 3.042470693588257, | |
| "learning_rate": 2.601698766500184e-06, | |
| "loss": 0.8466127514839172, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 5.738119602203369, | |
| "learning_rate": 2.5997636567037416e-06, | |
| "loss": 0.6221362352371216, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1599511599511598, | |
| "grad_norm": 8.702028274536133, | |
| "learning_rate": 2.5978246752584613e-06, | |
| "loss": 1.0508110523223877, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1623931623931625, | |
| "grad_norm": 2.88852858543396, | |
| "learning_rate": 2.5958818300684724e-06, | |
| "loss": 1.0352314710617065, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 5.778299331665039, | |
| "learning_rate": 2.593935129053655e-06, | |
| "loss": 0.8930572271347046, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1672771672771673, | |
| "grad_norm": 2.0894954204559326, | |
| "learning_rate": 2.591984580149607e-06, | |
| "loss": 0.8333898782730103, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1697191697191698, | |
| "grad_norm": 2.431748628616333, | |
| "learning_rate": 2.5900301913076133e-06, | |
| "loss": 0.7571872472763062, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 3.1503281593322754, | |
| "learning_rate": 2.5880719704946092e-06, | |
| "loss": 1.1830954551696777, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1746031746031746, | |
| "grad_norm": 5.665327548980713, | |
| "learning_rate": 2.5861099256931546e-06, | |
| "loss": 0.9428277611732483, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.177045177045177, | |
| "grad_norm": 2.1606311798095703, | |
| "learning_rate": 2.5841440649013944e-06, | |
| "loss": 0.7309147119522095, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 2.840561628341675, | |
| "learning_rate": 2.5821743961330307e-06, | |
| "loss": 1.1203508377075195, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.181929181929182, | |
| "grad_norm": 3.1255927085876465, | |
| "learning_rate": 2.5802009274172878e-06, | |
| "loss": 1.1612833738327026, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1843711843711844, | |
| "grad_norm": 6.816147327423096, | |
| "learning_rate": 2.578223666798881e-06, | |
| "loss": 0.7847422957420349, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 3.903107166290283, | |
| "learning_rate": 2.5762426223379816e-06, | |
| "loss": 1.104196310043335, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1892551892551892, | |
| "grad_norm": 2.5234203338623047, | |
| "learning_rate": 2.5742578021101885e-06, | |
| "loss": 0.8677800893783569, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1916971916971917, | |
| "grad_norm": 5.139813423156738, | |
| "learning_rate": 2.5722692142064895e-06, | |
| "loss": 0.7534090280532837, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 2.7109110355377197, | |
| "learning_rate": 2.570276866733231e-06, | |
| "loss": 0.8627274036407471, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 1.9220620393753052, | |
| "learning_rate": 2.5682807678120867e-06, | |
| "loss": 1.1159474849700928, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.199023199023199, | |
| "grad_norm": 2.1107513904571533, | |
| "learning_rate": 2.5662809255800215e-06, | |
| "loss": 1.0889906883239746, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 1.764716386795044, | |
| "learning_rate": 2.564277348189262e-06, | |
| "loss": 1.1670572757720947, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2039072039072038, | |
| "grad_norm": 5.729503631591797, | |
| "learning_rate": 2.562270043807257e-06, | |
| "loss": 0.74802565574646, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2063492063492063, | |
| "grad_norm": 6.3407673835754395, | |
| "learning_rate": 2.560259020616651e-06, | |
| "loss": 0.7788195610046387, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 4.765711784362793, | |
| "learning_rate": 2.558244286815247e-06, | |
| "loss": 0.8811196088790894, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2112332112332111, | |
| "grad_norm": 11.66674518585205, | |
| "learning_rate": 2.5562258506159756e-06, | |
| "loss": 1.0703587532043457, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2136752136752136, | |
| "grad_norm": 2.6325502395629883, | |
| "learning_rate": 2.5542037202468576e-06, | |
| "loss": 1.0166888236999512, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 3.6111443042755127, | |
| "learning_rate": 2.552177903950974e-06, | |
| "loss": 1.0367450714111328, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2185592185592187, | |
| "grad_norm": 2.560006856918335, | |
| "learning_rate": 2.550148409986431e-06, | |
| "loss": 1.058215618133545, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.221001221001221, | |
| "grad_norm": 9.144640922546387, | |
| "learning_rate": 2.548115246626327e-06, | |
| "loss": 0.7685431838035583, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 3.365041971206665, | |
| "learning_rate": 2.5460784221587187e-06, | |
| "loss": 0.9338477253913879, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.225885225885226, | |
| "grad_norm": 3.4426350593566895, | |
| "learning_rate": 2.5440379448865857e-06, | |
| "loss": 1.1116557121276855, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.2283272283272284, | |
| "grad_norm": 4.908542156219482, | |
| "learning_rate": 2.5419938231277996e-06, | |
| "loss": 0.823598325252533, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 3.7626216411590576, | |
| "learning_rate": 2.539946065215087e-06, | |
| "loss": 0.788004994392395, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.2332112332112333, | |
| "grad_norm": 3.286329507827759, | |
| "learning_rate": 2.5378946794959985e-06, | |
| "loss": 1.1074113845825195, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2356532356532357, | |
| "grad_norm": 1.5789026021957397, | |
| "learning_rate": 2.5358396743328726e-06, | |
| "loss": 0.6636894941329956, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 2.153325319290161, | |
| "learning_rate": 2.5337810581028017e-06, | |
| "loss": 1.1224884986877441, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2405372405372406, | |
| "grad_norm": 3.952401876449585, | |
| "learning_rate": 2.531718839197599e-06, | |
| "loss": 1.0443065166473389, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.242979242979243, | |
| "grad_norm": 2.471863031387329, | |
| "learning_rate": 2.529653026023764e-06, | |
| "loss": 1.0935031175613403, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 2.034644365310669, | |
| "learning_rate": 2.5275836270024472e-06, | |
| "loss": 0.6596218943595886, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2478632478632479, | |
| "grad_norm": 6.848422050476074, | |
| "learning_rate": 2.5255106505694184e-06, | |
| "loss": 0.8848438262939453, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2503052503052503, | |
| "grad_norm": 2.603252649307251, | |
| "learning_rate": 2.5234341051750273e-06, | |
| "loss": 1.025794267654419, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 6.599234580993652, | |
| "learning_rate": 2.521353999284175e-06, | |
| "loss": 1.0644009113311768, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2551892551892552, | |
| "grad_norm": 3.0620803833007812, | |
| "learning_rate": 2.5192703413762762e-06, | |
| "loss": 0.9927738308906555, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.2576312576312576, | |
| "grad_norm": 2.506056547164917, | |
| "learning_rate": 2.5171831399452245e-06, | |
| "loss": 0.8921350836753845, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 6.412139415740967, | |
| "learning_rate": 2.5150924034993585e-06, | |
| "loss": 1.20728600025177, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.2625152625152625, | |
| "grad_norm": 5.982737064361572, | |
| "learning_rate": 2.5129981405614274e-06, | |
| "loss": 1.1233861446380615, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.264957264957265, | |
| "grad_norm": 1.4975414276123047, | |
| "learning_rate": 2.510900359668556e-06, | |
| "loss": 0.7186289429664612, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 3.8226399421691895, | |
| "learning_rate": 2.5087990693722086e-06, | |
| "loss": 0.6558833122253418, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 3.203143358230591, | |
| "learning_rate": 2.5066942782381576e-06, | |
| "loss": 0.7843560576438904, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2722832722832722, | |
| "grad_norm": 2.2173891067504883, | |
| "learning_rate": 2.504585994846445e-06, | |
| "loss": 1.09853994846344, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 2.547311782836914, | |
| "learning_rate": 2.5024742277913485e-06, | |
| "loss": 0.9359837174415588, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.277167277167277, | |
| "grad_norm": 2.9288382530212402, | |
| "learning_rate": 2.500358985681347e-06, | |
| "loss": 1.030897617340088, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2796092796092795, | |
| "grad_norm": 5.406564712524414, | |
| "learning_rate": 2.4982402771390845e-06, | |
| "loss": 0.7548630237579346, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 6.541508197784424, | |
| "learning_rate": 2.496118110801338e-06, | |
| "loss": 0.4061433672904968, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2844932844932844, | |
| "grad_norm": 3.803784132003784, | |
| "learning_rate": 2.4939924953189784e-06, | |
| "loss": 1.0374689102172852, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2869352869352868, | |
| "grad_norm": 3.209254264831543, | |
| "learning_rate": 2.4918634393569357e-06, | |
| "loss": 0.9150550365447998, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 4.5784149169921875, | |
| "learning_rate": 2.4897309515941663e-06, | |
| "loss": 0.8126834630966187, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.291819291819292, | |
| "grad_norm": 3.433272123336792, | |
| "learning_rate": 2.4875950407236168e-06, | |
| "loss": 1.2447450160980225, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2942612942612943, | |
| "grad_norm": 2.5175130367279053, | |
| "learning_rate": 2.485455715452185e-06, | |
| "loss": 1.0734295845031738, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 11.471171379089355, | |
| "learning_rate": 2.483312984500691e-06, | |
| "loss": 0.5801107883453369, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2991452991452992, | |
| "grad_norm": 1.9704091548919678, | |
| "learning_rate": 2.481166856603835e-06, | |
| "loss": 1.0964834690093994, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.3015873015873016, | |
| "grad_norm": 2.9705400466918945, | |
| "learning_rate": 2.4790173405101655e-06, | |
| "loss": 0.7529336810112, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 3.7465691566467285, | |
| "learning_rate": 2.476864444982044e-06, | |
| "loss": 1.0144493579864502, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.3064713064713065, | |
| "grad_norm": 9.341630935668945, | |
| "learning_rate": 2.474708178795606e-06, | |
| "loss": 0.824262797832489, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.308913308913309, | |
| "grad_norm": 3.3401200771331787, | |
| "learning_rate": 2.4725485507407284e-06, | |
| "loss": 1.0086907148361206, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 22.889719009399414, | |
| "learning_rate": 2.4703855696209926e-06, | |
| "loss": 0.8306890726089478, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.3137973137973138, | |
| "grad_norm": 2.3913421630859375, | |
| "learning_rate": 2.468219244253648e-06, | |
| "loss": 0.7409257888793945, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.3162393162393162, | |
| "grad_norm": 3.086717128753662, | |
| "learning_rate": 2.466049583469578e-06, | |
| "loss": 1.011396884918213, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 5.698716163635254, | |
| "learning_rate": 2.4638765961132602e-06, | |
| "loss": 0.9011169672012329, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.321123321123321, | |
| "grad_norm": 4.568967342376709, | |
| "learning_rate": 2.461700291042734e-06, | |
| "loss": 0.9558655023574829, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3235653235653235, | |
| "grad_norm": 2.135237216949463, | |
| "learning_rate": 2.459520677129565e-06, | |
| "loss": 1.0484675168991089, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 4.013932704925537, | |
| "learning_rate": 2.4573377632588033e-06, | |
| "loss": 1.1204602718353271, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.3284493284493284, | |
| "grad_norm": 5.49869966506958, | |
| "learning_rate": 2.455151558328955e-06, | |
| "loss": 1.1205787658691406, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3308913308913308, | |
| "grad_norm": 2.8262710571289062, | |
| "learning_rate": 2.452962071251939e-06, | |
| "loss": 0.7297549247741699, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 9.246023178100586, | |
| "learning_rate": 2.450769310953055e-06, | |
| "loss": 0.9638161063194275, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3357753357753357, | |
| "grad_norm": 2.5899431705474854, | |
| "learning_rate": 2.448573286370946e-06, | |
| "loss": 1.0913522243499756, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3382173382173383, | |
| "grad_norm": 3.924144744873047, | |
| "learning_rate": 2.446374006457561e-06, | |
| "loss": 0.549030065536499, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 3.2062184810638428, | |
| "learning_rate": 2.4441714801781193e-06, | |
| "loss": 1.1665656566619873, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3431013431013432, | |
| "grad_norm": 2.434178590774536, | |
| "learning_rate": 2.4419657165110737e-06, | |
| "loss": 1.072550654411316, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3455433455433456, | |
| "grad_norm": 1.818092942237854, | |
| "learning_rate": 2.4397567244480746e-06, | |
| "loss": 1.0474157333374023, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 3.564908742904663, | |
| "learning_rate": 2.437544512993932e-06, | |
| "loss": 0.8284481167793274, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3504273504273505, | |
| "grad_norm": 3.48178768157959, | |
| "learning_rate": 2.435329091166579e-06, | |
| "loss": 1.1350401639938354, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.352869352869353, | |
| "grad_norm": 3.384739398956299, | |
| "learning_rate": 2.4331104679970383e-06, | |
| "loss": 0.9051408767700195, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 1.9093736410140991, | |
| "learning_rate": 2.4308886525293797e-06, | |
| "loss": 1.1148414611816406, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3577533577533578, | |
| "grad_norm": 2.4108328819274902, | |
| "learning_rate": 2.4286636538206863e-06, | |
| "loss": 1.0187081098556519, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3601953601953602, | |
| "grad_norm": 4.343973636627197, | |
| "learning_rate": 2.4264354809410203e-06, | |
| "loss": 1.0988596677780151, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 6.484367847442627, | |
| "learning_rate": 2.4242041429733806e-06, | |
| "loss": 0.4784133732318878, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3650793650793651, | |
| "grad_norm": 3.936992883682251, | |
| "learning_rate": 2.4219696490136683e-06, | |
| "loss": 1.1025023460388184, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 2.2236127853393555, | |
| "learning_rate": 2.4197320081706513e-06, | |
| "loss": 1.034416675567627, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 8.061772346496582, | |
| "learning_rate": 2.4174912295659247e-06, | |
| "loss": 1.0712573528289795, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3724053724053724, | |
| "grad_norm": 3.2876951694488525, | |
| "learning_rate": 2.4152473223338747e-06, | |
| "loss": 0.9827461242675781, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.3748473748473748, | |
| "grad_norm": 29.830081939697266, | |
| "learning_rate": 2.4130002956216404e-06, | |
| "loss": 1.181894063949585, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 3.0782265663146973, | |
| "learning_rate": 2.410750158589078e-06, | |
| "loss": 1.0360252857208252, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3797313797313797, | |
| "grad_norm": 2.537832736968994, | |
| "learning_rate": 2.4084969204087228e-06, | |
| "loss": 0.7446849942207336, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3821733821733821, | |
| "grad_norm": 3.4676365852355957, | |
| "learning_rate": 2.4062405902657515e-06, | |
| "loss": 0.7851254343986511, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 7.237531661987305, | |
| "learning_rate": 2.4039811773579447e-06, | |
| "loss": 0.49074786901474, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.387057387057387, | |
| "grad_norm": 4.967849254608154, | |
| "learning_rate": 2.4017186908956497e-06, | |
| "loss": 0.9201147556304932, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3894993894993894, | |
| "grad_norm": 2.321727991104126, | |
| "learning_rate": 2.399453140101744e-06, | |
| "loss": 1.0836430788040161, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 4.341231822967529, | |
| "learning_rate": 2.3971845342115953e-06, | |
| "loss": 0.8131306767463684, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3943833943833943, | |
| "grad_norm": 6.055034637451172, | |
| "learning_rate": 2.394912882473026e-06, | |
| "loss": 0.9711018800735474, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3968253968253967, | |
| "grad_norm": 6.066481113433838, | |
| "learning_rate": 2.3926381941462737e-06, | |
| "loss": 0.6168944239616394, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 1.8251116275787354, | |
| "learning_rate": 2.3903604785039562e-06, | |
| "loss": 1.1195178031921387, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4017094017094016, | |
| "grad_norm": 5.453243255615234, | |
| "learning_rate": 2.3880797448310302e-06, | |
| "loss": 0.8666636943817139, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.404151404151404, | |
| "grad_norm": 2.22725248336792, | |
| "learning_rate": 2.385796002424756e-06, | |
| "loss": 0.7636045813560486, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 2.205007314682007, | |
| "learning_rate": 2.3835092605946593e-06, | |
| "loss": 0.819721519947052, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.409035409035409, | |
| "grad_norm": 1.947036623954773, | |
| "learning_rate": 2.3812195286624914e-06, | |
| "loss": 0.8865010142326355, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4114774114774113, | |
| "grad_norm": 5.608088493347168, | |
| "learning_rate": 2.3789268159621936e-06, | |
| "loss": 0.7291615009307861, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 5.703329563140869, | |
| "learning_rate": 2.376631131839858e-06, | |
| "loss": 0.9819634556770325, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.4163614163614164, | |
| "grad_norm": 2.490891695022583, | |
| "learning_rate": 2.3743324856536895e-06, | |
| "loss": 0.912743330001831, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4188034188034189, | |
| "grad_norm": 2.2340879440307617, | |
| "learning_rate": 2.3720308867739674e-06, | |
| "loss": 0.7029795050621033, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 9.891602516174316, | |
| "learning_rate": 2.3697263445830075e-06, | |
| "loss": 0.737637996673584, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4236874236874237, | |
| "grad_norm": 2.812582492828369, | |
| "learning_rate": 2.367418868475124e-06, | |
| "loss": 1.2123751640319824, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4261294261294262, | |
| "grad_norm": 1.0300575494766235, | |
| "learning_rate": 2.365108467856591e-06, | |
| "loss": 0.9363994598388672, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 8.763495445251465, | |
| "learning_rate": 2.3627951521456027e-06, | |
| "loss": 0.8432412147521973, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.431013431013431, | |
| "grad_norm": 3.2396812438964844, | |
| "learning_rate": 2.36047893077224e-06, | |
| "loss": 1.0829715728759766, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.4334554334554335, | |
| "grad_norm": 1.775749921798706, | |
| "learning_rate": 2.358159813178425e-06, | |
| "loss": 1.0758581161499023, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 2.219306230545044, | |
| "learning_rate": 2.355837808817888e-06, | |
| "loss": 1.2304797172546387, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4383394383394383, | |
| "grad_norm": 3.9443769454956055, | |
| "learning_rate": 2.3535129271561264e-06, | |
| "loss": 0.8725553750991821, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4407814407814408, | |
| "grad_norm": 6.678852081298828, | |
| "learning_rate": 2.351185177670367e-06, | |
| "loss": 0.9805403351783752, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 2.23203706741333, | |
| "learning_rate": 2.348854569849526e-06, | |
| "loss": 0.7744059562683105, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.4456654456654456, | |
| "grad_norm": 2.6113221645355225, | |
| "learning_rate": 2.346521113194173e-06, | |
| "loss": 1.1211048364639282, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.448107448107448, | |
| "grad_norm": 2.196120262145996, | |
| "learning_rate": 2.3441848172164904e-06, | |
| "loss": 1.0878716707229614, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 17.605138778686523, | |
| "learning_rate": 2.3418456914402335e-06, | |
| "loss": 0.7848992943763733, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 4.244761943817139, | |
| "learning_rate": 2.339503745400695e-06, | |
| "loss": 1.0312596559524536, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4554334554334554, | |
| "grad_norm": 4.223480701446533, | |
| "learning_rate": 2.3371589886446625e-06, | |
| "loss": 0.8653528690338135, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 3.6434829235076904, | |
| "learning_rate": 2.334811430730383e-06, | |
| "loss": 0.7818981409072876, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4603174603174602, | |
| "grad_norm": 2.2532246112823486, | |
| "learning_rate": 2.332461081227521e-06, | |
| "loss": 0.7512142062187195, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.462759462759463, | |
| "grad_norm": 3.4803028106689453, | |
| "learning_rate": 2.3301079497171204e-06, | |
| "loss": 0.9879604578018188, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 3.4010937213897705, | |
| "learning_rate": 2.327752045791567e-06, | |
| "loss": 1.1199816465377808, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4676434676434678, | |
| "grad_norm": 2.7178382873535156, | |
| "learning_rate": 2.3253933790545477e-06, | |
| "loss": 1.0186278820037842, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4700854700854702, | |
| "grad_norm": 1.8763567209243774, | |
| "learning_rate": 2.3230319591210114e-06, | |
| "loss": 1.0206587314605713, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 6.890467166900635, | |
| "learning_rate": 2.3206677956171307e-06, | |
| "loss": 0.9307314157485962, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.474969474969475, | |
| "grad_norm": 9.589298248291016, | |
| "learning_rate": 2.318300898180262e-06, | |
| "loss": 0.8029470443725586, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4774114774114775, | |
| "grad_norm": 2.7268612384796143, | |
| "learning_rate": 2.315931276458907e-06, | |
| "loss": 0.8653849363327026, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 2.15297269821167, | |
| "learning_rate": 2.313558940112671e-06, | |
| "loss": 1.0210479497909546, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4822954822954824, | |
| "grad_norm": 2.2720346450805664, | |
| "learning_rate": 2.3111838988122278e-06, | |
| "loss": 1.0664467811584473, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4847374847374848, | |
| "grad_norm": 3.1207821369171143, | |
| "learning_rate": 2.3088061622392753e-06, | |
| "loss": 1.0064697265625, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 6.959964275360107, | |
| "learning_rate": 2.3064257400865e-06, | |
| "loss": 0.28272712230682373, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4896214896214897, | |
| "grad_norm": 1.743444800376892, | |
| "learning_rate": 2.3040426420575366e-06, | |
| "loss": 0.7155613303184509, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.492063492063492, | |
| "grad_norm": 4.6541008949279785, | |
| "learning_rate": 2.301656877866925e-06, | |
| "loss": 1.0751643180847168, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 10.59604263305664, | |
| "learning_rate": 2.299268457240077e-06, | |
| "loss": 0.8486805558204651, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.496947496947497, | |
| "grad_norm": 2.0615234375, | |
| "learning_rate": 2.296877389913231e-06, | |
| "loss": 1.0569241046905518, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4993894993894994, | |
| "grad_norm": 1.6598660945892334, | |
| "learning_rate": 2.2944836856334153e-06, | |
| "loss": 0.38845938444137573, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 4.208990097045898, | |
| "learning_rate": 2.292087354158407e-06, | |
| "loss": 0.8051425218582153, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5042735042735043, | |
| "grad_norm": 2.893552303314209, | |
| "learning_rate": 2.2896884052566934e-06, | |
| "loss": 0.663332462310791, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5067155067155067, | |
| "grad_norm": 3.6931581497192383, | |
| "learning_rate": 2.287286848707431e-06, | |
| "loss": 1.0903408527374268, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 3.0025713443756104, | |
| "learning_rate": 2.284882694300407e-06, | |
| "loss": 0.8944656848907471, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.5115995115995116, | |
| "grad_norm": 3.397970676422119, | |
| "learning_rate": 2.282475951835998e-06, | |
| "loss": 0.8679617643356323, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.514041514041514, | |
| "grad_norm": 2.675851345062256, | |
| "learning_rate": 2.2800666311251302e-06, | |
| "loss": 1.0300140380859375, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 4.939141273498535, | |
| "learning_rate": 2.277654741989241e-06, | |
| "loss": 1.0655279159545898, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.5189255189255189, | |
| "grad_norm": 13.946297645568848, | |
| "learning_rate": 2.275240294260237e-06, | |
| "loss": 0.6649850606918335, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.5213675213675213, | |
| "grad_norm": 2.2974154949188232, | |
| "learning_rate": 2.2728232977804555e-06, | |
| "loss": 0.7620567083358765, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 2.940153121948242, | |
| "learning_rate": 2.2704037624026216e-06, | |
| "loss": 0.885391354560852, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5262515262515262, | |
| "grad_norm": 3.4247350692749023, | |
| "learning_rate": 2.267981697989813e-06, | |
| "loss": 1.1166784763336182, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5286935286935286, | |
| "grad_norm": 7.821306228637695, | |
| "learning_rate": 2.2655571144154146e-06, | |
| "loss": 0.8540222644805908, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 2.5621931552886963, | |
| "learning_rate": 2.2631300215630816e-06, | |
| "loss": 1.033440113067627, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5335775335775335, | |
| "grad_norm": 2.3692877292633057, | |
| "learning_rate": 2.260700429326698e-06, | |
| "loss": 1.1055670976638794, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.536019536019536, | |
| "grad_norm": 1.7013641595840454, | |
| "learning_rate": 2.258268347610335e-06, | |
| "loss": 0.3900951147079468, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 11.654505729675293, | |
| "learning_rate": 2.2558337863282147e-06, | |
| "loss": 0.7620561718940735, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.5409035409035408, | |
| "grad_norm": 6.932107448577881, | |
| "learning_rate": 2.253396755404664e-06, | |
| "loss": 0.6907869577407837, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5433455433455432, | |
| "grad_norm": 6.320849418640137, | |
| "learning_rate": 2.2509572647740794e-06, | |
| "loss": 0.8268896341323853, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 2.399484395980835, | |
| "learning_rate": 2.248515324380884e-06, | |
| "loss": 0.8273181319236755, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5482295482295483, | |
| "grad_norm": 2.6950504779815674, | |
| "learning_rate": 2.2460709441794846e-06, | |
| "loss": 1.0873603820800781, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5506715506715507, | |
| "grad_norm": 36.27908706665039, | |
| "learning_rate": 2.243624134134237e-06, | |
| "loss": 0.5294194221496582, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 4.894410133361816, | |
| "learning_rate": 2.2411749042194017e-06, | |
| "loss": 1.1328932046890259, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 3.041984796524048, | |
| "learning_rate": 2.2387232644191004e-06, | |
| "loss": 0.910387396812439, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.557997557997558, | |
| "grad_norm": 6.796097278594971, | |
| "learning_rate": 2.2362692247272835e-06, | |
| "loss": 1.3359904289245605, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 1.7979986667633057, | |
| "learning_rate": 2.2338127951476794e-06, | |
| "loss": 0.7651663422584534, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5628815628815629, | |
| "grad_norm": 3.9291012287139893, | |
| "learning_rate": 2.2313539856937625e-06, | |
| "loss": 1.0647131204605103, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5653235653235653, | |
| "grad_norm": 3.5667965412139893, | |
| "learning_rate": 2.228892806388707e-06, | |
| "loss": 0.7010360956192017, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 2.790245294570923, | |
| "learning_rate": 2.2264292672653474e-06, | |
| "loss": 1.0297058820724487, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.5702075702075702, | |
| "grad_norm": 2.6824169158935547, | |
| "learning_rate": 2.2239633783661386e-06, | |
| "loss": 1.099691390991211, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5726495726495726, | |
| "grad_norm": 2.646146059036255, | |
| "learning_rate": 2.2214951497431138e-06, | |
| "loss": 0.4930266737937927, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 2.2645068168640137, | |
| "learning_rate": 2.2190245914578444e-06, | |
| "loss": 0.8623871803283691, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5775335775335775, | |
| "grad_norm": 7.941269397735596, | |
| "learning_rate": 2.2165517135813976e-06, | |
| "loss": 0.37714698910713196, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5799755799755801, | |
| "grad_norm": 4.971907615661621, | |
| "learning_rate": 2.214076526194297e-06, | |
| "loss": 0.7036126852035522, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 3.2157909870147705, | |
| "learning_rate": 2.2115990393864805e-06, | |
| "loss": 1.0419573783874512, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.584859584859585, | |
| "grad_norm": 3.5814459323883057, | |
| "learning_rate": 2.2091192632572594e-06, | |
| "loss": 1.1859967708587646, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 4.483184337615967, | |
| "learning_rate": 2.206637207915278e-06, | |
| "loss": 0.9445092678070068, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 6.791816234588623, | |
| "learning_rate": 2.2041528834784706e-06, | |
| "loss": 1.1775907278060913, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5921855921855923, | |
| "grad_norm": 4.0544538497924805, | |
| "learning_rate": 2.201666300074021e-06, | |
| "loss": 1.0888113975524902, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5946275946275947, | |
| "grad_norm": 2.553534507751465, | |
| "learning_rate": 2.199177467838323e-06, | |
| "loss": 0.7380733489990234, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 1.9135996103286743, | |
| "learning_rate": 2.1966863969169376e-06, | |
| "loss": 1.0500788688659668, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5995115995115996, | |
| "grad_norm": 2.675975799560547, | |
| "learning_rate": 2.1941930974645497e-06, | |
| "loss": 0.9689306020736694, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.601953601953602, | |
| "grad_norm": 2.6692886352539062, | |
| "learning_rate": 2.19169757964493e-06, | |
| "loss": 0.9954230189323425, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 2.322364330291748, | |
| "learning_rate": 2.189199853630893e-06, | |
| "loss": 1.087646245956421, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.606837606837607, | |
| "grad_norm": 3.0362532138824463, | |
| "learning_rate": 2.1866999296042536e-06, | |
| "loss": 1.1456096172332764, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.6092796092796093, | |
| "grad_norm": 2.276644468307495, | |
| "learning_rate": 2.184197817755786e-06, | |
| "loss": 0.9878302812576294, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 4.697429180145264, | |
| "learning_rate": 2.181693528285185e-06, | |
| "loss": 1.231487512588501, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6141636141636142, | |
| "grad_norm": 1.8196386098861694, | |
| "learning_rate": 2.1791870714010214e-06, | |
| "loss": 1.0166926383972168, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.6166056166056166, | |
| "grad_norm": 2.218425989151001, | |
| "learning_rate": 2.176678457320701e-06, | |
| "loss": 1.1100245714187622, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 23.85593605041504, | |
| "learning_rate": 2.174167696270423e-06, | |
| "loss": 0.7836192846298218, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.6214896214896215, | |
| "grad_norm": 3.1244821548461914, | |
| "learning_rate": 2.1716547984851394e-06, | |
| "loss": 1.11873197555542, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 3.655869722366333, | |
| "learning_rate": 2.1691397742085118e-06, | |
| "loss": 1.0793089866638184, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 3.339264154434204, | |
| "learning_rate": 2.1666226336928712e-06, | |
| "loss": 0.8472608327865601, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.6288156288156288, | |
| "grad_norm": 2.9374048709869385, | |
| "learning_rate": 2.1641033871991746e-06, | |
| "loss": 1.0283312797546387, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6312576312576312, | |
| "grad_norm": 2.7302792072296143, | |
| "learning_rate": 2.161582044996963e-06, | |
| "loss": 0.63735431432724, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 5.904194355010986, | |
| "learning_rate": 2.159058617364323e-06, | |
| "loss": 1.0554404258728027, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.636141636141636, | |
| "grad_norm": 6.066267490386963, | |
| "learning_rate": 2.1565331145878392e-06, | |
| "loss": 0.5332289934158325, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6385836385836385, | |
| "grad_norm": 15.874334335327148, | |
| "learning_rate": 2.1540055469625585e-06, | |
| "loss": 0.4242510199546814, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 3.521592855453491, | |
| "learning_rate": 2.151475924791942e-06, | |
| "loss": 1.2175500392913818, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6434676434676434, | |
| "grad_norm": 2.169109582901001, | |
| "learning_rate": 2.1489442583878286e-06, | |
| "loss": 1.0904210805892944, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6459096459096458, | |
| "grad_norm": 6.483483791351318, | |
| "learning_rate": 2.1464105580703883e-06, | |
| "loss": 0.694223165512085, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 2.6124441623687744, | |
| "learning_rate": 2.143874834168084e-06, | |
| "loss": 0.743478536605835, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6507936507936507, | |
| "grad_norm": 4.436251163482666, | |
| "learning_rate": 2.141337097017625e-06, | |
| "loss": 1.0697133541107178, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6532356532356531, | |
| "grad_norm": 3.2108986377716064, | |
| "learning_rate": 2.138797356963932e-06, | |
| "loss": 1.125101089477539, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 3.272199869155884, | |
| "learning_rate": 2.1362556243600857e-06, | |
| "loss": 1.0752925872802734, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.658119658119658, | |
| "grad_norm": 1.7775236368179321, | |
| "learning_rate": 2.1337119095672917e-06, | |
| "loss": 0.5759439468383789, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6605616605616604, | |
| "grad_norm": 6.5814056396484375, | |
| "learning_rate": 2.1311662229548354e-06, | |
| "loss": 0.7856385111808777, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 4.052211284637451, | |
| "learning_rate": 2.1286185749000403e-06, | |
| "loss": 1.140655279159546, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6654456654456653, | |
| "grad_norm": 2.508542537689209, | |
| "learning_rate": 2.1260689757882256e-06, | |
| "loss": 1.0907747745513916, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.6678876678876677, | |
| "grad_norm": 5.493361473083496, | |
| "learning_rate": 2.1235174360126634e-06, | |
| "loss": 1.0748016834259033, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 6.831475734710693, | |
| "learning_rate": 2.1209639659745374e-06, | |
| "loss": 1.06305730342865, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6727716727716728, | |
| "grad_norm": 2.4477882385253906, | |
| "learning_rate": 2.1184085760828996e-06, | |
| "loss": 1.089266300201416, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6752136752136753, | |
| "grad_norm": 2.9759106636047363, | |
| "learning_rate": 2.115851276754628e-06, | |
| "loss": 0.7721976637840271, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 3.7026004791259766, | |
| "learning_rate": 2.1132920784143846e-06, | |
| "loss": 0.3019918203353882, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6800976800976801, | |
| "grad_norm": 5.183734893798828, | |
| "learning_rate": 2.110730991494572e-06, | |
| "loss": 1.1731500625610352, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6825396825396826, | |
| "grad_norm": 1.8324676752090454, | |
| "learning_rate": 2.108168026435293e-06, | |
| "loss": 1.159613847732544, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 1.1377415657043457, | |
| "learning_rate": 2.1056031936843046e-06, | |
| "loss": 0.93675696849823, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6874236874236874, | |
| "grad_norm": 9.664449691772461, | |
| "learning_rate": 2.1030365036969785e-06, | |
| "loss": 1.0028681755065918, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6898656898656899, | |
| "grad_norm": 2.9405059814453125, | |
| "learning_rate": 2.100467966936257e-06, | |
| "loss": 1.1557364463806152, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 2.159385919570923, | |
| "learning_rate": 2.09789759387261e-06, | |
| "loss": 1.1020570993423462, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6947496947496947, | |
| "grad_norm": 4.435818672180176, | |
| "learning_rate": 2.0953253949839936e-06, | |
| "loss": 1.1116677522659302, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6971916971916972, | |
| "grad_norm": 4.153209686279297, | |
| "learning_rate": 2.0927513807558083e-06, | |
| "loss": 0.4230949282646179, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 2.028982162475586, | |
| "learning_rate": 2.0901755616808515e-06, | |
| "loss": 1.0068018436431885, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.702075702075702, | |
| "grad_norm": 2.8719983100891113, | |
| "learning_rate": 2.08759794825928e-06, | |
| "loss": 0.9289462566375732, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.7045177045177047, | |
| "grad_norm": 7.478988170623779, | |
| "learning_rate": 2.085018550998565e-06, | |
| "loss": 1.1604743003845215, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 5.984460830688477, | |
| "learning_rate": 2.0824373804134493e-06, | |
| "loss": 0.9005318880081177, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 4.825011730194092, | |
| "learning_rate": 2.0798544470259038e-06, | |
| "loss": 0.4574193060398102, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.711843711843712, | |
| "grad_norm": 5.245497226715088, | |
| "learning_rate": 2.077269761365086e-06, | |
| "loss": 0.9255741834640503, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 2.524322509765625, | |
| "learning_rate": 2.0746833339672966e-06, | |
| "loss": 1.084243893623352, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.7167277167277168, | |
| "grad_norm": 6.121528625488281, | |
| "learning_rate": 2.0720951753759365e-06, | |
| "loss": 0.5424783229827881, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.7191697191697193, | |
| "grad_norm": 2.459991216659546, | |
| "learning_rate": 2.0695052961414617e-06, | |
| "loss": 1.0562982559204102, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 5.736250877380371, | |
| "learning_rate": 2.066913706821346e-06, | |
| "loss": 0.7612860202789307, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7240537240537241, | |
| "grad_norm": 6.192779541015625, | |
| "learning_rate": 2.064320417980031e-06, | |
| "loss": 0.8354834318161011, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7264957264957266, | |
| "grad_norm": 2.2198197841644287, | |
| "learning_rate": 2.061725440188887e-06, | |
| "loss": 1.0771019458770752, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 4.956079959869385, | |
| "learning_rate": 2.059128784026171e-06, | |
| "loss": 0.9273592233657837, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7313797313797314, | |
| "grad_norm": 4.622679233551025, | |
| "learning_rate": 2.0565304600769793e-06, | |
| "loss": 1.088039755821228, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.7338217338217339, | |
| "grad_norm": 2.973011016845703, | |
| "learning_rate": 2.0539304789332086e-06, | |
| "loss": 0.8386093974113464, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 4.763584136962891, | |
| "learning_rate": 2.05132885119351e-06, | |
| "loss": 0.8466081619262695, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7387057387057387, | |
| "grad_norm": 5.492987632751465, | |
| "learning_rate": 2.048725587463247e-06, | |
| "loss": 0.8130955696105957, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7411477411477412, | |
| "grad_norm": 3.7499868869781494, | |
| "learning_rate": 2.046120698354453e-06, | |
| "loss": 0.7670303583145142, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 2.0593535900115967, | |
| "learning_rate": 2.0435141944857857e-06, | |
| "loss": 1.1218202114105225, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 3.6221861839294434, | |
| "learning_rate": 2.0409060864824866e-06, | |
| "loss": 1.162003517150879, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7484737484737485, | |
| "grad_norm": 67.97221374511719, | |
| "learning_rate": 2.038296384976335e-06, | |
| "loss": 0.7413933277130127, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 2.9006736278533936, | |
| "learning_rate": 2.0356851006056082e-06, | |
| "loss": 0.6421722173690796, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7533577533577533, | |
| "grad_norm": 2.640103578567505, | |
| "learning_rate": 2.0330722440150333e-06, | |
| "loss": 0.7539623975753784, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7557997557997558, | |
| "grad_norm": 4.3383097648620605, | |
| "learning_rate": 2.030457825855748e-06, | |
| "loss": 0.5598723888397217, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 2.5532031059265137, | |
| "learning_rate": 2.027841856785255e-06, | |
| "loss": 1.103808879852295, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7606837606837606, | |
| "grad_norm": 2.8078770637512207, | |
| "learning_rate": 2.0252243474673805e-06, | |
| "loss": 0.8731648921966553, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.763125763125763, | |
| "grad_norm": 3.871809482574463, | |
| "learning_rate": 2.022605308572227e-06, | |
| "loss": 0.7328888177871704, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 10.222489356994629, | |
| "learning_rate": 2.0199847507761355e-06, | |
| "loss": 1.0768474340438843, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.768009768009768, | |
| "grad_norm": 2.114724636077881, | |
| "learning_rate": 2.0173626847616348e-06, | |
| "loss": 1.1010804176330566, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7704517704517704, | |
| "grad_norm": 60.75843811035156, | |
| "learning_rate": 2.014739121217405e-06, | |
| "loss": 1.079850673675537, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 4.489736557006836, | |
| "learning_rate": 2.0121140708382295e-06, | |
| "loss": 0.3613211512565613, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.7753357753357752, | |
| "grad_norm": 2.230958938598633, | |
| "learning_rate": 2.009487544324953e-06, | |
| "loss": 0.7978078126907349, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 3.3382315635681152, | |
| "learning_rate": 2.006859552384437e-06, | |
| "loss": 0.49986037611961365, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 2.734253168106079, | |
| "learning_rate": 2.0042301057295178e-06, | |
| "loss": 1.0410568714141846, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7826617826617825, | |
| "grad_norm": 5.776972770690918, | |
| "learning_rate": 2.0015992150789602e-06, | |
| "loss": 1.0696702003479004, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.785103785103785, | |
| "grad_norm": 2.856689453125, | |
| "learning_rate": 1.998966891157417e-06, | |
| "loss": 0.9219186902046204, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 3.461794853210449, | |
| "learning_rate": 1.9963331446953825e-06, | |
| "loss": 0.7723693251609802, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7899877899877898, | |
| "grad_norm": 3.8800973892211914, | |
| "learning_rate": 1.99369798642915e-06, | |
| "loss": 0.845341145992279, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.7924297924297923, | |
| "grad_norm": 40.98175048828125, | |
| "learning_rate": 1.9910614271007688e-06, | |
| "loss": 0.8349391222000122, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 18.2846736907959, | |
| "learning_rate": 1.9884234774579986e-06, | |
| "loss": 1.0727319717407227, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7973137973137974, | |
| "grad_norm": 17.67431640625, | |
| "learning_rate": 1.985784148254267e-06, | |
| "loss": 1.0455303192138672, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7997557997557998, | |
| "grad_norm": 3.976879119873047, | |
| "learning_rate": 1.9831434502486253e-06, | |
| "loss": 0.7013100385665894, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 4.2198486328125, | |
| "learning_rate": 1.980501394205704e-06, | |
| "loss": 0.7062541246414185, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.8046398046398047, | |
| "grad_norm": 2.7160003185272217, | |
| "learning_rate": 1.9778579908956713e-06, | |
| "loss": 0.8056910634040833, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.807081807081807, | |
| "grad_norm": 2.3082945346832275, | |
| "learning_rate": 1.9752132510941846e-06, | |
| "loss": 0.41410213708877563, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 26.796232223510742, | |
| "learning_rate": 1.9725671855823528e-06, | |
| "loss": 0.8121680021286011, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.811965811965812, | |
| "grad_norm": 3.732379198074341, | |
| "learning_rate": 1.9699198051466863e-06, | |
| "loss": 1.2279154062271118, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.8144078144078144, | |
| "grad_norm": 3.2240803241729736, | |
| "learning_rate": 1.9672711205790568e-06, | |
| "loss": 1.0862740278244019, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 2.949432134628296, | |
| "learning_rate": 1.964621142676652e-06, | |
| "loss": 0.6904615759849548, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.8192918192918193, | |
| "grad_norm": 2.2964892387390137, | |
| "learning_rate": 1.9619698822419323e-06, | |
| "loss": 0.8438737392425537, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8217338217338217, | |
| "grad_norm": 3.897786855697632, | |
| "learning_rate": 1.9593173500825845e-06, | |
| "loss": 1.0971468687057495, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 11.29526138305664, | |
| "learning_rate": 1.9566635570114833e-06, | |
| "loss": 0.7382890582084656, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.8266178266178266, | |
| "grad_norm": 5.478521347045898, | |
| "learning_rate": 1.954008513846638e-06, | |
| "loss": 0.8590801954269409, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8290598290598292, | |
| "grad_norm": 3.4063892364501953, | |
| "learning_rate": 1.9513522314111592e-06, | |
| "loss": 1.4989734888076782, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 2.7821717262268066, | |
| "learning_rate": 1.9486947205332055e-06, | |
| "loss": 1.209479570388794, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.833943833943834, | |
| "grad_norm": 4.509057998657227, | |
| "learning_rate": 1.946035992045945e-06, | |
| "loss": 0.9144811630249023, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8363858363858365, | |
| "grad_norm": 1.0088343620300293, | |
| "learning_rate": 1.9433760567875084e-06, | |
| "loss": 0.7974712252616882, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 9.733602523803711, | |
| "learning_rate": 1.9407149256009467e-06, | |
| "loss": 0.7621541023254395, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8412698412698414, | |
| "grad_norm": 2.954293727874756, | |
| "learning_rate": 1.9380526093341855e-06, | |
| "loss": 0.8016723394393921, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8437118437118438, | |
| "grad_norm": 2.0507099628448486, | |
| "learning_rate": 1.935389118839981e-06, | |
| "loss": 1.1339998245239258, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 2.552881956100464, | |
| "learning_rate": 1.932724464975876e-06, | |
| "loss": 1.1323925256729126, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8485958485958487, | |
| "grad_norm": 1.9689065217971802, | |
| "learning_rate": 1.930058658604156e-06, | |
| "loss": 1.0781590938568115, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.8510378510378511, | |
| "grad_norm": 12.905902862548828, | |
| "learning_rate": 1.927391710591806e-06, | |
| "loss": 0.8360648155212402, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 2.5668632984161377, | |
| "learning_rate": 1.9247236318104625e-06, | |
| "loss": 1.122623324394226, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.855921855921856, | |
| "grad_norm": 8.542081832885742, | |
| "learning_rate": 1.9220544331363723e-06, | |
| "loss": 0.8414202332496643, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8583638583638584, | |
| "grad_norm": 13.200721740722656, | |
| "learning_rate": 1.919384125450347e-06, | |
| "loss": 0.9256816506385803, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 4.116710662841797, | |
| "learning_rate": 1.916712719637722e-06, | |
| "loss": 0.9429628849029541, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.8632478632478633, | |
| "grad_norm": 3.014883041381836, | |
| "learning_rate": 1.9140402265883044e-06, | |
| "loss": 0.9422796964645386, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8656898656898657, | |
| "grad_norm": 3.4821338653564453, | |
| "learning_rate": 1.9113666571963364e-06, | |
| "loss": 0.7582896947860718, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 5.297958850860596, | |
| "learning_rate": 1.908692022360447e-06, | |
| "loss": 0.9415192604064941, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8705738705738706, | |
| "grad_norm": 14.892047882080078, | |
| "learning_rate": 1.9060163329836083e-06, | |
| "loss": 0.8154198527336121, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.873015873015873, | |
| "grad_norm": 2.3350753784179688, | |
| "learning_rate": 1.9033395999730926e-06, | |
| "loss": 0.8682588338851929, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 1.8058019876480103, | |
| "learning_rate": 1.9006618342404239e-06, | |
| "loss": 1.0674057006835938, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.877899877899878, | |
| "grad_norm": 2.301260232925415, | |
| "learning_rate": 1.8979830467013373e-06, | |
| "loss": 0.9699643850326538, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 2.3693697452545166, | |
| "learning_rate": 1.8953032482757344e-06, | |
| "loss": 1.0694103240966797, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 2.177583694458008, | |
| "learning_rate": 1.892622449887636e-06, | |
| "loss": 1.097899079322815, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8852258852258852, | |
| "grad_norm": 3.3463196754455566, | |
| "learning_rate": 1.8899406624651397e-06, | |
| "loss": 0.9916119575500488, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8876678876678876, | |
| "grad_norm": 2.100240468978882, | |
| "learning_rate": 1.8872578969403754e-06, | |
| "loss": 1.0629746913909912, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 5.089227199554443, | |
| "learning_rate": 1.8845741642494586e-06, | |
| "loss": 1.0753787755966187, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8925518925518925, | |
| "grad_norm": 3.7538998126983643, | |
| "learning_rate": 1.8818894753324494e-06, | |
| "loss": 0.6911134719848633, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.894993894993895, | |
| "grad_norm": 3.1507911682128906, | |
| "learning_rate": 1.8792038411333044e-06, | |
| "loss": 1.1435554027557373, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 6.201906204223633, | |
| "learning_rate": 1.8765172725998344e-06, | |
| "loss": 0.8441902995109558, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8998778998778998, | |
| "grad_norm": 5.505573749542236, | |
| "learning_rate": 1.8738297806836583e-06, | |
| "loss": 1.1899962425231934, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.9023199023199022, | |
| "grad_norm": 3.466108798980713, | |
| "learning_rate": 1.8711413763401597e-06, | |
| "loss": 0.9702341556549072, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 2.6568405628204346, | |
| "learning_rate": 1.8684520705284412e-06, | |
| "loss": 0.7102415561676025, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.907203907203907, | |
| "grad_norm": 4.7769389152526855, | |
| "learning_rate": 1.8657618742112807e-06, | |
| "loss": 1.0289454460144043, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.9096459096459095, | |
| "grad_norm": 2.641930103302002, | |
| "learning_rate": 1.8630707983550853e-06, | |
| "loss": 0.7728992104530334, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 2.661684513092041, | |
| "learning_rate": 1.8603788539298487e-06, | |
| "loss": 1.0521299839019775, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.9145299145299144, | |
| "grad_norm": 5.217966556549072, | |
| "learning_rate": 1.857686051909104e-06, | |
| "loss": 0.9182690978050232, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.9169719169719168, | |
| "grad_norm": 2.3860421180725098, | |
| "learning_rate": 1.8549924032698815e-06, | |
| "loss": 0.7401301860809326, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 6.826650619506836, | |
| "learning_rate": 1.8522979189926617e-06, | |
| "loss": 1.1561548709869385, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.9218559218559217, | |
| "grad_norm": 4.80458927154541, | |
| "learning_rate": 1.849602610061332e-06, | |
| "loss": 0.7565726041793823, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.9242979242979243, | |
| "grad_norm": 21.791353225708008, | |
| "learning_rate": 1.8469064874631409e-06, | |
| "loss": 1.0700514316558838, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 3.5925564765930176, | |
| "learning_rate": 1.8442095621886546e-06, | |
| "loss": 0.7682386040687561, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9291819291819292, | |
| "grad_norm": 14.093546867370605, | |
| "learning_rate": 1.8415118452317103e-06, | |
| "loss": 1.0378044843673706, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9316239316239316, | |
| "grad_norm": 3.5279247760772705, | |
| "learning_rate": 1.838813347589374e-06, | |
| "loss": 0.33923083543777466, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 3.891294002532959, | |
| "learning_rate": 1.8361140802618928e-06, | |
| "loss": 0.535834789276123, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9365079365079365, | |
| "grad_norm": 6.704744815826416, | |
| "learning_rate": 1.833414054252651e-06, | |
| "loss": 0.655336856842041, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.938949938949939, | |
| "grad_norm": 2.675011396408081, | |
| "learning_rate": 1.8307132805681274e-06, | |
| "loss": 1.1804883480072021, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 2.893186330795288, | |
| "learning_rate": 1.828011770217847e-06, | |
| "loss": 1.0923516750335693, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9438339438339438, | |
| "grad_norm": 20.863788604736328, | |
| "learning_rate": 1.8253095342143385e-06, | |
| "loss": 0.9955095052719116, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.9462759462759462, | |
| "grad_norm": 5.177717208862305, | |
| "learning_rate": 1.8226065835730885e-06, | |
| "loss": 0.6742517948150635, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 2.7875900268554688, | |
| "learning_rate": 1.8199029293124974e-06, | |
| "loss": 0.6115295886993408, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.9511599511599511, | |
| "grad_norm": 2.1929914951324463, | |
| "learning_rate": 1.8171985824538319e-06, | |
| "loss": 0.7518469095230103, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9536019536019538, | |
| "grad_norm": 4.411022186279297, | |
| "learning_rate": 1.8144935540211854e-06, | |
| "loss": 0.7704393267631531, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 2.1730399131774902, | |
| "learning_rate": 1.8117878550414259e-06, | |
| "loss": 1.0425870418548584, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9584859584859586, | |
| "grad_norm": 3.9870214462280273, | |
| "learning_rate": 1.8090814965441579e-06, | |
| "loss": 1.280583143234253, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.960927960927961, | |
| "grad_norm": 6.02722692489624, | |
| "learning_rate": 1.8063744895616719e-06, | |
| "loss": 1.3034554719924927, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 2.3687644004821777, | |
| "learning_rate": 1.8036668451289048e-06, | |
| "loss": 1.1602702140808105, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 2.6075732707977295, | |
| "learning_rate": 1.8009585742833885e-06, | |
| "loss": 1.0742807388305664, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9682539682539684, | |
| "grad_norm": 5.860198497772217, | |
| "learning_rate": 1.7982496880652118e-06, | |
| "loss": 0.9445716142654419, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 2.031967878341675, | |
| "learning_rate": 1.7955401975169694e-06, | |
| "loss": 1.1359782218933105, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9731379731379732, | |
| "grad_norm": 2.8739750385284424, | |
| "learning_rate": 1.7928301136837215e-06, | |
| "loss": 0.9452301263809204, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.9755799755799757, | |
| "grad_norm": 2.3791871070861816, | |
| "learning_rate": 1.7901194476129452e-06, | |
| "loss": 0.9672026634216309, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 1.9117521047592163, | |
| "learning_rate": 1.7874082103544923e-06, | |
| "loss": 1.119147539138794, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.9804639804639805, | |
| "grad_norm": 2.7459216117858887, | |
| "learning_rate": 1.7846964129605423e-06, | |
| "loss": 0.817106306552887, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.982905982905983, | |
| "grad_norm": 5.714978218078613, | |
| "learning_rate": 1.781984066485558e-06, | |
| "loss": 0.6298950910568237, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 3.180934190750122, | |
| "learning_rate": 1.7792711819862413e-06, | |
| "loss": 0.8730656504631042, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9877899877899878, | |
| "grad_norm": 8.006962776184082, | |
| "learning_rate": 1.7765577705214862e-06, | |
| "loss": 0.9957073926925659, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9902319902319903, | |
| "grad_norm": 3.547837257385254, | |
| "learning_rate": 1.7738438431523355e-06, | |
| "loss": 1.1636309623718262, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 4.942066669464111, | |
| "learning_rate": 1.771129410941935e-06, | |
| "loss": 1.03013277053833, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9951159951159951, | |
| "grad_norm": 2.5757369995117188, | |
| "learning_rate": 1.7684144849554884e-06, | |
| "loss": 0.7975276112556458, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9975579975579976, | |
| "grad_norm": 2.362563133239746, | |
| "learning_rate": 1.765699076260212e-06, | |
| "loss": 1.1697049140930176, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.0958151817321777, | |
| "learning_rate": 1.7629831959252908e-06, | |
| "loss": 1.0749346017837524, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.0024420024420024, | |
| "grad_norm": 4.216618061065674, | |
| "learning_rate": 1.7602668550218306e-06, | |
| "loss": 0.9469319581985474, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.004884004884005, | |
| "grad_norm": 6.101734638214111, | |
| "learning_rate": 1.7575500646228164e-06, | |
| "loss": 0.6676955223083496, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 1.3480907678604126, | |
| "learning_rate": 1.7548328358030653e-06, | |
| "loss": 0.9219367504119873, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.0097680097680097, | |
| "grad_norm": 22.67909049987793, | |
| "learning_rate": 1.75211517963918e-06, | |
| "loss": 0.7239001989364624, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.012210012210012, | |
| "grad_norm": 2.3606653213500977, | |
| "learning_rate": 1.7493971072095072e-06, | |
| "loss": 0.7048317790031433, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 36.260040283203125, | |
| "learning_rate": 1.7466786295940897e-06, | |
| "loss": 0.749517560005188, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 3.51509165763855, | |
| "learning_rate": 1.7439597578746214e-06, | |
| "loss": 0.6970245838165283, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.0195360195360195, | |
| "grad_norm": 2.691490411758423, | |
| "learning_rate": 1.7412405031344045e-06, | |
| "loss": 1.078811764717102, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 2.797755479812622, | |
| "learning_rate": 1.7385208764583002e-06, | |
| "loss": 0.5249171257019043, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.0244200244200243, | |
| "grad_norm": 3.406498908996582, | |
| "learning_rate": 1.7358008889326879e-06, | |
| "loss": 0.882960319519043, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.0268620268620268, | |
| "grad_norm": 2.405264377593994, | |
| "learning_rate": 1.7330805516454165e-06, | |
| "loss": 0.9751981496810913, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 3.6406667232513428, | |
| "learning_rate": 1.7303598756857621e-06, | |
| "loss": 0.9668103456497192, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0317460317460316, | |
| "grad_norm": 3.7392663955688477, | |
| "learning_rate": 1.72763887214438e-06, | |
| "loss": 0.47517433762550354, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 5.553292751312256, | |
| "learning_rate": 1.724917552113262e-06, | |
| "loss": 0.6592341065406799, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 2.054922580718994, | |
| "learning_rate": 1.7221959266856887e-06, | |
| "loss": 0.9549490809440613, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.039072039072039, | |
| "grad_norm": 2.8771677017211914, | |
| "learning_rate": 1.719474006956188e-06, | |
| "loss": 0.6464592218399048, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.0415140415140414, | |
| "grad_norm": 2.981400966644287, | |
| "learning_rate": 1.716751804020484e-06, | |
| "loss": 0.8299617767333984, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 4.985719203948975, | |
| "learning_rate": 1.7140293289754587e-06, | |
| "loss": 0.8319445848464966, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.0463980463980462, | |
| "grad_norm": 3.6394999027252197, | |
| "learning_rate": 1.7113065929191015e-06, | |
| "loss": 0.5642856359481812, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.0488400488400487, | |
| "grad_norm": 2.7474498748779297, | |
| "learning_rate": 1.7085836069504655e-06, | |
| "loss": 0.9995335936546326, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 5.023496150970459, | |
| "learning_rate": 1.7058603821696244e-06, | |
| "loss": 0.9951038956642151, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0537240537240535, | |
| "grad_norm": 2.564154624938965, | |
| "learning_rate": 1.7031369296776228e-06, | |
| "loss": 0.6000786423683167, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.056166056166056, | |
| "grad_norm": 2.7970900535583496, | |
| "learning_rate": 1.7004132605764363e-06, | |
| "loss": 0.6068737506866455, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 6.858452796936035, | |
| "learning_rate": 1.6976893859689216e-06, | |
| "loss": 0.765514612197876, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.061050061050061, | |
| "grad_norm": 4.134739398956299, | |
| "learning_rate": 1.6949653169587732e-06, | |
| "loss": 0.836578905582428, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 13.155983924865723, | |
| "learning_rate": 1.6922410646504795e-06, | |
| "loss": 0.7360749244689941, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 2.470672369003296, | |
| "learning_rate": 1.6895166401492746e-06, | |
| "loss": 0.7588337659835815, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 2.9545395374298096, | |
| "learning_rate": 1.6867920545610952e-06, | |
| "loss": 0.7109832763671875, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.070818070818071, | |
| "grad_norm": 3.3367116451263428, | |
| "learning_rate": 1.684067318992535e-06, | |
| "loss": 0.9194005727767944, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 3.1307387351989746, | |
| "learning_rate": 1.6813424445507977e-06, | |
| "loss": 0.9540067911148071, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.075702075702076, | |
| "grad_norm": 2.6127028465270996, | |
| "learning_rate": 1.6786174423436547e-06, | |
| "loss": 0.9777125120162964, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0781440781440783, | |
| "grad_norm": 10.716217041015625, | |
| "learning_rate": 1.6758923234793975e-06, | |
| "loss": 0.4608305096626282, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 2.2553529739379883, | |
| "learning_rate": 1.673167099066793e-06, | |
| "loss": 0.43998032808303833, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.083028083028083, | |
| "grad_norm": 24.75486183166504, | |
| "learning_rate": 1.6704417802150393e-06, | |
| "loss": 0.3858780264854431, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 6.908089637756348, | |
| "learning_rate": 1.6677163780337179e-06, | |
| "loss": 0.9613991975784302, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 2.345442056655884, | |
| "learning_rate": 1.6649909036327508e-06, | |
| "loss": 0.6652936935424805, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.0903540903540905, | |
| "grad_norm": 14.50218391418457, | |
| "learning_rate": 1.6622653681223553e-06, | |
| "loss": 0.5869041085243225, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.092796092796093, | |
| "grad_norm": 2.2177321910858154, | |
| "learning_rate": 1.6595397826129959e-06, | |
| "loss": 0.660941481590271, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 2.791415214538574, | |
| "learning_rate": 1.6568141582153432e-06, | |
| "loss": 0.7489262223243713, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.0976800976800978, | |
| "grad_norm": 2.4642961025238037, | |
| "learning_rate": 1.654088506040224e-06, | |
| "loss": 0.977318286895752, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.1001221001221, | |
| "grad_norm": 2.2271535396575928, | |
| "learning_rate": 1.6513628371985803e-06, | |
| "loss": 0.909781813621521, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 3.0111823081970215, | |
| "learning_rate": 1.6486371628014201e-06, | |
| "loss": 0.9366869926452637, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.105006105006105, | |
| "grad_norm": 3.323113441467285, | |
| "learning_rate": 1.6459114939597763e-06, | |
| "loss": 0.6197817325592041, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.1074481074481075, | |
| "grad_norm": 13.640995025634766, | |
| "learning_rate": 1.6431858417846572e-06, | |
| "loss": 0.5317754745483398, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 1.7508105039596558, | |
| "learning_rate": 1.6404602173870043e-06, | |
| "loss": 0.5558261871337891, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.1123321123321124, | |
| "grad_norm": 3.5128135681152344, | |
| "learning_rate": 1.6377346318776455e-06, | |
| "loss": 0.8417212963104248, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.114774114774115, | |
| "grad_norm": 3.973053455352783, | |
| "learning_rate": 1.6350090963672495e-06, | |
| "loss": 0.5290448665618896, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 1.1887048482894897, | |
| "learning_rate": 1.6322836219662825e-06, | |
| "loss": 0.26386559009552, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 2.8599250316619873, | |
| "learning_rate": 1.629558219784961e-06, | |
| "loss": 0.9280307292938232, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.122100122100122, | |
| "grad_norm": 4.396246433258057, | |
| "learning_rate": 1.6268329009332073e-06, | |
| "loss": 0.8014868497848511, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 9.692065238952637, | |
| "learning_rate": 1.624107676520603e-06, | |
| "loss": 0.9294686317443848, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 11.22627067565918, | |
| "learning_rate": 1.6213825576563453e-06, | |
| "loss": 0.8538755178451538, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.1294261294261294, | |
| "grad_norm": 2.9961278438568115, | |
| "learning_rate": 1.6186575554492025e-06, | |
| "loss": 0.6519080400466919, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 3.5045223236083984, | |
| "learning_rate": 1.6159326810074655e-06, | |
| "loss": 1.0177139043807983, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.1343101343101343, | |
| "grad_norm": 10.62950325012207, | |
| "learning_rate": 1.6132079454389051e-06, | |
| "loss": 0.38920822739601135, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 2.673360586166382, | |
| "learning_rate": 1.6104833598507258e-06, | |
| "loss": 1.0627334117889404, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 2.3104326725006104, | |
| "learning_rate": 1.607758935349521e-06, | |
| "loss": 0.8785233497619629, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.1416361416361416, | |
| "grad_norm": 3.0112380981445312, | |
| "learning_rate": 1.6050346830412268e-06, | |
| "loss": 0.7654712200164795, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.144078144078144, | |
| "grad_norm": 2.3209385871887207, | |
| "learning_rate": 1.6023106140310788e-06, | |
| "loss": 1.0415282249450684, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 2.134535551071167, | |
| "learning_rate": 1.599586739423564e-06, | |
| "loss": 0.4587876498699188, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.148962148962149, | |
| "grad_norm": 2.9099526405334473, | |
| "learning_rate": 1.5968630703223773e-06, | |
| "loss": 0.8560281991958618, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1514041514041513, | |
| "grad_norm": 1.3330203294754028, | |
| "learning_rate": 1.5941396178303766e-06, | |
| "loss": 0.6638654470443726, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 6.182371616363525, | |
| "learning_rate": 1.5914163930495345e-06, | |
| "loss": 0.5872393846511841, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.156288156288156, | |
| "grad_norm": 1.8256715536117554, | |
| "learning_rate": 1.5886934070808989e-06, | |
| "loss": 0.4515438675880432, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.1587301587301586, | |
| "grad_norm": 4.12510347366333, | |
| "learning_rate": 1.5859706710245417e-06, | |
| "loss": 0.6968947052955627, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 13.284811019897461, | |
| "learning_rate": 1.5832481959795163e-06, | |
| "loss": 0.5480116605758667, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.1636141636141635, | |
| "grad_norm": 2.6436359882354736, | |
| "learning_rate": 1.5805259930438125e-06, | |
| "loss": 0.6640259027481079, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.166056166056166, | |
| "grad_norm": 5.23723840713501, | |
| "learning_rate": 1.577804073314311e-06, | |
| "loss": 0.7670260667800903, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 2.664271831512451, | |
| "learning_rate": 1.5750824478867382e-06, | |
| "loss": 0.9593580365180969, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 8.346899032592773, | |
| "learning_rate": 1.5723611278556203e-06, | |
| "loss": 0.6394690275192261, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.173382173382173, | |
| "grad_norm": 3.4478020668029785, | |
| "learning_rate": 1.5696401243142383e-06, | |
| "loss": 0.9311685562133789, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 8.392507553100586, | |
| "learning_rate": 1.5669194483545839e-06, | |
| "loss": 0.7687492966651917, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.178266178266178, | |
| "grad_norm": 2.0415618419647217, | |
| "learning_rate": 1.5641991110673125e-06, | |
| "loss": 0.8528757095336914, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.1807081807081805, | |
| "grad_norm": 10.873391151428223, | |
| "learning_rate": 1.5614791235417002e-06, | |
| "loss": 0.7388204336166382, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 4.45784330368042, | |
| "learning_rate": 1.5587594968655961e-06, | |
| "loss": 0.8448042869567871, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.185592185592186, | |
| "grad_norm": 2.4441540241241455, | |
| "learning_rate": 1.556040242125379e-06, | |
| "loss": 0.8550477623939514, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 9.97851848602295, | |
| "learning_rate": 1.5533213704059111e-06, | |
| "loss": 0.8863077163696289, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 2.1011757850646973, | |
| "learning_rate": 1.550602892790493e-06, | |
| "loss": 0.5247014760971069, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.192918192918193, | |
| "grad_norm": 3.3591878414154053, | |
| "learning_rate": 1.5478848203608204e-06, | |
| "loss": 0.7306163311004639, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.1953601953601956, | |
| "grad_norm": 3.187375068664551, | |
| "learning_rate": 1.5451671641969355e-06, | |
| "loss": 0.7007449865341187, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 2.8326480388641357, | |
| "learning_rate": 1.542449935377184e-06, | |
| "loss": 0.9678464531898499, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2002442002442004, | |
| "grad_norm": 16.241653442382812, | |
| "learning_rate": 1.5397331449781702e-06, | |
| "loss": 0.9001434445381165, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.202686202686203, | |
| "grad_norm": 3.0486555099487305, | |
| "learning_rate": 1.5370168040747094e-06, | |
| "loss": 0.6704305410385132, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 2.7000041007995605, | |
| "learning_rate": 1.534300923739788e-06, | |
| "loss": 0.921515166759491, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.2075702075702077, | |
| "grad_norm": 2.3786838054656982, | |
| "learning_rate": 1.5315855150445117e-06, | |
| "loss": 1.0040926933288574, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.21001221001221, | |
| "grad_norm": 2.6251399517059326, | |
| "learning_rate": 1.5288705890580654e-06, | |
| "loss": 0.7396351099014282, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 7.335432052612305, | |
| "learning_rate": 1.526156156847665e-06, | |
| "loss": 0.6708033084869385, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.214896214896215, | |
| "grad_norm": 3.2044546604156494, | |
| "learning_rate": 1.5234422294785146e-06, | |
| "loss": 0.3860257863998413, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.2173382173382175, | |
| "grad_norm": 1.9752088785171509, | |
| "learning_rate": 1.5207288180137588e-06, | |
| "loss": 0.9366165399551392, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 3.3452351093292236, | |
| "learning_rate": 1.5180159335144419e-06, | |
| "loss": 0.8869001865386963, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.6766027212142944, | |
| "learning_rate": 1.515303587039458e-06, | |
| "loss": 0.9146523475646973, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2246642246642248, | |
| "grad_norm": 3.886622190475464, | |
| "learning_rate": 1.512591789645508e-06, | |
| "loss": 0.5201717615127563, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 5.899975299835205, | |
| "learning_rate": 1.5098805523870552e-06, | |
| "loss": 0.7236602306365967, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.2295482295482296, | |
| "grad_norm": 5.8403096199035645, | |
| "learning_rate": 1.5071698863162787e-06, | |
| "loss": 0.6882015466690063, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.231990231990232, | |
| "grad_norm": 6.883052825927734, | |
| "learning_rate": 1.5044598024830307e-06, | |
| "loss": 0.574286937713623, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 7.918755054473877, | |
| "learning_rate": 1.5017503119347888e-06, | |
| "loss": 0.7234624028205872, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.236874236874237, | |
| "grad_norm": 3.1122801303863525, | |
| "learning_rate": 1.4990414257166119e-06, | |
| "loss": 0.9573710560798645, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 2.384960174560547, | |
| "learning_rate": 1.4963331548710956e-06, | |
| "loss": 0.912803053855896, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 3.325394630432129, | |
| "learning_rate": 1.4936255104383279e-06, | |
| "loss": 0.9289860725402832, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.244200244200244, | |
| "grad_norm": 6.586422443389893, | |
| "learning_rate": 1.4909185034558425e-06, | |
| "loss": 1.0167450904846191, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.2466422466422467, | |
| "grad_norm": 20.79213523864746, | |
| "learning_rate": 1.4882121449585745e-06, | |
| "loss": 0.4085450768470764, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 2.2009057998657227, | |
| "learning_rate": 1.485506445978815e-06, | |
| "loss": 0.8706716299057007, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.2515262515262515, | |
| "grad_norm": 3.2609333992004395, | |
| "learning_rate": 1.482801417546168e-06, | |
| "loss": 1.1071045398712158, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 14.764912605285645, | |
| "learning_rate": 1.480097070687503e-06, | |
| "loss": 0.6743595600128174, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 1.1561812162399292, | |
| "learning_rate": 1.4773934164269115e-06, | |
| "loss": 0.6079235672950745, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.258852258852259, | |
| "grad_norm": 5.049133777618408, | |
| "learning_rate": 1.4746904657856619e-06, | |
| "loss": 0.681169867515564, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2612942612942613, | |
| "grad_norm": 4.974262714385986, | |
| "learning_rate": 1.4719882297821534e-06, | |
| "loss": 0.8603782057762146, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 2.6787617206573486, | |
| "learning_rate": 1.4692867194318731e-06, | |
| "loss": 0.9463958740234375, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.266178266178266, | |
| "grad_norm": 3.137239456176758, | |
| "learning_rate": 1.4665859457473486e-06, | |
| "loss": 0.9231513738632202, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.2686202686202686, | |
| "grad_norm": 2.1381964683532715, | |
| "learning_rate": 1.4638859197381076e-06, | |
| "loss": 0.729681670665741, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 2.4693713188171387, | |
| "learning_rate": 1.4611866524106263e-06, | |
| "loss": 0.9645771980285645, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 3.4739949703216553, | |
| "learning_rate": 1.4584881547682897e-06, | |
| "loss": 0.6171971559524536, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.275946275946276, | |
| "grad_norm": 11.276899337768555, | |
| "learning_rate": 1.4557904378113456e-06, | |
| "loss": 0.6007958054542542, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 3.313239336013794, | |
| "learning_rate": 1.4530935125368595e-06, | |
| "loss": 0.9140582084655762, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.2808302808302807, | |
| "grad_norm": 2.4360618591308594, | |
| "learning_rate": 1.450397389938668e-06, | |
| "loss": 0.8729221224784851, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.283272283272283, | |
| "grad_norm": 2.5397229194641113, | |
| "learning_rate": 1.4477020810073382e-06, | |
| "loss": 0.9614423513412476, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 2.3346688747406006, | |
| "learning_rate": 1.4450075967301186e-06, | |
| "loss": 0.8911330699920654, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.288156288156288, | |
| "grad_norm": 6.887451648712158, | |
| "learning_rate": 1.4423139480908963e-06, | |
| "loss": 0.22436925768852234, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 2.1941566467285156, | |
| "learning_rate": 1.439621146070152e-06, | |
| "loss": 0.9239218235015869, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 20.07970428466797, | |
| "learning_rate": 1.436929201644915e-06, | |
| "loss": 0.6081951260566711, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.2954822954822953, | |
| "grad_norm": 33.53670120239258, | |
| "learning_rate": 1.4342381257887197e-06, | |
| "loss": 0.7479280233383179, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.2979242979242978, | |
| "grad_norm": 3.6325926780700684, | |
| "learning_rate": 1.4315479294715592e-06, | |
| "loss": 0.9375418424606323, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 2.3526835441589355, | |
| "learning_rate": 1.4288586236598407e-06, | |
| "loss": 0.6414413452148438, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.3028083028083026, | |
| "grad_norm": 7.12298059463501, | |
| "learning_rate": 1.4261702193163423e-06, | |
| "loss": 1.1579365730285645, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.305250305250305, | |
| "grad_norm": 2.2552568912506104, | |
| "learning_rate": 1.4234827274001658e-06, | |
| "loss": 0.9225502014160156, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 2.3250763416290283, | |
| "learning_rate": 1.4207961588666956e-06, | |
| "loss": 0.9797046184539795, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.31013431013431, | |
| "grad_norm": 2.733238697052002, | |
| "learning_rate": 1.4181105246675508e-06, | |
| "loss": 0.9228655099868774, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.3125763125763124, | |
| "grad_norm": 5.1224751472473145, | |
| "learning_rate": 1.4154258357505415e-06, | |
| "loss": 1.0722181797027588, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 2.998682737350464, | |
| "learning_rate": 1.412742103059625e-06, | |
| "loss": 0.9534224271774292, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.317460317460317, | |
| "grad_norm": 4.182287216186523, | |
| "learning_rate": 1.4100593375348603e-06, | |
| "loss": 1.0109329223632812, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.3199023199023197, | |
| "grad_norm": 3.3046650886535645, | |
| "learning_rate": 1.4073775501123644e-06, | |
| "loss": 0.8815030455589294, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 2.948150157928467, | |
| "learning_rate": 1.4046967517242657e-06, | |
| "loss": 0.9238619804382324, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 10.11429214477539, | |
| "learning_rate": 1.402016953298663e-06, | |
| "loss": 0.5186190009117126, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.3272283272283274, | |
| "grad_norm": 12.115678787231445, | |
| "learning_rate": 1.399338165759577e-06, | |
| "loss": 0.44795453548431396, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 2.503965377807617, | |
| "learning_rate": 1.3966604000269082e-06, | |
| "loss": 0.6183021068572998, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.3321123321123323, | |
| "grad_norm": 5.133605003356934, | |
| "learning_rate": 1.3939836670163914e-06, | |
| "loss": 0.33668291568756104, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3345543345543347, | |
| "grad_norm": 5.264642715454102, | |
| "learning_rate": 1.3913079776395532e-06, | |
| "loss": 0.7825181484222412, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 2.069408893585205, | |
| "learning_rate": 1.388633342803664e-06, | |
| "loss": 0.8511747121810913, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.3394383394383396, | |
| "grad_norm": 5.884605407714844, | |
| "learning_rate": 1.3859597734116962e-06, | |
| "loss": 0.8724058866500854, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 2.5526764392852783, | |
| "learning_rate": 1.3832872803622787e-06, | |
| "loss": 0.5849741697311401, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 8.768757820129395, | |
| "learning_rate": 1.3806158745496523e-06, | |
| "loss": 0.244558185338974, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.346764346764347, | |
| "grad_norm": 8.230733871459961, | |
| "learning_rate": 1.3779455668636281e-06, | |
| "loss": 0.6931383609771729, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.3492063492063493, | |
| "grad_norm": 1.4398163557052612, | |
| "learning_rate": 1.375276368189538e-06, | |
| "loss": 0.5176495313644409, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 3.206899642944336, | |
| "learning_rate": 1.3726082894081942e-06, | |
| "loss": 0.8807193636894226, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.354090354090354, | |
| "grad_norm": 4.781541347503662, | |
| "learning_rate": 1.369941341395844e-06, | |
| "loss": 0.4091113805770874, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.3565323565323566, | |
| "grad_norm": 1.930128574371338, | |
| "learning_rate": 1.3672755350241242e-06, | |
| "loss": 0.479788601398468, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 2.9465410709381104, | |
| "learning_rate": 1.3646108811600196e-06, | |
| "loss": 0.6251949071884155, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.3614163614163615, | |
| "grad_norm": 2.628770589828491, | |
| "learning_rate": 1.361947390665815e-06, | |
| "loss": 0.8730329275131226, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.363858363858364, | |
| "grad_norm": 1.8498796224594116, | |
| "learning_rate": 1.3592850743990536e-06, | |
| "loss": 0.5206220746040344, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 2.42006778717041, | |
| "learning_rate": 1.3566239432124922e-06, | |
| "loss": 0.8877469301223755, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.3687423687423688, | |
| "grad_norm": 5.480327129364014, | |
| "learning_rate": 1.3539640079540552e-06, | |
| "loss": 0.7236716747283936, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.371184371184371, | |
| "grad_norm": 0.8233247399330139, | |
| "learning_rate": 1.3513052794667944e-06, | |
| "loss": 0.5979463458061218, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 16.757837295532227, | |
| "learning_rate": 1.3486477685888412e-06, | |
| "loss": 0.8658863306045532, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 5.338561058044434, | |
| "learning_rate": 1.345991486153362e-06, | |
| "loss": 0.18130576610565186, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.3785103785103785, | |
| "grad_norm": 1.9789485931396484, | |
| "learning_rate": 1.3433364429885175e-06, | |
| "loss": 0.45993301272392273, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 5.3818230628967285, | |
| "learning_rate": 1.340682649917415e-06, | |
| "loss": 0.49821627140045166, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.3833943833943834, | |
| "grad_norm": 2.89180064201355, | |
| "learning_rate": 1.3380301177580685e-06, | |
| "loss": 0.45867276191711426, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.385836385836386, | |
| "grad_norm": 11.45947265625, | |
| "learning_rate": 1.3353788573233485e-06, | |
| "loss": 0.7745890617370605, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 2.6990694999694824, | |
| "learning_rate": 1.3327288794209438e-06, | |
| "loss": 1.0086697340011597, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.3907203907203907, | |
| "grad_norm": 2.360067367553711, | |
| "learning_rate": 1.330080194853314e-06, | |
| "loss": 0.5278434753417969, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 1.9279552698135376, | |
| "learning_rate": 1.3274328144176476e-06, | |
| "loss": 1.0481023788452148, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 4.896252632141113, | |
| "learning_rate": 1.3247867489058158e-06, | |
| "loss": 0.5714776515960693, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.398046398046398, | |
| "grad_norm": 2.0633604526519775, | |
| "learning_rate": 1.322142009104329e-06, | |
| "loss": 0.8940474987030029, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.4004884004884004, | |
| "grad_norm": 5.460541725158691, | |
| "learning_rate": 1.3194986057942964e-06, | |
| "loss": 0.4920280873775482, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 10.408319473266602, | |
| "learning_rate": 1.3168565497513755e-06, | |
| "loss": 0.6397073268890381, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.4053724053724053, | |
| "grad_norm": 8.490042686462402, | |
| "learning_rate": 1.3142158517457336e-06, | |
| "loss": 0.4596382677555084, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4078144078144077, | |
| "grad_norm": 21.235645294189453, | |
| "learning_rate": 1.3115765225420014e-06, | |
| "loss": 0.2081039547920227, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 1.7084224224090576, | |
| "learning_rate": 1.3089385728992314e-06, | |
| "loss": 0.2166650891304016, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.4126984126984126, | |
| "grad_norm": 172.3874053955078, | |
| "learning_rate": 1.3063020135708505e-06, | |
| "loss": 0.45456746220588684, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.415140415140415, | |
| "grad_norm": 5.325479984283447, | |
| "learning_rate": 1.303666855304618e-06, | |
| "loss": 0.973847508430481, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 2.519134283065796, | |
| "learning_rate": 1.3010331088425834e-06, | |
| "loss": 0.4655970335006714, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.42002442002442, | |
| "grad_norm": 4.00046443939209, | |
| "learning_rate": 1.2984007849210397e-06, | |
| "loss": 1.049086332321167, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.4224664224664223, | |
| "grad_norm": 1.241420865058899, | |
| "learning_rate": 1.2957698942704826e-06, | |
| "loss": 0.5066336989402771, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 1.724372148513794, | |
| "learning_rate": 1.293140447615563e-06, | |
| "loss": 0.9419578313827515, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 7.0960798263549805, | |
| "learning_rate": 1.2905124556750475e-06, | |
| "loss": 0.9048100709915161, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.42979242979243, | |
| "grad_norm": 4.784210681915283, | |
| "learning_rate": 1.2878859291617709e-06, | |
| "loss": 0.9671621918678284, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 3.5901238918304443, | |
| "learning_rate": 1.2852608787825949e-06, | |
| "loss": 0.951181173324585, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.434676434676435, | |
| "grad_norm": 2.41652774810791, | |
| "learning_rate": 1.2826373152383652e-06, | |
| "loss": 0.882880687713623, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.4371184371184373, | |
| "grad_norm": 2.2836592197418213, | |
| "learning_rate": 1.2800152492238653e-06, | |
| "loss": 0.9011931419372559, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 4.0648512840271, | |
| "learning_rate": 1.2773946914277732e-06, | |
| "loss": 0.9935725927352905, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.442002442002442, | |
| "grad_norm": 5.472900390625, | |
| "learning_rate": 1.2747756525326203e-06, | |
| "loss": 0.6879719495773315, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 2.7880618572235107, | |
| "learning_rate": 1.2721581432147446e-06, | |
| "loss": 1.0028624534606934, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 2.5442252159118652, | |
| "learning_rate": 1.2695421741442524e-06, | |
| "loss": 0.5691015720367432, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.4493284493284495, | |
| "grad_norm": 3.0710692405700684, | |
| "learning_rate": 1.266927755984967e-06, | |
| "loss": 0.694396436214447, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.451770451770452, | |
| "grad_norm": 3.5317189693450928, | |
| "learning_rate": 1.2643148993943924e-06, | |
| "loss": 1.0118412971496582, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 2.652507781982422, | |
| "learning_rate": 1.261703615023665e-06, | |
| "loss": 0.5923147201538086, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.456654456654457, | |
| "grad_norm": 2.592494249343872, | |
| "learning_rate": 1.2590939135175136e-06, | |
| "loss": 0.5213377475738525, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.4590964590964592, | |
| "grad_norm": 2.6872897148132324, | |
| "learning_rate": 1.2564858055142145e-06, | |
| "loss": 0.9990290403366089, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 2.356238842010498, | |
| "learning_rate": 1.2538793016455475e-06, | |
| "loss": 0.9234784841537476, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.463980463980464, | |
| "grad_norm": 1.9037610292434692, | |
| "learning_rate": 1.2512744125367528e-06, | |
| "loss": 0.9043244123458862, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.4664224664224665, | |
| "grad_norm": 7.668602466583252, | |
| "learning_rate": 1.2486711488064905e-06, | |
| "loss": 0.7256745100021362, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 2.6887943744659424, | |
| "learning_rate": 1.2460695210667914e-06, | |
| "loss": 0.5414122343063354, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.4713064713064714, | |
| "grad_norm": 64.09046936035156, | |
| "learning_rate": 1.2434695399230209e-06, | |
| "loss": 0.679863691329956, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.473748473748474, | |
| "grad_norm": 3.3055121898651123, | |
| "learning_rate": 1.240871215973829e-06, | |
| "loss": 1.0142806768417358, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 1.0458382368087769, | |
| "learning_rate": 1.238274559811113e-06, | |
| "loss": 0.5501134395599365, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 2.6726701259613037, | |
| "learning_rate": 1.2356795820199698e-06, | |
| "loss": 1.0118372440338135, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.481074481074481, | |
| "grad_norm": 2.2977938652038574, | |
| "learning_rate": 1.2330862931786545e-06, | |
| "loss": 1.0428073406219482, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 5.646606922149658, | |
| "learning_rate": 1.230494703858538e-06, | |
| "loss": 0.5894712209701538, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.485958485958486, | |
| "grad_norm": 5.495636940002441, | |
| "learning_rate": 1.2279048246240643e-06, | |
| "loss": 1.023470401763916, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.4884004884004884, | |
| "grad_norm": 2.8086633682250977, | |
| "learning_rate": 1.2253166660327038e-06, | |
| "loss": 0.6385378837585449, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 17.103914260864258, | |
| "learning_rate": 1.2227302386349143e-06, | |
| "loss": 0.71128249168396, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.4932844932844933, | |
| "grad_norm": 5.164656639099121, | |
| "learning_rate": 1.2201455529740966e-06, | |
| "loss": 0.9743695259094238, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 5.278618335723877, | |
| "learning_rate": 1.2175626195865509e-06, | |
| "loss": 0.6444355845451355, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 7.79168176651001, | |
| "learning_rate": 1.2149814490014348e-06, | |
| "loss": 0.5937858819961548, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.5006105006105006, | |
| "grad_norm": 2.2751874923706055, | |
| "learning_rate": 1.21240205174072e-06, | |
| "loss": 0.7090959548950195, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.503052503052503, | |
| "grad_norm": 2.375674247741699, | |
| "learning_rate": 1.2098244383191486e-06, | |
| "loss": 0.7352893948554993, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 5.6591796875, | |
| "learning_rate": 1.2072486192441923e-06, | |
| "loss": 1.0522555112838745, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 3.329157829284668, | |
| "learning_rate": 1.204674605016006e-06, | |
| "loss": 0.676443338394165, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.5103785103785103, | |
| "grad_norm": 3.159114360809326, | |
| "learning_rate": 1.20210240612739e-06, | |
| "loss": 0.9966514706611633, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 8.732215881347656, | |
| "learning_rate": 1.1995320330637436e-06, | |
| "loss": 0.2845388948917389, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.515262515262515, | |
| "grad_norm": 3.7096004486083984, | |
| "learning_rate": 1.1969634963030219e-06, | |
| "loss": 0.32089143991470337, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.5177045177045176, | |
| "grad_norm": 3.319898843765259, | |
| "learning_rate": 1.1943968063156958e-06, | |
| "loss": 0.6263499855995178, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 3.8793458938598633, | |
| "learning_rate": 1.191831973564707e-06, | |
| "loss": 0.822320818901062, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.5225885225885225, | |
| "grad_norm": 1.7198671102523804, | |
| "learning_rate": 1.189269008505428e-06, | |
| "loss": 0.6087065935134888, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.525030525030525, | |
| "grad_norm": 5.9597272872924805, | |
| "learning_rate": 1.1867079215856158e-06, | |
| "loss": 0.7903388738632202, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 1.8400710821151733, | |
| "learning_rate": 1.1841487232453726e-06, | |
| "loss": 1.028204321861267, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.52991452991453, | |
| "grad_norm": 2.3164854049682617, | |
| "learning_rate": 1.181591423917101e-06, | |
| "loss": 0.6416464447975159, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.5323565323565322, | |
| "grad_norm": 3.8321754932403564, | |
| "learning_rate": 1.179036034025463e-06, | |
| "loss": 0.9703350067138672, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 7.375532627105713, | |
| "learning_rate": 1.1764825639873366e-06, | |
| "loss": 0.7711504697799683, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.537240537240537, | |
| "grad_norm": 6.5299530029296875, | |
| "learning_rate": 1.173931024211775e-06, | |
| "loss": 0.901872992515564, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.5396825396825395, | |
| "grad_norm": 33.257057189941406, | |
| "learning_rate": 1.1713814250999598e-06, | |
| "loss": 0.6119508743286133, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 2.738152503967285, | |
| "learning_rate": 1.1688337770451646e-06, | |
| "loss": 0.946742832660675, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.5445665445665444, | |
| "grad_norm": 2.8225514888763428, | |
| "learning_rate": 1.1662880904327088e-06, | |
| "loss": 0.9159148931503296, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.547008547008547, | |
| "grad_norm": 1.81510591506958, | |
| "learning_rate": 1.1637443756399147e-06, | |
| "loss": 0.7692682147026062, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 3.052072286605835, | |
| "learning_rate": 1.1612026430360683e-06, | |
| "loss": 0.8489340543746948, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.5518925518925517, | |
| "grad_norm": 3.423278331756592, | |
| "learning_rate": 1.158662902982375e-06, | |
| "loss": 0.6858044862747192, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.554334554334554, | |
| "grad_norm": 4.983499526977539, | |
| "learning_rate": 1.156125165831917e-06, | |
| "loss": 0.676191508769989, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 3.31892991065979, | |
| "learning_rate": 1.1535894419296125e-06, | |
| "loss": 0.6658056974411011, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.559218559218559, | |
| "grad_norm": 3.722532272338867, | |
| "learning_rate": 1.1510557416121716e-06, | |
| "loss": 0.9009595513343811, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.5616605616605614, | |
| "grad_norm": 1.2027240991592407, | |
| "learning_rate": 1.1485240752080578e-06, | |
| "loss": 0.03792768716812134, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 2.7659621238708496, | |
| "learning_rate": 1.145994453037442e-06, | |
| "loss": 0.5793513059616089, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5665445665445663, | |
| "grad_norm": 3.262632369995117, | |
| "learning_rate": 1.143466885412161e-06, | |
| "loss": 0.9170372486114502, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.5689865689865687, | |
| "grad_norm": 15.442719459533691, | |
| "learning_rate": 1.1409413826356774e-06, | |
| "loss": 0.6066586971282959, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 2.0126445293426514, | |
| "learning_rate": 1.1384179550030368e-06, | |
| "loss": 0.8480682373046875, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.5738705738705736, | |
| "grad_norm": 13.192070960998535, | |
| "learning_rate": 1.1358966128008256e-06, | |
| "loss": 0.5439214706420898, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.576312576312576, | |
| "grad_norm": 4.459967136383057, | |
| "learning_rate": 1.133377366307129e-06, | |
| "loss": 0.5642499923706055, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 1.7246872186660767, | |
| "learning_rate": 1.1308602257914882e-06, | |
| "loss": 0.9049108028411865, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.5811965811965814, | |
| "grad_norm": 6.134650230407715, | |
| "learning_rate": 1.1283452015148612e-06, | |
| "loss": 0.31016868352890015, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.583638583638584, | |
| "grad_norm": 2.271333932876587, | |
| "learning_rate": 1.1258323037295769e-06, | |
| "loss": 0.6509522199630737, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 3.3012821674346924, | |
| "learning_rate": 1.1233215426792994e-06, | |
| "loss": 0.9619686007499695, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.5885225885225887, | |
| "grad_norm": 1.7674365043640137, | |
| "learning_rate": 1.1208129285989785e-06, | |
| "loss": 0.5941784977912903, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.590964590964591, | |
| "grad_norm": 3.0935308933258057, | |
| "learning_rate": 1.1183064717148147e-06, | |
| "loss": 0.8604737520217896, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 2.9636788368225098, | |
| "learning_rate": 1.1158021822442145e-06, | |
| "loss": 0.8363077640533447, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.595848595848596, | |
| "grad_norm": 2.787670373916626, | |
| "learning_rate": 1.113300070395747e-06, | |
| "loss": 0.5091552138328552, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.5982905982905984, | |
| "grad_norm": 9.528656005859375, | |
| "learning_rate": 1.110800146369107e-06, | |
| "loss": 0.9548131823539734, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 3.1251182556152344, | |
| "learning_rate": 1.10830242035507e-06, | |
| "loss": 0.6277987957000732, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6031746031746033, | |
| "grad_norm": 2.250443458557129, | |
| "learning_rate": 1.105806902535451e-06, | |
| "loss": 0.5488678216934204, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.6056166056166057, | |
| "grad_norm": 2.513256311416626, | |
| "learning_rate": 1.1033136030830628e-06, | |
| "loss": 0.7977935075759888, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 3.494269371032715, | |
| "learning_rate": 1.1008225321616769e-06, | |
| "loss": 0.6196584701538086, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.6105006105006106, | |
| "grad_norm": 4.025573253631592, | |
| "learning_rate": 1.098333699925979e-06, | |
| "loss": 0.6206701397895813, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.612942612942613, | |
| "grad_norm": 1.8326927423477173, | |
| "learning_rate": 1.0958471165215298e-06, | |
| "loss": 0.5463999509811401, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 3.7380852699279785, | |
| "learning_rate": 1.093362792084722e-06, | |
| "loss": 0.7910411357879639, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.617826617826618, | |
| "grad_norm": 2.8892993927001953, | |
| "learning_rate": 1.0908807367427405e-06, | |
| "loss": 0.920970618724823, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.6202686202686203, | |
| "grad_norm": 2.955557346343994, | |
| "learning_rate": 1.0884009606135199e-06, | |
| "loss": 1.0370674133300781, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 4.528905868530273, | |
| "learning_rate": 1.0859234738057028e-06, | |
| "loss": 0.6259182691574097, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.625152625152625, | |
| "grad_norm": 3.167137861251831, | |
| "learning_rate": 1.0834482864186027e-06, | |
| "loss": 0.9444223642349243, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6275946275946276, | |
| "grad_norm": 2.9011199474334717, | |
| "learning_rate": 1.0809754085421558e-06, | |
| "loss": 0.9033249020576477, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 3.1613001823425293, | |
| "learning_rate": 1.0785048502568866e-06, | |
| "loss": 0.9606409072875977, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.6324786324786325, | |
| "grad_norm": 2.4797394275665283, | |
| "learning_rate": 1.0760366216338618e-06, | |
| "loss": 0.9261834621429443, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 5.077497482299805, | |
| "learning_rate": 1.0735707327346528e-06, | |
| "loss": 0.7732234001159668, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 6.45423698425293, | |
| "learning_rate": 1.0711071936112932e-06, | |
| "loss": 0.5764999389648438, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6398046398046398, | |
| "grad_norm": 80.45039367675781, | |
| "learning_rate": 1.0686460143062383e-06, | |
| "loss": 0.7456884384155273, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.642246642246642, | |
| "grad_norm": 3.7490296363830566, | |
| "learning_rate": 1.0661872048523208e-06, | |
| "loss": 0.5622179508209229, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 2.6304526329040527, | |
| "learning_rate": 1.063730775272717e-06, | |
| "loss": 0.8909397721290588, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.647130647130647, | |
| "grad_norm": 3.888472557067871, | |
| "learning_rate": 1.0612767355808991e-06, | |
| "loss": 0.6424640417098999, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 2.6570329666137695, | |
| "learning_rate": 1.0588250957805989e-06, | |
| "loss": 0.9468408823013306, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 3.979954957962036, | |
| "learning_rate": 1.056375865865763e-06, | |
| "loss": 1.0085461139678955, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.6544566544566544, | |
| "grad_norm": 5.121192455291748, | |
| "learning_rate": 1.0539290558205155e-06, | |
| "loss": 1.0236008167266846, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.656898656898657, | |
| "grad_norm": 2.919496536254883, | |
| "learning_rate": 1.051484675619117e-06, | |
| "loss": 0.7649430632591248, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 2.0370700359344482, | |
| "learning_rate": 1.0490427352259208e-06, | |
| "loss": 0.9104880690574646, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.6617826617826617, | |
| "grad_norm": 7.094944953918457, | |
| "learning_rate": 1.0466032445953358e-06, | |
| "loss": 0.9893792867660522, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.664224664224664, | |
| "grad_norm": 2.306826114654541, | |
| "learning_rate": 1.0441662136717853e-06, | |
| "loss": 0.926790714263916, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 2.302286148071289, | |
| "learning_rate": 1.0417316523896652e-06, | |
| "loss": 0.602645218372345, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.669108669108669, | |
| "grad_norm": 2.7806568145751953, | |
| "learning_rate": 1.0392995706733025e-06, | |
| "loss": 0.889150857925415, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.6715506715506714, | |
| "grad_norm": 8.433646202087402, | |
| "learning_rate": 1.0368699784369181e-06, | |
| "loss": 0.8979561924934387, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 3.222686529159546, | |
| "learning_rate": 1.0344428855845851e-06, | |
| "loss": 1.0049721002578735, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.6764346764346767, | |
| "grad_norm": 5.096746444702148, | |
| "learning_rate": 1.0320183020101874e-06, | |
| "loss": 0.8711087703704834, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.678876678876679, | |
| "grad_norm": 4.993548393249512, | |
| "learning_rate": 1.0295962375973783e-06, | |
| "loss": 0.330793559551239, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 2.0326321125030518, | |
| "learning_rate": 1.0271767022195453e-06, | |
| "loss": 1.098541021347046, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.683760683760684, | |
| "grad_norm": 2.071915864944458, | |
| "learning_rate": 1.024759705739763e-06, | |
| "loss": 0.8561097383499146, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.6862026862026864, | |
| "grad_norm": 3.06192946434021, | |
| "learning_rate": 1.022345258010759e-06, | |
| "loss": 0.893789529800415, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 5.383426189422607, | |
| "learning_rate": 1.0199333688748695e-06, | |
| "loss": 0.5891574621200562, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.6910866910866913, | |
| "grad_norm": 4.719232082366943, | |
| "learning_rate": 1.0175240481640023e-06, | |
| "loss": 0.7356748580932617, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.6935286935286937, | |
| "grad_norm": 29.910390853881836, | |
| "learning_rate": 1.015117305699593e-06, | |
| "loss": 0.30005913972854614, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 4.9184889793396, | |
| "learning_rate": 1.0127131512925694e-06, | |
| "loss": 0.9179880023002625, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.6984126984126986, | |
| "grad_norm": 4.041310787200928, | |
| "learning_rate": 1.0103115947433066e-06, | |
| "loss": 0.6154261827468872, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.700854700854701, | |
| "grad_norm": 3.462355375289917, | |
| "learning_rate": 1.0079126458415932e-06, | |
| "loss": 0.8864128589630127, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 3.169670581817627, | |
| "learning_rate": 1.0055163143665849e-06, | |
| "loss": 0.5859304666519165, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.705738705738706, | |
| "grad_norm": 3.87908935546875, | |
| "learning_rate": 1.0031226100867693e-06, | |
| "loss": 0.493660032749176, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.7081807081807083, | |
| "grad_norm": 3.9208171367645264, | |
| "learning_rate": 1.0007315427599233e-06, | |
| "loss": 0.5849690437316895, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 12.258569717407227, | |
| "learning_rate": 9.98343122133075e-07, | |
| "loss": 0.2784596085548401, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.713064713064713, | |
| "grad_norm": 14.164685249328613, | |
| "learning_rate": 9.95957357942464e-07, | |
| "loss": 0.5615782737731934, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.7155067155067156, | |
| "grad_norm": 7.182824611663818, | |
| "learning_rate": 9.935742599135003e-07, | |
| "loss": 0.5892859697341919, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 2.665816307067871, | |
| "learning_rate": 9.911938377607253e-07, | |
| "loss": 0.5999610424041748, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.7203907203907205, | |
| "grad_norm": 14.807984352111816, | |
| "learning_rate": 9.888161011877724e-07, | |
| "loss": 0.7669193744659424, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.722832722832723, | |
| "grad_norm": 3.079016923904419, | |
| "learning_rate": 9.864410598873288e-07, | |
| "loss": 0.8617438077926636, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 7.283175945281982, | |
| "learning_rate": 9.840687235410935e-07, | |
| "loss": 0.5871368646621704, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.727716727716728, | |
| "grad_norm": 2.393118143081665, | |
| "learning_rate": 9.81699101819738e-07, | |
| "loss": 0.9107319116592407, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.7301587301587302, | |
| "grad_norm": 3.0181188583374023, | |
| "learning_rate": 9.793322043828693e-07, | |
| "loss": 0.9876934289932251, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 2.218498706817627, | |
| "learning_rate": 9.76968040878989e-07, | |
| "loss": 0.8414710760116577, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 3.7080860137939453, | |
| "learning_rate": 9.746066209454527e-07, | |
| "loss": 0.6018757224082947, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.7374847374847375, | |
| "grad_norm": 5.018340110778809, | |
| "learning_rate": 9.72247954208433e-07, | |
| "loss": 0.9519623517990112, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 7.269012928009033, | |
| "learning_rate": 9.698920502828796e-07, | |
| "loss": 0.514839231967926, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.7423687423687424, | |
| "grad_norm": 2.92769718170166, | |
| "learning_rate": 9.675389187724794e-07, | |
| "loss": 0.6671766042709351, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.744810744810745, | |
| "grad_norm": 3.515108108520508, | |
| "learning_rate": 9.65188569269617e-07, | |
| "loss": 0.6551962494850159, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 2.476094961166382, | |
| "learning_rate": 9.628410113553377e-07, | |
| "loss": 0.8923386931419373, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7496947496947497, | |
| "grad_norm": 4.466701984405518, | |
| "learning_rate": 9.604962545993047e-07, | |
| "loss": 0.8455872535705566, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.752136752136752, | |
| "grad_norm": 3.037766933441162, | |
| "learning_rate": 9.581543085597666e-07, | |
| "loss": 0.6692306995391846, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 6.779411315917969, | |
| "learning_rate": 9.558151827835097e-07, | |
| "loss": 0.7176947593688965, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.757020757020757, | |
| "grad_norm": 3.2545528411865234, | |
| "learning_rate": 9.534788868058273e-07, | |
| "loss": 0.907602071762085, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.7594627594627594, | |
| "grad_norm": 4.992172718048096, | |
| "learning_rate": 9.511454301504744e-07, | |
| "loss": 1.2043339014053345, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 3.5469167232513428, | |
| "learning_rate": 9.488148223296335e-07, | |
| "loss": 0.9311165809631348, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.7643467643467643, | |
| "grad_norm": 3.044855833053589, | |
| "learning_rate": 9.464870728438738e-07, | |
| "loss": 0.8616805076599121, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.7667887667887667, | |
| "grad_norm": 2.740926742553711, | |
| "learning_rate": 9.441621911821125e-07, | |
| "loss": 0.8925296068191528, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 1.372963309288025, | |
| "learning_rate": 9.418401868215754e-07, | |
| "loss": 0.6977874636650085, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.7716727716727716, | |
| "grad_norm": 2.319948196411133, | |
| "learning_rate": 9.39521069227761e-07, | |
| "loss": 0.4934701919555664, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.774114774114774, | |
| "grad_norm": 1.1700794696807861, | |
| "learning_rate": 9.372048478543973e-07, | |
| "loss": 0.1815406084060669, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 2.3060333728790283, | |
| "learning_rate": 9.3489153214341e-07, | |
| "loss": 0.9932706356048584, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.778998778998779, | |
| "grad_norm": 2.0681960582733154, | |
| "learning_rate": 9.325811315248766e-07, | |
| "loss": 0.8084139227867126, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.7814407814407813, | |
| "grad_norm": 4.336089611053467, | |
| "learning_rate": 9.302736554169931e-07, | |
| "loss": 0.5911020040512085, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 4.918403625488281, | |
| "learning_rate": 9.27969113226033e-07, | |
| "loss": 0.2715740501880646, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.786324786324786, | |
| "grad_norm": 3.0501925945281982, | |
| "learning_rate": 9.256675143463106e-07, | |
| "loss": 0.8959693908691406, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.7887667887667886, | |
| "grad_norm": 2.734907388687134, | |
| "learning_rate": 9.23368868160142e-07, | |
| "loss": 0.8861985206604004, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 2.5319039821624756, | |
| "learning_rate": 9.210731840378071e-07, | |
| "loss": 0.9189043045043945, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.7936507936507935, | |
| "grad_norm": 2.35827898979187, | |
| "learning_rate": 9.187804713375092e-07, | |
| "loss": 0.8315421342849731, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.796092796092796, | |
| "grad_norm": 8.367574691772461, | |
| "learning_rate": 9.164907394053413e-07, | |
| "loss": 0.3263718783855438, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 3.762657403945923, | |
| "learning_rate": 9.142039975752441e-07, | |
| "loss": 0.7226351499557495, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.800976800976801, | |
| "grad_norm": 2.5866949558258057, | |
| "learning_rate": 9.119202551689703e-07, | |
| "loss": 0.7572792768478394, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.8034188034188032, | |
| "grad_norm": 2.7875094413757324, | |
| "learning_rate": 9.096395214960442e-07, | |
| "loss": 0.9100178480148315, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 7.963706016540527, | |
| "learning_rate": 9.073618058537263e-07, | |
| "loss": 0.440452516078949, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.808302808302808, | |
| "grad_norm": 2.4370265007019043, | |
| "learning_rate": 9.050871175269745e-07, | |
| "loss": 0.6573533415794373, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8107448107448105, | |
| "grad_norm": 5.317839622497559, | |
| "learning_rate": 9.028154657884048e-07, | |
| "loss": 0.9777510762214661, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.8748495578765869, | |
| "learning_rate": 9.005468598982559e-07, | |
| "loss": 0.5308395624160767, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.8156288156288154, | |
| "grad_norm": 1.7506893873214722, | |
| "learning_rate": 8.982813091043501e-07, | |
| "loss": 0.6835004091262817, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.818070818070818, | |
| "grad_norm": 2.0650813579559326, | |
| "learning_rate": 8.960188226420558e-07, | |
| "loss": 0.580802857875824, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 3.050828456878662, | |
| "learning_rate": 8.937594097342487e-07, | |
| "loss": 0.9979471564292908, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8229548229548227, | |
| "grad_norm": 2.60994553565979, | |
| "learning_rate": 8.915030795912777e-07, | |
| "loss": 0.5684899091720581, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.825396825396825, | |
| "grad_norm": 10.324296951293945, | |
| "learning_rate": 8.892498414109218e-07, | |
| "loss": 0.7211320400238037, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 7.168615818023682, | |
| "learning_rate": 8.869997043783601e-07, | |
| "loss": 0.6895596981048584, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.8302808302808304, | |
| "grad_norm": 7.450392246246338, | |
| "learning_rate": 8.847526776661257e-07, | |
| "loss": 1.002673625946045, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.832722832722833, | |
| "grad_norm": 2.7963898181915283, | |
| "learning_rate": 8.825087704340758e-07, | |
| "loss": 0.5786941647529602, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 11.978581428527832, | |
| "learning_rate": 8.80267991829349e-07, | |
| "loss": 0.5861607789993286, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.8376068376068377, | |
| "grad_norm": 1.3733934164047241, | |
| "learning_rate": 8.780303509863317e-07, | |
| "loss": 0.1574743241071701, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.84004884004884, | |
| "grad_norm": 2.2476611137390137, | |
| "learning_rate": 8.757958570266197e-07, | |
| "loss": 0.7898412942886353, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 2.539482355117798, | |
| "learning_rate": 8.735645190589798e-07, | |
| "loss": 0.7218636274337769, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.844932844932845, | |
| "grad_norm": 2.708406448364258, | |
| "learning_rate": 8.713363461793135e-07, | |
| "loss": 0.7509723901748657, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8473748473748475, | |
| "grad_norm": 2.7118008136749268, | |
| "learning_rate": 8.691113474706212e-07, | |
| "loss": 0.9384118914604187, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 3.7488577365875244, | |
| "learning_rate": 8.668895320029616e-07, | |
| "loss": 0.18342451751232147, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.8522588522588523, | |
| "grad_norm": 2.957876205444336, | |
| "learning_rate": 8.64670908833421e-07, | |
| "loss": 0.950477123260498, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.8547008547008548, | |
| "grad_norm": 2.710932493209839, | |
| "learning_rate": 8.624554870060685e-07, | |
| "loss": 0.5325321555137634, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 2.418707847595215, | |
| "learning_rate": 8.60243275551926e-07, | |
| "loss": 0.9373496770858765, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.8595848595848596, | |
| "grad_norm": 1.9435275793075562, | |
| "learning_rate": 8.580342834889265e-07, | |
| "loss": 1.076695442199707, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.862026862026862, | |
| "grad_norm": 10.91031265258789, | |
| "learning_rate": 8.558285198218809e-07, | |
| "loss": 0.2554206848144531, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 7.6466965675354, | |
| "learning_rate": 8.536259935424391e-07, | |
| "loss": 0.8836771249771118, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.866910866910867, | |
| "grad_norm": 1.3893219232559204, | |
| "learning_rate": 8.514267136290543e-07, | |
| "loss": 0.4657254219055176, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.8693528693528694, | |
| "grad_norm": 1.8530827760696411, | |
| "learning_rate": 8.492306890469451e-07, | |
| "loss": 0.5240323543548584, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 3.315155506134033, | |
| "learning_rate": 8.470379287480611e-07, | |
| "loss": 0.6606559753417969, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.8742368742368742, | |
| "grad_norm": 4.732027053833008, | |
| "learning_rate": 8.448484416710452e-07, | |
| "loss": 0.5519453883171082, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.8766788766788767, | |
| "grad_norm": 5.364790439605713, | |
| "learning_rate": 8.426622367411968e-07, | |
| "loss": 0.8750231862068176, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 8.549674987792969, | |
| "learning_rate": 8.404793228704355e-07, | |
| "loss": 0.8055195808410645, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.8815628815628815, | |
| "grad_norm": 2.5722405910491943, | |
| "learning_rate": 8.382997089572657e-07, | |
| "loss": 0.52369225025177, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.884004884004884, | |
| "grad_norm": 3.7537426948547363, | |
| "learning_rate": 8.361234038867405e-07, | |
| "loss": 0.672394871711731, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 2.590395927429199, | |
| "learning_rate": 8.339504165304226e-07, | |
| "loss": 0.9153240919113159, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 3.5174717903137207, | |
| "learning_rate": 8.317807557463518e-07, | |
| "loss": 0.9851236343383789, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.8913308913308913, | |
| "grad_norm": 18.8581485748291, | |
| "learning_rate": 8.296144303790073e-07, | |
| "loss": 0.697291910648346, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 4.934338092803955, | |
| "learning_rate": 8.274514492592721e-07, | |
| "loss": 1.1962435245513916, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.896214896214896, | |
| "grad_norm": 3.703958749771118, | |
| "learning_rate": 8.252918212043943e-07, | |
| "loss": 0.883178174495697, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.8986568986568986, | |
| "grad_norm": 1.871797800064087, | |
| "learning_rate": 8.231355550179566e-07, | |
| "loss": 0.761870801448822, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 2.5275323390960693, | |
| "learning_rate": 8.209826594898342e-07, | |
| "loss": 0.93211829662323, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.9035409035409034, | |
| "grad_norm": 7.618900775909424, | |
| "learning_rate": 8.188331433961653e-07, | |
| "loss": 0.8561273217201233, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 2.035733699798584, | |
| "learning_rate": 8.166870154993092e-07, | |
| "loss": 0.7174577116966248, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 2.2822883129119873, | |
| "learning_rate": 8.145442845478154e-07, | |
| "loss": 0.7858035564422607, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.9108669108669107, | |
| "grad_norm": 2.8050036430358887, | |
| "learning_rate": 8.124049592763838e-07, | |
| "loss": 0.8687206506729126, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.913308913308913, | |
| "grad_norm": 2.1639275550842285, | |
| "learning_rate": 8.102690484058338e-07, | |
| "loss": 0.922956109046936, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 2.6578361988067627, | |
| "learning_rate": 8.081365606430644e-07, | |
| "loss": 0.7477933764457703, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.918192918192918, | |
| "grad_norm": 4.602790355682373, | |
| "learning_rate": 8.060075046810221e-07, | |
| "loss": 0.41797584295272827, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.9206349206349205, | |
| "grad_norm": 2.8708016872406006, | |
| "learning_rate": 8.038818891986619e-07, | |
| "loss": 0.47318512201309204, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 2.181591749191284, | |
| "learning_rate": 8.017597228609157e-07, | |
| "loss": 0.6307573318481445, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.925518925518926, | |
| "grad_norm": 4.88926362991333, | |
| "learning_rate": 7.996410143186534e-07, | |
| "loss": 0.6228193640708923, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.927960927960928, | |
| "grad_norm": 6.2888569831848145, | |
| "learning_rate": 7.975257722086519e-07, | |
| "loss": 0.6822891235351562, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 7.576361179351807, | |
| "learning_rate": 7.95414005153555e-07, | |
| "loss": 0.445895254611969, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.932844932844933, | |
| "grad_norm": 3.0486338138580322, | |
| "learning_rate": 7.933057217618426e-07, | |
| "loss": 0.6405600309371948, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.9352869352869355, | |
| "grad_norm": 8.322898864746094, | |
| "learning_rate": 7.912009306277916e-07, | |
| "loss": 0.29521942138671875, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 13.022372245788574, | |
| "learning_rate": 7.890996403314446e-07, | |
| "loss": 0.9211161136627197, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.9401709401709404, | |
| "grad_norm": 4.712466716766357, | |
| "learning_rate": 7.870018594385727e-07, | |
| "loss": 0.6805351972579956, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.942612942612943, | |
| "grad_norm": 7.020658493041992, | |
| "learning_rate": 7.849075965006419e-07, | |
| "loss": 0.6539613008499146, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 2.068073034286499, | |
| "learning_rate": 7.828168600547757e-07, | |
| "loss": 0.8703947067260742, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.9474969474969477, | |
| "grad_norm": 2.5590927600860596, | |
| "learning_rate": 7.807296586237236e-07, | |
| "loss": 0.9074662923812866, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.94993894993895, | |
| "grad_norm": 2.219924211502075, | |
| "learning_rate": 7.78646000715825e-07, | |
| "loss": 0.9266680479049683, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 2.282229423522949, | |
| "learning_rate": 7.765658948249727e-07, | |
| "loss": 0.5627447366714478, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.954822954822955, | |
| "grad_norm": 2.376621723175049, | |
| "learning_rate": 7.74489349430582e-07, | |
| "loss": 0.9271303415298462, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.9572649572649574, | |
| "grad_norm": 2.2242136001586914, | |
| "learning_rate": 7.724163729975526e-07, | |
| "loss": 0.8751970529556274, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 2.3841874599456787, | |
| "learning_rate": 7.703469739762362e-07, | |
| "loss": 0.4712880551815033, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.9621489621489623, | |
| "grad_norm": 2.1766679286956787, | |
| "learning_rate": 7.682811608024011e-07, | |
| "loss": 0.9493281841278076, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.9645909645909647, | |
| "grad_norm": 4.962987422943115, | |
| "learning_rate": 7.662189418971985e-07, | |
| "loss": 0.6048038005828857, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 2.546107530593872, | |
| "learning_rate": 7.641603256671275e-07, | |
| "loss": 0.9180459976196289, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.9694749694749696, | |
| "grad_norm": 2.2236132621765137, | |
| "learning_rate": 7.621053205040018e-07, | |
| "loss": 0.7987039089202881, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.971916971916972, | |
| "grad_norm": 2.6432392597198486, | |
| "learning_rate": 7.600539347849131e-07, | |
| "loss": 0.9729940891265869, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 3.465829372406006, | |
| "learning_rate": 7.580061768722012e-07, | |
| "loss": 0.5282536745071411, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.976800976800977, | |
| "grad_norm": 7.40516471862793, | |
| "learning_rate": 7.559620551134144e-07, | |
| "loss": 0.5220422148704529, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.9792429792429793, | |
| "grad_norm": 2.079118013381958, | |
| "learning_rate": 7.539215778412817e-07, | |
| "loss": 0.8890263438224792, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 2.7219574451446533, | |
| "learning_rate": 7.51884753373673e-07, | |
| "loss": 1.0083708763122559, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.984126984126984, | |
| "grad_norm": 2.6659610271453857, | |
| "learning_rate": 7.498515900135696e-07, | |
| "loss": 0.8965180516242981, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.9865689865689866, | |
| "grad_norm": 2.9896388053894043, | |
| "learning_rate": 7.478220960490265e-07, | |
| "loss": 0.946886420249939, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 2.870820999145508, | |
| "learning_rate": 7.457962797531427e-07, | |
| "loss": 0.9332548379898071, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 27.741483688354492, | |
| "learning_rate": 7.437741493840244e-07, | |
| "loss": 0.5826358199119568, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.993894993894994, | |
| "grad_norm": 3.5439491271972656, | |
| "learning_rate": 7.417557131847529e-07, | |
| "loss": 0.9089205861091614, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 2.596341609954834, | |
| "learning_rate": 7.397409793833493e-07, | |
| "loss": 0.8732277154922485, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.998778998778999, | |
| "grad_norm": 1.7463470697402954, | |
| "learning_rate": 7.377299561927437e-07, | |
| "loss": 0.42197978496551514, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 3.001221001221001, | |
| "grad_norm": 2.6970930099487305, | |
| "learning_rate": 7.357226518107382e-07, | |
| "loss": 0.5636741518974304, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 3.0036630036630036, | |
| "grad_norm": 4.093437671661377, | |
| "learning_rate": 7.337190744199783e-07, | |
| "loss": 0.5100598335266113, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.006105006105006, | |
| "grad_norm": 2.996396064758301, | |
| "learning_rate": 7.317192321879135e-07, | |
| "loss": 0.8150706887245178, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 3.0085470085470085, | |
| "grad_norm": 2.1530697345733643, | |
| "learning_rate": 7.297231332667696e-07, | |
| "loss": 0.8549832105636597, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 3.010989010989011, | |
| "grad_norm": 9.229522705078125, | |
| "learning_rate": 7.277307857935111e-07, | |
| "loss": 0.5101163387298584, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 3.0134310134310134, | |
| "grad_norm": 1.99428391456604, | |
| "learning_rate": 7.257421978898116e-07, | |
| "loss": 0.7490478754043579, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 3.015873015873016, | |
| "grad_norm": 0.5552986860275269, | |
| "learning_rate": 7.23757377662018e-07, | |
| "loss": 0.1678122729063034, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.0183150183150182, | |
| "grad_norm": 7.193281650543213, | |
| "learning_rate": 7.217763332011193e-07, | |
| "loss": 0.24209608137607574, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 3.0207570207570207, | |
| "grad_norm": 2.3648483753204346, | |
| "learning_rate": 7.197990725827128e-07, | |
| "loss": 0.8045497536659241, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 3.023199023199023, | |
| "grad_norm": 11.67722225189209, | |
| "learning_rate": 7.178256038669696e-07, | |
| "loss": 0.45075228810310364, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 3.0256410256410255, | |
| "grad_norm": 2.1771862506866455, | |
| "learning_rate": 7.158559350986062e-07, | |
| "loss": 0.4686111509799957, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 3.028083028083028, | |
| "grad_norm": 2.6705634593963623, | |
| "learning_rate": 7.138900743068454e-07, | |
| "loss": 0.8005867004394531, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.0305250305250304, | |
| "grad_norm": 3.4312071800231934, | |
| "learning_rate": 7.119280295053907e-07, | |
| "loss": 0.7147307395935059, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 3.032967032967033, | |
| "grad_norm": 2.937742233276367, | |
| "learning_rate": 7.099698086923872e-07, | |
| "loss": 0.477982759475708, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 3.0354090354090353, | |
| "grad_norm": 4.713664531707764, | |
| "learning_rate": 7.080154198503934e-07, | |
| "loss": 0.7930957078933716, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 3.0378510378510377, | |
| "grad_norm": 7.838520526885986, | |
| "learning_rate": 7.060648709463456e-07, | |
| "loss": 0.5695945024490356, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 3.04029304029304, | |
| "grad_norm": 3.8250021934509277, | |
| "learning_rate": 7.04118169931528e-07, | |
| "loss": 0.8050855994224548, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.0427350427350426, | |
| "grad_norm": 4.019608497619629, | |
| "learning_rate": 7.021753247415388e-07, | |
| "loss": 0.26581862568855286, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 3.045177045177045, | |
| "grad_norm": 0.6795697212219238, | |
| "learning_rate": 7.002363432962583e-07, | |
| "loss": 0.2102213203907013, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 3.0476190476190474, | |
| "grad_norm": 5.3154191970825195, | |
| "learning_rate": 6.983012334998161e-07, | |
| "loss": 0.6199802756309509, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 3.05006105006105, | |
| "grad_norm": 5.59798002243042, | |
| "learning_rate": 6.963700032405598e-07, | |
| "loss": 0.38272398710250854, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 3.0525030525030523, | |
| "grad_norm": 1.7878090143203735, | |
| "learning_rate": 6.944426603910209e-07, | |
| "loss": 0.4661582410335541, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0549450549450547, | |
| "grad_norm": 13.370331764221191, | |
| "learning_rate": 6.925192128078873e-07, | |
| "loss": 0.43664366006851196, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 3.057387057387057, | |
| "grad_norm": 4.486617088317871, | |
| "learning_rate": 6.905996683319644e-07, | |
| "loss": 0.787665843963623, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 3.0598290598290596, | |
| "grad_norm": 4.143476963043213, | |
| "learning_rate": 6.886840347881494e-07, | |
| "loss": 0.46613210439682007, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 3.062271062271062, | |
| "grad_norm": 2.4750924110412598, | |
| "learning_rate": 6.867723199853954e-07, | |
| "loss": 0.6599079370498657, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 3.064713064713065, | |
| "grad_norm": 3.2640233039855957, | |
| "learning_rate": 6.848645317166816e-07, | |
| "loss": 0.9250385761260986, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.0671550671550674, | |
| "grad_norm": 0.24759773910045624, | |
| "learning_rate": 6.829606777589812e-07, | |
| "loss": 0.4720076322555542, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 3.06959706959707, | |
| "grad_norm": 2.7576491832733154, | |
| "learning_rate": 6.810607658732296e-07, | |
| "loss": 0.6627781987190247, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 3.0720390720390722, | |
| "grad_norm": 2.697263479232788, | |
| "learning_rate": 6.791648038042912e-07, | |
| "loss": 0.48119017481803894, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 3.0744810744810747, | |
| "grad_norm": 1.3262935876846313, | |
| "learning_rate": 6.77272799280931e-07, | |
| "loss": 0.12304246425628662, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 2.7172646522521973, | |
| "learning_rate": 6.753847600157793e-07, | |
| "loss": 0.4097219705581665, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.0793650793650795, | |
| "grad_norm": 2.4207475185394287, | |
| "learning_rate": 6.735006937053055e-07, | |
| "loss": 0.6418076753616333, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 3.081807081807082, | |
| "grad_norm": 3.1673455238342285, | |
| "learning_rate": 6.716206080297795e-07, | |
| "loss": 0.747841477394104, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 3.0842490842490844, | |
| "grad_norm": 6.720166206359863, | |
| "learning_rate": 6.697445106532481e-07, | |
| "loss": 0.5003422498703003, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 3.086691086691087, | |
| "grad_norm": 4.194862365722656, | |
| "learning_rate": 6.678724092234971e-07, | |
| "loss": 0.48644983768463135, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 3.0891330891330893, | |
| "grad_norm": 7.190936088562012, | |
| "learning_rate": 6.660043113720247e-07, | |
| "loss": 0.08977795392274857, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.0915750915750917, | |
| "grad_norm": 4.334343433380127, | |
| "learning_rate": 6.641402247140088e-07, | |
| "loss": 0.24807599186897278, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 3.094017094017094, | |
| "grad_norm": 6.426454067230225, | |
| "learning_rate": 6.622801568482753e-07, | |
| "loss": 0.8089706897735596, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 3.0964590964590966, | |
| "grad_norm": 4.830419540405273, | |
| "learning_rate": 6.604241153572687e-07, | |
| "loss": 0.43892985582351685, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 3.098901098901099, | |
| "grad_norm": 13.627212524414062, | |
| "learning_rate": 6.585721078070185e-07, | |
| "loss": 0.5923545360565186, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 3.1013431013431014, | |
| "grad_norm": 2.4753658771514893, | |
| "learning_rate": 6.567241417471122e-07, | |
| "loss": 0.726067304611206, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.103785103785104, | |
| "grad_norm": 2.7445006370544434, | |
| "learning_rate": 6.548802247106602e-07, | |
| "loss": 0.4831371605396271, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 3.1062271062271063, | |
| "grad_norm": 2.5388689041137695, | |
| "learning_rate": 6.5304036421427e-07, | |
| "loss": 0.42076992988586426, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 3.1086691086691087, | |
| "grad_norm": 6.297148704528809, | |
| "learning_rate": 6.512045677580104e-07, | |
| "loss": 0.0995209664106369, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 3.111111111111111, | |
| "grad_norm": 3.977672576904297, | |
| "learning_rate": 6.493728428253853e-07, | |
| "loss": 0.40470319986343384, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 3.1135531135531136, | |
| "grad_norm": 4.385368824005127, | |
| "learning_rate": 6.475451968832996e-07, | |
| "loss": 0.6437668800354004, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.115995115995116, | |
| "grad_norm": 12.631845474243164, | |
| "learning_rate": 6.457216373820316e-07, | |
| "loss": 0.22822971642017365, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 3.1184371184371185, | |
| "grad_norm": 2.8103175163269043, | |
| "learning_rate": 6.439021717552012e-07, | |
| "loss": 0.6819199323654175, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 3.120879120879121, | |
| "grad_norm": 3.5617892742156982, | |
| "learning_rate": 6.420868074197405e-07, | |
| "loss": 0.30838051438331604, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 3.1233211233211233, | |
| "grad_norm": 1.8436968326568604, | |
| "learning_rate": 6.402755517758613e-07, | |
| "loss": 0.4027915596961975, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 3.1257631257631258, | |
| "grad_norm": 2.8249123096466064, | |
| "learning_rate": 6.384684122070287e-07, | |
| "loss": 0.8031132221221924, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.128205128205128, | |
| "grad_norm": 5.5436296463012695, | |
| "learning_rate": 6.366653960799263e-07, | |
| "loss": 0.9157365560531616, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 3.1306471306471306, | |
| "grad_norm": 20.762697219848633, | |
| "learning_rate": 6.348665107444323e-07, | |
| "loss": 0.7175855040550232, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 3.133089133089133, | |
| "grad_norm": 4.656121253967285, | |
| "learning_rate": 6.330717635335825e-07, | |
| "loss": 0.43480825424194336, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 3.1355311355311355, | |
| "grad_norm": 6.591810703277588, | |
| "learning_rate": 6.312811617635464e-07, | |
| "loss": 0.8588677644729614, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 3.137973137973138, | |
| "grad_norm": 22.628002166748047, | |
| "learning_rate": 6.29494712733593e-07, | |
| "loss": 0.5387420654296875, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.1404151404151404, | |
| "grad_norm": 9.113458633422852, | |
| "learning_rate": 6.27712423726064e-07, | |
| "loss": 0.501701831817627, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 3.451632022857666, | |
| "learning_rate": 6.259343020063433e-07, | |
| "loss": 0.8168814182281494, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 3.1452991452991452, | |
| "grad_norm": 5.145376682281494, | |
| "learning_rate": 6.241603548228269e-07, | |
| "loss": 0.8524529933929443, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 3.1477411477411477, | |
| "grad_norm": 5.351768493652344, | |
| "learning_rate": 6.223905894068925e-07, | |
| "loss": 0.7835222482681274, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 3.15018315018315, | |
| "grad_norm": 5.4381022453308105, | |
| "learning_rate": 6.206250129728727e-07, | |
| "loss": 0.8305777311325073, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.1526251526251525, | |
| "grad_norm": 5.506639003753662, | |
| "learning_rate": 6.188636327180222e-07, | |
| "loss": 0.8366413116455078, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 3.155067155067155, | |
| "grad_norm": 4.613339424133301, | |
| "learning_rate": 6.171064558224931e-07, | |
| "loss": 0.25465893745422363, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 3.1575091575091574, | |
| "grad_norm": 2.6634879112243652, | |
| "learning_rate": 6.153534894492993e-07, | |
| "loss": 0.6007862091064453, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 3.15995115995116, | |
| "grad_norm": 6.523024559020996, | |
| "learning_rate": 6.136047407442932e-07, | |
| "loss": 0.16968603432178497, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 3.1623931623931623, | |
| "grad_norm": 10.192069053649902, | |
| "learning_rate": 6.11860216836134e-07, | |
| "loss": 0.27697157859802246, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.1648351648351647, | |
| "grad_norm": 3.1504926681518555, | |
| "learning_rate": 6.101199248362572e-07, | |
| "loss": 0.7407965660095215, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 3.167277167277167, | |
| "grad_norm": 2.813279628753662, | |
| "learning_rate": 6.083838718388486e-07, | |
| "loss": 0.6240221261978149, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 3.1697191697191696, | |
| "grad_norm": 3.7025303840637207, | |
| "learning_rate": 6.066520649208137e-07, | |
| "loss": 0.47483938932418823, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 3.172161172161172, | |
| "grad_norm": 2.692544937133789, | |
| "learning_rate": 6.049245111417493e-07, | |
| "loss": 0.7736239433288574, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 3.1746031746031744, | |
| "grad_norm": 3.362095355987549, | |
| "learning_rate": 6.032012175439139e-07, | |
| "loss": 0.739479660987854, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.177045177045177, | |
| "grad_norm": 4.023605823516846, | |
| "learning_rate": 6.014821911522005e-07, | |
| "loss": 0.8537817001342773, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 3.1794871794871793, | |
| "grad_norm": 3.6344149112701416, | |
| "learning_rate": 5.997674389741056e-07, | |
| "loss": 0.7341170907020569, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 3.1819291819291817, | |
| "grad_norm": 4.450408458709717, | |
| "learning_rate": 5.980569679997048e-07, | |
| "loss": 0.6847058534622192, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 3.1843711843711846, | |
| "grad_norm": 2.562770128250122, | |
| "learning_rate": 5.96350785201619e-07, | |
| "loss": 0.7031633853912354, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 3.186813186813187, | |
| "grad_norm": 2.2277753353118896, | |
| "learning_rate": 5.946488975349905e-07, | |
| "loss": 0.636197566986084, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.1892551892551895, | |
| "grad_norm": 2.560616970062256, | |
| "learning_rate": 5.929513119374511e-07, | |
| "loss": 0.6687245965003967, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 3.191697191697192, | |
| "grad_norm": 3.4713096618652344, | |
| "learning_rate": 5.912580353290972e-07, | |
| "loss": 0.6120387315750122, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 3.1941391941391943, | |
| "grad_norm": 6.3813347816467285, | |
| "learning_rate": 5.895690746124586e-07, | |
| "loss": 0.4897659718990326, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 3.1965811965811968, | |
| "grad_norm": 21.71295166015625, | |
| "learning_rate": 5.878844366724725e-07, | |
| "loss": 0.5589523315429688, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 3.199023199023199, | |
| "grad_norm": 10.79178524017334, | |
| "learning_rate": 5.862041283764539e-07, | |
| "loss": 0.2927311956882477, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.2014652014652016, | |
| "grad_norm": 2.3552072048187256, | |
| "learning_rate": 5.845281565740691e-07, | |
| "loss": 0.4253409504890442, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 3.203907203907204, | |
| "grad_norm": 10.33370590209961, | |
| "learning_rate": 5.828565280973055e-07, | |
| "loss": 0.2613220512866974, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 3.2063492063492065, | |
| "grad_norm": 3.2602241039276123, | |
| "learning_rate": 5.811892497604476e-07, | |
| "loss": 0.9796355962753296, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 3.208791208791209, | |
| "grad_norm": 5.60067081451416, | |
| "learning_rate": 5.795263283600444e-07, | |
| "loss": 0.18396517634391785, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 3.2112332112332114, | |
| "grad_norm": 4.523139953613281, | |
| "learning_rate": 5.77867770674886e-07, | |
| "loss": 0.846479058265686, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.213675213675214, | |
| "grad_norm": 2.926802158355713, | |
| "learning_rate": 5.762135834659723e-07, | |
| "loss": 0.45950865745544434, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 3.2161172161172162, | |
| "grad_norm": 2.7911295890808105, | |
| "learning_rate": 5.745637734764889e-07, | |
| "loss": 0.6301865577697754, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 3.2185592185592187, | |
| "grad_norm": 10.409442901611328, | |
| "learning_rate": 5.729183474317772e-07, | |
| "loss": 0.5855114459991455, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 3.221001221001221, | |
| "grad_norm": 7.145462512969971, | |
| "learning_rate": 5.712773120393082e-07, | |
| "loss": 0.47362077236175537, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 3.2234432234432235, | |
| "grad_norm": 2.722864866256714, | |
| "learning_rate": 5.696406739886535e-07, | |
| "loss": 0.596495509147644, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.225885225885226, | |
| "grad_norm": 3.252976655960083, | |
| "learning_rate": 5.680084399514606e-07, | |
| "loss": 0.8046815395355225, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 3.2283272283272284, | |
| "grad_norm": 3.6956825256347656, | |
| "learning_rate": 5.663806165814238e-07, | |
| "loss": 0.6859568953514099, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 3.230769230769231, | |
| "grad_norm": 3.428558111190796, | |
| "learning_rate": 5.647572105142573e-07, | |
| "loss": 0.8417099714279175, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 3.2332112332112333, | |
| "grad_norm": 3.5709574222564697, | |
| "learning_rate": 5.631382283676691e-07, | |
| "loss": 0.7712951898574829, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 3.2356532356532357, | |
| "grad_norm": 4.211808204650879, | |
| "learning_rate": 5.615236767413326e-07, | |
| "loss": 0.23967456817626953, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.238095238095238, | |
| "grad_norm": 2.6336851119995117, | |
| "learning_rate": 5.599135622168615e-07, | |
| "loss": 0.8613542318344116, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 3.2405372405372406, | |
| "grad_norm": 2.873466968536377, | |
| "learning_rate": 5.583078913577807e-07, | |
| "loss": 0.8800490498542786, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 3.242979242979243, | |
| "grad_norm": 2.6431236267089844, | |
| "learning_rate": 5.567066707095017e-07, | |
| "loss": 0.37551987171173096, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 3.2454212454212454, | |
| "grad_norm": 19.679590225219727, | |
| "learning_rate": 5.55109906799295e-07, | |
| "loss": 0.2173229455947876, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 3.247863247863248, | |
| "grad_norm": 11.948999404907227, | |
| "learning_rate": 5.535176061362633e-07, | |
| "loss": 0.44817906618118286, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 3.2503052503052503, | |
| "grad_norm": 5.232640266418457, | |
| "learning_rate": 5.519297752113144e-07, | |
| "loss": 0.8609251976013184, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 3.2527472527472527, | |
| "grad_norm": 6.457090854644775, | |
| "learning_rate": 5.503464204971371e-07, | |
| "loss": 0.5830891132354736, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 3.255189255189255, | |
| "grad_norm": 2.9404375553131104, | |
| "learning_rate": 5.487675484481714e-07, | |
| "loss": 0.41301780939102173, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 3.2576312576312576, | |
| "grad_norm": 23.864641189575195, | |
| "learning_rate": 5.471931655005861e-07, | |
| "loss": 0.7523496150970459, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 3.26007326007326, | |
| "grad_norm": 7.809010028839111, | |
| "learning_rate": 5.456232780722485e-07, | |
| "loss": 0.7383593320846558, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 3.2625152625152625, | |
| "grad_norm": 8.772366523742676, | |
| "learning_rate": 5.440578925627015e-07, | |
| "loss": 0.42602211236953735, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 3.264957264957265, | |
| "grad_norm": 8.39612865447998, | |
| "learning_rate": 5.424970153531352e-07, | |
| "loss": 0.09617789089679718, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 3.2673992673992673, | |
| "grad_norm": 6.192194938659668, | |
| "learning_rate": 5.409406528063629e-07, | |
| "loss": 0.8647331595420837, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 3.2698412698412698, | |
| "grad_norm": 12.891087532043457, | |
| "learning_rate": 5.393888112667938e-07, | |
| "loss": 0.7215873003005981, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 3.272283272283272, | |
| "grad_norm": 1.1529864072799683, | |
| "learning_rate": 5.378414970604078e-07, | |
| "loss": 0.4208923578262329, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 3.2747252747252746, | |
| "grad_norm": 4.892144203186035, | |
| "learning_rate": 5.362987164947283e-07, | |
| "loss": 0.4185737371444702, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 3.277167277167277, | |
| "grad_norm": 3.0555760860443115, | |
| "learning_rate": 5.347604758587992e-07, | |
| "loss": 0.4928082525730133, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 3.2796092796092795, | |
| "grad_norm": 3.4435908794403076, | |
| "learning_rate": 5.332267814231557e-07, | |
| "loss": 0.9214242696762085, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 3.282051282051282, | |
| "grad_norm": 0.6462732553482056, | |
| "learning_rate": 5.316976394398035e-07, | |
| "loss": 0.05206817388534546, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 3.2844932844932844, | |
| "grad_norm": 4.35649299621582, | |
| "learning_rate": 5.301730561421878e-07, | |
| "loss": 0.8785997629165649, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 3.286935286935287, | |
| "grad_norm": 4.109615325927734, | |
| "learning_rate": 5.286530377451725e-07, | |
| "loss": 0.8360549211502075, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 3.2893772893772892, | |
| "grad_norm": 0.4986629784107208, | |
| "learning_rate": 5.271375904450113e-07, | |
| "loss": 0.05576964467763901, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 3.2918192918192917, | |
| "grad_norm": 1.1180140972137451, | |
| "learning_rate": 5.256267204193257e-07, | |
| "loss": 0.10897117108106613, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 3.294261294261294, | |
| "grad_norm": 2.6195151805877686, | |
| "learning_rate": 5.241204338270775e-07, | |
| "loss": 0.5570809245109558, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 3.2967032967032965, | |
| "grad_norm": 2.971738338470459, | |
| "learning_rate": 5.226187368085449e-07, | |
| "loss": 0.758766770362854, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.299145299145299, | |
| "grad_norm": 2.7780795097351074, | |
| "learning_rate": 5.211216354852959e-07, | |
| "loss": 0.8534475564956665, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 3.3015873015873014, | |
| "grad_norm": 2.715257167816162, | |
| "learning_rate": 5.196291359601656e-07, | |
| "loss": 0.8049676418304443, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 3.304029304029304, | |
| "grad_norm": 2.0465452671051025, | |
| "learning_rate": 5.181412443172302e-07, | |
| "loss": 0.6683448553085327, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 3.3064713064713063, | |
| "grad_norm": 2.6878154277801514, | |
| "learning_rate": 5.166579666217808e-07, | |
| "loss": 0.3922683000564575, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 3.3089133089133087, | |
| "grad_norm": 2.643120765686035, | |
| "learning_rate": 5.151793089203016e-07, | |
| "loss": 0.5145785808563232, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 3.311355311355311, | |
| "grad_norm": 4.139755725860596, | |
| "learning_rate": 5.137052772404429e-07, | |
| "loss": 0.8065720796585083, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 3.3137973137973136, | |
| "grad_norm": 2.7374370098114014, | |
| "learning_rate": 5.12235877590998e-07, | |
| "loss": 0.46399354934692383, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 3.316239316239316, | |
| "grad_norm": 5.588755130767822, | |
| "learning_rate": 5.107711159618767e-07, | |
| "loss": 0.8394476771354675, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 3.3186813186813184, | |
| "grad_norm": 5.747230052947998, | |
| "learning_rate": 5.093109983240838e-07, | |
| "loss": 0.7278931140899658, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 3.3211233211233213, | |
| "grad_norm": 4.068961143493652, | |
| "learning_rate": 5.078555306296921e-07, | |
| "loss": 0.7875164747238159, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 3.3235653235653237, | |
| "grad_norm": 3.255009174346924, | |
| "learning_rate": 5.064047188118198e-07, | |
| "loss": 0.6470295190811157, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 3.326007326007326, | |
| "grad_norm": 1.376708984375, | |
| "learning_rate": 5.04958568784605e-07, | |
| "loss": 0.053887248039245605, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 3.3284493284493286, | |
| "grad_norm": 2.7145447731018066, | |
| "learning_rate": 5.035170864431835e-07, | |
| "loss": 0.4699758291244507, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 3.330891330891331, | |
| "grad_norm": 2.425973415374756, | |
| "learning_rate": 5.020802776636619e-07, | |
| "loss": 0.7286192178726196, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 6.599777698516846, | |
| "learning_rate": 5.006481483030977e-07, | |
| "loss": 0.710435152053833, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 3.335775335775336, | |
| "grad_norm": 1.4933651685714722, | |
| "learning_rate": 4.992207041994704e-07, | |
| "loss": 0.527194619178772, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 3.3382173382173383, | |
| "grad_norm": 6.664951801300049, | |
| "learning_rate": 4.977979511716627e-07, | |
| "loss": 0.5734143257141113, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 3.340659340659341, | |
| "grad_norm": 2.5262253284454346, | |
| "learning_rate": 4.963798950194323e-07, | |
| "loss": 0.6353036165237427, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 3.343101343101343, | |
| "grad_norm": 2.0396175384521484, | |
| "learning_rate": 4.949665415233921e-07, | |
| "loss": 0.43671801686286926, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 3.3455433455433456, | |
| "grad_norm": 2.3020055294036865, | |
| "learning_rate": 4.935578964449842e-07, | |
| "loss": 0.414195716381073, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 3.347985347985348, | |
| "grad_norm": 4.539997577667236, | |
| "learning_rate": 4.921539655264577e-07, | |
| "loss": 0.6845102906227112, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 3.3504273504273505, | |
| "grad_norm": 27.385744094848633, | |
| "learning_rate": 4.907547544908434e-07, | |
| "loss": 0.48483800888061523, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 3.352869352869353, | |
| "grad_norm": 3.306389570236206, | |
| "learning_rate": 4.893602690419335e-07, | |
| "loss": 0.7324085235595703, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 3.3553113553113554, | |
| "grad_norm": 27.748350143432617, | |
| "learning_rate": 4.879705148642551e-07, | |
| "loss": 0.5347940325737, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 3.357753357753358, | |
| "grad_norm": 7.101909160614014, | |
| "learning_rate": 4.865854976230503e-07, | |
| "loss": 0.30482548475265503, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.3601953601953602, | |
| "grad_norm": 2.7623767852783203, | |
| "learning_rate": 4.852052229642496e-07, | |
| "loss": 0.3212909996509552, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 3.3626373626373627, | |
| "grad_norm": 3.065473794937134, | |
| "learning_rate": 4.838296965144523e-07, | |
| "loss": 0.46142399311065674, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 3.365079365079365, | |
| "grad_norm": 2.433959484100342, | |
| "learning_rate": 4.824589238809002e-07, | |
| "loss": 0.7341235280036926, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 3.3675213675213675, | |
| "grad_norm": 11.061236381530762, | |
| "learning_rate": 4.810929106514581e-07, | |
| "loss": 0.765345573425293, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 3.36996336996337, | |
| "grad_norm": 6.86146354675293, | |
| "learning_rate": 4.797316623945884e-07, | |
| "loss": 0.9664710760116577, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.3724053724053724, | |
| "grad_norm": 7.752105236053467, | |
| "learning_rate": 4.783751846593303e-07, | |
| "loss": 0.4229215681552887, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 3.374847374847375, | |
| "grad_norm": 3.0226082801818848, | |
| "learning_rate": 4.770234829752751e-07, | |
| "loss": 0.8460754156112671, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 3.3772893772893773, | |
| "grad_norm": 3.3056082725524902, | |
| "learning_rate": 4.756765628525456e-07, | |
| "loss": 0.42917996644973755, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 3.3797313797313797, | |
| "grad_norm": 8.907353401184082, | |
| "learning_rate": 4.743344297817733e-07, | |
| "loss": 0.3285076320171356, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 3.382173382173382, | |
| "grad_norm": 8.507306098937988, | |
| "learning_rate": 4.7299708923407426e-07, | |
| "loss": 0.4301242530345917, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 3.3846153846153846, | |
| "grad_norm": 4.4141106605529785, | |
| "learning_rate": 4.716645466610293e-07, | |
| "loss": 0.8090613484382629, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 3.387057387057387, | |
| "grad_norm": 4.848315238952637, | |
| "learning_rate": 4.703368074946601e-07, | |
| "loss": 0.355411171913147, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 3.3894993894993894, | |
| "grad_norm": 2.3719654083251953, | |
| "learning_rate": 4.690138771474081e-07, | |
| "loss": 0.718955397605896, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 3.391941391941392, | |
| "grad_norm": 2.225470781326294, | |
| "learning_rate": 4.6769576101211094e-07, | |
| "loss": 0.6947466135025024, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 3.3943833943833943, | |
| "grad_norm": 1.4156147241592407, | |
| "learning_rate": 4.663824644619825e-07, | |
| "loss": 0.40980637073516846, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.3968253968253967, | |
| "grad_norm": 4.567814826965332, | |
| "learning_rate": 4.650739928505891e-07, | |
| "loss": 0.4516459107398987, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 3.399267399267399, | |
| "grad_norm": 31.004159927368164, | |
| "learning_rate": 4.637703515118293e-07, | |
| "loss": 0.17260479927062988, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 3.4017094017094016, | |
| "grad_norm": 7.04186487197876, | |
| "learning_rate": 4.6247154575991056e-07, | |
| "loss": 0.4166035056114197, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 3.404151404151404, | |
| "grad_norm": 7.280468463897705, | |
| "learning_rate": 4.611775808893289e-07, | |
| "loss": 0.8995333909988403, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 3.4065934065934065, | |
| "grad_norm": 6.956652641296387, | |
| "learning_rate": 4.5988846217484634e-07, | |
| "loss": 0.6580412983894348, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 3.409035409035409, | |
| "grad_norm": 2.8585188388824463, | |
| "learning_rate": 4.5860419487147113e-07, | |
| "loss": 0.41331350803375244, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 3.4114774114774113, | |
| "grad_norm": 2.8108794689178467, | |
| "learning_rate": 4.573247842144331e-07, | |
| "loss": 0.6332775354385376, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 3.413919413919414, | |
| "grad_norm": 4.075611114501953, | |
| "learning_rate": 4.5605023541916623e-07, | |
| "loss": 0.7772250175476074, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 3.416361416361416, | |
| "grad_norm": 9.713813781738281, | |
| "learning_rate": 4.547805536812836e-07, | |
| "loss": 0.4333896338939667, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 3.4188034188034186, | |
| "grad_norm": 0.9859614968299866, | |
| "learning_rate": 4.5351574417655964e-07, | |
| "loss": 0.05127192288637161, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.421245421245421, | |
| "grad_norm": 4.002601623535156, | |
| "learning_rate": 4.522558120609064e-07, | |
| "loss": 0.7769187688827515, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 3.4236874236874235, | |
| "grad_norm": 0.7182661890983582, | |
| "learning_rate": 4.5100076247035466e-07, | |
| "loss": 0.5086408853530884, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 3.426129426129426, | |
| "grad_norm": 3.442944288253784, | |
| "learning_rate": 4.4975060052103037e-07, | |
| "loss": 0.5425475835800171, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 3.3078877925872803, | |
| "learning_rate": 4.4850533130913673e-07, | |
| "loss": 0.8806586861610413, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 3.4310134310134313, | |
| "grad_norm": 8.249839782714844, | |
| "learning_rate": 4.4726495991093093e-07, | |
| "loss": 0.6783867478370667, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 3.4334554334554337, | |
| "grad_norm": 6.180336952209473, | |
| "learning_rate": 4.460294913827055e-07, | |
| "loss": 0.09303609281778336, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 3.435897435897436, | |
| "grad_norm": 16.910554885864258, | |
| "learning_rate": 4.447989307607662e-07, | |
| "loss": 0.4257678985595703, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 3.4383394383394386, | |
| "grad_norm": 2.7629129886627197, | |
| "learning_rate": 4.435732830614125e-07, | |
| "loss": 0.6648207902908325, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 3.440781440781441, | |
| "grad_norm": 2.5652284622192383, | |
| "learning_rate": 4.4235255328091576e-07, | |
| "loss": 0.8116101026535034, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 3.4432234432234434, | |
| "grad_norm": 3.2046570777893066, | |
| "learning_rate": 4.411367463955005e-07, | |
| "loss": 0.7154589295387268, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 3.445665445665446, | |
| "grad_norm": 3.974566698074341, | |
| "learning_rate": 4.399258673613233e-07, | |
| "loss": 0.8278774619102478, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 3.4481074481074483, | |
| "grad_norm": 2.4374897480010986, | |
| "learning_rate": 4.3871992111445276e-07, | |
| "loss": 0.7418302297592163, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 3.4505494505494507, | |
| "grad_norm": 2.7656939029693604, | |
| "learning_rate": 4.3751891257084884e-07, | |
| "loss": 0.5064851641654968, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 3.452991452991453, | |
| "grad_norm": 4.026212692260742, | |
| "learning_rate": 4.363228466263437e-07, | |
| "loss": 0.7239431142807007, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 3.4554334554334556, | |
| "grad_norm": 10.000690460205078, | |
| "learning_rate": 4.351317281566214e-07, | |
| "loss": 0.4672069549560547, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 3.457875457875458, | |
| "grad_norm": 2.8275201320648193, | |
| "learning_rate": 4.3394556201719724e-07, | |
| "loss": 0.40725383162498474, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 3.4603174603174605, | |
| "grad_norm": 2.8882293701171875, | |
| "learning_rate": 4.3276435304339946e-07, | |
| "loss": 0.7354273796081543, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 3.462759462759463, | |
| "grad_norm": 48.296871185302734, | |
| "learning_rate": 4.315881060503485e-07, | |
| "loss": 0.6351673603057861, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 3.4652014652014653, | |
| "grad_norm": 9.754876136779785, | |
| "learning_rate": 4.3041682583293777e-07, | |
| "loss": 0.44702062010765076, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 3.4676434676434678, | |
| "grad_norm": 4.990954399108887, | |
| "learning_rate": 4.2925051716581314e-07, | |
| "loss": 0.8453319072723389, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 3.47008547008547, | |
| "grad_norm": 5.628749370574951, | |
| "learning_rate": 4.2808918480335524e-07, | |
| "loss": 0.7494460344314575, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 3.4725274725274726, | |
| "grad_norm": 2.881686210632324, | |
| "learning_rate": 4.269328334796588e-07, | |
| "loss": 0.7807775735855103, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.474969474969475, | |
| "grad_norm": 3.935601234436035, | |
| "learning_rate": 4.2578146790851385e-07, | |
| "loss": 0.7870265245437622, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 3.4774114774114775, | |
| "grad_norm": 3.1705915927886963, | |
| "learning_rate": 4.246350927833857e-07, | |
| "loss": 0.7632369995117188, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 3.47985347985348, | |
| "grad_norm": 5.830966472625732, | |
| "learning_rate": 4.234937127773971e-07, | |
| "loss": 0.39957284927368164, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 3.4822954822954824, | |
| "grad_norm": 2.9891774654388428, | |
| "learning_rate": 4.2235733254330783e-07, | |
| "loss": 0.7151432037353516, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 3.484737484737485, | |
| "grad_norm": 0.7090671062469482, | |
| "learning_rate": 4.2122595671349774e-07, | |
| "loss": 0.0894431471824646, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 3.4871794871794872, | |
| "grad_norm": 6.399838924407959, | |
| "learning_rate": 4.2009958989994487e-07, | |
| "loss": 0.601172685623169, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 3.4896214896214897, | |
| "grad_norm": 4.3422722816467285, | |
| "learning_rate": 4.1897823669421006e-07, | |
| "loss": 0.19986438751220703, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 3.492063492063492, | |
| "grad_norm": 1.8583272695541382, | |
| "learning_rate": 4.1786190166741455e-07, | |
| "loss": 0.694176971912384, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 3.4945054945054945, | |
| "grad_norm": 2.8959078788757324, | |
| "learning_rate": 4.167505893702254e-07, | |
| "loss": 0.7624994516372681, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 3.496947496947497, | |
| "grad_norm": 2.90195631980896, | |
| "learning_rate": 4.1564430433283263e-07, | |
| "loss": 0.43300166726112366, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 3.4993894993894994, | |
| "grad_norm": 3.2913739681243896, | |
| "learning_rate": 4.1454305106493546e-07, | |
| "loss": 0.3288887143135071, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 3.501831501831502, | |
| "grad_norm": 5.068844795227051, | |
| "learning_rate": 4.134468340557192e-07, | |
| "loss": 0.7319455146789551, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 3.5042735042735043, | |
| "grad_norm": 6.3675537109375, | |
| "learning_rate": 4.1235565777384056e-07, | |
| "loss": 0.5317668318748474, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 3.5067155067155067, | |
| "grad_norm": 15.0240478515625, | |
| "learning_rate": 4.1126952666740725e-07, | |
| "loss": 0.38471299409866333, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 3.509157509157509, | |
| "grad_norm": 3.1798951625823975, | |
| "learning_rate": 4.101884451639614e-07, | |
| "loss": 0.9037513732910156, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 3.5115995115995116, | |
| "grad_norm": 6.532365322113037, | |
| "learning_rate": 4.0911241767046016e-07, | |
| "loss": 0.26760926842689514, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 3.514041514041514, | |
| "grad_norm": 2.566946029663086, | |
| "learning_rate": 4.080414485732587e-07, | |
| "loss": 0.7500442862510681, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 3.5164835164835164, | |
| "grad_norm": 4.2052130699157715, | |
| "learning_rate": 4.069755422380917e-07, | |
| "loss": 0.5388165712356567, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.518925518925519, | |
| "grad_norm": 20.752094268798828, | |
| "learning_rate": 4.059147030100559e-07, | |
| "loss": 0.5732119083404541, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 3.5213675213675213, | |
| "grad_norm": 2.6611931324005127, | |
| "learning_rate": 4.048589352135924e-07, | |
| "loss": 0.8847407698631287, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 3.5238095238095237, | |
| "grad_norm": 10.907923698425293, | |
| "learning_rate": 4.0380824315246947e-07, | |
| "loss": 0.9121260046958923, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 3.526251526251526, | |
| "grad_norm": 3.9812328815460205, | |
| "learning_rate": 4.027626311097629e-07, | |
| "loss": 0.6736772060394287, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 3.5286935286935286, | |
| "grad_norm": 2.7269723415374756, | |
| "learning_rate": 4.0172210334784195e-07, | |
| "loss": 0.7638009786605835, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 3.531135531135531, | |
| "grad_norm": 3.4985084533691406, | |
| "learning_rate": 4.006866641083492e-07, | |
| "loss": 0.7275708317756653, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 3.5335775335775335, | |
| "grad_norm": 4.522525310516357, | |
| "learning_rate": 3.99656317612184e-07, | |
| "loss": 0.41521430015563965, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 3.536019536019536, | |
| "grad_norm": 2.8411009311676025, | |
| "learning_rate": 3.9863106805948604e-07, | |
| "loss": 0.6723597049713135, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 3.5384615384615383, | |
| "grad_norm": 3.263356924057007, | |
| "learning_rate": 3.976109196296174e-07, | |
| "loss": 0.8013684749603271, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 3.5409035409035408, | |
| "grad_norm": 4.989956378936768, | |
| "learning_rate": 3.9659587648114617e-07, | |
| "loss": 0.7627596855163574, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.543345543345543, | |
| "grad_norm": 7.430449962615967, | |
| "learning_rate": 3.955859427518281e-07, | |
| "loss": 0.1464676856994629, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 3.5457875457875456, | |
| "grad_norm": 2.5465877056121826, | |
| "learning_rate": 3.9458112255859157e-07, | |
| "loss": 0.7924457788467407, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 3.548229548229548, | |
| "grad_norm": 3.9990952014923096, | |
| "learning_rate": 3.9358141999751996e-07, | |
| "loss": 0.6930620670318604, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 3.5506715506715505, | |
| "grad_norm": 3.2028019428253174, | |
| "learning_rate": 3.925868391438347e-07, | |
| "loss": 0.826697587966919, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 3.553113553113553, | |
| "grad_norm": 19.867488861083984, | |
| "learning_rate": 3.9159738405187873e-07, | |
| "loss": 0.6113706827163696, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 3.5555555555555554, | |
| "grad_norm": 27.57074546813965, | |
| "learning_rate": 3.906130587551009e-07, | |
| "loss": 0.41953060030937195, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 3.557997557997558, | |
| "grad_norm": 2.6804850101470947, | |
| "learning_rate": 3.896338672660374e-07, | |
| "loss": 0.4396386444568634, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 3.5604395604395602, | |
| "grad_norm": 2.3489506244659424, | |
| "learning_rate": 3.8865981357629866e-07, | |
| "loss": 0.5773664712905884, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 3.5628815628815627, | |
| "grad_norm": 8.585136413574219, | |
| "learning_rate": 3.8769090165654973e-07, | |
| "loss": 0.6045223474502563, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 3.565323565323565, | |
| "grad_norm": 31.268516540527344, | |
| "learning_rate": 3.867271354564962e-07, | |
| "loss": 0.5454591512680054, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 3.5677655677655675, | |
| "grad_norm": 2.7172422409057617, | |
| "learning_rate": 3.8576851890486726e-07, | |
| "loss": 0.8313965201377869, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 3.57020757020757, | |
| "grad_norm": 5.068464756011963, | |
| "learning_rate": 3.8481505590940023e-07, | |
| "loss": 0.46147140860557556, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 3.5726495726495724, | |
| "grad_norm": 2.5422284603118896, | |
| "learning_rate": 3.838667503568233e-07, | |
| "loss": 0.36731448769569397, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 3.575091575091575, | |
| "grad_norm": 3.056962013244629, | |
| "learning_rate": 3.829236061128427e-07, | |
| "loss": 0.7054284811019897, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 3.5775335775335773, | |
| "grad_norm": 4.427704334259033, | |
| "learning_rate": 3.819856270221226e-07, | |
| "loss": 0.770653486251831, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 3.57997557997558, | |
| "grad_norm": 8.665727615356445, | |
| "learning_rate": 3.810528169082734e-07, | |
| "loss": 0.4913595914840698, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 3.5824175824175826, | |
| "grad_norm": 2.904313325881958, | |
| "learning_rate": 3.8012517957383373e-07, | |
| "loss": 0.6569290161132812, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 3.584859584859585, | |
| "grad_norm": 3.0541298389434814, | |
| "learning_rate": 3.792027188002558e-07, | |
| "loss": 0.46475017070770264, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 3.5873015873015874, | |
| "grad_norm": 3.205838680267334, | |
| "learning_rate": 3.782854383478903e-07, | |
| "loss": 0.4289541244506836, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 3.58974358974359, | |
| "grad_norm": 2.68637752532959, | |
| "learning_rate": 3.7737334195597043e-07, | |
| "loss": 0.5848438739776611, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 3.5921855921855923, | |
| "grad_norm": 5.944007873535156, | |
| "learning_rate": 3.7646643334259684e-07, | |
| "loss": 0.8478963375091553, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 3.5946275946275947, | |
| "grad_norm": 2.2448582649230957, | |
| "learning_rate": 3.7556471620472253e-07, | |
| "loss": 0.775229811668396, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 3.597069597069597, | |
| "grad_norm": 2.8263726234436035, | |
| "learning_rate": 3.746681942181379e-07, | |
| "loss": 0.7618613839149475, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 3.5995115995115996, | |
| "grad_norm": 2.975411891937256, | |
| "learning_rate": 3.737768710374559e-07, | |
| "loss": 0.4575694501399994, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 3.601953601953602, | |
| "grad_norm": 8.245552062988281, | |
| "learning_rate": 3.7289075029609616e-07, | |
| "loss": 0.3998860716819763, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.6043956043956045, | |
| "grad_norm": 7.906712055206299, | |
| "learning_rate": 3.720098356062717e-07, | |
| "loss": 0.2165716290473938, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 3.606837606837607, | |
| "grad_norm": 14.53108024597168, | |
| "learning_rate": 3.711341305589731e-07, | |
| "loss": 0.5015303492546082, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 3.6092796092796093, | |
| "grad_norm": 14.5282621383667, | |
| "learning_rate": 3.70263638723954e-07, | |
| "loss": 0.7012228965759277, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 3.6117216117216118, | |
| "grad_norm": 3.689199447631836, | |
| "learning_rate": 3.6939836364971694e-07, | |
| "loss": 0.6010891199111938, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 3.614163614163614, | |
| "grad_norm": 2.563419818878174, | |
| "learning_rate": 3.6853830886349873e-07, | |
| "loss": 0.7269465923309326, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 3.6166056166056166, | |
| "grad_norm": 10.599438667297363, | |
| "learning_rate": 3.6768347787125606e-07, | |
| "loss": 0.4651035666465759, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 3.619047619047619, | |
| "grad_norm": 2.2189157009124756, | |
| "learning_rate": 3.6683387415765044e-07, | |
| "loss": 0.6498839855194092, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 3.6214896214896215, | |
| "grad_norm": 3.3419363498687744, | |
| "learning_rate": 3.6598950118603586e-07, | |
| "loss": 0.7469695806503296, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 3.623931623931624, | |
| "grad_norm": 3.3574743270874023, | |
| "learning_rate": 3.6515036239844287e-07, | |
| "loss": 0.8440409898757935, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 3.6263736263736264, | |
| "grad_norm": 2.3137948513031006, | |
| "learning_rate": 3.6431646121556533e-07, | |
| "loss": 0.4129015803337097, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 3.628815628815629, | |
| "grad_norm": 2.628683090209961, | |
| "learning_rate": 3.6348780103674614e-07, | |
| "loss": 0.40047919750213623, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 3.6312576312576312, | |
| "grad_norm": 2.473060369491577, | |
| "learning_rate": 3.626643852399643e-07, | |
| "loss": 0.42548859119415283, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 3.6336996336996337, | |
| "grad_norm": 6.196104049682617, | |
| "learning_rate": 3.618462171818193e-07, | |
| "loss": 0.7563096284866333, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 3.636141636141636, | |
| "grad_norm": 10.924947738647461, | |
| "learning_rate": 3.610333001975197e-07, | |
| "loss": 0.3043268322944641, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 3.6385836385836385, | |
| "grad_norm": 3.1692097187042236, | |
| "learning_rate": 3.602256376008676e-07, | |
| "loss": 0.7787331938743591, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 3.641025641025641, | |
| "grad_norm": 5.5619049072265625, | |
| "learning_rate": 3.5942323268424655e-07, | |
| "loss": 0.916191577911377, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 3.6434676434676434, | |
| "grad_norm": 2.588724136352539, | |
| "learning_rate": 3.5862608871860695e-07, | |
| "loss": 0.8353859782218933, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 3.645909645909646, | |
| "grad_norm": 3.2748382091522217, | |
| "learning_rate": 3.578342089534539e-07, | |
| "loss": 0.797304630279541, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 3.6483516483516483, | |
| "grad_norm": 2.3065786361694336, | |
| "learning_rate": 3.570475966168323e-07, | |
| "loss": 0.702369213104248, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 3.6507936507936507, | |
| "grad_norm": 2.7678704261779785, | |
| "learning_rate": 3.562662549153161e-07, | |
| "loss": 0.8888381719589233, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 3.653235653235653, | |
| "grad_norm": 3.8173694610595703, | |
| "learning_rate": 3.554901870339929e-07, | |
| "loss": 0.5250690579414368, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 3.6556776556776556, | |
| "grad_norm": 9.152386665344238, | |
| "learning_rate": 3.547193961364523e-07, | |
| "loss": 0.33277344703674316, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 3.658119658119658, | |
| "grad_norm": 2.908566951751709, | |
| "learning_rate": 3.5395388536477207e-07, | |
| "loss": 0.8926382660865784, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 3.6605616605616604, | |
| "grad_norm": 0.9657400846481323, | |
| "learning_rate": 3.531936578395066e-07, | |
| "loss": 0.4112926721572876, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 3.663003663003663, | |
| "grad_norm": 4.3384857177734375, | |
| "learning_rate": 3.524387166596734e-07, | |
| "loss": 0.2578233778476715, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.6654456654456653, | |
| "grad_norm": 4.620382308959961, | |
| "learning_rate": 3.516890649027403e-07, | |
| "loss": 0.3403754234313965, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 3.6678876678876677, | |
| "grad_norm": 6.0603437423706055, | |
| "learning_rate": 3.5094470562461296e-07, | |
| "loss": 0.1455678939819336, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 3.67032967032967, | |
| "grad_norm": 5.389955520629883, | |
| "learning_rate": 3.502056418596229e-07, | |
| "loss": 0.7512868642807007, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 3.672771672771673, | |
| "grad_norm": 39.584007263183594, | |
| "learning_rate": 3.494718766205148e-07, | |
| "loss": 0.5591350793838501, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 3.6752136752136755, | |
| "grad_norm": 2.471529722213745, | |
| "learning_rate": 3.4874341289843427e-07, | |
| "loss": 0.4434140920639038, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 3.677655677655678, | |
| "grad_norm": 2.184110164642334, | |
| "learning_rate": 3.480202536629155e-07, | |
| "loss": 0.7699903845787048, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 3.6800976800976803, | |
| "grad_norm": 3.0973334312438965, | |
| "learning_rate": 3.473024018618692e-07, | |
| "loss": 0.8556205630302429, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 3.682539682539683, | |
| "grad_norm": 11.31489372253418, | |
| "learning_rate": 3.465898604215714e-07, | |
| "loss": 0.7214917540550232, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 3.684981684981685, | |
| "grad_norm": 2.5208775997161865, | |
| "learning_rate": 3.458826322466495e-07, | |
| "loss": 0.4928692579269409, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 3.6874236874236876, | |
| "grad_norm": 4.180920124053955, | |
| "learning_rate": 3.4518072022007307e-07, | |
| "loss": 0.5168589949607849, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 3.68986568986569, | |
| "grad_norm": 5.655241966247559, | |
| "learning_rate": 3.4448412720313964e-07, | |
| "loss": 0.5075653791427612, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 3.6923076923076925, | |
| "grad_norm": 2.7862062454223633, | |
| "learning_rate": 3.4379285603546524e-07, | |
| "loss": 0.8357092142105103, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 3.694749694749695, | |
| "grad_norm": 1.5549516677856445, | |
| "learning_rate": 3.431069095349705e-07, | |
| "loss": 0.5025522708892822, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 3.6971916971916974, | |
| "grad_norm": 3.4826738834381104, | |
| "learning_rate": 3.424262904978717e-07, | |
| "loss": 0.6062172651290894, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 3.6996336996337, | |
| "grad_norm": 2.05151104927063, | |
| "learning_rate": 3.417510016986667e-07, | |
| "loss": 0.8095488548278809, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 3.7020757020757022, | |
| "grad_norm": 2.471933364868164, | |
| "learning_rate": 3.410810458901262e-07, | |
| "loss": 0.6981524229049683, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 3.7045177045177047, | |
| "grad_norm": 3.5082616806030273, | |
| "learning_rate": 3.404164258032807e-07, | |
| "loss": 0.4733709692955017, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 3.706959706959707, | |
| "grad_norm": 1.4198700189590454, | |
| "learning_rate": 3.3975714414741013e-07, | |
| "loss": 0.4630986452102661, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 3.7094017094017095, | |
| "grad_norm": 3.0218346118927, | |
| "learning_rate": 3.391032036100325e-07, | |
| "loss": 0.4389650821685791, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 3.711843711843712, | |
| "grad_norm": 2.944387674331665, | |
| "learning_rate": 3.384546068568933e-07, | |
| "loss": 0.8785519599914551, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 4.196402549743652, | |
| "learning_rate": 3.378113565319544e-07, | |
| "loss": 0.7434395551681519, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 3.716727716727717, | |
| "grad_norm": 2.696420669555664, | |
| "learning_rate": 3.3717345525738344e-07, | |
| "loss": 0.48036837577819824, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 3.7191697191697193, | |
| "grad_norm": 3.1391804218292236, | |
| "learning_rate": 3.3654090563354244e-07, | |
| "loss": 0.8472328186035156, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 3.7216117216117217, | |
| "grad_norm": 16.5904598236084, | |
| "learning_rate": 3.3591371023897855e-07, | |
| "loss": 0.4724040627479553, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 3.724053724053724, | |
| "grad_norm": 2.7015161514282227, | |
| "learning_rate": 3.35291871630412e-07, | |
| "loss": 0.6447727084159851, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.7264957264957266, | |
| "grad_norm": 3.3567774295806885, | |
| "learning_rate": 3.346753923427274e-07, | |
| "loss": 0.8310827016830444, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 3.728937728937729, | |
| "grad_norm": 6.175748825073242, | |
| "learning_rate": 3.3406427488896135e-07, | |
| "loss": 0.39910775423049927, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 3.7313797313797314, | |
| "grad_norm": 3.706573009490967, | |
| "learning_rate": 3.334585217602942e-07, | |
| "loss": 0.7661048173904419, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 3.733821733821734, | |
| "grad_norm": 2.6590542793273926, | |
| "learning_rate": 3.328581354260381e-07, | |
| "loss": 0.6034551858901978, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 3.7362637362637363, | |
| "grad_norm": 11.154624938964844, | |
| "learning_rate": 3.3226311833362856e-07, | |
| "loss": 0.3673848807811737, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 3.7387057387057387, | |
| "grad_norm": 4.551034927368164, | |
| "learning_rate": 3.3167347290861357e-07, | |
| "loss": 0.47802889347076416, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 3.741147741147741, | |
| "grad_norm": 3.527188777923584, | |
| "learning_rate": 3.310892015546438e-07, | |
| "loss": 0.28257882595062256, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 3.7435897435897436, | |
| "grad_norm": 5.753845691680908, | |
| "learning_rate": 3.305103066534627e-07, | |
| "loss": 0.4748725891113281, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 3.746031746031746, | |
| "grad_norm": 2.916123151779175, | |
| "learning_rate": 3.299367905648969e-07, | |
| "loss": 0.8344959020614624, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 3.7484737484737485, | |
| "grad_norm": 2.6062560081481934, | |
| "learning_rate": 3.2936865562684724e-07, | |
| "loss": 0.7747886180877686, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 3.750915750915751, | |
| "grad_norm": 14.459248542785645, | |
| "learning_rate": 3.2880590415527835e-07, | |
| "loss": 0.45985788106918335, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 3.7533577533577533, | |
| "grad_norm": 1.4691053628921509, | |
| "learning_rate": 3.282485384442091e-07, | |
| "loss": 0.36843568086624146, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 3.755799755799756, | |
| "grad_norm": 7.2976789474487305, | |
| "learning_rate": 3.2769656076570404e-07, | |
| "loss": 0.8071094751358032, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 3.758241758241758, | |
| "grad_norm": 2.8548290729522705, | |
| "learning_rate": 3.271499733698641e-07, | |
| "loss": 0.7401329278945923, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 3.7606837606837606, | |
| "grad_norm": 6.964620113372803, | |
| "learning_rate": 3.266087784848164e-07, | |
| "loss": 0.5241950750350952, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 3.763125763125763, | |
| "grad_norm": 3.751598834991455, | |
| "learning_rate": 3.2607297831670605e-07, | |
| "loss": 0.8286868333816528, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 3.7655677655677655, | |
| "grad_norm": 2.7581403255462646, | |
| "learning_rate": 3.255425750496874e-07, | |
| "loss": 0.7994064092636108, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 3.768009768009768, | |
| "grad_norm": 3.7547802925109863, | |
| "learning_rate": 3.2501757084591424e-07, | |
| "loss": 0.8994746208190918, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 3.7704517704517704, | |
| "grad_norm": 1.5219696760177612, | |
| "learning_rate": 3.244979678455314e-07, | |
| "loss": 0.10294540226459503, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 3.772893772893773, | |
| "grad_norm": 2.7293152809143066, | |
| "learning_rate": 3.2398376816666626e-07, | |
| "loss": 0.6969175338745117, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 3.7753357753357752, | |
| "grad_norm": 13.49944019317627, | |
| "learning_rate": 3.234749739054198e-07, | |
| "loss": 0.7655293345451355, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 3.7777777777777777, | |
| "grad_norm": 2.4176318645477295, | |
| "learning_rate": 3.2297158713585855e-07, | |
| "loss": 0.8352910280227661, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 3.78021978021978, | |
| "grad_norm": 8.134320259094238, | |
| "learning_rate": 3.224736099100051e-07, | |
| "loss": 0.7784008383750916, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 3.7826617826617825, | |
| "grad_norm": 2.9717869758605957, | |
| "learning_rate": 3.2198104425783085e-07, | |
| "loss": 0.5075839161872864, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 3.785103785103785, | |
| "grad_norm": 11.678095817565918, | |
| "learning_rate": 3.2149389218724715e-07, | |
| "loss": 0.6785246133804321, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.7875457875457874, | |
| "grad_norm": 2.376706123352051, | |
| "learning_rate": 3.2101215568409724e-07, | |
| "loss": 0.7613149881362915, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 3.78998778998779, | |
| "grad_norm": 3.306133270263672, | |
| "learning_rate": 3.205358367121482e-07, | |
| "loss": 0.7240723371505737, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 3.7924297924297923, | |
| "grad_norm": 3.1075305938720703, | |
| "learning_rate": 3.20064937213083e-07, | |
| "loss": 0.7881240248680115, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 3.7948717948717947, | |
| "grad_norm": 2.911088705062866, | |
| "learning_rate": 3.1959945910649197e-07, | |
| "loss": 0.5425209999084473, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 3.797313797313797, | |
| "grad_norm": 3.396981954574585, | |
| "learning_rate": 3.191394042898663e-07, | |
| "loss": 0.5427860021591187, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 3.7997557997557996, | |
| "grad_norm": 3.09364914894104, | |
| "learning_rate": 3.1868477463858867e-07, | |
| "loss": 0.5342693328857422, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 3.802197802197802, | |
| "grad_norm": 3.77823543548584, | |
| "learning_rate": 3.1823557200592727e-07, | |
| "loss": 0.4840196669101715, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 3.8046398046398044, | |
| "grad_norm": 6.581445693969727, | |
| "learning_rate": 3.177917982230266e-07, | |
| "loss": 0.4354932904243469, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 3.807081807081807, | |
| "grad_norm": 4.048827171325684, | |
| "learning_rate": 3.1735345509890153e-07, | |
| "loss": 0.5204283595085144, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 4.30662202835083, | |
| "learning_rate": 3.169205444204287e-07, | |
| "loss": 0.8616774082183838, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 3.8119658119658117, | |
| "grad_norm": 3.3878118991851807, | |
| "learning_rate": 3.1649306795233974e-07, | |
| "loss": 0.4943420886993408, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 3.814407814407814, | |
| "grad_norm": 3.516070604324341, | |
| "learning_rate": 3.1607102743721444e-07, | |
| "loss": 0.6372618079185486, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 3.8168498168498166, | |
| "grad_norm": 10.609537124633789, | |
| "learning_rate": 3.156544245954731e-07, | |
| "loss": 0.8626052141189575, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 3.819291819291819, | |
| "grad_norm": 4.941616058349609, | |
| "learning_rate": 3.1524326112536954e-07, | |
| "loss": 0.44780248403549194, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 3.8217338217338215, | |
| "grad_norm": 3.134873628616333, | |
| "learning_rate": 3.148375387029847e-07, | |
| "loss": 0.8529410362243652, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 3.824175824175824, | |
| "grad_norm": 0.7779524326324463, | |
| "learning_rate": 3.1443725898221915e-07, | |
| "loss": 0.46019861102104187, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 3.8266178266178263, | |
| "grad_norm": 5.240747451782227, | |
| "learning_rate": 3.140424235947869e-07, | |
| "loss": 1.0692764520645142, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 3.8290598290598292, | |
| "grad_norm": 3.5423636436462402, | |
| "learning_rate": 3.13653034150208e-07, | |
| "loss": 0.9176429510116577, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 3.8315018315018317, | |
| "grad_norm": 5.5746564865112305, | |
| "learning_rate": 3.1326909223580295e-07, | |
| "loss": 0.4994342625141144, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 3.833943833943834, | |
| "grad_norm": 1.4443594217300415, | |
| "learning_rate": 3.128905994166859e-07, | |
| "loss": 0.33987581729888916, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 3.8363858363858365, | |
| "grad_norm": 5.162232398986816, | |
| "learning_rate": 3.125175572357573e-07, | |
| "loss": 0.5604066848754883, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 3.838827838827839, | |
| "grad_norm": 18.79522132873535, | |
| "learning_rate": 3.121499672136995e-07, | |
| "loss": 0.26430392265319824, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 3.8412698412698414, | |
| "grad_norm": 5.502162456512451, | |
| "learning_rate": 3.117878308489687e-07, | |
| "loss": 0.47760242223739624, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 3.843711843711844, | |
| "grad_norm": 0.3487054109573364, | |
| "learning_rate": 3.1143114961778995e-07, | |
| "loss": 0.06170821189880371, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 3.8461538461538463, | |
| "grad_norm": 9.559247016906738, | |
| "learning_rate": 3.1107992497415046e-07, | |
| "loss": 0.4256623387336731, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.8485958485958487, | |
| "grad_norm": 6.494686126708984, | |
| "learning_rate": 3.107341583497948e-07, | |
| "loss": 0.7344862222671509, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 3.851037851037851, | |
| "grad_norm": 5.391261100769043, | |
| "learning_rate": 3.103938511542174e-07, | |
| "loss": 0.43656569719314575, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 3.8534798534798536, | |
| "grad_norm": 2.5856785774230957, | |
| "learning_rate": 3.1005900477465816e-07, | |
| "loss": 0.7838259339332581, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 3.855921855921856, | |
| "grad_norm": 2.5243351459503174, | |
| "learning_rate": 3.0972962057609634e-07, | |
| "loss": 0.7311700582504272, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 3.8583638583638584, | |
| "grad_norm": 4.596135139465332, | |
| "learning_rate": 3.09405699901245e-07, | |
| "loss": 0.049799878150224686, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 3.860805860805861, | |
| "grad_norm": 3.657917022705078, | |
| "learning_rate": 3.0908724407054496e-07, | |
| "loss": 0.43010884523391724, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 3.8632478632478633, | |
| "grad_norm": 2.116675615310669, | |
| "learning_rate": 3.087742543821607e-07, | |
| "loss": 0.8404495716094971, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 3.8656898656898657, | |
| "grad_norm": 5.3547749519348145, | |
| "learning_rate": 3.0846673211197385e-07, | |
| "loss": 0.7852703928947449, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 3.868131868131868, | |
| "grad_norm": 2.9708454608917236, | |
| "learning_rate": 3.081646785135788e-07, | |
| "loss": 0.785150408744812, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 3.8705738705738706, | |
| "grad_norm": 4.000237941741943, | |
| "learning_rate": 3.07868094818277e-07, | |
| "loss": 0.5341532230377197, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 3.873015873015873, | |
| "grad_norm": 7.4156646728515625, | |
| "learning_rate": 3.0757698223507246e-07, | |
| "loss": 0.4277054965496063, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 3.8754578754578755, | |
| "grad_norm": 2.476334810256958, | |
| "learning_rate": 3.0729134195066615e-07, | |
| "loss": 0.5318750143051147, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 3.877899877899878, | |
| "grad_norm": 4.160787105560303, | |
| "learning_rate": 3.0701117512945196e-07, | |
| "loss": 0.7418047189712524, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 3.8803418803418803, | |
| "grad_norm": 2.8902194499969482, | |
| "learning_rate": 3.067364829135116e-07, | |
| "loss": 0.9083335399627686, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 3.8827838827838828, | |
| "grad_norm": 2.9332680702209473, | |
| "learning_rate": 3.064672664226094e-07, | |
| "loss": 0.7361115217208862, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 3.885225885225885, | |
| "grad_norm": 2.178924322128296, | |
| "learning_rate": 3.062035267541889e-07, | |
| "loss": 0.7915389537811279, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 3.8876678876678876, | |
| "grad_norm": 4.9449462890625, | |
| "learning_rate": 3.059452649833673e-07, | |
| "loss": 0.2678080201148987, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 3.89010989010989, | |
| "grad_norm": 3.050114870071411, | |
| "learning_rate": 3.056924821629317e-07, | |
| "loss": 0.7626932263374329, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 3.8925518925518925, | |
| "grad_norm": 2.8154830932617188, | |
| "learning_rate": 3.054451793233347e-07, | |
| "loss": 0.4224632680416107, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 3.894993894993895, | |
| "grad_norm": 1.828765869140625, | |
| "learning_rate": 3.0520335747268976e-07, | |
| "loss": 0.380077600479126, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 3.8974358974358974, | |
| "grad_norm": 3.0249011516571045, | |
| "learning_rate": 3.0496701759676784e-07, | |
| "loss": 0.8676139116287231, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 3.8998778998779, | |
| "grad_norm": 13.60191535949707, | |
| "learning_rate": 3.0473616065899275e-07, | |
| "loss": 0.8261326551437378, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 3.9023199023199022, | |
| "grad_norm": 9.212258338928223, | |
| "learning_rate": 3.045107876004377e-07, | |
| "loss": 0.4649001955986023, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 3.9047619047619047, | |
| "grad_norm": 6.984424591064453, | |
| "learning_rate": 3.0429089933982095e-07, | |
| "loss": 0.13965851068496704, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 3.907203907203907, | |
| "grad_norm": 3.2678136825561523, | |
| "learning_rate": 3.0407649677350244e-07, | |
| "loss": 0.7392624616622925, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.9096459096459095, | |
| "grad_norm": 3.037532091140747, | |
| "learning_rate": 3.0386758077548036e-07, | |
| "loss": 0.48279869556427, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 3.912087912087912, | |
| "grad_norm": 5.4636054039001465, | |
| "learning_rate": 3.036641521973866e-07, | |
| "loss": 0.8397436141967773, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 3.9145299145299144, | |
| "grad_norm": 2.561164140701294, | |
| "learning_rate": 3.0346621186848466e-07, | |
| "loss": 0.9013160467147827, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 3.916971916971917, | |
| "grad_norm": 2.6872096061706543, | |
| "learning_rate": 3.032737605956649e-07, | |
| "loss": 0.8809531927108765, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 3.9194139194139193, | |
| "grad_norm": 2.269146203994751, | |
| "learning_rate": 3.030867991634425e-07, | |
| "loss": 0.471352219581604, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 3.9218559218559217, | |
| "grad_norm": 5.170947551727295, | |
| "learning_rate": 3.029053283339533e-07, | |
| "loss": 0.23797103762626648, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 3.9242979242979246, | |
| "grad_norm": 2.6191842555999756, | |
| "learning_rate": 3.0272934884695113e-07, | |
| "loss": 0.7647219896316528, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 3.926739926739927, | |
| "grad_norm": 5.886957168579102, | |
| "learning_rate": 3.0255886141980464e-07, | |
| "loss": 0.8198211193084717, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 3.9291819291819294, | |
| "grad_norm": 8.85236644744873, | |
| "learning_rate": 3.023938667474948e-07, | |
| "loss": 0.502949595451355, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 3.931623931623932, | |
| "grad_norm": 3.0946693420410156, | |
| "learning_rate": 3.022343655026115e-07, | |
| "loss": 0.4772048592567444, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 3.9340659340659343, | |
| "grad_norm": 4.789615631103516, | |
| "learning_rate": 3.020803583353507e-07, | |
| "loss": 0.4212894141674042, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 3.9365079365079367, | |
| "grad_norm": 2.7063755989074707, | |
| "learning_rate": 3.019318458735128e-07, | |
| "loss": 0.693466067314148, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 3.938949938949939, | |
| "grad_norm": 7.847006797790527, | |
| "learning_rate": 3.017888287224989e-07, | |
| "loss": 0.4305657148361206, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 3.9413919413919416, | |
| "grad_norm": 4.677613735198975, | |
| "learning_rate": 3.016513074653089e-07, | |
| "loss": 0.8423424363136292, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 3.943833943833944, | |
| "grad_norm": 3.0852110385894775, | |
| "learning_rate": 3.0151928266253933e-07, | |
| "loss": 0.44737470149993896, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 3.9462759462759465, | |
| "grad_norm": 3.329002618789673, | |
| "learning_rate": 3.0139275485238054e-07, | |
| "loss": 0.8724846839904785, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 3.948717948717949, | |
| "grad_norm": 6.412221908569336, | |
| "learning_rate": 3.012717245506146e-07, | |
| "loss": 0.45604297518730164, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 3.9511599511599513, | |
| "grad_norm": 3.633538246154785, | |
| "learning_rate": 3.011561922506136e-07, | |
| "loss": 0.756797194480896, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 3.9536019536019538, | |
| "grad_norm": 24.208881378173828, | |
| "learning_rate": 3.010461584233376e-07, | |
| "loss": 0.73784339427948, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 3.956043956043956, | |
| "grad_norm": 2.3144328594207764, | |
| "learning_rate": 3.0094162351733194e-07, | |
| "loss": 0.3887001872062683, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.9584859584859586, | |
| "grad_norm": 7.828591346740723, | |
| "learning_rate": 3.0084258795872626e-07, | |
| "loss": 0.50379478931427, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 3.960927960927961, | |
| "grad_norm": 3.9664628505706787, | |
| "learning_rate": 3.007490521512327e-07, | |
| "loss": 0.8260669112205505, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 3.9633699633699635, | |
| "grad_norm": 3.5767982006073, | |
| "learning_rate": 3.006610164761436e-07, | |
| "loss": 0.4810832738876343, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 3.965811965811966, | |
| "grad_norm": 3.2386586666107178, | |
| "learning_rate": 3.0057848129233066e-07, | |
| "loss": 0.7690592408180237, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 3.9682539682539684, | |
| "grad_norm": 2.7200279235839844, | |
| "learning_rate": 3.0050144693624323e-07, | |
| "loss": 0.4866028428077698, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.970695970695971, | |
| "grad_norm": 5.123220443725586, | |
| "learning_rate": 3.004299137219065e-07, | |
| "loss": 0.13242197036743164, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 3.9731379731379732, | |
| "grad_norm": 4.087306499481201, | |
| "learning_rate": 3.00363881940921e-07, | |
| "loss": 0.7160649299621582, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 3.9755799755799757, | |
| "grad_norm": 3.564754009246826, | |
| "learning_rate": 3.003033518624611e-07, | |
| "loss": 0.7744536399841309, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 3.978021978021978, | |
| "grad_norm": 2.57946515083313, | |
| "learning_rate": 3.0024832373327347e-07, | |
| "loss": 0.4117498993873596, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 3.9804639804639805, | |
| "grad_norm": 5.399810314178467, | |
| "learning_rate": 3.001987977776767e-07, | |
| "loss": 0.4066256880760193, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 3.982905982905983, | |
| "grad_norm": 4.3059515953063965, | |
| "learning_rate": 3.001547741975601e-07, | |
| "loss": 0.46041280031204224, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 3.9853479853479854, | |
| "grad_norm": 10.905681610107422, | |
| "learning_rate": 3.0011625317238293e-07, | |
| "loss": 0.5426976680755615, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 3.987789987789988, | |
| "grad_norm": 8.33562183380127, | |
| "learning_rate": 3.0008323485917347e-07, | |
| "loss": 0.7136914730072021, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 3.9902319902319903, | |
| "grad_norm": 2.0271568298339844, | |
| "learning_rate": 3.000557193925287e-07, | |
| "loss": 0.7866110801696777, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 3.9926739926739927, | |
| "grad_norm": 6.280376434326172, | |
| "learning_rate": 3.000337068846139e-07, | |
| "loss": 0.6041853427886963, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 3.995115995115995, | |
| "grad_norm": 2.359619617462158, | |
| "learning_rate": 3.0001719742516126e-07, | |
| "loss": 0.8250214457511902, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 3.9975579975579976, | |
| "grad_norm": 4.224130630493164, | |
| "learning_rate": 3.000061910814706e-07, | |
| "loss": 0.6971200704574585, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.9241721630096436, | |
| "learning_rate": 3.0000068789840856e-07, | |
| "loss": 0.41884008049964905, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 3276, | |
| "total_flos": 3.4380478348997427e+18, | |
| "train_loss": 0.8572880857224975, | |
| "train_runtime": 5848.4538, | |
| "train_samples_per_second": 8.962, | |
| "train_steps_per_second": 0.56 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3276, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4380478348997427e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |