Instructions to use IParraMartin/gpt2-medium-bLM100M with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use IParraMartin/gpt2-medium-bLM100M with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="IParraMartin/gpt2-medium-bLM100M")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("IParraMartin/gpt2-medium-bLM100M") model = AutoModelForCausalLM.from_pretrained("IParraMartin/gpt2-medium-bLM100M") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use IParraMartin/gpt2-medium-bLM100M with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "IParraMartin/gpt2-medium-bLM100M" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IParraMartin/gpt2-medium-bLM100M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/IParraMartin/gpt2-medium-bLM100M
- SGLang
How to use IParraMartin/gpt2-medium-bLM100M with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "IParraMartin/gpt2-medium-bLM100M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IParraMartin/gpt2-medium-bLM100M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "IParraMartin/gpt2-medium-bLM100M" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "IParraMartin/gpt2-medium-bLM100M", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use IParraMartin/gpt2-medium-bLM100M with Docker Model Runner:
docker model run hf.co/IParraMartin/gpt2-medium-bLM100M
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.622338113863537, | |
| "eval_steps": 500, | |
| "global_step": 1907, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003476749239461104, | |
| "grad_norm": 28.668624877929688, | |
| "learning_rate": 0.0, | |
| "loss": 10.9346, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006953498478922208, | |
| "grad_norm": 29.20085334777832, | |
| "learning_rate": 5.235602094240838e-07, | |
| "loss": 10.9266, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.010430247718383311, | |
| "grad_norm": 27.985437393188477, | |
| "learning_rate": 1.0471204188481676e-06, | |
| "loss": 10.8227, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.013906996957844416, | |
| "grad_norm": 26.395610809326172, | |
| "learning_rate": 1.5706806282722513e-06, | |
| "loss": 10.6153, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.017383746197305518, | |
| "grad_norm": 21.0169677734375, | |
| "learning_rate": 2.094240837696335e-06, | |
| "loss": 10.3783, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.020860495436766623, | |
| "grad_norm": 17.197437286376953, | |
| "learning_rate": 2.617801047120419e-06, | |
| "loss": 10.0927, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.024337244676227728, | |
| "grad_norm": 14.133214950561523, | |
| "learning_rate": 3.1413612565445026e-06, | |
| "loss": 9.8757, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.027813993915688832, | |
| "grad_norm": 12.546614646911621, | |
| "learning_rate": 3.664921465968586e-06, | |
| "loss": 9.5995, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03129074315514994, | |
| "grad_norm": 10.003512382507324, | |
| "learning_rate": 4.18848167539267e-06, | |
| "loss": 9.4723, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.034767492394611035, | |
| "grad_norm": 9.371498107910156, | |
| "learning_rate": 4.712041884816754e-06, | |
| "loss": 9.2018, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03824424163407214, | |
| "grad_norm": 7.8304948806762695, | |
| "learning_rate": 5.235602094240838e-06, | |
| "loss": 9.0917, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.041720990873533245, | |
| "grad_norm": 7.299487113952637, | |
| "learning_rate": 5.759162303664922e-06, | |
| "loss": 8.9398, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04519774011299435, | |
| "grad_norm": 6.750319480895996, | |
| "learning_rate": 6.282722513089005e-06, | |
| "loss": 8.8567, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.048674489352455455, | |
| "grad_norm": 6.2069993019104, | |
| "learning_rate": 6.8062827225130895e-06, | |
| "loss": 8.6588, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.05215123859191656, | |
| "grad_norm": 5.454948425292969, | |
| "learning_rate": 7.329842931937172e-06, | |
| "loss": 8.5728, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.055627987831377665, | |
| "grad_norm": 5.076164722442627, | |
| "learning_rate": 7.853403141361257e-06, | |
| "loss": 8.5335, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05910473707083876, | |
| "grad_norm": 5.935672283172607, | |
| "learning_rate": 8.37696335078534e-06, | |
| "loss": 8.3915, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.06258148631029987, | |
| "grad_norm": 6.291903972625732, | |
| "learning_rate": 8.900523560209424e-06, | |
| "loss": 8.3351, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06605823554976098, | |
| "grad_norm": 4.611499786376953, | |
| "learning_rate": 9.424083769633508e-06, | |
| "loss": 8.1489, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06953498478922207, | |
| "grad_norm": 4.213597297668457, | |
| "learning_rate": 9.947643979057591e-06, | |
| "loss": 8.1105, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07301173402868318, | |
| "grad_norm": 7.032574653625488, | |
| "learning_rate": 1.0471204188481676e-05, | |
| "loss": 7.9326, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07648848326814428, | |
| "grad_norm": 3.5775418281555176, | |
| "learning_rate": 1.099476439790576e-05, | |
| "loss": 8.0387, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07996523250760539, | |
| "grad_norm": 3.3227384090423584, | |
| "learning_rate": 1.1518324607329843e-05, | |
| "loss": 8.0855, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.08344198174706649, | |
| "grad_norm": 3.577180862426758, | |
| "learning_rate": 1.2041884816753927e-05, | |
| "loss": 7.8638, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0869187309865276, | |
| "grad_norm": 3.077481508255005, | |
| "learning_rate": 1.256544502617801e-05, | |
| "loss": 7.7775, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0903954802259887, | |
| "grad_norm": 2.4729714393615723, | |
| "learning_rate": 1.3089005235602096e-05, | |
| "loss": 7.7653, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0938722294654498, | |
| "grad_norm": 2.7105133533477783, | |
| "learning_rate": 1.3612565445026179e-05, | |
| "loss": 7.8085, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.09734897870491091, | |
| "grad_norm": 2.2809700965881348, | |
| "learning_rate": 1.4136125654450264e-05, | |
| "loss": 7.6766, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.10082572794437202, | |
| "grad_norm": 2.30652117729187, | |
| "learning_rate": 1.4659685863874344e-05, | |
| "loss": 7.703, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.10430247718383312, | |
| "grad_norm": 2.233243703842163, | |
| "learning_rate": 1.518324607329843e-05, | |
| "loss": 7.7391, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10777922642329422, | |
| "grad_norm": 2.0807292461395264, | |
| "learning_rate": 1.5706806282722515e-05, | |
| "loss": 7.5976, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.11125597566275533, | |
| "grad_norm": 2.4727425575256348, | |
| "learning_rate": 1.6230366492146596e-05, | |
| "loss": 7.5206, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.11473272490221642, | |
| "grad_norm": 3.1418378353118896, | |
| "learning_rate": 1.675392670157068e-05, | |
| "loss": 7.6335, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.11820947414167753, | |
| "grad_norm": 3.3428750038146973, | |
| "learning_rate": 1.7277486910994763e-05, | |
| "loss": 7.5816, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.12168622338113863, | |
| "grad_norm": 2.7240543365478516, | |
| "learning_rate": 1.780104712041885e-05, | |
| "loss": 7.4649, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.12516297262059975, | |
| "grad_norm": 2.6777167320251465, | |
| "learning_rate": 1.8324607329842934e-05, | |
| "loss": 7.2798, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.12863972186006084, | |
| "grad_norm": 2.6221325397491455, | |
| "learning_rate": 1.8848167539267016e-05, | |
| "loss": 7.2434, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.13211647109952196, | |
| "grad_norm": 3.304597854614258, | |
| "learning_rate": 1.93717277486911e-05, | |
| "loss": 7.3748, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 2.2426793575286865, | |
| "learning_rate": 1.9895287958115183e-05, | |
| "loss": 7.2165, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.13906996957844414, | |
| "grad_norm": 2.3125767707824707, | |
| "learning_rate": 2.0418848167539268e-05, | |
| "loss": 7.0828, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14254671881790526, | |
| "grad_norm": 2.921846389770508, | |
| "learning_rate": 2.0942408376963353e-05, | |
| "loss": 7.242, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.14602346805736635, | |
| "grad_norm": 2.0602433681488037, | |
| "learning_rate": 2.1465968586387435e-05, | |
| "loss": 7.1855, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.14950021729682747, | |
| "grad_norm": 2.180553436279297, | |
| "learning_rate": 2.198952879581152e-05, | |
| "loss": 7.0872, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.15297696653628856, | |
| "grad_norm": 1.9306440353393555, | |
| "learning_rate": 2.25130890052356e-05, | |
| "loss": 7.1991, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.15645371577574968, | |
| "grad_norm": 2.243671417236328, | |
| "learning_rate": 2.3036649214659687e-05, | |
| "loss": 7.087, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.15993046501521077, | |
| "grad_norm": 1.9487829208374023, | |
| "learning_rate": 2.3560209424083772e-05, | |
| "loss": 6.9366, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1634072142546719, | |
| "grad_norm": 2.5406157970428467, | |
| "learning_rate": 2.4083769633507854e-05, | |
| "loss": 7.1055, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.16688396349413298, | |
| "grad_norm": 3.9518744945526123, | |
| "learning_rate": 2.460732984293194e-05, | |
| "loss": 6.8859, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1703607127335941, | |
| "grad_norm": 1.8939940929412842, | |
| "learning_rate": 2.513089005235602e-05, | |
| "loss": 6.9724, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.1738374619730552, | |
| "grad_norm": 2.785097360610962, | |
| "learning_rate": 2.5654450261780106e-05, | |
| "loss": 6.8924, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1773142112125163, | |
| "grad_norm": 3.876585006713867, | |
| "learning_rate": 2.617801047120419e-05, | |
| "loss": 6.7097, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1807909604519774, | |
| "grad_norm": 1.672093391418457, | |
| "learning_rate": 2.6701570680628273e-05, | |
| "loss": 6.8596, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1842677096914385, | |
| "grad_norm": 3.9122424125671387, | |
| "learning_rate": 2.7225130890052358e-05, | |
| "loss": 6.745, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1877444589308996, | |
| "grad_norm": 2.6010124683380127, | |
| "learning_rate": 2.7748691099476443e-05, | |
| "loss": 6.7213, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1912212081703607, | |
| "grad_norm": 3.1334500312805176, | |
| "learning_rate": 2.827225130890053e-05, | |
| "loss": 6.5959, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.19469795740982182, | |
| "grad_norm": 2.5764386653900146, | |
| "learning_rate": 2.879581151832461e-05, | |
| "loss": 6.7594, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1981747066492829, | |
| "grad_norm": 2.5421552658081055, | |
| "learning_rate": 2.931937172774869e-05, | |
| "loss": 6.5791, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.20165145588874403, | |
| "grad_norm": 2.9680330753326416, | |
| "learning_rate": 2.9842931937172774e-05, | |
| "loss": 6.5585, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 2.292030096054077, | |
| "learning_rate": 3.036649214659686e-05, | |
| "loss": 6.4137, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.20860495436766624, | |
| "grad_norm": 3.2336649894714355, | |
| "learning_rate": 3.0890052356020944e-05, | |
| "loss": 6.542, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.21208170360712733, | |
| "grad_norm": 2.3037948608398438, | |
| "learning_rate": 3.141361256544503e-05, | |
| "loss": 6.4735, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.21555845284658845, | |
| "grad_norm": 2.364863157272339, | |
| "learning_rate": 3.1937172774869115e-05, | |
| "loss": 6.4433, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.21903520208604954, | |
| "grad_norm": 1.9736865758895874, | |
| "learning_rate": 3.246073298429319e-05, | |
| "loss": 6.2436, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.22251195132551066, | |
| "grad_norm": 2.201551914215088, | |
| "learning_rate": 3.298429319371728e-05, | |
| "loss": 6.4195, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.22598870056497175, | |
| "grad_norm": 2.3755462169647217, | |
| "learning_rate": 3.350785340314136e-05, | |
| "loss": 6.3625, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.22946544980443284, | |
| "grad_norm": 3.011631727218628, | |
| "learning_rate": 3.403141361256545e-05, | |
| "loss": 6.3454, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.23294219904389396, | |
| "grad_norm": 2.120392084121704, | |
| "learning_rate": 3.455497382198953e-05, | |
| "loss": 6.2755, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.23641894828335505, | |
| "grad_norm": 2.4527335166931152, | |
| "learning_rate": 3.507853403141361e-05, | |
| "loss": 6.3017, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.23989569752281617, | |
| "grad_norm": 2.6498172283172607, | |
| "learning_rate": 3.56020942408377e-05, | |
| "loss": 6.2205, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.24337244676227726, | |
| "grad_norm": 1.6044883728027344, | |
| "learning_rate": 3.612565445026178e-05, | |
| "loss": 6.1502, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24684919600173838, | |
| "grad_norm": 4.1391143798828125, | |
| "learning_rate": 3.664921465968587e-05, | |
| "loss": 6.1017, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2503259452411995, | |
| "grad_norm": 2.0595903396606445, | |
| "learning_rate": 3.717277486910995e-05, | |
| "loss": 6.1089, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.25380269448066056, | |
| "grad_norm": 3.4644057750701904, | |
| "learning_rate": 3.769633507853403e-05, | |
| "loss": 6.0465, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2572794437201217, | |
| "grad_norm": 2.4427716732025146, | |
| "learning_rate": 3.8219895287958116e-05, | |
| "loss": 6.0676, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2607561929595828, | |
| "grad_norm": 3.2828521728515625, | |
| "learning_rate": 3.87434554973822e-05, | |
| "loss": 6.1147, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2642329421990439, | |
| "grad_norm": 2.433551788330078, | |
| "learning_rate": 3.926701570680629e-05, | |
| "loss": 6.0704, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.267709691438505, | |
| "grad_norm": 1.9476454257965088, | |
| "learning_rate": 3.9790575916230365e-05, | |
| "loss": 5.9814, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 2.0444705486297607, | |
| "learning_rate": 4.031413612565445e-05, | |
| "loss": 5.8327, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2746631899174272, | |
| "grad_norm": 2.3648533821105957, | |
| "learning_rate": 4.0837696335078535e-05, | |
| "loss": 5.9608, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2781399391568883, | |
| "grad_norm": 2.4138662815093994, | |
| "learning_rate": 4.136125654450262e-05, | |
| "loss": 5.9469, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2816166883963494, | |
| "grad_norm": 2.0390286445617676, | |
| "learning_rate": 4.1884816753926706e-05, | |
| "loss": 5.8486, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2850934376358105, | |
| "grad_norm": 2.036783218383789, | |
| "learning_rate": 4.240837696335079e-05, | |
| "loss": 5.8894, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.28857018687527164, | |
| "grad_norm": 2.660581111907959, | |
| "learning_rate": 4.293193717277487e-05, | |
| "loss": 5.8379, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2920469361147327, | |
| "grad_norm": 2.0146775245666504, | |
| "learning_rate": 4.3455497382198955e-05, | |
| "loss": 5.7308, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2955236853541938, | |
| "grad_norm": 2.3022539615631104, | |
| "learning_rate": 4.397905759162304e-05, | |
| "loss": 5.816, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.29900043459365494, | |
| "grad_norm": 2.7125766277313232, | |
| "learning_rate": 4.4502617801047125e-05, | |
| "loss": 5.6184, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.30247718383311606, | |
| "grad_norm": 2.6640212535858154, | |
| "learning_rate": 4.50261780104712e-05, | |
| "loss": 5.8341, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3059539330725771, | |
| "grad_norm": 2.317542314529419, | |
| "learning_rate": 4.554973821989529e-05, | |
| "loss": 5.7043, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.30943068231203824, | |
| "grad_norm": 1.7317227125167847, | |
| "learning_rate": 4.6073298429319374e-05, | |
| "loss": 5.7464, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.31290743155149936, | |
| "grad_norm": 2.3583664894104004, | |
| "learning_rate": 4.659685863874346e-05, | |
| "loss": 5.5552, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3163841807909605, | |
| "grad_norm": 2.680361270904541, | |
| "learning_rate": 4.7120418848167544e-05, | |
| "loss": 5.6926, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.31986093003042154, | |
| "grad_norm": 1.4900928735733032, | |
| "learning_rate": 4.764397905759162e-05, | |
| "loss": 5.6321, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.32333767926988266, | |
| "grad_norm": 2.226301431655884, | |
| "learning_rate": 4.816753926701571e-05, | |
| "loss": 5.4741, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3268144285093438, | |
| "grad_norm": 2.516113519668579, | |
| "learning_rate": 4.869109947643979e-05, | |
| "loss": 5.687, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.33029117774880484, | |
| "grad_norm": 1.9042409658432007, | |
| "learning_rate": 4.921465968586388e-05, | |
| "loss": 5.5558, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.33376792698826596, | |
| "grad_norm": 1.791879653930664, | |
| "learning_rate": 4.973821989528796e-05, | |
| "loss": 5.4747, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3372446762277271, | |
| "grad_norm": 1.4365205764770508, | |
| "learning_rate": 5.026178010471204e-05, | |
| "loss": 5.5098, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3407214254671882, | |
| "grad_norm": 2.371295690536499, | |
| "learning_rate": 5.0785340314136134e-05, | |
| "loss": 5.448, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.34419817470664926, | |
| "grad_norm": 2.3430371284484863, | |
| "learning_rate": 5.130890052356021e-05, | |
| "loss": 5.5506, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.3476749239461104, | |
| "grad_norm": 1.4759836196899414, | |
| "learning_rate": 5.18324607329843e-05, | |
| "loss": 5.4155, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3511516731855715, | |
| "grad_norm": 2.003601551055908, | |
| "learning_rate": 5.235602094240838e-05, | |
| "loss": 5.5077, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3546284224250326, | |
| "grad_norm": 1.7206441164016724, | |
| "learning_rate": 5.287958115183246e-05, | |
| "loss": 5.4507, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3581051716644937, | |
| "grad_norm": 2.2505338191986084, | |
| "learning_rate": 5.3403141361256546e-05, | |
| "loss": 5.3292, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3615819209039548, | |
| "grad_norm": 2.510773181915283, | |
| "learning_rate": 5.3926701570680624e-05, | |
| "loss": 5.4533, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3650586701434159, | |
| "grad_norm": 1.8072589635849, | |
| "learning_rate": 5.4450261780104716e-05, | |
| "loss": 5.355, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.368535419382877, | |
| "grad_norm": 2.089144706726074, | |
| "learning_rate": 5.4973821989528795e-05, | |
| "loss": 5.5014, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3720121686223381, | |
| "grad_norm": 1.5899293422698975, | |
| "learning_rate": 5.5497382198952887e-05, | |
| "loss": 5.376, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3754889178617992, | |
| "grad_norm": 1.6699646711349487, | |
| "learning_rate": 5.6020942408376965e-05, | |
| "loss": 5.3309, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.37896566710126034, | |
| "grad_norm": 1.2876421213150024, | |
| "learning_rate": 5.654450261780106e-05, | |
| "loss": 5.3524, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3824424163407214, | |
| "grad_norm": 2.535942316055298, | |
| "learning_rate": 5.7068062827225135e-05, | |
| "loss": 5.3004, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3859191655801825, | |
| "grad_norm": 1.8234626054763794, | |
| "learning_rate": 5.759162303664922e-05, | |
| "loss": 5.2188, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.38939591481964364, | |
| "grad_norm": 2.6738812923431396, | |
| "learning_rate": 5.81151832460733e-05, | |
| "loss": 5.3345, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.39287266405910476, | |
| "grad_norm": 2.4427976608276367, | |
| "learning_rate": 5.863874345549738e-05, | |
| "loss": 5.2417, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3963494132985658, | |
| "grad_norm": 2.158632516860962, | |
| "learning_rate": 5.916230366492147e-05, | |
| "loss": 5.2943, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.39982616253802694, | |
| "grad_norm": 2.0583152770996094, | |
| "learning_rate": 5.968586387434555e-05, | |
| "loss": 5.2959, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.40330291177748806, | |
| "grad_norm": 1.8146238327026367, | |
| "learning_rate": 6.020942408376964e-05, | |
| "loss": 5.3123, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 1.9138127565383911, | |
| "learning_rate": 6.073298429319372e-05, | |
| "loss": 5.1541, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 2.2583582401275635, | |
| "learning_rate": 6.125654450261781e-05, | |
| "loss": 5.0541, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.41373315949587136, | |
| "grad_norm": 1.597258448600769, | |
| "learning_rate": 6.178010471204189e-05, | |
| "loss": 5.199, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4172099087353325, | |
| "grad_norm": 2.2339518070220947, | |
| "learning_rate": 6.230366492146598e-05, | |
| "loss": 5.1763, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.42068665797479354, | |
| "grad_norm": 2.5288898944854736, | |
| "learning_rate": 6.282722513089006e-05, | |
| "loss": 5.2478, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.42416340721425466, | |
| "grad_norm": 1.9757497310638428, | |
| "learning_rate": 6.335078534031414e-05, | |
| "loss": 5.0547, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4276401564537158, | |
| "grad_norm": 2.4785637855529785, | |
| "learning_rate": 6.387434554973823e-05, | |
| "loss": 5.0945, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.4311169056931769, | |
| "grad_norm": 2.1948068141937256, | |
| "learning_rate": 6.439790575916231e-05, | |
| "loss": 5.119, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.43459365493263796, | |
| "grad_norm": 1.3128104209899902, | |
| "learning_rate": 6.492146596858639e-05, | |
| "loss": 5.0568, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4380704041720991, | |
| "grad_norm": 2.0916574001312256, | |
| "learning_rate": 6.544502617801048e-05, | |
| "loss": 5.197, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4415471534115602, | |
| "grad_norm": 1.8343582153320312, | |
| "learning_rate": 6.596858638743456e-05, | |
| "loss": 5.1277, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.4450239026510213, | |
| "grad_norm": 2.083747386932373, | |
| "learning_rate": 6.649214659685863e-05, | |
| "loss": 5.1871, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.4485006518904824, | |
| "grad_norm": 2.1757895946502686, | |
| "learning_rate": 6.701570680628273e-05, | |
| "loss": 5.0276, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.4519774011299435, | |
| "grad_norm": 2.2232532501220703, | |
| "learning_rate": 6.75392670157068e-05, | |
| "loss": 4.9795, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4554541503694046, | |
| "grad_norm": 1.853911280632019, | |
| "learning_rate": 6.80628272251309e-05, | |
| "loss": 5.0363, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.4589308996088657, | |
| "grad_norm": 2.0266318321228027, | |
| "learning_rate": 6.858638743455498e-05, | |
| "loss": 5.2211, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.4624076488483268, | |
| "grad_norm": 2.087162971496582, | |
| "learning_rate": 6.910994764397905e-05, | |
| "loss": 4.8979, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.4658843980877879, | |
| "grad_norm": 1.4458317756652832, | |
| "learning_rate": 6.963350785340315e-05, | |
| "loss": 5.0776, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.46936114732724904, | |
| "grad_norm": 2.3262505531311035, | |
| "learning_rate": 7.015706806282722e-05, | |
| "loss": 5.0269, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4728378965667101, | |
| "grad_norm": 2.3064498901367188, | |
| "learning_rate": 7.068062827225132e-05, | |
| "loss": 4.8792, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.4763146458061712, | |
| "grad_norm": 1.705779790878296, | |
| "learning_rate": 7.12041884816754e-05, | |
| "loss": 4.9817, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.47979139504563234, | |
| "grad_norm": 1.8515121936798096, | |
| "learning_rate": 7.172774869109949e-05, | |
| "loss": 5.106, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.48326814428509346, | |
| "grad_norm": 1.824192762374878, | |
| "learning_rate": 7.225130890052356e-05, | |
| "loss": 4.9356, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4867448935245545, | |
| "grad_norm": 1.516313076019287, | |
| "learning_rate": 7.277486910994766e-05, | |
| "loss": 5.0519, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.49022164276401564, | |
| "grad_norm": 1.6829155683517456, | |
| "learning_rate": 7.329842931937174e-05, | |
| "loss": 4.8605, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.49369839200347676, | |
| "grad_norm": 1.4541552066802979, | |
| "learning_rate": 7.382198952879581e-05, | |
| "loss": 4.94, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4971751412429379, | |
| "grad_norm": 2.4809017181396484, | |
| "learning_rate": 7.43455497382199e-05, | |
| "loss": 4.959, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.500651890482399, | |
| "grad_norm": 2.6929731369018555, | |
| "learning_rate": 7.486910994764398e-05, | |
| "loss": 5.0495, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5041286397218601, | |
| "grad_norm": 1.3168176412582397, | |
| "learning_rate": 7.539267015706806e-05, | |
| "loss": 4.8998, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5076053889613211, | |
| "grad_norm": 1.5721124410629272, | |
| "learning_rate": 7.591623036649214e-05, | |
| "loss": 4.8676, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5110821382007823, | |
| "grad_norm": 2.32256817817688, | |
| "learning_rate": 7.643979057591623e-05, | |
| "loss": 5.0139, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.5145588874402434, | |
| "grad_norm": 2.1673014163970947, | |
| "learning_rate": 7.696335078534031e-05, | |
| "loss": 4.942, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5180356366797044, | |
| "grad_norm": 1.5646917819976807, | |
| "learning_rate": 7.74869109947644e-05, | |
| "loss": 4.7135, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.5215123859191656, | |
| "grad_norm": 1.453816294670105, | |
| "learning_rate": 7.801047120418848e-05, | |
| "loss": 4.7374, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5249891351586267, | |
| "grad_norm": 1.178645372390747, | |
| "learning_rate": 7.853403141361257e-05, | |
| "loss": 4.7643, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.5284658843980878, | |
| "grad_norm": 2.270442247390747, | |
| "learning_rate": 7.905759162303665e-05, | |
| "loss": 4.8833, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5319426336375489, | |
| "grad_norm": 1.6220506429672241, | |
| "learning_rate": 7.958115183246073e-05, | |
| "loss": 4.8259, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.53541938287701, | |
| "grad_norm": 1.7205973863601685, | |
| "learning_rate": 8.010471204188482e-05, | |
| "loss": 4.8368, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5388961321164711, | |
| "grad_norm": 2.033658504486084, | |
| "learning_rate": 8.06282722513089e-05, | |
| "loss": 4.7614, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 2.0730934143066406, | |
| "learning_rate": 8.115183246073299e-05, | |
| "loss": 4.7387, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5458496305953933, | |
| "grad_norm": 1.7333288192749023, | |
| "learning_rate": 8.167539267015707e-05, | |
| "loss": 4.8901, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5493263798348544, | |
| "grad_norm": 1.4700742959976196, | |
| "learning_rate": 8.219895287958116e-05, | |
| "loss": 4.9365, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5528031290743155, | |
| "grad_norm": 1.8137691020965576, | |
| "learning_rate": 8.272251308900524e-05, | |
| "loss": 4.785, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5562798783137766, | |
| "grad_norm": 1.1198782920837402, | |
| "learning_rate": 8.324607329842933e-05, | |
| "loss": 4.7751, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5597566275532377, | |
| "grad_norm": 1.5133748054504395, | |
| "learning_rate": 8.376963350785341e-05, | |
| "loss": 4.7732, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5632333767926988, | |
| "grad_norm": 1.357731819152832, | |
| "learning_rate": 8.429319371727749e-05, | |
| "loss": 4.7859, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.56671012603216, | |
| "grad_norm": 1.9853086471557617, | |
| "learning_rate": 8.481675392670158e-05, | |
| "loss": 4.8198, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.570186875271621, | |
| "grad_norm": 2.5406196117401123, | |
| "learning_rate": 8.534031413612566e-05, | |
| "loss": 4.8113, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5736636245110821, | |
| "grad_norm": 2.0581886768341064, | |
| "learning_rate": 8.586387434554974e-05, | |
| "loss": 4.8693, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5771403737505433, | |
| "grad_norm": 1.1141613721847534, | |
| "learning_rate": 8.638743455497382e-05, | |
| "loss": 4.7263, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5806171229900043, | |
| "grad_norm": 1.7544695138931274, | |
| "learning_rate": 8.691099476439791e-05, | |
| "loss": 4.7621, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5840938722294654, | |
| "grad_norm": 2.428950548171997, | |
| "learning_rate": 8.743455497382199e-05, | |
| "loss": 4.7001, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5875706214689266, | |
| "grad_norm": 2.087353467941284, | |
| "learning_rate": 8.795811518324608e-05, | |
| "loss": 4.7774, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5910473707083876, | |
| "grad_norm": 1.6976741552352905, | |
| "learning_rate": 8.848167539267016e-05, | |
| "loss": 4.7362, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5945241199478487, | |
| "grad_norm": 1.7949515581130981, | |
| "learning_rate": 8.900523560209425e-05, | |
| "loss": 4.8683, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5980008691873099, | |
| "grad_norm": 1.0558027029037476, | |
| "learning_rate": 8.952879581151833e-05, | |
| "loss": 4.6915, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6014776184267709, | |
| "grad_norm": 2.1845736503601074, | |
| "learning_rate": 9.00523560209424e-05, | |
| "loss": 4.6825, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.6049543676662321, | |
| "grad_norm": 1.852575659751892, | |
| "learning_rate": 9.05759162303665e-05, | |
| "loss": 4.5881, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.6084311169056932, | |
| "grad_norm": 2.295853853225708, | |
| "learning_rate": 9.109947643979058e-05, | |
| "loss": 4.8337, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6119078661451542, | |
| "grad_norm": 1.8543741703033447, | |
| "learning_rate": 9.162303664921467e-05, | |
| "loss": 4.7888, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.4840337038040161, | |
| "learning_rate": 9.214659685863875e-05, | |
| "loss": 4.7088, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.6188613646240765, | |
| "grad_norm": 1.1520745754241943, | |
| "learning_rate": 9.267015706806284e-05, | |
| "loss": 4.7188, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.6223381138635375, | |
| "grad_norm": 1.4230411052703857, | |
| "learning_rate": 9.319371727748692e-05, | |
| "loss": 4.7505, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.6258148631029987, | |
| "grad_norm": 1.6348016262054443, | |
| "learning_rate": 9.371727748691101e-05, | |
| "loss": 4.7266, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6292916123424598, | |
| "grad_norm": 1.824994444847107, | |
| "learning_rate": 9.424083769633509e-05, | |
| "loss": 4.7228, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.632768361581921, | |
| "grad_norm": 1.3238110542297363, | |
| "learning_rate": 9.476439790575917e-05, | |
| "loss": 4.74, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.636245110821382, | |
| "grad_norm": 1.4533603191375732, | |
| "learning_rate": 9.528795811518324e-05, | |
| "loss": 4.639, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.6397218600608431, | |
| "grad_norm": 1.7972502708435059, | |
| "learning_rate": 9.581151832460732e-05, | |
| "loss": 4.6973, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.6431986093003043, | |
| "grad_norm": 1.4870437383651733, | |
| "learning_rate": 9.633507853403142e-05, | |
| "loss": 4.6286, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6466753585397653, | |
| "grad_norm": 1.4717066287994385, | |
| "learning_rate": 9.68586387434555e-05, | |
| "loss": 4.5181, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6501521077792264, | |
| "grad_norm": 1.4903441667556763, | |
| "learning_rate": 9.738219895287959e-05, | |
| "loss": 4.6885, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6536288570186876, | |
| "grad_norm": 2.0264744758605957, | |
| "learning_rate": 9.790575916230366e-05, | |
| "loss": 4.7168, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6571056062581486, | |
| "grad_norm": 1.5279369354248047, | |
| "learning_rate": 9.842931937172776e-05, | |
| "loss": 4.6545, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.6605823554976097, | |
| "grad_norm": 1.2937990427017212, | |
| "learning_rate": 9.895287958115183e-05, | |
| "loss": 4.648, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6640591047370709, | |
| "grad_norm": 0.9555952548980713, | |
| "learning_rate": 9.947643979057593e-05, | |
| "loss": 4.5756, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6675358539765319, | |
| "grad_norm": 1.3998730182647705, | |
| "learning_rate": 0.0001, | |
| "loss": 4.538, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6710126032159931, | |
| "grad_norm": 0.8394646048545837, | |
| "learning_rate": 9.999991620751589e-05, | |
| "loss": 4.5968, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6744893524554542, | |
| "grad_norm": 1.3991713523864746, | |
| "learning_rate": 9.999966483034437e-05, | |
| "loss": 4.6261, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 1.7438290119171143, | |
| "learning_rate": 9.999924586932803e-05, | |
| "loss": 4.5051, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6814428509343764, | |
| "grad_norm": 1.0810681581497192, | |
| "learning_rate": 9.999865932587107e-05, | |
| "loss": 4.5634, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6849196001738375, | |
| "grad_norm": 1.470165729522705, | |
| "learning_rate": 9.99979052019394e-05, | |
| "loss": 4.7187, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6883963494132985, | |
| "grad_norm": 1.3457802534103394, | |
| "learning_rate": 9.999698350006063e-05, | |
| "loss": 4.5977, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6918730986527597, | |
| "grad_norm": 1.7207729816436768, | |
| "learning_rate": 9.999589422332404e-05, | |
| "loss": 4.5589, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6953498478922208, | |
| "grad_norm": 1.0946606397628784, | |
| "learning_rate": 9.999463737538053e-05, | |
| "loss": 4.3829, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6988265971316818, | |
| "grad_norm": 1.4563919305801392, | |
| "learning_rate": 9.99932129604427e-05, | |
| "loss": 4.5988, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.702303346371143, | |
| "grad_norm": 1.7498416900634766, | |
| "learning_rate": 9.999162098328474e-05, | |
| "loss": 4.6138, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.7057800956106041, | |
| "grad_norm": 1.4779905080795288, | |
| "learning_rate": 9.998986144924251e-05, | |
| "loss": 4.6133, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.7092568448500652, | |
| "grad_norm": 0.8581809401512146, | |
| "learning_rate": 9.99879343642134e-05, | |
| "loss": 4.5436, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.7127335940895263, | |
| "grad_norm": 1.22317373752594, | |
| "learning_rate": 9.998583973465646e-05, | |
| "loss": 4.5361, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7162103433289874, | |
| "grad_norm": 1.2488147020339966, | |
| "learning_rate": 9.998357756759222e-05, | |
| "loss": 4.5957, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.7196870925684485, | |
| "grad_norm": 1.6058237552642822, | |
| "learning_rate": 9.998114787060282e-05, | |
| "loss": 4.562, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.7231638418079096, | |
| "grad_norm": 1.423363208770752, | |
| "learning_rate": 9.997855065183184e-05, | |
| "loss": 4.6001, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.7266405910473707, | |
| "grad_norm": 0.9326280951499939, | |
| "learning_rate": 9.99757859199844e-05, | |
| "loss": 4.4833, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.7301173402868318, | |
| "grad_norm": 1.4092398881912231, | |
| "learning_rate": 9.997285368432703e-05, | |
| "loss": 4.447, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7335940895262929, | |
| "grad_norm": 0.8156947493553162, | |
| "learning_rate": 9.996975395468772e-05, | |
| "loss": 4.5549, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.737070838765754, | |
| "grad_norm": 1.4543663263320923, | |
| "learning_rate": 9.996648674145583e-05, | |
| "loss": 4.4712, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7405475880052151, | |
| "grad_norm": 1.1543498039245605, | |
| "learning_rate": 9.996305205558207e-05, | |
| "loss": 4.7086, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.7440243372446762, | |
| "grad_norm": 1.3337583541870117, | |
| "learning_rate": 9.995944990857849e-05, | |
| "loss": 4.5306, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.7475010864841374, | |
| "grad_norm": 1.3848955631256104, | |
| "learning_rate": 9.995568031251838e-05, | |
| "loss": 4.4306, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7509778357235984, | |
| "grad_norm": 1.0516972541809082, | |
| "learning_rate": 9.995174328003631e-05, | |
| "loss": 4.5513, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7544545849630595, | |
| "grad_norm": 1.375748634338379, | |
| "learning_rate": 9.9947638824328e-05, | |
| "loss": 4.5299, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.7579313342025207, | |
| "grad_norm": 0.9336188435554504, | |
| "learning_rate": 9.99433669591504e-05, | |
| "loss": 4.5093, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7614080834419817, | |
| "grad_norm": 1.1167926788330078, | |
| "learning_rate": 9.99389276988215e-05, | |
| "loss": 4.5166, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7648848326814428, | |
| "grad_norm": 1.5452927350997925, | |
| "learning_rate": 9.993432105822034e-05, | |
| "loss": 4.5704, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.768361581920904, | |
| "grad_norm": 0.8405366539955139, | |
| "learning_rate": 9.9929547052787e-05, | |
| "loss": 4.4324, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.771838331160365, | |
| "grad_norm": 0.9402779936790466, | |
| "learning_rate": 9.992460569852256e-05, | |
| "loss": 4.5248, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7753150803998262, | |
| "grad_norm": 1.0658060312271118, | |
| "learning_rate": 9.991949701198889e-05, | |
| "loss": 4.4155, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7787918296392873, | |
| "grad_norm": 1.407897710800171, | |
| "learning_rate": 9.99142210103088e-05, | |
| "loss": 4.517, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7822685788787483, | |
| "grad_norm": 1.3789775371551514, | |
| "learning_rate": 9.990877771116589e-05, | |
| "loss": 4.5166, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7857453281182095, | |
| "grad_norm": 1.715079665184021, | |
| "learning_rate": 9.99031671328044e-05, | |
| "loss": 4.4957, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7892220773576706, | |
| "grad_norm": 0.7953473329544067, | |
| "learning_rate": 9.989738929402934e-05, | |
| "loss": 4.4265, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7926988265971316, | |
| "grad_norm": 1.3501601219177246, | |
| "learning_rate": 9.98914442142063e-05, | |
| "loss": 4.4762, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7961755758365928, | |
| "grad_norm": 1.1037392616271973, | |
| "learning_rate": 9.988533191326137e-05, | |
| "loss": 4.553, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7996523250760539, | |
| "grad_norm": 0.9815847873687744, | |
| "learning_rate": 9.987905241168117e-05, | |
| "loss": 4.4133, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.803129074315515, | |
| "grad_norm": 0.9624298214912415, | |
| "learning_rate": 9.987260573051269e-05, | |
| "loss": 4.3117, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.8066058235549761, | |
| "grad_norm": 0.8100147843360901, | |
| "learning_rate": 9.986599189136325e-05, | |
| "loss": 4.4257, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.8100825727944372, | |
| "grad_norm": 0.8231883645057678, | |
| "learning_rate": 9.98592109164005e-05, | |
| "loss": 4.3901, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.7882739901542664, | |
| "learning_rate": 9.985226282835216e-05, | |
| "loss": 4.4451, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.8170360712733594, | |
| "grad_norm": 1.0251091718673706, | |
| "learning_rate": 9.984514765050619e-05, | |
| "loss": 4.4834, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.483324408531189, | |
| "learning_rate": 9.983786540671051e-05, | |
| "loss": 4.3826, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.8239895697522817, | |
| "grad_norm": 1.0795848369598389, | |
| "learning_rate": 9.983041612137301e-05, | |
| "loss": 4.4195, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.8274663189917427, | |
| "grad_norm": 0.9076191186904907, | |
| "learning_rate": 9.982279981946143e-05, | |
| "loss": 4.3718, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.8309430682312038, | |
| "grad_norm": 0.7669966816902161, | |
| "learning_rate": 9.981501652650337e-05, | |
| "loss": 4.3789, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.834419817470665, | |
| "grad_norm": 0.9206002354621887, | |
| "learning_rate": 9.980706626858607e-05, | |
| "loss": 4.4972, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.837896566710126, | |
| "grad_norm": 0.9446786046028137, | |
| "learning_rate": 9.979894907235639e-05, | |
| "loss": 4.4284, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.8413733159495871, | |
| "grad_norm": 0.898154616355896, | |
| "learning_rate": 9.979066496502074e-05, | |
| "loss": 4.3296, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.8448500651890483, | |
| "grad_norm": 1.006205677986145, | |
| "learning_rate": 9.978221397434496e-05, | |
| "loss": 4.3849, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.8483268144285093, | |
| "grad_norm": 1.512513279914856, | |
| "learning_rate": 9.977359612865423e-05, | |
| "loss": 4.4939, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.8518035636679705, | |
| "grad_norm": 0.9920049905776978, | |
| "learning_rate": 9.976481145683299e-05, | |
| "loss": 4.3769, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8552803129074316, | |
| "grad_norm": 0.7966930866241455, | |
| "learning_rate": 9.97558599883248e-05, | |
| "loss": 4.4977, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.8587570621468926, | |
| "grad_norm": 0.715207040309906, | |
| "learning_rate": 9.974674175313228e-05, | |
| "loss": 4.4068, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8622338113863538, | |
| "grad_norm": 0.9936574697494507, | |
| "learning_rate": 9.973745678181705e-05, | |
| "loss": 4.477, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.8657105606258149, | |
| "grad_norm": 1.4594191312789917, | |
| "learning_rate": 9.972800510549951e-05, | |
| "loss": 4.4045, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.8691873098652759, | |
| "grad_norm": 0.7639303207397461, | |
| "learning_rate": 9.971838675585888e-05, | |
| "loss": 4.3301, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8726640591047371, | |
| "grad_norm": 0.901237964630127, | |
| "learning_rate": 9.970860176513291e-05, | |
| "loss": 4.5048, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.8761408083441982, | |
| "grad_norm": 1.1843230724334717, | |
| "learning_rate": 9.9698650166118e-05, | |
| "loss": 4.4119, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8796175575836592, | |
| "grad_norm": 0.8304505944252014, | |
| "learning_rate": 9.96885319921689e-05, | |
| "loss": 4.3257, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8830943068231204, | |
| "grad_norm": 0.7715577483177185, | |
| "learning_rate": 9.96782472771987e-05, | |
| "loss": 4.4614, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8865710560625815, | |
| "grad_norm": 0.9905951619148254, | |
| "learning_rate": 9.966779605567866e-05, | |
| "loss": 4.389, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.8900478053020426, | |
| "grad_norm": 0.8776286244392395, | |
| "learning_rate": 9.965717836263812e-05, | |
| "loss": 4.4466, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8935245545415037, | |
| "grad_norm": 0.7492311596870422, | |
| "learning_rate": 9.964639423366442e-05, | |
| "loss": 4.4444, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8970013037809648, | |
| "grad_norm": 0.7437993288040161, | |
| "learning_rate": 9.96354437049027e-05, | |
| "loss": 4.4429, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.9004780530204259, | |
| "grad_norm": 0.9150131344795227, | |
| "learning_rate": 9.962432681305586e-05, | |
| "loss": 4.2684, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.903954802259887, | |
| "grad_norm": 0.9309634566307068, | |
| "learning_rate": 9.961304359538437e-05, | |
| "loss": 4.4381, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9074315514993481, | |
| "grad_norm": 1.0173296928405762, | |
| "learning_rate": 9.960159408970616e-05, | |
| "loss": 4.3821, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.9109083007388092, | |
| "grad_norm": 1.237082839012146, | |
| "learning_rate": 9.958997833439657e-05, | |
| "loss": 4.3013, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.9143850499782703, | |
| "grad_norm": 0.8809208869934082, | |
| "learning_rate": 9.95781963683881e-05, | |
| "loss": 4.4633, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.9178617992177314, | |
| "grad_norm": 0.8925305008888245, | |
| "learning_rate": 9.956624823117036e-05, | |
| "loss": 4.3338, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.9213385484571925, | |
| "grad_norm": 1.145957589149475, | |
| "learning_rate": 9.955413396278989e-05, | |
| "loss": 4.363, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.9248152976966536, | |
| "grad_norm": 0.9999008178710938, | |
| "learning_rate": 9.954185360385013e-05, | |
| "loss": 4.3985, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.9282920469361148, | |
| "grad_norm": 1.0845799446105957, | |
| "learning_rate": 9.952940719551112e-05, | |
| "loss": 4.4413, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.9317687961755758, | |
| "grad_norm": 1.0020147562026978, | |
| "learning_rate": 9.951679477948947e-05, | |
| "loss": 4.2964, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.9352455454150369, | |
| "grad_norm": 0.8358871340751648, | |
| "learning_rate": 9.95040163980582e-05, | |
| "loss": 4.4035, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.9387222946544981, | |
| "grad_norm": 0.9009070992469788, | |
| "learning_rate": 9.949107209404665e-05, | |
| "loss": 4.51, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9421990438939591, | |
| "grad_norm": 1.1610163450241089, | |
| "learning_rate": 9.947796191084017e-05, | |
| "loss": 4.4029, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.9456757931334202, | |
| "grad_norm": 0.9122158288955688, | |
| "learning_rate": 9.946468589238021e-05, | |
| "loss": 4.2494, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.908501148223877, | |
| "learning_rate": 9.945124408316398e-05, | |
| "loss": 4.3541, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.9526292916123424, | |
| "grad_norm": 0.9475510120391846, | |
| "learning_rate": 9.943763652824436e-05, | |
| "loss": 4.3241, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.9561060408518036, | |
| "grad_norm": 0.8620980978012085, | |
| "learning_rate": 9.942386327322978e-05, | |
| "loss": 4.2955, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9595827900912647, | |
| "grad_norm": 0.8473606109619141, | |
| "learning_rate": 9.940992436428409e-05, | |
| "loss": 4.2834, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.9630595393307257, | |
| "grad_norm": 0.7120651006698608, | |
| "learning_rate": 9.93958198481263e-05, | |
| "loss": 4.4614, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.9665362885701869, | |
| "grad_norm": 0.613216757774353, | |
| "learning_rate": 9.938154977203049e-05, | |
| "loss": 4.3594, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.970013037809648, | |
| "grad_norm": 0.7402132153511047, | |
| "learning_rate": 9.93671141838257e-05, | |
| "loss": 4.377, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.973489787049109, | |
| "grad_norm": 0.6736766695976257, | |
| "learning_rate": 9.935251313189564e-05, | |
| "loss": 4.2954, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9769665362885702, | |
| "grad_norm": 0.6440555453300476, | |
| "learning_rate": 9.93377466651787e-05, | |
| "loss": 4.342, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9804432855280313, | |
| "grad_norm": 0.6008772850036621, | |
| "learning_rate": 9.932281483316758e-05, | |
| "loss": 4.2842, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9839200347674923, | |
| "grad_norm": 0.6302841901779175, | |
| "learning_rate": 9.930771768590933e-05, | |
| "loss": 4.347, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9873967840069535, | |
| "grad_norm": 0.6728438138961792, | |
| "learning_rate": 9.929245527400503e-05, | |
| "loss": 4.403, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9908735332464146, | |
| "grad_norm": 0.8032251596450806, | |
| "learning_rate": 9.927702764860973e-05, | |
| "loss": 4.3079, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9943502824858758, | |
| "grad_norm": 0.8664717674255371, | |
| "learning_rate": 9.926143486143214e-05, | |
| "loss": 4.3445, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9978270317253368, | |
| "grad_norm": 0.9623958468437195, | |
| "learning_rate": 9.924567696473464e-05, | |
| "loss": 4.2848, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.8602651357650757, | |
| "learning_rate": 9.922975401133293e-05, | |
| "loss": 4.4036, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.0034767492394612, | |
| "grad_norm": 1.0667355060577393, | |
| "learning_rate": 9.921366605459597e-05, | |
| "loss": 4.1641, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.0069534984789221, | |
| "grad_norm": 1.0437101125717163, | |
| "learning_rate": 9.919741314844577e-05, | |
| "loss": 4.2776, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.0104302477183833, | |
| "grad_norm": 1.0120586156845093, | |
| "learning_rate": 9.918099534735718e-05, | |
| "loss": 4.2515, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.0139069969578445, | |
| "grad_norm": 0.9842764735221863, | |
| "learning_rate": 9.916441270635772e-05, | |
| "loss": 4.2831, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.0173837461973054, | |
| "grad_norm": 0.8590813279151917, | |
| "learning_rate": 9.914766528102744e-05, | |
| "loss": 4.2932, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.0208604954367666, | |
| "grad_norm": 0.8668161034584045, | |
| "learning_rate": 9.913075312749866e-05, | |
| "loss": 4.1978, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.0243372446762278, | |
| "grad_norm": 0.6615017652511597, | |
| "learning_rate": 9.911367630245582e-05, | |
| "loss": 4.3328, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.0278139939156887, | |
| "grad_norm": 0.802143931388855, | |
| "learning_rate": 9.909643486313533e-05, | |
| "loss": 4.2053, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.03129074315515, | |
| "grad_norm": 0.9250998497009277, | |
| "learning_rate": 9.907902886732532e-05, | |
| "loss": 4.2751, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.034767492394611, | |
| "grad_norm": 1.1024829149246216, | |
| "learning_rate": 9.90614583733654e-05, | |
| "loss": 4.3187, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.038244241634072, | |
| "grad_norm": 0.8108821511268616, | |
| "learning_rate": 9.904372344014665e-05, | |
| "loss": 4.234, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.0417209908735332, | |
| "grad_norm": 0.7548679709434509, | |
| "learning_rate": 9.90258241271112e-05, | |
| "loss": 4.2868, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0451977401129944, | |
| "grad_norm": 0.7505620718002319, | |
| "learning_rate": 9.900776049425215e-05, | |
| "loss": 4.3266, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.0486744893524556, | |
| "grad_norm": 0.7533482909202576, | |
| "learning_rate": 9.898953260211338e-05, | |
| "loss": 4.3681, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.0521512385919165, | |
| "grad_norm": 0.7146593928337097, | |
| "learning_rate": 9.897114051178934e-05, | |
| "loss": 4.3203, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.0556279878313777, | |
| "grad_norm": 0.7534311413764954, | |
| "learning_rate": 9.895258428492475e-05, | |
| "loss": 4.3486, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.0591047370708389, | |
| "grad_norm": 0.7221617102622986, | |
| "learning_rate": 9.89338639837145e-05, | |
| "loss": 4.3302, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.0625814863102998, | |
| "grad_norm": 0.7446046471595764, | |
| "learning_rate": 9.891497967090344e-05, | |
| "loss": 4.2775, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.066058235549761, | |
| "grad_norm": 0.9568740725517273, | |
| "learning_rate": 9.889593140978608e-05, | |
| "loss": 4.3143, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.0695349847892222, | |
| "grad_norm": 1.138326644897461, | |
| "learning_rate": 9.887671926420648e-05, | |
| "loss": 4.2791, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.073011734028683, | |
| "grad_norm": 1.1122509241104126, | |
| "learning_rate": 9.885734329855798e-05, | |
| "loss": 4.4454, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.0764884832681443, | |
| "grad_norm": 0.7699280977249146, | |
| "learning_rate": 9.883780357778299e-05, | |
| "loss": 4.2428, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0799652325076055, | |
| "grad_norm": 0.7796168923377991, | |
| "learning_rate": 9.881810016737276e-05, | |
| "loss": 4.2749, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.0834419817470664, | |
| "grad_norm": 0.9707072377204895, | |
| "learning_rate": 9.879823313336722e-05, | |
| "loss": 4.2417, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0869187309865276, | |
| "grad_norm": 1.184442162513733, | |
| "learning_rate": 9.877820254235471e-05, | |
| "loss": 4.1928, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.0903954802259888, | |
| "grad_norm": 0.8373008966445923, | |
| "learning_rate": 9.87580084614717e-05, | |
| "loss": 4.3421, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0938722294654497, | |
| "grad_norm": 0.7426689863204956, | |
| "learning_rate": 9.873765095840271e-05, | |
| "loss": 4.1962, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0973489787049109, | |
| "grad_norm": 0.7142062187194824, | |
| "learning_rate": 9.871713010137997e-05, | |
| "loss": 4.2231, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.100825727944372, | |
| "grad_norm": 0.6791448593139648, | |
| "learning_rate": 9.869644595918323e-05, | |
| "loss": 4.305, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.104302477183833, | |
| "grad_norm": 0.7155593633651733, | |
| "learning_rate": 9.86755986011395e-05, | |
| "loss": 4.1596, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.1077792264232942, | |
| "grad_norm": 0.6412737965583801, | |
| "learning_rate": 9.865458809712286e-05, | |
| "loss": 4.239, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.1112559756627554, | |
| "grad_norm": 0.6325717568397522, | |
| "learning_rate": 9.86334145175542e-05, | |
| "loss": 4.1725, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.1147327249022165, | |
| "grad_norm": 0.5991365909576416, | |
| "learning_rate": 9.861207793340101e-05, | |
| "loss": 4.3368, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.1182094741416775, | |
| "grad_norm": 0.7135671973228455, | |
| "learning_rate": 9.859057841617709e-05, | |
| "loss": 4.3057, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.1216862233811387, | |
| "grad_norm": 0.8155140280723572, | |
| "learning_rate": 9.856891603794235e-05, | |
| "loss": 4.3181, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.1251629726205998, | |
| "grad_norm": 0.8652500510215759, | |
| "learning_rate": 9.85470908713026e-05, | |
| "loss": 4.1882, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.1286397218600608, | |
| "grad_norm": 0.7795478105545044, | |
| "learning_rate": 9.852510298940922e-05, | |
| "loss": 4.2282, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.132116471099522, | |
| "grad_norm": 0.8294603228569031, | |
| "learning_rate": 9.850295246595898e-05, | |
| "loss": 4.2696, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.1355932203389831, | |
| "grad_norm": 1.0049405097961426, | |
| "learning_rate": 9.848063937519376e-05, | |
| "loss": 4.2633, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.139069969578444, | |
| "grad_norm": 1.0607136487960815, | |
| "learning_rate": 9.845816379190036e-05, | |
| "loss": 4.1424, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.1425467188179053, | |
| "grad_norm": 0.9381546974182129, | |
| "learning_rate": 9.843552579141016e-05, | |
| "loss": 4.2441, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.1460234680573664, | |
| "grad_norm": 0.8802816271781921, | |
| "learning_rate": 9.841272544959892e-05, | |
| "loss": 4.3125, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.1495002172968274, | |
| "grad_norm": 0.8450530767440796, | |
| "learning_rate": 9.838976284288657e-05, | |
| "loss": 4.2173, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.1529769665362886, | |
| "grad_norm": 0.8956658244132996, | |
| "learning_rate": 9.836663804823683e-05, | |
| "loss": 4.1585, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.1564537157757497, | |
| "grad_norm": 0.8815373182296753, | |
| "learning_rate": 9.834335114315708e-05, | |
| "loss": 4.1268, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.1599304650152107, | |
| "grad_norm": 0.785399317741394, | |
| "learning_rate": 9.831990220569801e-05, | |
| "loss": 4.264, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.1634072142546719, | |
| "grad_norm": 0.7312754988670349, | |
| "learning_rate": 9.829629131445342e-05, | |
| "loss": 4.1669, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.166883963494133, | |
| "grad_norm": 0.7686638832092285, | |
| "learning_rate": 9.827251854855991e-05, | |
| "loss": 4.1686, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.170360712733594, | |
| "grad_norm": 0.7417734265327454, | |
| "learning_rate": 9.824858398769665e-05, | |
| "loss": 4.2047, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.1738374619730552, | |
| "grad_norm": 0.8165032863616943, | |
| "learning_rate": 9.82244877120851e-05, | |
| "loss": 4.2661, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.1773142112125163, | |
| "grad_norm": 0.9953269958496094, | |
| "learning_rate": 9.820022980248871e-05, | |
| "loss": 4.143, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.1807909604519775, | |
| "grad_norm": 1.0205968618392944, | |
| "learning_rate": 9.817581034021272e-05, | |
| "loss": 4.146, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1842677096914385, | |
| "grad_norm": 0.8952431082725525, | |
| "learning_rate": 9.815122940710382e-05, | |
| "loss": 4.2152, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.1877444589308996, | |
| "grad_norm": 0.799082338809967, | |
| "learning_rate": 9.81264870855499e-05, | |
| "loss": 4.2976, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.1912212081703606, | |
| "grad_norm": 0.8718714714050293, | |
| "learning_rate": 9.81015834584798e-05, | |
| "loss": 4.2623, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.1946979574098218, | |
| "grad_norm": 0.8109663724899292, | |
| "learning_rate": 9.807651860936297e-05, | |
| "loss": 4.0596, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.198174706649283, | |
| "grad_norm": 0.742445707321167, | |
| "learning_rate": 9.805129262220927e-05, | |
| "loss": 4.165, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.2016514558887441, | |
| "grad_norm": 0.7259525060653687, | |
| "learning_rate": 9.802590558156862e-05, | |
| "loss": 4.2433, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.205128205128205, | |
| "grad_norm": 0.5326026678085327, | |
| "learning_rate": 9.800035757253074e-05, | |
| "loss": 4.1949, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.2086049543676662, | |
| "grad_norm": 0.6962956190109253, | |
| "learning_rate": 9.797464868072488e-05, | |
| "loss": 4.2642, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.2120817036071274, | |
| "grad_norm": 0.6962010860443115, | |
| "learning_rate": 9.794877899231951e-05, | |
| "loss": 4.2872, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.2155584528465884, | |
| "grad_norm": 0.596272885799408, | |
| "learning_rate": 9.792274859402205e-05, | |
| "loss": 4.2611, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.2190352020860495, | |
| "grad_norm": 0.6690101027488708, | |
| "learning_rate": 9.789655757307858e-05, | |
| "loss": 4.3164, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.2225119513255107, | |
| "grad_norm": 0.64564049243927, | |
| "learning_rate": 9.787020601727352e-05, | |
| "loss": 4.2659, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.2259887005649717, | |
| "grad_norm": 0.5660618543624878, | |
| "learning_rate": 9.784369401492937e-05, | |
| "loss": 4.2104, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.2294654498044328, | |
| "grad_norm": 0.5513273477554321, | |
| "learning_rate": 9.781702165490639e-05, | |
| "loss": 4.1671, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.232942199043894, | |
| "grad_norm": 0.617933452129364, | |
| "learning_rate": 9.779018902660228e-05, | |
| "loss": 4.2193, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.236418948283355, | |
| "grad_norm": 0.632296621799469, | |
| "learning_rate": 9.776319621995201e-05, | |
| "loss": 4.1993, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.2398956975228161, | |
| "grad_norm": 0.6899546384811401, | |
| "learning_rate": 9.773604332542729e-05, | |
| "loss": 4.1942, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.2433724467622773, | |
| "grad_norm": 0.9000681638717651, | |
| "learning_rate": 9.770873043403648e-05, | |
| "loss": 4.2009, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.2468491960017385, | |
| "grad_norm": 1.3722553253173828, | |
| "learning_rate": 9.76812576373242e-05, | |
| "loss": 4.1257, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.2503259452411994, | |
| "grad_norm": 0.8985775709152222, | |
| "learning_rate": 9.765362502737097e-05, | |
| "loss": 4.1981, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.2538026944806606, | |
| "grad_norm": 0.7751098275184631, | |
| "learning_rate": 9.762583269679303e-05, | |
| "loss": 4.1548, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.2572794437201216, | |
| "grad_norm": 0.7167885899543762, | |
| "learning_rate": 9.759788073874189e-05, | |
| "loss": 4.1174, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.2607561929595827, | |
| "grad_norm": 0.9465803503990173, | |
| "learning_rate": 9.756976924690412e-05, | |
| "loss": 4.1763, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.264232942199044, | |
| "grad_norm": 1.0560104846954346, | |
| "learning_rate": 9.754149831550098e-05, | |
| "loss": 4.1163, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.267709691438505, | |
| "grad_norm": 0.811275839805603, | |
| "learning_rate": 9.751306803928815e-05, | |
| "loss": 4.1351, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 0.7459397912025452, | |
| "learning_rate": 9.748447851355535e-05, | |
| "loss": 4.1354, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.2746631899174272, | |
| "grad_norm": 0.6689170002937317, | |
| "learning_rate": 9.745572983412607e-05, | |
| "loss": 4.2022, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.2781399391568882, | |
| "grad_norm": 0.6770418882369995, | |
| "learning_rate": 9.742682209735727e-05, | |
| "loss": 4.1978, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.2816166883963493, | |
| "grad_norm": 0.7698088884353638, | |
| "learning_rate": 9.7397755400139e-05, | |
| "loss": 4.1294, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.2850934376358105, | |
| "grad_norm": 0.8307857513427734, | |
| "learning_rate": 9.736852983989404e-05, | |
| "loss": 4.0905, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2885701868752717, | |
| "grad_norm": 0.8605286478996277, | |
| "learning_rate": 9.733914551457771e-05, | |
| "loss": 4.2347, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.2920469361147326, | |
| "grad_norm": 0.8637606501579285, | |
| "learning_rate": 9.730960252267743e-05, | |
| "loss": 4.1814, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.2955236853541938, | |
| "grad_norm": 0.9002863764762878, | |
| "learning_rate": 9.727990096321244e-05, | |
| "loss": 4.1763, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.299000434593655, | |
| "grad_norm": 0.7790195345878601, | |
| "learning_rate": 9.725004093573342e-05, | |
| "loss": 4.1895, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.302477183833116, | |
| "grad_norm": 0.8514074087142944, | |
| "learning_rate": 9.722002254032224e-05, | |
| "loss": 4.1396, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.3059539330725771, | |
| "grad_norm": 0.9697980284690857, | |
| "learning_rate": 9.718984587759148e-05, | |
| "loss": 4.1228, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.3094306823120383, | |
| "grad_norm": 0.9058026671409607, | |
| "learning_rate": 9.715951104868428e-05, | |
| "loss": 4.1695, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.3129074315514995, | |
| "grad_norm": 0.8548423051834106, | |
| "learning_rate": 9.712901815527386e-05, | |
| "loss": 4.17, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.3163841807909604, | |
| "grad_norm": 0.9168500304222107, | |
| "learning_rate": 9.709836729956325e-05, | |
| "loss": 4.1448, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.3198609300304216, | |
| "grad_norm": 1.040820837020874, | |
| "learning_rate": 9.706755858428486e-05, | |
| "loss": 4.2922, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.3233376792698825, | |
| "grad_norm": 0.9632564187049866, | |
| "learning_rate": 9.703659211270028e-05, | |
| "loss": 4.0698, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.3268144285093437, | |
| "grad_norm": 1.0353721380233765, | |
| "learning_rate": 9.70054679885998e-05, | |
| "loss": 4.0974, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.330291177748805, | |
| "grad_norm": 0.9106934070587158, | |
| "learning_rate": 9.69741863163021e-05, | |
| "loss": 4.1968, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.333767926988266, | |
| "grad_norm": 0.7971479892730713, | |
| "learning_rate": 9.694274720065399e-05, | |
| "loss": 4.1139, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.337244676227727, | |
| "grad_norm": 0.8078503608703613, | |
| "learning_rate": 9.691115074702992e-05, | |
| "loss": 4.1033, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.3407214254671882, | |
| "grad_norm": 0.8231106996536255, | |
| "learning_rate": 9.687939706133168e-05, | |
| "loss": 4.2063, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.3441981747066492, | |
| "grad_norm": 0.7890482544898987, | |
| "learning_rate": 9.68474862499881e-05, | |
| "loss": 4.0187, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.3476749239461103, | |
| "grad_norm": 0.658843994140625, | |
| "learning_rate": 9.681541841995461e-05, | |
| "loss": 4.114, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.3511516731855715, | |
| "grad_norm": 0.7294294238090515, | |
| "learning_rate": 9.678319367871293e-05, | |
| "loss": 4.198, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.3546284224250327, | |
| "grad_norm": 0.9159700274467468, | |
| "learning_rate": 9.675081213427076e-05, | |
| "loss": 4.1494, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.3581051716644936, | |
| "grad_norm": 0.964610755443573, | |
| "learning_rate": 9.671827389516122e-05, | |
| "loss": 4.2178, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.3615819209039548, | |
| "grad_norm": 0.8890171647071838, | |
| "learning_rate": 9.668557907044276e-05, | |
| "loss": 4.2487, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.365058670143416, | |
| "grad_norm": 0.999011754989624, | |
| "learning_rate": 9.66527277696986e-05, | |
| "loss": 4.222, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.368535419382877, | |
| "grad_norm": 1.05777108669281, | |
| "learning_rate": 9.661972010303641e-05, | |
| "loss": 4.191, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.372012168622338, | |
| "grad_norm": 0.6993465423583984, | |
| "learning_rate": 9.658655618108796e-05, | |
| "loss": 4.0272, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.3754889178617993, | |
| "grad_norm": 0.8366750478744507, | |
| "learning_rate": 9.655323611500875e-05, | |
| "loss": 4.1595, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.3789656671012605, | |
| "grad_norm": 0.8456549048423767, | |
| "learning_rate": 9.651976001647765e-05, | |
| "loss": 4.237, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.3824424163407214, | |
| "grad_norm": 0.791394054889679, | |
| "learning_rate": 9.648612799769644e-05, | |
| "loss": 4.2289, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.3859191655801826, | |
| "grad_norm": 0.9533299803733826, | |
| "learning_rate": 9.645234017138956e-05, | |
| "loss": 4.1586, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.3893959148196435, | |
| "grad_norm": 1.044310450553894, | |
| "learning_rate": 9.641839665080363e-05, | |
| "loss": 4.2023, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.3928726640591047, | |
| "grad_norm": 0.920463502407074, | |
| "learning_rate": 9.638429754970715e-05, | |
| "loss": 4.1882, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.3963494132985659, | |
| "grad_norm": 0.9414947032928467, | |
| "learning_rate": 9.635004298239004e-05, | |
| "loss": 4.0434, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.399826162538027, | |
| "grad_norm": 0.8273729085922241, | |
| "learning_rate": 9.63156330636633e-05, | |
| "loss": 4.1685, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.403302911777488, | |
| "grad_norm": 0.6224656105041504, | |
| "learning_rate": 9.628106790885865e-05, | |
| "loss": 4.1801, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 0.7260700464248657, | |
| "learning_rate": 9.62463476338281e-05, | |
| "loss": 4.164, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.7788506150245667, | |
| "learning_rate": 9.621147235494356e-05, | |
| "loss": 4.1687, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.4137331594958713, | |
| "grad_norm": 0.9212743043899536, | |
| "learning_rate": 9.617644218909649e-05, | |
| "loss": 4.094, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.4172099087353325, | |
| "grad_norm": 0.9231887459754944, | |
| "learning_rate": 9.614125725369747e-05, | |
| "loss": 4.221, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.4206866579747937, | |
| "grad_norm": 0.8516282439231873, | |
| "learning_rate": 9.610591766667583e-05, | |
| "loss": 4.0776, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.4241634072142546, | |
| "grad_norm": 0.6721898913383484, | |
| "learning_rate": 9.607042354647924e-05, | |
| "loss": 4.0561, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.4276401564537158, | |
| "grad_norm": 0.9020458459854126, | |
| "learning_rate": 9.60347750120733e-05, | |
| "loss": 4.0563, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.431116905693177, | |
| "grad_norm": 0.8082847595214844, | |
| "learning_rate": 9.599897218294122e-05, | |
| "loss": 4.0433, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.434593654932638, | |
| "grad_norm": 0.89698725938797, | |
| "learning_rate": 9.596301517908328e-05, | |
| "loss": 4.1292, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.438070404172099, | |
| "grad_norm": 1.1492091417312622, | |
| "learning_rate": 9.592690412101658e-05, | |
| "loss": 4.1224, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.4415471534115603, | |
| "grad_norm": 0.8929200172424316, | |
| "learning_rate": 9.589063912977451e-05, | |
| "loss": 4.0852, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.4450239026510214, | |
| "grad_norm": 0.8456454873085022, | |
| "learning_rate": 9.585422032690643e-05, | |
| "loss": 4.1604, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.4485006518904824, | |
| "grad_norm": 0.9664533734321594, | |
| "learning_rate": 9.581764783447719e-05, | |
| "loss": 4.234, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.4519774011299436, | |
| "grad_norm": 0.8303775787353516, | |
| "learning_rate": 9.578092177506683e-05, | |
| "loss": 4.1508, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.4554541503694045, | |
| "grad_norm": 0.735723614692688, | |
| "learning_rate": 9.574404227177005e-05, | |
| "loss": 4.0714, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.4589308996088657, | |
| "grad_norm": 0.7835253477096558, | |
| "learning_rate": 9.570700944819584e-05, | |
| "loss": 4.0993, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.4624076488483269, | |
| "grad_norm": 0.8943220376968384, | |
| "learning_rate": 9.566982342846709e-05, | |
| "loss": 4.0499, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.465884398087788, | |
| "grad_norm": 1.0396778583526611, | |
| "learning_rate": 9.563248433722019e-05, | |
| "loss": 4.0751, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.469361147327249, | |
| "grad_norm": 0.9445440173149109, | |
| "learning_rate": 9.559499229960451e-05, | |
| "loss": 4.0111, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.4728378965667102, | |
| "grad_norm": 0.8301072716712952, | |
| "learning_rate": 9.55573474412821e-05, | |
| "loss": 4.045, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.476314645806171, | |
| "grad_norm": 0.9725110530853271, | |
| "learning_rate": 9.551954988842724e-05, | |
| "loss": 4.0069, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.4797913950456323, | |
| "grad_norm": 1.132828712463379, | |
| "learning_rate": 9.548159976772592e-05, | |
| "loss": 4.125, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.4832681442850935, | |
| "grad_norm": 0.7702617645263672, | |
| "learning_rate": 9.544349720637556e-05, | |
| "loss": 4.0776, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.4867448935245546, | |
| "grad_norm": 0.7960605025291443, | |
| "learning_rate": 9.540524233208448e-05, | |
| "loss": 4.0346, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.4902216427640156, | |
| "grad_norm": 0.9969260096549988, | |
| "learning_rate": 9.536683527307153e-05, | |
| "loss": 4.1162, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.4936983920034768, | |
| "grad_norm": 1.1111648082733154, | |
| "learning_rate": 9.53282761580656e-05, | |
| "loss": 4.0933, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.497175141242938, | |
| "grad_norm": 1.198155164718628, | |
| "learning_rate": 9.528956511630529e-05, | |
| "loss": 4.0783, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.5006518904823989, | |
| "grad_norm": 1.0453709363937378, | |
| "learning_rate": 9.525070227753834e-05, | |
| "loss": 4.0607, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.50412863972186, | |
| "grad_norm": 0.9988518953323364, | |
| "learning_rate": 9.521168777202132e-05, | |
| "loss": 4.112, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.5076053889613212, | |
| "grad_norm": 0.918400764465332, | |
| "learning_rate": 9.517252173051911e-05, | |
| "loss": 4.0134, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.5110821382007824, | |
| "grad_norm": 0.869310200214386, | |
| "learning_rate": 9.513320428430452e-05, | |
| "loss": 4.0576, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.5145588874402434, | |
| "grad_norm": 0.9518806338310242, | |
| "learning_rate": 9.509373556515782e-05, | |
| "loss": 4.1244, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.5180356366797043, | |
| "grad_norm": 0.8721877336502075, | |
| "learning_rate": 9.505411570536626e-05, | |
| "loss": 4.0608, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.5215123859191655, | |
| "grad_norm": 0.7652842998504639, | |
| "learning_rate": 9.50143448377237e-05, | |
| "loss": 4.0603, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.5249891351586267, | |
| "grad_norm": 0.8885753750801086, | |
| "learning_rate": 9.497442309553016e-05, | |
| "loss": 3.8367, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.5284658843980878, | |
| "grad_norm": 0.6828482151031494, | |
| "learning_rate": 9.493435061259131e-05, | |
| "loss": 4.1155, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.531942633637549, | |
| "grad_norm": 0.729836642742157, | |
| "learning_rate": 9.489412752321805e-05, | |
| "loss": 4.0957, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.53541938287701, | |
| "grad_norm": 0.9394379258155823, | |
| "learning_rate": 9.485375396222609e-05, | |
| "loss": 3.9957, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.5388961321164711, | |
| "grad_norm": 0.9025709629058838, | |
| "learning_rate": 9.481323006493547e-05, | |
| "loss": 4.0381, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 0.861587405204773, | |
| "learning_rate": 9.477255596717012e-05, | |
| "loss": 4.2398, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.5458496305953933, | |
| "grad_norm": 0.8114928007125854, | |
| "learning_rate": 9.473173180525737e-05, | |
| "loss": 4.096, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.5493263798348544, | |
| "grad_norm": 0.8760333061218262, | |
| "learning_rate": 9.469075771602755e-05, | |
| "loss": 4.1301, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.5528031290743156, | |
| "grad_norm": 1.0566339492797852, | |
| "learning_rate": 9.464963383681349e-05, | |
| "loss": 3.9805, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.5562798783137766, | |
| "grad_norm": 0.9962750673294067, | |
| "learning_rate": 9.460836030545007e-05, | |
| "loss": 3.9729, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.5597566275532377, | |
| "grad_norm": 0.9361708164215088, | |
| "learning_rate": 9.456693726027375e-05, | |
| "loss": 4.0141, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.5632333767926987, | |
| "grad_norm": 0.9272918105125427, | |
| "learning_rate": 9.452536484012212e-05, | |
| "loss": 4.079, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.5667101260321599, | |
| "grad_norm": 1.0941708087921143, | |
| "learning_rate": 9.448364318433345e-05, | |
| "loss": 4.0688, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.570186875271621, | |
| "grad_norm": 0.9107572436332703, | |
| "learning_rate": 9.444177243274618e-05, | |
| "loss": 3.9463, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.5736636245110822, | |
| "grad_norm": 0.786167562007904, | |
| "learning_rate": 9.439975272569848e-05, | |
| "loss": 4.0317, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.5771403737505434, | |
| "grad_norm": 0.7826558947563171, | |
| "learning_rate": 9.435758420402778e-05, | |
| "loss": 4.0186, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.5806171229900043, | |
| "grad_norm": 0.8052117824554443, | |
| "learning_rate": 9.431526700907027e-05, | |
| "loss": 4.0627, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.5840938722294653, | |
| "grad_norm": 1.0061588287353516, | |
| "learning_rate": 9.42728012826605e-05, | |
| "loss": 3.9494, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.5875706214689265, | |
| "grad_norm": 0.9913825392723083, | |
| "learning_rate": 9.423018716713079e-05, | |
| "loss": 4.0443, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.5910473707083876, | |
| "grad_norm": 0.8176896572113037, | |
| "learning_rate": 9.418742480531085e-05, | |
| "loss": 3.9168, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.5945241199478488, | |
| "grad_norm": 0.7156447768211365, | |
| "learning_rate": 9.414451434052727e-05, | |
| "loss": 4.0151, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.59800086918731, | |
| "grad_norm": 0.7260638475418091, | |
| "learning_rate": 9.410145591660301e-05, | |
| "loss": 4.0644, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.601477618426771, | |
| "grad_norm": 0.8086751103401184, | |
| "learning_rate": 9.405824967785698e-05, | |
| "loss": 3.9587, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.6049543676662321, | |
| "grad_norm": 0.9354509115219116, | |
| "learning_rate": 9.401489576910349e-05, | |
| "loss": 3.9932, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.608431116905693, | |
| "grad_norm": 1.1333049535751343, | |
| "learning_rate": 9.39713943356518e-05, | |
| "loss": 3.9713, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.6119078661451542, | |
| "grad_norm": 0.9655582904815674, | |
| "learning_rate": 9.392774552330567e-05, | |
| "loss": 3.9768, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 1.089020013809204, | |
| "learning_rate": 9.388394947836279e-05, | |
| "loss": 3.8841, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.6188613646240766, | |
| "grad_norm": 0.7879194021224976, | |
| "learning_rate": 9.38400063476143e-05, | |
| "loss": 3.9356, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.6223381138635375, | |
| "grad_norm": 0.6482805013656616, | |
| "learning_rate": 9.37959162783444e-05, | |
| "loss": 3.9739, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.6258148631029987, | |
| "grad_norm": 0.7414939403533936, | |
| "learning_rate": 9.375167941832973e-05, | |
| "loss": 3.9431, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.6292916123424597, | |
| "grad_norm": 0.9199277758598328, | |
| "learning_rate": 9.370729591583894e-05, | |
| "loss": 3.949, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.6327683615819208, | |
| "grad_norm": 1.0250579118728638, | |
| "learning_rate": 9.366276591963221e-05, | |
| "loss": 4.0201, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.636245110821382, | |
| "grad_norm": 1.1997772455215454, | |
| "learning_rate": 9.361808957896067e-05, | |
| "loss": 4.0029, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.6397218600608432, | |
| "grad_norm": 1.0056092739105225, | |
| "learning_rate": 9.357326704356602e-05, | |
| "loss": 3.9536, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.6431986093003044, | |
| "grad_norm": 0.8187026977539062, | |
| "learning_rate": 9.35282984636799e-05, | |
| "loss": 4.0072, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.6466753585397653, | |
| "grad_norm": 1.099008321762085, | |
| "learning_rate": 9.348318399002347e-05, | |
| "loss": 4.0514, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.6501521077792263, | |
| "grad_norm": 1.1227092742919922, | |
| "learning_rate": 9.343792377380687e-05, | |
| "loss": 3.9256, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.6536288570186874, | |
| "grad_norm": 0.8582590222358704, | |
| "learning_rate": 9.339251796672877e-05, | |
| "loss": 3.9037, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.6571056062581486, | |
| "grad_norm": 1.0513243675231934, | |
| "learning_rate": 9.334696672097576e-05, | |
| "loss": 3.9695, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.6605823554976098, | |
| "grad_norm": 1.0664559602737427, | |
| "learning_rate": 9.330127018922194e-05, | |
| "loss": 3.9251, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.664059104737071, | |
| "grad_norm": 1.0497174263000488, | |
| "learning_rate": 9.325542852462833e-05, | |
| "loss": 4.0381, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.667535853976532, | |
| "grad_norm": 0.9143519401550293, | |
| "learning_rate": 9.320944188084242e-05, | |
| "loss": 3.9366, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.671012603215993, | |
| "grad_norm": 0.7945806384086609, | |
| "learning_rate": 9.31633104119976e-05, | |
| "loss": 4.0225, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.674489352455454, | |
| "grad_norm": 0.8599205613136292, | |
| "learning_rate": 9.31170342727127e-05, | |
| "loss": 4.0028, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 1.0326347351074219, | |
| "learning_rate": 9.307061361809141e-05, | |
| "loss": 3.9804, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.6814428509343764, | |
| "grad_norm": 1.0522825717926025, | |
| "learning_rate": 9.302404860372185e-05, | |
| "loss": 3.9047, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.6849196001738376, | |
| "grad_norm": 0.9849698543548584, | |
| "learning_rate": 9.29773393856759e-05, | |
| "loss": 3.9828, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.6883963494132985, | |
| "grad_norm": 1.1080557107925415, | |
| "learning_rate": 9.293048612050883e-05, | |
| "loss": 3.9129, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.6918730986527597, | |
| "grad_norm": 0.9390277862548828, | |
| "learning_rate": 9.28834889652587e-05, | |
| "loss": 4.0346, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.6953498478922207, | |
| "grad_norm": 0.8586708903312683, | |
| "learning_rate": 9.283634807744586e-05, | |
| "loss": 3.992, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.6988265971316818, | |
| "grad_norm": 0.9357092976570129, | |
| "learning_rate": 9.278906361507238e-05, | |
| "loss": 3.9007, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.702303346371143, | |
| "grad_norm": 1.0461989641189575, | |
| "learning_rate": 9.274163573662157e-05, | |
| "loss": 3.9684, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.7057800956106042, | |
| "grad_norm": 1.0791900157928467, | |
| "learning_rate": 9.26940646010574e-05, | |
| "loss": 3.9989, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.7092568448500653, | |
| "grad_norm": 0.7584978938102722, | |
| "learning_rate": 9.264635036782405e-05, | |
| "loss": 3.9932, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.7127335940895263, | |
| "grad_norm": 0.8475772142410278, | |
| "learning_rate": 9.259849319684526e-05, | |
| "loss": 3.879, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.7162103433289873, | |
| "grad_norm": 0.8825334310531616, | |
| "learning_rate": 9.255049324852388e-05, | |
| "loss": 3.8554, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.7196870925684484, | |
| "grad_norm": 0.9635478258132935, | |
| "learning_rate": 9.250235068374133e-05, | |
| "loss": 3.882, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.7231638418079096, | |
| "grad_norm": 1.0515531301498413, | |
| "learning_rate": 9.245406566385697e-05, | |
| "loss": 3.767, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.7266405910473708, | |
| "grad_norm": 0.951738715171814, | |
| "learning_rate": 9.240563835070771e-05, | |
| "loss": 3.9327, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.730117340286832, | |
| "grad_norm": 0.9645599126815796, | |
| "learning_rate": 9.235706890660733e-05, | |
| "loss": 3.9418, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.733594089526293, | |
| "grad_norm": 0.9542970061302185, | |
| "learning_rate": 9.230835749434601e-05, | |
| "loss": 3.9136, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.7370708387657539, | |
| "grad_norm": 0.9848579168319702, | |
| "learning_rate": 9.225950427718975e-05, | |
| "loss": 3.8674, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.740547588005215, | |
| "grad_norm": 0.8875890374183655, | |
| "learning_rate": 9.221050941887984e-05, | |
| "loss": 3.8565, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.7440243372446762, | |
| "grad_norm": 0.9016240835189819, | |
| "learning_rate": 9.216137308363233e-05, | |
| "loss": 3.9054, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.7475010864841374, | |
| "grad_norm": 0.8858099579811096, | |
| "learning_rate": 9.211209543613746e-05, | |
| "loss": 3.9223, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.7509778357235986, | |
| "grad_norm": 0.8951818943023682, | |
| "learning_rate": 9.206267664155907e-05, | |
| "loss": 3.8056, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.7544545849630595, | |
| "grad_norm": 0.8185245394706726, | |
| "learning_rate": 9.201311686553408e-05, | |
| "loss": 3.9184, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.7579313342025207, | |
| "grad_norm": 0.8379631042480469, | |
| "learning_rate": 9.1963416274172e-05, | |
| "loss": 3.9537, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.7614080834419816, | |
| "grad_norm": 0.6970687508583069, | |
| "learning_rate": 9.191357503405425e-05, | |
| "loss": 3.8938, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.7648848326814428, | |
| "grad_norm": 0.7571694254875183, | |
| "learning_rate": 9.186359331223369e-05, | |
| "loss": 3.8748, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.768361581920904, | |
| "grad_norm": 0.916691780090332, | |
| "learning_rate": 9.181347127623403e-05, | |
| "loss": 3.8796, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.7718383311603652, | |
| "grad_norm": 0.8490556478500366, | |
| "learning_rate": 9.176320909404924e-05, | |
| "loss": 3.8544, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.7753150803998263, | |
| "grad_norm": 0.8541643619537354, | |
| "learning_rate": 9.171280693414307e-05, | |
| "loss": 3.9174, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.7787918296392873, | |
| "grad_norm": 0.7272506356239319, | |
| "learning_rate": 9.166226496544839e-05, | |
| "loss": 3.7822, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.7822685788787482, | |
| "grad_norm": 0.7909744381904602, | |
| "learning_rate": 9.161158335736671e-05, | |
| "loss": 3.9238, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.7857453281182094, | |
| "grad_norm": 1.08568274974823, | |
| "learning_rate": 9.156076227976752e-05, | |
| "loss": 3.9444, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.7892220773576706, | |
| "grad_norm": 1.0940698385238647, | |
| "learning_rate": 9.15098019029878e-05, | |
| "loss": 3.8727, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.7926988265971318, | |
| "grad_norm": 0.9210303425788879, | |
| "learning_rate": 9.145870239783142e-05, | |
| "loss": 3.9063, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.796175575836593, | |
| "grad_norm": 0.9154436588287354, | |
| "learning_rate": 9.140746393556854e-05, | |
| "loss": 3.8853, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.7996523250760539, | |
| "grad_norm": 1.0353447198867798, | |
| "learning_rate": 9.135608668793511e-05, | |
| "loss": 3.841, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.8031290743155148, | |
| "grad_norm": 0.8575829863548279, | |
| "learning_rate": 9.130457082713219e-05, | |
| "loss": 3.8811, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.806605823554976, | |
| "grad_norm": 0.9011865854263306, | |
| "learning_rate": 9.125291652582549e-05, | |
| "loss": 3.7803, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.8100825727944372, | |
| "grad_norm": 0.9533329606056213, | |
| "learning_rate": 9.120112395714463e-05, | |
| "loss": 3.8986, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.8135593220338984, | |
| "grad_norm": 0.9975365996360779, | |
| "learning_rate": 9.114919329468282e-05, | |
| "loss": 3.9143, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.8170360712733595, | |
| "grad_norm": 1.015561819076538, | |
| "learning_rate": 9.109712471249598e-05, | |
| "loss": 3.8863, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.8205128205128205, | |
| "grad_norm": 1.0247142314910889, | |
| "learning_rate": 9.104491838510235e-05, | |
| "loss": 3.9139, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.8239895697522817, | |
| "grad_norm": 1.0852620601654053, | |
| "learning_rate": 9.099257448748184e-05, | |
| "loss": 3.8964, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.8274663189917426, | |
| "grad_norm": 0.9563137292861938, | |
| "learning_rate": 9.094009319507547e-05, | |
| "loss": 3.8767, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.8309430682312038, | |
| "grad_norm": 1.0613676309585571, | |
| "learning_rate": 9.088747468378474e-05, | |
| "loss": 3.9272, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.834419817470665, | |
| "grad_norm": 1.0097156763076782, | |
| "learning_rate": 9.083471912997108e-05, | |
| "loss": 3.9006, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.8378965667101261, | |
| "grad_norm": 0.9422491192817688, | |
| "learning_rate": 9.078182671045527e-05, | |
| "loss": 3.7358, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.841373315949587, | |
| "grad_norm": 0.9380639791488647, | |
| "learning_rate": 9.072879760251679e-05, | |
| "loss": 3.8661, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.8448500651890483, | |
| "grad_norm": 0.9634526371955872, | |
| "learning_rate": 9.067563198389326e-05, | |
| "loss": 3.8993, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.8483268144285092, | |
| "grad_norm": 0.9645963907241821, | |
| "learning_rate": 9.062233003277983e-05, | |
| "loss": 3.8783, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.8518035636679704, | |
| "grad_norm": 0.9658483862876892, | |
| "learning_rate": 9.056889192782866e-05, | |
| "loss": 3.8708, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.8552803129074316, | |
| "grad_norm": 0.8875890970230103, | |
| "learning_rate": 9.051531784814817e-05, | |
| "loss": 3.7292, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.8587570621468927, | |
| "grad_norm": 0.9648452997207642, | |
| "learning_rate": 9.04616079733026e-05, | |
| "loss": 3.8904, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.862233811386354, | |
| "grad_norm": 1.1329749822616577, | |
| "learning_rate": 9.040776248331129e-05, | |
| "loss": 3.8909, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.8657105606258149, | |
| "grad_norm": 0.9306442141532898, | |
| "learning_rate": 9.035378155864813e-05, | |
| "loss": 3.8015, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.8691873098652758, | |
| "grad_norm": 0.8368638753890991, | |
| "learning_rate": 9.029966538024097e-05, | |
| "loss": 3.7593, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.872664059104737, | |
| "grad_norm": 0.7863436937332153, | |
| "learning_rate": 9.024541412947094e-05, | |
| "loss": 3.7565, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.8761408083441982, | |
| "grad_norm": 0.8507801294326782, | |
| "learning_rate": 9.019102798817197e-05, | |
| "loss": 3.867, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.8796175575836593, | |
| "grad_norm": 0.8136400580406189, | |
| "learning_rate": 9.013650713863e-05, | |
| "loss": 3.6624, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.8830943068231205, | |
| "grad_norm": 0.8798525929450989, | |
| "learning_rate": 9.008185176358256e-05, | |
| "loss": 3.7241, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.8865710560625815, | |
| "grad_norm": 0.7656006217002869, | |
| "learning_rate": 9.002706204621803e-05, | |
| "loss": 3.9443, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.8900478053020426, | |
| "grad_norm": 1.18944251537323, | |
| "learning_rate": 8.997213817017507e-05, | |
| "loss": 3.861, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.8935245545415036, | |
| "grad_norm": 1.3046687841415405, | |
| "learning_rate": 8.991708031954199e-05, | |
| "loss": 3.7927, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.8970013037809648, | |
| "grad_norm": 0.7693729400634766, | |
| "learning_rate": 8.986188867885617e-05, | |
| "loss": 3.7631, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.900478053020426, | |
| "grad_norm": 0.9569775462150574, | |
| "learning_rate": 8.980656343310338e-05, | |
| "loss": 3.8547, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.9039548022598871, | |
| "grad_norm": 1.0345852375030518, | |
| "learning_rate": 8.975110476771724e-05, | |
| "loss": 3.8461, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.907431551499348, | |
| "grad_norm": 0.9311661124229431, | |
| "learning_rate": 8.969551286857849e-05, | |
| "loss": 3.831, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.9109083007388092, | |
| "grad_norm": 0.847986102104187, | |
| "learning_rate": 8.963978792201449e-05, | |
| "loss": 3.6819, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.9143850499782702, | |
| "grad_norm": 0.7580497860908508, | |
| "learning_rate": 8.958393011479848e-05, | |
| "loss": 3.7759, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.9178617992177314, | |
| "grad_norm": 0.7308617830276489, | |
| "learning_rate": 8.952793963414907e-05, | |
| "loss": 3.8164, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.9213385484571925, | |
| "grad_norm": 0.7043769955635071, | |
| "learning_rate": 8.947181666772948e-05, | |
| "loss": 3.7235, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.9248152976966537, | |
| "grad_norm": 0.6796336770057678, | |
| "learning_rate": 8.941556140364706e-05, | |
| "loss": 3.8156, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.928292046936115, | |
| "grad_norm": 0.6973956227302551, | |
| "learning_rate": 8.935917403045251e-05, | |
| "loss": 3.7504, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.9317687961755758, | |
| "grad_norm": 0.8313055038452148, | |
| "learning_rate": 8.930265473713938e-05, | |
| "loss": 3.8686, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.9352455454150368, | |
| "grad_norm": 0.8577587604522705, | |
| "learning_rate": 8.924600371314334e-05, | |
| "loss": 3.7068, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.938722294654498, | |
| "grad_norm": 0.7737483978271484, | |
| "learning_rate": 8.918922114834156e-05, | |
| "loss": 3.7297, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.9421990438939591, | |
| "grad_norm": 0.826034665107727, | |
| "learning_rate": 8.913230723305218e-05, | |
| "loss": 3.8176, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.9456757931334203, | |
| "grad_norm": 0.9927733540534973, | |
| "learning_rate": 8.90752621580335e-05, | |
| "loss": 3.8469, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.9491525423728815, | |
| "grad_norm": 1.0524464845657349, | |
| "learning_rate": 8.901808611448348e-05, | |
| "loss": 3.8041, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.9526292916123424, | |
| "grad_norm": 1.098197102546692, | |
| "learning_rate": 8.896077929403901e-05, | |
| "loss": 3.7239, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.9561060408518036, | |
| "grad_norm": 0.9311512112617493, | |
| "learning_rate": 8.890334188877533e-05, | |
| "loss": 3.8397, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.9595827900912646, | |
| "grad_norm": 0.8799173831939697, | |
| "learning_rate": 8.884577409120535e-05, | |
| "loss": 3.7943, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.9630595393307257, | |
| "grad_norm": 0.9762241840362549, | |
| "learning_rate": 8.878807609427905e-05, | |
| "loss": 3.7369, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.966536288570187, | |
| "grad_norm": 1.0802910327911377, | |
| "learning_rate": 8.873024809138272e-05, | |
| "loss": 3.7294, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.970013037809648, | |
| "grad_norm": 0.9968630075454712, | |
| "learning_rate": 8.86722902763385e-05, | |
| "loss": 3.8131, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.973489787049109, | |
| "grad_norm": 1.0812654495239258, | |
| "learning_rate": 8.861420284340352e-05, | |
| "loss": 3.7722, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.9769665362885702, | |
| "grad_norm": 1.0790822505950928, | |
| "learning_rate": 8.855598598726939e-05, | |
| "loss": 3.7907, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.9804432855280312, | |
| "grad_norm": 1.0286710262298584, | |
| "learning_rate": 8.849763990306152e-05, | |
| "loss": 3.8615, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.9839200347674923, | |
| "grad_norm": 0.9916720390319824, | |
| "learning_rate": 8.843916478633845e-05, | |
| "loss": 3.8969, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.9873967840069535, | |
| "grad_norm": 1.0431153774261475, | |
| "learning_rate": 8.838056083309118e-05, | |
| "loss": 3.8122, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.9908735332464147, | |
| "grad_norm": 0.9479820728302002, | |
| "learning_rate": 8.832182823974256e-05, | |
| "loss": 3.7987, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.9943502824858759, | |
| "grad_norm": 0.8419528603553772, | |
| "learning_rate": 8.826296720314657e-05, | |
| "loss": 3.7854, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.9978270317253368, | |
| "grad_norm": 0.7449129223823547, | |
| "learning_rate": 8.820397792058772e-05, | |
| "loss": 3.7427, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.7268674373626709, | |
| "learning_rate": 8.814486058978035e-05, | |
| "loss": 3.7999, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.003476749239461, | |
| "grad_norm": 0.7994548082351685, | |
| "learning_rate": 8.808561540886796e-05, | |
| "loss": 3.737, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 2.0069534984789223, | |
| "grad_norm": 0.8729116320610046, | |
| "learning_rate": 8.802624257642261e-05, | |
| "loss": 3.8124, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.0104302477183835, | |
| "grad_norm": 0.9562597274780273, | |
| "learning_rate": 8.796674229144418e-05, | |
| "loss": 3.8352, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 2.0139069969578443, | |
| "grad_norm": 1.0685930252075195, | |
| "learning_rate": 8.790711475335971e-05, | |
| "loss": 3.799, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.0173837461973054, | |
| "grad_norm": 1.0459791421890259, | |
| "learning_rate": 8.784736016202282e-05, | |
| "loss": 3.8583, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 2.0208604954367666, | |
| "grad_norm": 0.8897305130958557, | |
| "learning_rate": 8.778747871771292e-05, | |
| "loss": 3.6429, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.0243372446762278, | |
| "grad_norm": 0.8162091374397278, | |
| "learning_rate": 8.77274706211346e-05, | |
| "loss": 3.6127, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 2.027813993915689, | |
| "grad_norm": 0.7601345181465149, | |
| "learning_rate": 8.766733607341698e-05, | |
| "loss": 3.7352, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.03129074315515, | |
| "grad_norm": 0.7575955986976624, | |
| "learning_rate": 8.760707527611297e-05, | |
| "loss": 3.7707, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.034767492394611, | |
| "grad_norm": 0.6935109496116638, | |
| "learning_rate": 8.754668843119864e-05, | |
| "loss": 3.6845, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.038244241634072, | |
| "grad_norm": 0.6233929395675659, | |
| "learning_rate": 8.748617574107257e-05, | |
| "loss": 3.6204, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 2.041720990873533, | |
| "grad_norm": 0.6228452920913696, | |
| "learning_rate": 8.742553740855506e-05, | |
| "loss": 3.7569, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.0451977401129944, | |
| "grad_norm": 0.6230937838554382, | |
| "learning_rate": 8.736477363688761e-05, | |
| "loss": 3.7207, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 2.0486744893524556, | |
| "grad_norm": 0.6162528395652771, | |
| "learning_rate": 8.730388462973208e-05, | |
| "loss": 3.7242, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.0521512385919167, | |
| "grad_norm": 0.7501924633979797, | |
| "learning_rate": 8.724287059117016e-05, | |
| "loss": 3.7486, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 2.0556279878313775, | |
| "grad_norm": 0.8422186970710754, | |
| "learning_rate": 8.718173172570254e-05, | |
| "loss": 3.6304, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.0591047370708386, | |
| "grad_norm": 0.8915469646453857, | |
| "learning_rate": 8.71204682382483e-05, | |
| "loss": 3.7905, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 2.0625814863103, | |
| "grad_norm": 0.9807952642440796, | |
| "learning_rate": 8.705908033414425e-05, | |
| "loss": 3.643, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.066058235549761, | |
| "grad_norm": 1.0013173818588257, | |
| "learning_rate": 8.69975682191442e-05, | |
| "loss": 3.6854, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.069534984789222, | |
| "grad_norm": 0.9831700325012207, | |
| "learning_rate": 8.693593209941825e-05, | |
| "loss": 3.673, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.0730117340286833, | |
| "grad_norm": 1.1712896823883057, | |
| "learning_rate": 8.687417218155213e-05, | |
| "loss": 3.8204, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.076488483268144, | |
| "grad_norm": 0.9944608807563782, | |
| "learning_rate": 8.681228867254655e-05, | |
| "loss": 3.7916, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.0799652325076052, | |
| "grad_norm": 1.0010840892791748, | |
| "learning_rate": 8.675028177981643e-05, | |
| "loss": 3.7294, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.0834419817470664, | |
| "grad_norm": 0.9211255311965942, | |
| "learning_rate": 8.668815171119021e-05, | |
| "loss": 3.6087, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.0869187309865276, | |
| "grad_norm": 0.869985818862915, | |
| "learning_rate": 8.66258986749092e-05, | |
| "loss": 3.5953, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.0903954802259888, | |
| "grad_norm": 0.7657525539398193, | |
| "learning_rate": 8.656352287962686e-05, | |
| "loss": 3.6554, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.09387222946545, | |
| "grad_norm": 0.7607572078704834, | |
| "learning_rate": 8.650102453440813e-05, | |
| "loss": 3.6477, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.097348978704911, | |
| "grad_norm": 0.9117911458015442, | |
| "learning_rate": 8.643840384872866e-05, | |
| "loss": 3.6963, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.100825727944372, | |
| "grad_norm": 0.8420771956443787, | |
| "learning_rate": 8.637566103247415e-05, | |
| "loss": 3.6472, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.104302477183833, | |
| "grad_norm": 0.7396708726882935, | |
| "learning_rate": 8.631279629593966e-05, | |
| "loss": 3.7796, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.107779226423294, | |
| "grad_norm": 0.6754468083381653, | |
| "learning_rate": 8.624980984982892e-05, | |
| "loss": 3.581, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.1112559756627554, | |
| "grad_norm": 0.78558748960495, | |
| "learning_rate": 8.618670190525352e-05, | |
| "loss": 3.7445, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.1147327249022165, | |
| "grad_norm": 0.7408106923103333, | |
| "learning_rate": 8.612347267373234e-05, | |
| "loss": 3.7551, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.1182094741416777, | |
| "grad_norm": 0.7574490904808044, | |
| "learning_rate": 8.606012236719073e-05, | |
| "loss": 3.5841, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.1216862233811384, | |
| "grad_norm": 0.8567992448806763, | |
| "learning_rate": 8.599665119795992e-05, | |
| "loss": 3.5432, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.1251629726205996, | |
| "grad_norm": 1.0516282320022583, | |
| "learning_rate": 8.593305937877614e-05, | |
| "loss": 3.6579, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.128639721860061, | |
| "grad_norm": 1.0702804327011108, | |
| "learning_rate": 8.586934712278006e-05, | |
| "loss": 3.7136, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.132116471099522, | |
| "grad_norm": 0.9042352437973022, | |
| "learning_rate": 8.580551464351603e-05, | |
| "loss": 3.7588, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.135593220338983, | |
| "grad_norm": 1.0957833528518677, | |
| "learning_rate": 8.574156215493132e-05, | |
| "loss": 3.7078, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.1390699695784443, | |
| "grad_norm": 1.1452794075012207, | |
| "learning_rate": 8.567748987137544e-05, | |
| "loss": 3.6626, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.142546718817905, | |
| "grad_norm": 0.8174344897270203, | |
| "learning_rate": 8.561329800759943e-05, | |
| "loss": 3.7657, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.146023468057366, | |
| "grad_norm": 0.8348018527030945, | |
| "learning_rate": 8.554898677875509e-05, | |
| "loss": 3.7377, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.1495002172968274, | |
| "grad_norm": 0.8676193952560425, | |
| "learning_rate": 8.548455640039437e-05, | |
| "loss": 3.6855, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.1529769665362886, | |
| "grad_norm": 0.7565463185310364, | |
| "learning_rate": 8.542000708846852e-05, | |
| "loss": 3.6733, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.1564537157757497, | |
| "grad_norm": 0.7726176977157593, | |
| "learning_rate": 8.535533905932738e-05, | |
| "loss": 3.6327, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.159930465015211, | |
| "grad_norm": 0.7790519595146179, | |
| "learning_rate": 8.529055252971879e-05, | |
| "loss": 3.6361, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.163407214254672, | |
| "grad_norm": 0.7205121517181396, | |
| "learning_rate": 8.522564771678771e-05, | |
| "loss": 3.6716, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.166883963494133, | |
| "grad_norm": 0.7978485226631165, | |
| "learning_rate": 8.516062483807556e-05, | |
| "loss": 3.7891, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.170360712733594, | |
| "grad_norm": 0.7888757586479187, | |
| "learning_rate": 8.509548411151948e-05, | |
| "loss": 3.6903, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.173837461973055, | |
| "grad_norm": 0.7865068912506104, | |
| "learning_rate": 8.503022575545158e-05, | |
| "loss": 3.7538, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.1773142112125163, | |
| "grad_norm": 0.943021833896637, | |
| "learning_rate": 8.49648499885983e-05, | |
| "loss": 3.6956, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.1807909604519775, | |
| "grad_norm": 1.2752074003219604, | |
| "learning_rate": 8.489935703007949e-05, | |
| "loss": 3.719, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.1842677096914387, | |
| "grad_norm": 0.9368734955787659, | |
| "learning_rate": 8.483374709940792e-05, | |
| "loss": 3.7057, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.1877444589308994, | |
| "grad_norm": 0.8080997467041016, | |
| "learning_rate": 8.476802041648832e-05, | |
| "loss": 3.7596, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.1912212081703606, | |
| "grad_norm": 0.8630561828613281, | |
| "learning_rate": 8.47021772016168e-05, | |
| "loss": 3.6446, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.1946979574098218, | |
| "grad_norm": 0.975145161151886, | |
| "learning_rate": 8.463621767547998e-05, | |
| "loss": 3.7115, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.198174706649283, | |
| "grad_norm": 0.9544527530670166, | |
| "learning_rate": 8.457014205915438e-05, | |
| "loss": 3.6173, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.201651455888744, | |
| "grad_norm": 0.8249536752700806, | |
| "learning_rate": 8.450395057410561e-05, | |
| "loss": 3.606, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.852883517742157, | |
| "learning_rate": 8.443764344218761e-05, | |
| "loss": 3.6814, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.208604954367666, | |
| "grad_norm": 0.8320687413215637, | |
| "learning_rate": 8.437122088564198e-05, | |
| "loss": 3.7175, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.212081703607127, | |
| "grad_norm": 0.781334638595581, | |
| "learning_rate": 8.430468312709712e-05, | |
| "loss": 3.7437, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.2155584528465884, | |
| "grad_norm": 0.7602312564849854, | |
| "learning_rate": 8.423803038956764e-05, | |
| "loss": 3.6857, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.2190352020860495, | |
| "grad_norm": 0.7551150918006897, | |
| "learning_rate": 8.417126289645344e-05, | |
| "loss": 3.7341, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.2225119513255107, | |
| "grad_norm": 0.8869550228118896, | |
| "learning_rate": 8.410438087153911e-05, | |
| "loss": 3.6262, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.225988700564972, | |
| "grad_norm": 0.8678908348083496, | |
| "learning_rate": 8.403738453899308e-05, | |
| "loss": 3.5698, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.229465449804433, | |
| "grad_norm": 0.7575172781944275, | |
| "learning_rate": 8.39702741233669e-05, | |
| "loss": 3.6571, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.232942199043894, | |
| "grad_norm": 0.7576946020126343, | |
| "learning_rate": 8.390304984959454e-05, | |
| "loss": 3.7523, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.236418948283355, | |
| "grad_norm": 0.7081326246261597, | |
| "learning_rate": 8.383571194299154e-05, | |
| "loss": 3.6405, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.239895697522816, | |
| "grad_norm": 0.7227503657341003, | |
| "learning_rate": 8.376826062925432e-05, | |
| "loss": 3.5813, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.2433724467622773, | |
| "grad_norm": 0.9157429933547974, | |
| "learning_rate": 8.370069613445939e-05, | |
| "loss": 3.7295, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.2468491960017385, | |
| "grad_norm": 0.9113756418228149, | |
| "learning_rate": 8.363301868506264e-05, | |
| "loss": 3.6834, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.2503259452411997, | |
| "grad_norm": 0.9403269290924072, | |
| "learning_rate": 8.356522850789852e-05, | |
| "loss": 3.7961, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.2538026944806604, | |
| "grad_norm": 0.7835237979888916, | |
| "learning_rate": 8.349732583017934e-05, | |
| "loss": 3.5661, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.2572794437201216, | |
| "grad_norm": 0.7705581188201904, | |
| "learning_rate": 8.342931087949446e-05, | |
| "loss": 3.658, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.2607561929595827, | |
| "grad_norm": 0.9230582118034363, | |
| "learning_rate": 8.336118388380954e-05, | |
| "loss": 3.5736, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.264232942199044, | |
| "grad_norm": 0.8842865824699402, | |
| "learning_rate": 8.329294507146579e-05, | |
| "loss": 3.7125, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.267709691438505, | |
| "grad_norm": 0.8269637823104858, | |
| "learning_rate": 8.32245946711792e-05, | |
| "loss": 3.7351, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.2711864406779663, | |
| "grad_norm": 0.8161711096763611, | |
| "learning_rate": 8.315613291203976e-05, | |
| "loss": 3.561, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.274663189917427, | |
| "grad_norm": 0.8007653951644897, | |
| "learning_rate": 8.30875600235107e-05, | |
| "loss": 3.6928, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.278139939156888, | |
| "grad_norm": 0.685224175453186, | |
| "learning_rate": 8.301887623542773e-05, | |
| "loss": 3.6402, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.2816166883963493, | |
| "grad_norm": 0.6802821755409241, | |
| "learning_rate": 8.295008177799827e-05, | |
| "loss": 3.6212, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.2850934376358105, | |
| "grad_norm": 0.6886381506919861, | |
| "learning_rate": 8.288117688180064e-05, | |
| "loss": 3.5536, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.2885701868752717, | |
| "grad_norm": 0.7427237629890442, | |
| "learning_rate": 8.281216177778334e-05, | |
| "loss": 3.7049, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.292046936114733, | |
| "grad_norm": 0.6893945932388306, | |
| "learning_rate": 8.274303669726426e-05, | |
| "loss": 3.6027, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.295523685354194, | |
| "grad_norm": 0.7162789106369019, | |
| "learning_rate": 8.267380187192989e-05, | |
| "loss": 3.7657, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.2990004345936548, | |
| "grad_norm": 0.7326735854148865, | |
| "learning_rate": 8.260445753383452e-05, | |
| "loss": 3.5549, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.302477183833116, | |
| "grad_norm": 0.8043762445449829, | |
| "learning_rate": 8.253500391539956e-05, | |
| "loss": 3.5237, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.305953933072577, | |
| "grad_norm": 0.9436041712760925, | |
| "learning_rate": 8.246544124941266e-05, | |
| "loss": 3.6999, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.3094306823120383, | |
| "grad_norm": 0.9793234467506409, | |
| "learning_rate": 8.239576976902695e-05, | |
| "loss": 3.6953, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.3129074315514995, | |
| "grad_norm": 0.8090691566467285, | |
| "learning_rate": 8.232598970776028e-05, | |
| "loss": 3.6129, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.3163841807909606, | |
| "grad_norm": 0.8231872320175171, | |
| "learning_rate": 8.225610129949443e-05, | |
| "loss": 3.6311, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.3198609300304214, | |
| "grad_norm": 0.7168385982513428, | |
| "learning_rate": 8.218610477847436e-05, | |
| "loss": 3.5907, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.3233376792698825, | |
| "grad_norm": 0.7335355877876282, | |
| "learning_rate": 8.211600037930736e-05, | |
| "loss": 3.6044, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.3268144285093437, | |
| "grad_norm": 0.8048617839813232, | |
| "learning_rate": 8.204578833696228e-05, | |
| "loss": 3.7008, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.330291177748805, | |
| "grad_norm": 0.8575453758239746, | |
| "learning_rate": 8.197546888676878e-05, | |
| "loss": 3.6408, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.333767926988266, | |
| "grad_norm": 0.7635408043861389, | |
| "learning_rate": 8.190504226441654e-05, | |
| "loss": 3.605, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.3372446762277272, | |
| "grad_norm": 0.6884478330612183, | |
| "learning_rate": 8.183450870595441e-05, | |
| "loss": 3.6391, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.340721425467188, | |
| "grad_norm": 0.9262790083885193, | |
| "learning_rate": 8.176386844778968e-05, | |
| "loss": 3.5877, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.344198174706649, | |
| "grad_norm": 1.1123980283737183, | |
| "learning_rate": 8.169312172668726e-05, | |
| "loss": 3.5557, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.3476749239461103, | |
| "grad_norm": 0.8961548805236816, | |
| "learning_rate": 8.162226877976887e-05, | |
| "loss": 3.6788, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.3511516731855715, | |
| "grad_norm": 1.1101398468017578, | |
| "learning_rate": 8.15513098445123e-05, | |
| "loss": 3.653, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.3546284224250327, | |
| "grad_norm": 1.0202254056930542, | |
| "learning_rate": 8.148024515875057e-05, | |
| "loss": 3.6128, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.358105171664494, | |
| "grad_norm": 0.9809442758560181, | |
| "learning_rate": 8.140907496067114e-05, | |
| "loss": 3.6884, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.361581920903955, | |
| "grad_norm": 0.998312771320343, | |
| "learning_rate": 8.133779948881514e-05, | |
| "loss": 3.6343, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.3650586701434158, | |
| "grad_norm": 0.8048033118247986, | |
| "learning_rate": 8.126641898207648e-05, | |
| "loss": 3.6517, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.368535419382877, | |
| "grad_norm": 0.7618369460105896, | |
| "learning_rate": 8.119493367970119e-05, | |
| "loss": 3.5172, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.372012168622338, | |
| "grad_norm": 0.7011895775794983, | |
| "learning_rate": 8.112334382128651e-05, | |
| "loss": 3.5716, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.3754889178617993, | |
| "grad_norm": 0.7433372735977173, | |
| "learning_rate": 8.105164964678009e-05, | |
| "loss": 3.7358, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.3789656671012605, | |
| "grad_norm": 0.8242580890655518, | |
| "learning_rate": 8.09798513964793e-05, | |
| "loss": 3.5706, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.382442416340721, | |
| "grad_norm": 0.7195044755935669, | |
| "learning_rate": 8.090794931103026e-05, | |
| "loss": 3.6252, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.3859191655801824, | |
| "grad_norm": 0.7642951011657715, | |
| "learning_rate": 8.083594363142717e-05, | |
| "loss": 3.6058, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.3893959148196435, | |
| "grad_norm": 0.799333393573761, | |
| "learning_rate": 8.076383459901137e-05, | |
| "loss": 3.5904, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.3928726640591047, | |
| "grad_norm": 0.7877199053764343, | |
| "learning_rate": 8.06916224554707e-05, | |
| "loss": 3.5985, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.396349413298566, | |
| "grad_norm": 0.8267778754234314, | |
| "learning_rate": 8.061930744283854e-05, | |
| "loss": 3.5293, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.399826162538027, | |
| "grad_norm": 0.8435574173927307, | |
| "learning_rate": 8.054688980349309e-05, | |
| "loss": 3.614, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.4033029117774882, | |
| "grad_norm": 0.7058742046356201, | |
| "learning_rate": 8.047436978015649e-05, | |
| "loss": 3.5749, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.406779661016949, | |
| "grad_norm": 0.6880009770393372, | |
| "learning_rate": 8.040174761589406e-05, | |
| "loss": 3.6028, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.7245696187019348, | |
| "learning_rate": 8.032902355411345e-05, | |
| "loss": 3.6305, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.4137331594958713, | |
| "grad_norm": 0.7221509218215942, | |
| "learning_rate": 8.025619783856388e-05, | |
| "loss": 3.552, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.4172099087353325, | |
| "grad_norm": 0.6806580424308777, | |
| "learning_rate": 8.018327071333521e-05, | |
| "loss": 3.497, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.4206866579747937, | |
| "grad_norm": 0.6653738617897034, | |
| "learning_rate": 8.011024242285728e-05, | |
| "loss": 3.5242, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.424163407214255, | |
| "grad_norm": 0.725482702255249, | |
| "learning_rate": 8.003711321189895e-05, | |
| "loss": 3.6399, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.427640156453716, | |
| "grad_norm": 0.6843823194503784, | |
| "learning_rate": 7.996388332556735e-05, | |
| "loss": 3.5002, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.4311169056931767, | |
| "grad_norm": 0.6126710772514343, | |
| "learning_rate": 7.989055300930704e-05, | |
| "loss": 3.5279, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.434593654932638, | |
| "grad_norm": 0.6402905583381653, | |
| "learning_rate": 7.981712250889921e-05, | |
| "loss": 3.4861, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.438070404172099, | |
| "grad_norm": 0.6130061745643616, | |
| "learning_rate": 7.97435920704608e-05, | |
| "loss": 3.5446, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.4415471534115603, | |
| "grad_norm": 0.6037262678146362, | |
| "learning_rate": 7.966996194044376e-05, | |
| "loss": 3.5591, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.4450239026510214, | |
| "grad_norm": 0.5912169218063354, | |
| "learning_rate": 7.959623236563411e-05, | |
| "loss": 3.5881, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.448500651890482, | |
| "grad_norm": 0.6557775139808655, | |
| "learning_rate": 7.952240359315126e-05, | |
| "loss": 3.5602, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.4519774011299433, | |
| "grad_norm": 0.7176311016082764, | |
| "learning_rate": 7.944847587044704e-05, | |
| "loss": 3.6946, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.4554541503694045, | |
| "grad_norm": 0.9263601303100586, | |
| "learning_rate": 7.937444944530495e-05, | |
| "loss": 3.4952, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.4589308996088657, | |
| "grad_norm": 0.8851249814033508, | |
| "learning_rate": 7.930032456583931e-05, | |
| "loss": 3.4797, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.462407648848327, | |
| "grad_norm": 0.7957706451416016, | |
| "learning_rate": 7.922610148049445e-05, | |
| "loss": 3.4552, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.465884398087788, | |
| "grad_norm": 0.8621044754981995, | |
| "learning_rate": 7.915178043804382e-05, | |
| "loss": 3.6836, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.469361147327249, | |
| "grad_norm": 0.8820719718933105, | |
| "learning_rate": 7.907736168758921e-05, | |
| "loss": 3.4278, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.47283789656671, | |
| "grad_norm": 0.9527013301849365, | |
| "learning_rate": 7.900284547855991e-05, | |
| "loss": 3.4505, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.476314645806171, | |
| "grad_norm": 0.8297730088233948, | |
| "learning_rate": 7.892823206071185e-05, | |
| "loss": 3.5746, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.4797913950456323, | |
| "grad_norm": 0.8282458186149597, | |
| "learning_rate": 7.885352168412676e-05, | |
| "loss": 3.566, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.4832681442850935, | |
| "grad_norm": 0.8243257403373718, | |
| "learning_rate": 7.877871459921138e-05, | |
| "loss": 3.4891, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.4867448935245546, | |
| "grad_norm": 0.8400633335113525, | |
| "learning_rate": 7.870381105669657e-05, | |
| "loss": 3.4364, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.490221642764016, | |
| "grad_norm": 0.9697174429893494, | |
| "learning_rate": 7.862881130763646e-05, | |
| "loss": 3.58, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.493698392003477, | |
| "grad_norm": 0.882383406162262, | |
| "learning_rate": 7.85537156034077e-05, | |
| "loss": 3.5832, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.4971751412429377, | |
| "grad_norm": 0.7008349299430847, | |
| "learning_rate": 7.847852419570846e-05, | |
| "loss": 3.4555, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.500651890482399, | |
| "grad_norm": 0.8088206648826599, | |
| "learning_rate": 7.840323733655778e-05, | |
| "loss": 3.5888, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.50412863972186, | |
| "grad_norm": 0.8056133985519409, | |
| "learning_rate": 7.832785527829458e-05, | |
| "loss": 3.6404, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.5076053889613212, | |
| "grad_norm": 0.9462586045265198, | |
| "learning_rate": 7.825237827357683e-05, | |
| "loss": 3.5969, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.5110821382007824, | |
| "grad_norm": 0.8435570597648621, | |
| "learning_rate": 7.817680657538078e-05, | |
| "loss": 3.5405, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.514558887440243, | |
| "grad_norm": 0.7596110105514526, | |
| "learning_rate": 7.8101140437e-05, | |
| "loss": 3.5774, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.5180356366797043, | |
| "grad_norm": 0.8581807017326355, | |
| "learning_rate": 7.80253801120447e-05, | |
| "loss": 3.4993, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.5215123859191655, | |
| "grad_norm": 0.7680181860923767, | |
| "learning_rate": 7.794952585444068e-05, | |
| "loss": 3.5667, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.5249891351586267, | |
| "grad_norm": 0.8518640398979187, | |
| "learning_rate": 7.78735779184286e-05, | |
| "loss": 3.5648, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.528465884398088, | |
| "grad_norm": 0.8862175345420837, | |
| "learning_rate": 7.779753655856312e-05, | |
| "loss": 3.608, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.531942633637549, | |
| "grad_norm": 0.8444491028785706, | |
| "learning_rate": 7.772140202971204e-05, | |
| "loss": 3.5332, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.53541938287701, | |
| "grad_norm": 0.8340655565261841, | |
| "learning_rate": 7.764517458705536e-05, | |
| "loss": 3.4764, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.538896132116471, | |
| "grad_norm": 0.9282291531562805, | |
| "learning_rate": 7.756885448608459e-05, | |
| "loss": 3.5358, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 0.7459742426872253, | |
| "learning_rate": 7.749244198260175e-05, | |
| "loss": 3.5173, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.5458496305953933, | |
| "grad_norm": 0.6265541315078735, | |
| "learning_rate": 7.74159373327186e-05, | |
| "loss": 3.5435, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.5493263798348544, | |
| "grad_norm": 0.6342918276786804, | |
| "learning_rate": 7.733934079285569e-05, | |
| "loss": 3.5435, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.5528031290743156, | |
| "grad_norm": 0.6661461591720581, | |
| "learning_rate": 7.726265261974162e-05, | |
| "loss": 3.4791, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.5562798783137763, | |
| "grad_norm": 0.6519825458526611, | |
| "learning_rate": 7.718587307041208e-05, | |
| "loss": 3.6155, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.559756627553238, | |
| "grad_norm": 0.6564475297927856, | |
| "learning_rate": 7.710900240220904e-05, | |
| "loss": 3.6105, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.5632333767926987, | |
| "grad_norm": 0.6510676741600037, | |
| "learning_rate": 7.703204087277988e-05, | |
| "loss": 3.5056, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.56671012603216, | |
| "grad_norm": 0.7257498502731323, | |
| "learning_rate": 7.695498874007649e-05, | |
| "loss": 3.4504, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.570186875271621, | |
| "grad_norm": 0.7453542351722717, | |
| "learning_rate": 7.687784626235448e-05, | |
| "loss": 3.5899, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.573663624511082, | |
| "grad_norm": 0.855290949344635, | |
| "learning_rate": 7.680061369817222e-05, | |
| "loss": 3.5492, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.5771403737505434, | |
| "grad_norm": 0.9114766120910645, | |
| "learning_rate": 7.672329130639005e-05, | |
| "loss": 3.5393, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.580617122990004, | |
| "grad_norm": 0.7029814720153809, | |
| "learning_rate": 7.66458793461694e-05, | |
| "loss": 3.5573, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.5840938722294653, | |
| "grad_norm": 0.6988779306411743, | |
| "learning_rate": 7.656837807697187e-05, | |
| "loss": 3.596, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.5875706214689265, | |
| "grad_norm": 0.7252441048622131, | |
| "learning_rate": 7.64907877585584e-05, | |
| "loss": 3.479, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.5910473707083876, | |
| "grad_norm": 0.7226544618606567, | |
| "learning_rate": 7.641310865098845e-05, | |
| "loss": 3.6214, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.594524119947849, | |
| "grad_norm": 0.6503444314002991, | |
| "learning_rate": 7.633534101461902e-05, | |
| "loss": 3.5305, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.59800086918731, | |
| "grad_norm": 0.635787844657898, | |
| "learning_rate": 7.625748511010382e-05, | |
| "loss": 3.5219, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.601477618426771, | |
| "grad_norm": 0.6334975957870483, | |
| "learning_rate": 7.617954119839247e-05, | |
| "loss": 3.468, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.604954367666232, | |
| "grad_norm": 0.692579448223114, | |
| "learning_rate": 7.610150954072952e-05, | |
| "loss": 3.4644, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.608431116905693, | |
| "grad_norm": 0.8107365369796753, | |
| "learning_rate": 7.602339039865362e-05, | |
| "loss": 3.488, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.6119078661451542, | |
| "grad_norm": 0.8618629574775696, | |
| "learning_rate": 7.594518403399667e-05, | |
| "loss": 3.5615, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 0.8016732931137085, | |
| "learning_rate": 7.586689070888284e-05, | |
| "loss": 3.4891, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.6188613646240766, | |
| "grad_norm": 0.9589606523513794, | |
| "learning_rate": 7.578851068572788e-05, | |
| "loss": 3.5743, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.6223381138635373, | |
| "grad_norm": 1.0258772373199463, | |
| "learning_rate": 7.571004422723805e-05, | |
| "loss": 3.5625, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.625814863102999, | |
| "grad_norm": 0.7846320271492004, | |
| "learning_rate": 7.563149159640929e-05, | |
| "loss": 3.5014, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.6292916123424597, | |
| "grad_norm": 0.7241854667663574, | |
| "learning_rate": 7.555285305652644e-05, | |
| "loss": 3.5707, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.632768361581921, | |
| "grad_norm": 0.6381678581237793, | |
| "learning_rate": 7.547412887116223e-05, | |
| "loss": 3.5553, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.636245110821382, | |
| "grad_norm": 0.6428322196006775, | |
| "learning_rate": 7.539531930417648e-05, | |
| "loss": 3.4585, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.639721860060843, | |
| "grad_norm": 0.6515122056007385, | |
| "learning_rate": 7.531642461971516e-05, | |
| "loss": 3.4973, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.6431986093003044, | |
| "grad_norm": 0.7195065021514893, | |
| "learning_rate": 7.523744508220949e-05, | |
| "loss": 3.5204, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.646675358539765, | |
| "grad_norm": 0.7389284372329712, | |
| "learning_rate": 7.51583809563752e-05, | |
| "loss": 3.5345, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.6501521077792263, | |
| "grad_norm": 0.8561949133872986, | |
| "learning_rate": 7.507923250721145e-05, | |
| "loss": 3.5781, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.6536288570186874, | |
| "grad_norm": 0.9598309993743896, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 3.5838, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.6571056062581486, | |
| "grad_norm": 1.006478190422058, | |
| "learning_rate": 7.492068370030447e-05, | |
| "loss": 3.6576, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.66058235549761, | |
| "grad_norm": 0.9836744666099548, | |
| "learning_rate": 7.48412838739692e-05, | |
| "loss": 3.649, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.664059104737071, | |
| "grad_norm": 0.8797217011451721, | |
| "learning_rate": 7.476180078711854e-05, | |
| "loss": 3.4689, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.667535853976532, | |
| "grad_norm": 0.8832845091819763, | |
| "learning_rate": 7.468223470615593e-05, | |
| "loss": 3.3975, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.671012603215993, | |
| "grad_norm": 0.7454182505607605, | |
| "learning_rate": 7.460258589776292e-05, | |
| "loss": 3.5938, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.674489352455454, | |
| "grad_norm": 0.6505441069602966, | |
| "learning_rate": 7.452285462889841e-05, | |
| "loss": 3.5975, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.6779661016949152, | |
| "grad_norm": 0.6446083188056946, | |
| "learning_rate": 7.44430411667976e-05, | |
| "loss": 3.4146, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.6814428509343764, | |
| "grad_norm": 0.6646334528923035, | |
| "learning_rate": 7.436314577897126e-05, | |
| "loss": 3.5033, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.6849196001738376, | |
| "grad_norm": 0.7433136105537415, | |
| "learning_rate": 7.428316873320466e-05, | |
| "loss": 3.3934, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.6883963494132983, | |
| "grad_norm": 0.7214708924293518, | |
| "learning_rate": 7.420311029755688e-05, | |
| "loss": 3.5329, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.69187309865276, | |
| "grad_norm": 0.7187039852142334, | |
| "learning_rate": 7.412297074035967e-05, | |
| "loss": 3.6085, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.6953498478922207, | |
| "grad_norm": 0.6889802813529968, | |
| "learning_rate": 7.404275033021676e-05, | |
| "loss": 3.5609, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.698826597131682, | |
| "grad_norm": 0.7113670706748962, | |
| "learning_rate": 7.396244933600285e-05, | |
| "loss": 3.5038, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.702303346371143, | |
| "grad_norm": 0.7699861526489258, | |
| "learning_rate": 7.388206802686272e-05, | |
| "loss": 3.4915, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.705780095610604, | |
| "grad_norm": 0.874978244304657, | |
| "learning_rate": 7.380160667221035e-05, | |
| "loss": 3.5332, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.7092568448500653, | |
| "grad_norm": 0.8308786153793335, | |
| "learning_rate": 7.372106554172802e-05, | |
| "loss": 3.5566, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.712733594089526, | |
| "grad_norm": 0.8035147786140442, | |
| "learning_rate": 7.364044490536539e-05, | |
| "loss": 3.4245, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.7162103433289873, | |
| "grad_norm": 0.6532664895057678, | |
| "learning_rate": 7.355974503333859e-05, | |
| "loss": 3.5687, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.7196870925684484, | |
| "grad_norm": 0.7014971375465393, | |
| "learning_rate": 7.347896619612932e-05, | |
| "loss": 3.3345, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.7231638418079096, | |
| "grad_norm": 0.8252018690109253, | |
| "learning_rate": 7.339810866448398e-05, | |
| "loss": 3.4766, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.7266405910473708, | |
| "grad_norm": 0.8229169845581055, | |
| "learning_rate": 7.331717270941268e-05, | |
| "loss": 3.4474, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.730117340286832, | |
| "grad_norm": 0.7657115459442139, | |
| "learning_rate": 7.323615860218843e-05, | |
| "loss": 3.5018, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.733594089526293, | |
| "grad_norm": 0.7167194485664368, | |
| "learning_rate": 7.315506661434616e-05, | |
| "loss": 3.3943, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.737070838765754, | |
| "grad_norm": 0.6008238196372986, | |
| "learning_rate": 7.307389701768182e-05, | |
| "loss": 3.4675, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.740547588005215, | |
| "grad_norm": 0.63405841588974, | |
| "learning_rate": 7.299265008425151e-05, | |
| "loss": 3.4227, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.744024337244676, | |
| "grad_norm": 0.6532236337661743, | |
| "learning_rate": 7.291132608637052e-05, | |
| "loss": 3.5426, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.7475010864841374, | |
| "grad_norm": 0.6104541420936584, | |
| "learning_rate": 7.282992529661246e-05, | |
| "loss": 3.4405, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.7509778357235986, | |
| "grad_norm": 0.620786726474762, | |
| "learning_rate": 7.274844798780826e-05, | |
| "loss": 3.5128, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.7544545849630593, | |
| "grad_norm": 0.6040831804275513, | |
| "learning_rate": 7.266689443304541e-05, | |
| "loss": 3.3964, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.757931334202521, | |
| "grad_norm": 0.6351047158241272, | |
| "learning_rate": 7.258526490566687e-05, | |
| "loss": 3.4846, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.7614080834419816, | |
| "grad_norm": 0.6179344654083252, | |
| "learning_rate": 7.25035596792703e-05, | |
| "loss": 3.3667, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.764884832681443, | |
| "grad_norm": 0.5391456484794617, | |
| "learning_rate": 7.242177902770707e-05, | |
| "loss": 3.5516, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.768361581920904, | |
| "grad_norm": 0.5362287759780884, | |
| "learning_rate": 7.233992322508129e-05, | |
| "loss": 3.5142, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.771838331160365, | |
| "grad_norm": 0.5863167643547058, | |
| "learning_rate": 7.225799254574904e-05, | |
| "loss": 3.468, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.7753150803998263, | |
| "grad_norm": 0.6511790156364441, | |
| "learning_rate": 7.217598726431734e-05, | |
| "loss": 3.38, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.778791829639287, | |
| "grad_norm": 0.7753508687019348, | |
| "learning_rate": 7.209390765564318e-05, | |
| "loss": 3.4388, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.7822685788787482, | |
| "grad_norm": 0.9056048393249512, | |
| "learning_rate": 7.201175399483278e-05, | |
| "loss": 3.4029, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.7857453281182094, | |
| "grad_norm": 0.8542281985282898, | |
| "learning_rate": 7.192952655724049e-05, | |
| "loss": 3.504, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.7892220773576706, | |
| "grad_norm": 0.7020969986915588, | |
| "learning_rate": 7.184722561846798e-05, | |
| "loss": 3.479, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.7926988265971318, | |
| "grad_norm": 0.7977145314216614, | |
| "learning_rate": 7.176485145436325e-05, | |
| "loss": 3.41, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.796175575836593, | |
| "grad_norm": 0.8588235378265381, | |
| "learning_rate": 7.168240434101971e-05, | |
| "loss": 3.5017, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.799652325076054, | |
| "grad_norm": 0.8466975092887878, | |
| "learning_rate": 7.159988455477534e-05, | |
| "loss": 3.5023, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.803129074315515, | |
| "grad_norm": 0.8365435600280762, | |
| "learning_rate": 7.151729237221162e-05, | |
| "loss": 3.5185, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.806605823554976, | |
| "grad_norm": 0.8156039118766785, | |
| "learning_rate": 7.143462807015271e-05, | |
| "loss": 3.4056, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.810082572794437, | |
| "grad_norm": 0.8369770050048828, | |
| "learning_rate": 7.135189192566453e-05, | |
| "loss": 3.4314, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.8135593220338984, | |
| "grad_norm": 0.7135697603225708, | |
| "learning_rate": 7.126908421605375e-05, | |
| "loss": 3.4166, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.8170360712733595, | |
| "grad_norm": 0.6435958743095398, | |
| "learning_rate": 7.11862052188669e-05, | |
| "loss": 3.469, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.6611959934234619, | |
| "learning_rate": 7.110325521188949e-05, | |
| "loss": 3.3635, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.823989569752282, | |
| "grad_norm": 0.6469593644142151, | |
| "learning_rate": 7.102023447314501e-05, | |
| "loss": 3.4362, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.8274663189917426, | |
| "grad_norm": 0.6080538630485535, | |
| "learning_rate": 7.093714328089399e-05, | |
| "loss": 3.4378, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.830943068231204, | |
| "grad_norm": 0.6681721210479736, | |
| "learning_rate": 7.085398191363313e-05, | |
| "loss": 3.4019, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.834419817470665, | |
| "grad_norm": 0.6522132158279419, | |
| "learning_rate": 7.077075065009433e-05, | |
| "loss": 3.4162, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.837896566710126, | |
| "grad_norm": 0.5956054329872131, | |
| "learning_rate": 7.068744976924378e-05, | |
| "loss": 3.3781, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.8413733159495873, | |
| "grad_norm": 0.5895470976829529, | |
| "learning_rate": 7.060407955028097e-05, | |
| "loss": 3.4813, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.844850065189048, | |
| "grad_norm": 0.6135871410369873, | |
| "learning_rate": 7.052064027263786e-05, | |
| "loss": 3.4464, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.848326814428509, | |
| "grad_norm": 0.5782895088195801, | |
| "learning_rate": 7.043713221597774e-05, | |
| "loss": 3.3993, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.8518035636679704, | |
| "grad_norm": 0.6497248411178589, | |
| "learning_rate": 7.035355566019458e-05, | |
| "loss": 3.4595, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.8552803129074316, | |
| "grad_norm": 0.7294395565986633, | |
| "learning_rate": 7.026991088541184e-05, | |
| "loss": 3.3974, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.8587570621468927, | |
| "grad_norm": 0.6991782188415527, | |
| "learning_rate": 7.018619817198165e-05, | |
| "loss": 3.4243, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.862233811386354, | |
| "grad_norm": 0.688694417476654, | |
| "learning_rate": 7.01024178004839e-05, | |
| "loss": 3.4343, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.865710560625815, | |
| "grad_norm": 0.8485518097877502, | |
| "learning_rate": 7.001857005172515e-05, | |
| "loss": 3.4753, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.869187309865276, | |
| "grad_norm": 0.7970876097679138, | |
| "learning_rate": 6.99346552067379e-05, | |
| "loss": 3.3739, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.872664059104737, | |
| "grad_norm": 0.8355128765106201, | |
| "learning_rate": 6.985067354677946e-05, | |
| "loss": 3.3663, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.876140808344198, | |
| "grad_norm": 1.0474745035171509, | |
| "learning_rate": 6.976662535333107e-05, | |
| "loss": 3.4602, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.8796175575836593, | |
| "grad_norm": 0.971641480922699, | |
| "learning_rate": 6.968251090809708e-05, | |
| "loss": 3.5213, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.8830943068231205, | |
| "grad_norm": 0.85914146900177, | |
| "learning_rate": 6.959833049300377e-05, | |
| "loss": 3.4002, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.8865710560625812, | |
| "grad_norm": 0.7117258310317993, | |
| "learning_rate": 6.951408439019858e-05, | |
| "loss": 3.3931, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.890047805302043, | |
| "grad_norm": 0.7882718443870544, | |
| "learning_rate": 6.942977288204915e-05, | |
| "loss": 3.4796, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.8935245545415036, | |
| "grad_norm": 0.8237490057945251, | |
| "learning_rate": 6.93453962511423e-05, | |
| "loss": 3.511, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.8970013037809648, | |
| "grad_norm": 0.8855555057525635, | |
| "learning_rate": 6.926095478028311e-05, | |
| "loss": 3.4921, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.900478053020426, | |
| "grad_norm": 0.8086044788360596, | |
| "learning_rate": 6.917644875249404e-05, | |
| "loss": 3.3488, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.903954802259887, | |
| "grad_norm": 0.672223687171936, | |
| "learning_rate": 6.909187845101387e-05, | |
| "loss": 3.4005, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.9074315514993483, | |
| "grad_norm": 0.7050768136978149, | |
| "learning_rate": 6.900724415929681e-05, | |
| "loss": 3.436, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.910908300738809, | |
| "grad_norm": 0.6785340905189514, | |
| "learning_rate": 6.892254616101159e-05, | |
| "loss": 3.4632, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.91438504997827, | |
| "grad_norm": 0.6947048902511597, | |
| "learning_rate": 6.883778474004041e-05, | |
| "loss": 3.3555, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.9178617992177314, | |
| "grad_norm": 0.712297797203064, | |
| "learning_rate": 6.87529601804781e-05, | |
| "loss": 3.4413, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.9213385484571925, | |
| "grad_norm": 0.6110817193984985, | |
| "learning_rate": 6.866807276663106e-05, | |
| "loss": 3.3946, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.9248152976966537, | |
| "grad_norm": 0.5554404258728027, | |
| "learning_rate": 6.858312278301637e-05, | |
| "loss": 3.3931, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.928292046936115, | |
| "grad_norm": 0.5974950194358826, | |
| "learning_rate": 6.849811051436088e-05, | |
| "loss": 3.4516, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.931768796175576, | |
| "grad_norm": 0.6106230020523071, | |
| "learning_rate": 6.841303624560012e-05, | |
| "loss": 3.4381, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.935245545415037, | |
| "grad_norm": 0.6150991320610046, | |
| "learning_rate": 6.832790026187748e-05, | |
| "loss": 3.3654, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.938722294654498, | |
| "grad_norm": 0.6058139204978943, | |
| "learning_rate": 6.824270284854319e-05, | |
| "loss": 3.4406, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.942199043893959, | |
| "grad_norm": 0.6507914066314697, | |
| "learning_rate": 6.815744429115331e-05, | |
| "loss": 3.4364, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.9456757931334203, | |
| "grad_norm": 0.7351534962654114, | |
| "learning_rate": 6.807212487546897e-05, | |
| "loss": 3.4768, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.9491525423728815, | |
| "grad_norm": 0.7599115371704102, | |
| "learning_rate": 6.798674488745515e-05, | |
| "loss": 3.3948, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.952629291612342, | |
| "grad_norm": 0.6528266668319702, | |
| "learning_rate": 6.790130461327993e-05, | |
| "loss": 3.4272, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.956106040851804, | |
| "grad_norm": 0.5727442502975464, | |
| "learning_rate": 6.78158043393134e-05, | |
| "loss": 3.324, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.9595827900912646, | |
| "grad_norm": 0.5675135254859924, | |
| "learning_rate": 6.773024435212678e-05, | |
| "loss": 3.3768, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.9630595393307257, | |
| "grad_norm": 0.5391719937324524, | |
| "learning_rate": 6.764462493849144e-05, | |
| "loss": 3.3768, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.966536288570187, | |
| "grad_norm": 0.5126081705093384, | |
| "learning_rate": 6.755894638537791e-05, | |
| "loss": 3.3735, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.970013037809648, | |
| "grad_norm": 0.5525986552238464, | |
| "learning_rate": 6.747320897995493e-05, | |
| "loss": 3.3825, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.9734897870491093, | |
| "grad_norm": 0.5459985733032227, | |
| "learning_rate": 6.73874130095885e-05, | |
| "loss": 3.3285, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.97696653628857, | |
| "grad_norm": 0.5992332696914673, | |
| "learning_rate": 6.730155876184094e-05, | |
| "loss": 3.3521, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.980443285528031, | |
| "grad_norm": 0.6372593641281128, | |
| "learning_rate": 6.721564652446986e-05, | |
| "loss": 3.3832, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.9839200347674923, | |
| "grad_norm": 0.740464448928833, | |
| "learning_rate": 6.712967658542729e-05, | |
| "loss": 3.4085, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.9873967840069535, | |
| "grad_norm": 0.8248367309570312, | |
| "learning_rate": 6.704364923285857e-05, | |
| "loss": 3.4706, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.9908735332464147, | |
| "grad_norm": 0.9461883306503296, | |
| "learning_rate": 6.695756475510156e-05, | |
| "loss": 3.3043, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.994350282485876, | |
| "grad_norm": 0.8672154545783997, | |
| "learning_rate": 6.687142344068553e-05, | |
| "loss": 3.4494, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.997827031725337, | |
| "grad_norm": 0.7892129421234131, | |
| "learning_rate": 6.678522557833024e-05, | |
| "loss": 3.2984, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.7941856384277344, | |
| "learning_rate": 6.669897145694507e-05, | |
| "loss": 3.4018, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.003476749239461, | |
| "grad_norm": 0.7466159462928772, | |
| "learning_rate": 6.661266136562788e-05, | |
| "loss": 3.3642, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 3.0069534984789223, | |
| "grad_norm": 0.8197594881057739, | |
| "learning_rate": 6.652629559366414e-05, | |
| "loss": 3.4616, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.0104302477183835, | |
| "grad_norm": 0.8821737766265869, | |
| "learning_rate": 6.643987443052595e-05, | |
| "loss": 3.4096, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 3.0139069969578443, | |
| "grad_norm": 1.0090149641036987, | |
| "learning_rate": 6.635339816587109e-05, | |
| "loss": 3.313, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.0173837461973054, | |
| "grad_norm": 0.958720862865448, | |
| "learning_rate": 6.626686708954198e-05, | |
| "loss": 3.4145, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 3.0208604954367666, | |
| "grad_norm": 0.8551411628723145, | |
| "learning_rate": 6.618028149156479e-05, | |
| "loss": 3.4031, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.0243372446762278, | |
| "grad_norm": 0.8152096271514893, | |
| "learning_rate": 6.609364166214837e-05, | |
| "loss": 3.4753, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 3.027813993915689, | |
| "grad_norm": 0.797737717628479, | |
| "learning_rate": 6.600694789168344e-05, | |
| "loss": 3.4012, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.03129074315515, | |
| "grad_norm": 0.8055285215377808, | |
| "learning_rate": 6.592020047074144e-05, | |
| "loss": 3.3403, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 3.034767492394611, | |
| "grad_norm": 0.6998369693756104, | |
| "learning_rate": 6.583339969007363e-05, | |
| "loss": 3.3336, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.038244241634072, | |
| "grad_norm": 0.6863573789596558, | |
| "learning_rate": 6.574654584061013e-05, | |
| "loss": 3.369, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.041720990873533, | |
| "grad_norm": 0.6229199767112732, | |
| "learning_rate": 6.565963921345895e-05, | |
| "loss": 3.2858, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.0451977401129944, | |
| "grad_norm": 0.7069191336631775, | |
| "learning_rate": 6.557268009990496e-05, | |
| "loss": 3.4242, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 3.0486744893524556, | |
| "grad_norm": 0.6492975354194641, | |
| "learning_rate": 6.548566879140897e-05, | |
| "loss": 3.4076, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.0521512385919167, | |
| "grad_norm": 0.5807247161865234, | |
| "learning_rate": 6.539860557960674e-05, | |
| "loss": 3.3027, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 3.0556279878313775, | |
| "grad_norm": 0.6085408926010132, | |
| "learning_rate": 6.531149075630796e-05, | |
| "loss": 3.3768, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.0591047370708386, | |
| "grad_norm": 0.6492026448249817, | |
| "learning_rate": 6.522432461349536e-05, | |
| "loss": 3.3567, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 3.0625814863103, | |
| "grad_norm": 0.6504300832748413, | |
| "learning_rate": 6.51371074433236e-05, | |
| "loss": 3.3631, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.066058235549761, | |
| "grad_norm": 0.616385281085968, | |
| "learning_rate": 6.504983953811845e-05, | |
| "loss": 3.399, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 3.069534984789222, | |
| "grad_norm": 0.6602182984352112, | |
| "learning_rate": 6.49625211903757e-05, | |
| "loss": 3.2783, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.0730117340286833, | |
| "grad_norm": 0.6816926002502441, | |
| "learning_rate": 6.487515269276016e-05, | |
| "loss": 3.4135, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 3.076488483268144, | |
| "grad_norm": 0.6374818682670593, | |
| "learning_rate": 6.478773433810477e-05, | |
| "loss": 3.4138, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.0799652325076052, | |
| "grad_norm": 0.6355860829353333, | |
| "learning_rate": 6.470026641940963e-05, | |
| "loss": 3.3589, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 3.0834419817470664, | |
| "grad_norm": 0.6020347476005554, | |
| "learning_rate": 6.461274922984086e-05, | |
| "loss": 3.3701, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.0869187309865276, | |
| "grad_norm": 0.5898982882499695, | |
| "learning_rate": 6.45251830627298e-05, | |
| "loss": 3.332, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 3.0903954802259888, | |
| "grad_norm": 0.6016554236412048, | |
| "learning_rate": 6.443756821157186e-05, | |
| "loss": 3.3707, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.09387222946545, | |
| "grad_norm": 0.59771728515625, | |
| "learning_rate": 6.434990497002573e-05, | |
| "loss": 3.462, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 3.097348978704911, | |
| "grad_norm": 0.5475634336471558, | |
| "learning_rate": 6.426219363191224e-05, | |
| "loss": 3.3056, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.100825727944372, | |
| "grad_norm": 0.5900417566299438, | |
| "learning_rate": 6.417443449121339e-05, | |
| "loss": 3.2803, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 3.104302477183833, | |
| "grad_norm": 0.5932677388191223, | |
| "learning_rate": 6.408662784207149e-05, | |
| "loss": 3.4113, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.107779226423294, | |
| "grad_norm": 0.6010504961013794, | |
| "learning_rate": 6.3998773978788e-05, | |
| "loss": 3.4699, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 3.1112559756627554, | |
| "grad_norm": 0.7265802025794983, | |
| "learning_rate": 6.391087319582264e-05, | |
| "loss": 3.3484, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.1147327249022165, | |
| "grad_norm": 0.8029538989067078, | |
| "learning_rate": 6.382292578779243e-05, | |
| "loss": 3.4066, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 3.1182094741416777, | |
| "grad_norm": 0.7880899310112, | |
| "learning_rate": 6.373493204947065e-05, | |
| "loss": 3.4581, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.1216862233811384, | |
| "grad_norm": 0.9686315655708313, | |
| "learning_rate": 6.364689227578583e-05, | |
| "loss": 3.3693, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 3.1251629726205996, | |
| "grad_norm": 0.9728556871414185, | |
| "learning_rate": 6.355880676182086e-05, | |
| "loss": 3.3544, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.128639721860061, | |
| "grad_norm": 0.700410783290863, | |
| "learning_rate": 6.347067580281186e-05, | |
| "loss": 3.384, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 3.132116471099522, | |
| "grad_norm": 0.8260563611984253, | |
| "learning_rate": 6.338249969414734e-05, | |
| "loss": 3.375, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.135593220338983, | |
| "grad_norm": 0.8206276297569275, | |
| "learning_rate": 6.32942787313671e-05, | |
| "loss": 3.2862, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 3.1390699695784443, | |
| "grad_norm": 0.6374201774597168, | |
| "learning_rate": 6.320601321016128e-05, | |
| "loss": 3.3537, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.142546718817905, | |
| "grad_norm": 0.7070665955543518, | |
| "learning_rate": 6.311770342636937e-05, | |
| "loss": 3.3998, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 3.146023468057366, | |
| "grad_norm": 0.6780283451080322, | |
| "learning_rate": 6.302934967597922e-05, | |
| "loss": 3.4153, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.1495002172968274, | |
| "grad_norm": 0.6037342548370361, | |
| "learning_rate": 6.294095225512603e-05, | |
| "loss": 3.3119, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 3.1529769665362886, | |
| "grad_norm": 0.617067813873291, | |
| "learning_rate": 6.28525114600914e-05, | |
| "loss": 3.415, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.1564537157757497, | |
| "grad_norm": 0.5938243269920349, | |
| "learning_rate": 6.276402758730229e-05, | |
| "loss": 3.3607, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 3.159930465015211, | |
| "grad_norm": 0.5539820194244385, | |
| "learning_rate": 6.267550093333e-05, | |
| "loss": 3.3559, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.163407214254672, | |
| "grad_norm": 0.6201304197311401, | |
| "learning_rate": 6.25869317948893e-05, | |
| "loss": 3.3305, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 3.166883963494133, | |
| "grad_norm": 0.6455796957015991, | |
| "learning_rate": 6.24983204688373e-05, | |
| "loss": 3.3515, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.170360712733594, | |
| "grad_norm": 0.6351650953292847, | |
| "learning_rate": 6.240966725217249e-05, | |
| "loss": 3.3312, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 3.173837461973055, | |
| "grad_norm": 0.6822057366371155, | |
| "learning_rate": 6.232097244203388e-05, | |
| "loss": 3.3026, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.1773142112125163, | |
| "grad_norm": 0.700149655342102, | |
| "learning_rate": 6.223223633569973e-05, | |
| "loss": 3.3879, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 3.1807909604519775, | |
| "grad_norm": 0.5904785990715027, | |
| "learning_rate": 6.214345923058686e-05, | |
| "loss": 3.3544, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.1842677096914387, | |
| "grad_norm": 0.6163371205329895, | |
| "learning_rate": 6.205464142424938e-05, | |
| "loss": 3.2991, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 3.1877444589308994, | |
| "grad_norm": 0.6407819390296936, | |
| "learning_rate": 6.19657832143779e-05, | |
| "loss": 3.3577, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.1912212081703606, | |
| "grad_norm": 0.7109596133232117, | |
| "learning_rate": 6.187688489879842e-05, | |
| "loss": 3.359, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 3.1946979574098218, | |
| "grad_norm": 0.6707236766815186, | |
| "learning_rate": 6.178794677547137e-05, | |
| "loss": 3.3949, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.198174706649283, | |
| "grad_norm": 0.6592925190925598, | |
| "learning_rate": 6.169896914249059e-05, | |
| "loss": 3.3861, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 3.201651455888744, | |
| "grad_norm": 0.6427909731864929, | |
| "learning_rate": 6.160995229808239e-05, | |
| "loss": 3.4066, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.2051282051282053, | |
| "grad_norm": 0.6185514330863953, | |
| "learning_rate": 6.152089654060444e-05, | |
| "loss": 3.305, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 3.208604954367666, | |
| "grad_norm": 0.6078172326087952, | |
| "learning_rate": 6.143180216854487e-05, | |
| "loss": 3.2733, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.212081703607127, | |
| "grad_norm": 0.6162821650505066, | |
| "learning_rate": 6.134266948052126e-05, | |
| "loss": 3.4089, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.2155584528465884, | |
| "grad_norm": 0.6186605095863342, | |
| "learning_rate": 6.125349877527952e-05, | |
| "loss": 3.3432, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.2190352020860495, | |
| "grad_norm": 0.582805335521698, | |
| "learning_rate": 6.116429035169309e-05, | |
| "loss": 3.2337, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 3.2225119513255107, | |
| "grad_norm": 0.6280192136764526, | |
| "learning_rate": 6.107504450876181e-05, | |
| "loss": 3.4249, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.225988700564972, | |
| "grad_norm": 0.6952533721923828, | |
| "learning_rate": 6.098576154561087e-05, | |
| "loss": 3.397, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 3.229465449804433, | |
| "grad_norm": 0.7494910955429077, | |
| "learning_rate": 6.089644176148992e-05, | |
| "loss": 3.3243, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.232942199043894, | |
| "grad_norm": 0.8510701656341553, | |
| "learning_rate": 6.080708545577206e-05, | |
| "loss": 3.3274, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 3.236418948283355, | |
| "grad_norm": 0.8542797565460205, | |
| "learning_rate": 6.0717692927952744e-05, | |
| "loss": 3.3442, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.239895697522816, | |
| "grad_norm": 0.8013666868209839, | |
| "learning_rate": 6.062826447764883e-05, | |
| "loss": 3.4637, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 3.2433724467622773, | |
| "grad_norm": 0.6489382982254028, | |
| "learning_rate": 6.053880040459764e-05, | |
| "loss": 3.2955, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.2468491960017385, | |
| "grad_norm": 0.7227512001991272, | |
| "learning_rate": 6.044930100865582e-05, | |
| "loss": 3.3125, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 3.2503259452411997, | |
| "grad_norm": 0.6559109091758728, | |
| "learning_rate": 6.035976658979846e-05, | |
| "loss": 3.3617, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.2538026944806604, | |
| "grad_norm": 0.5902848839759827, | |
| "learning_rate": 6.027019744811799e-05, | |
| "loss": 3.3577, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 3.2572794437201216, | |
| "grad_norm": 0.6053394079208374, | |
| "learning_rate": 6.0180593883823266e-05, | |
| "loss": 3.3575, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.2607561929595827, | |
| "grad_norm": 0.6064934730529785, | |
| "learning_rate": 6.009095619723849e-05, | |
| "loss": 3.2599, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 3.264232942199044, | |
| "grad_norm": 0.5894815325737, | |
| "learning_rate": 6.0001284688802226e-05, | |
| "loss": 3.3916, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.267709691438505, | |
| "grad_norm": 0.6426341533660889, | |
| "learning_rate": 5.991157965906643e-05, | |
| "loss": 3.2913, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 3.2711864406779663, | |
| "grad_norm": 0.6230353713035583, | |
| "learning_rate": 5.982184140869539e-05, | |
| "loss": 3.2609, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.274663189917427, | |
| "grad_norm": 0.604451060295105, | |
| "learning_rate": 5.973207023846475e-05, | |
| "loss": 3.3861, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 3.278139939156888, | |
| "grad_norm": 0.5737584829330444, | |
| "learning_rate": 5.964226644926045e-05, | |
| "loss": 3.3206, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.2816166883963493, | |
| "grad_norm": 0.563567042350769, | |
| "learning_rate": 5.9552430342077845e-05, | |
| "loss": 3.3096, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 3.2850934376358105, | |
| "grad_norm": 0.6597940325737, | |
| "learning_rate": 5.946256221802051e-05, | |
| "loss": 3.4015, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.2885701868752717, | |
| "grad_norm": 0.771218478679657, | |
| "learning_rate": 5.937266237829941e-05, | |
| "loss": 3.4045, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 3.292046936114733, | |
| "grad_norm": 0.8483478426933289, | |
| "learning_rate": 5.928273112423177e-05, | |
| "loss": 3.2997, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.295523685354194, | |
| "grad_norm": 0.7908669114112854, | |
| "learning_rate": 5.9192768757240115e-05, | |
| "loss": 3.3535, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 3.2990004345936548, | |
| "grad_norm": 0.831207811832428, | |
| "learning_rate": 5.9102775578851275e-05, | |
| "loss": 3.3441, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.302477183833116, | |
| "grad_norm": 0.8150995373725891, | |
| "learning_rate": 5.90127518906953e-05, | |
| "loss": 3.2245, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 3.305953933072577, | |
| "grad_norm": 0.5736242532730103, | |
| "learning_rate": 5.892269799450453e-05, | |
| "loss": 3.3601, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.3094306823120383, | |
| "grad_norm": 0.724216639995575, | |
| "learning_rate": 5.883261419211257e-05, | |
| "loss": 3.3684, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 3.3129074315514995, | |
| "grad_norm": 0.6450987458229065, | |
| "learning_rate": 5.874250078545323e-05, | |
| "loss": 3.3208, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.3163841807909606, | |
| "grad_norm": 0.630285382270813, | |
| "learning_rate": 5.8652358076559554e-05, | |
| "loss": 3.2841, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 3.3198609300304214, | |
| "grad_norm": 0.6654447913169861, | |
| "learning_rate": 5.856218636756281e-05, | |
| "loss": 3.3725, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.3233376792698825, | |
| "grad_norm": 0.6245713829994202, | |
| "learning_rate": 5.847198596069148e-05, | |
| "loss": 3.2424, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 3.3268144285093437, | |
| "grad_norm": 0.6425074338912964, | |
| "learning_rate": 5.838175715827016e-05, | |
| "loss": 3.3608, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.330291177748805, | |
| "grad_norm": 0.635083019733429, | |
| "learning_rate": 5.829150026271871e-05, | |
| "loss": 3.408, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 3.333767926988266, | |
| "grad_norm": 0.6142615675926208, | |
| "learning_rate": 5.820121557655109e-05, | |
| "loss": 3.2893, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.3372446762277272, | |
| "grad_norm": 0.62328040599823, | |
| "learning_rate": 5.811090340237445e-05, | |
| "loss": 3.2408, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 3.340721425467188, | |
| "grad_norm": 0.6543041467666626, | |
| "learning_rate": 5.8020564042888015e-05, | |
| "loss": 3.3329, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.344198174706649, | |
| "grad_norm": 0.6720776557922363, | |
| "learning_rate": 5.793019780088217e-05, | |
| "loss": 3.2764, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 3.3476749239461103, | |
| "grad_norm": 0.6297585368156433, | |
| "learning_rate": 5.783980497923742e-05, | |
| "loss": 3.3776, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.3511516731855715, | |
| "grad_norm": 0.5663807988166809, | |
| "learning_rate": 5.774938588092327e-05, | |
| "loss": 3.2923, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 3.3546284224250327, | |
| "grad_norm": 0.5683589577674866, | |
| "learning_rate": 5.7658940808997394e-05, | |
| "loss": 3.2942, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.358105171664494, | |
| "grad_norm": 0.6379316449165344, | |
| "learning_rate": 5.7568470066604485e-05, | |
| "loss": 3.3255, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 3.361581920903955, | |
| "grad_norm": 0.7071390151977539, | |
| "learning_rate": 5.747797395697525e-05, | |
| "loss": 3.2915, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.3650586701434158, | |
| "grad_norm": 0.7530797719955444, | |
| "learning_rate": 5.738745278342546e-05, | |
| "loss": 3.2985, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 3.368535419382877, | |
| "grad_norm": 0.7314615845680237, | |
| "learning_rate": 5.729690684935487e-05, | |
| "loss": 3.2902, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.372012168622338, | |
| "grad_norm": 0.6785851120948792, | |
| "learning_rate": 5.7206336458246234e-05, | |
| "loss": 3.1867, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 3.3754889178617993, | |
| "grad_norm": 0.6161269545555115, | |
| "learning_rate": 5.7115741913664264e-05, | |
| "loss": 3.3526, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.3789656671012605, | |
| "grad_norm": 0.6322996020317078, | |
| "learning_rate": 5.702512351925464e-05, | |
| "loss": 3.2621, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 3.382442416340721, | |
| "grad_norm": 0.6312293410301208, | |
| "learning_rate": 5.693448157874298e-05, | |
| "loss": 3.3925, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 3.3859191655801824, | |
| "grad_norm": 0.6690333485603333, | |
| "learning_rate": 5.6843816395933825e-05, | |
| "loss": 3.3599, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.3893959148196435, | |
| "grad_norm": 0.6046136021614075, | |
| "learning_rate": 5.675312827470959e-05, | |
| "loss": 3.2502, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 3.3928726640591047, | |
| "grad_norm": 0.5736027359962463, | |
| "learning_rate": 5.666241751902962e-05, | |
| "loss": 3.2605, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 3.396349413298566, | |
| "grad_norm": 0.6438172459602356, | |
| "learning_rate": 5.6571684432929085e-05, | |
| "loss": 3.4194, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 3.399826162538027, | |
| "grad_norm": 0.6276637315750122, | |
| "learning_rate": 5.648092932051801e-05, | |
| "loss": 3.4356, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 3.4033029117774882, | |
| "grad_norm": 0.6946492195129395, | |
| "learning_rate": 5.6390152485980244e-05, | |
| "loss": 3.3409, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.406779661016949, | |
| "grad_norm": 0.7016040682792664, | |
| "learning_rate": 5.6299354233572445e-05, | |
| "loss": 3.2799, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 3.41025641025641, | |
| "grad_norm": 0.6583088636398315, | |
| "learning_rate": 5.6208534867623067e-05, | |
| "loss": 3.3199, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 3.4137331594958713, | |
| "grad_norm": 0.6809118390083313, | |
| "learning_rate": 5.611769469253132e-05, | |
| "loss": 3.3738, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 3.4172099087353325, | |
| "grad_norm": 0.7044417858123779, | |
| "learning_rate": 5.602683401276615e-05, | |
| "loss": 3.2786, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 3.4206866579747937, | |
| "grad_norm": 0.7142994999885559, | |
| "learning_rate": 5.593595313286526e-05, | |
| "loss": 3.2612, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 3.424163407214255, | |
| "grad_norm": 0.5846092104911804, | |
| "learning_rate": 5.584505235743403e-05, | |
| "loss": 3.2446, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 3.427640156453716, | |
| "grad_norm": 0.6685866117477417, | |
| "learning_rate": 5.575413199114452e-05, | |
| "loss": 3.3602, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 3.4311169056931767, | |
| "grad_norm": 0.6180519461631775, | |
| "learning_rate": 5.566319233873446e-05, | |
| "loss": 3.1912, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 3.434593654932638, | |
| "grad_norm": 0.6132622361183167, | |
| "learning_rate": 5.557223370500626e-05, | |
| "loss": 3.3666, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 3.438070404172099, | |
| "grad_norm": 0.6167900562286377, | |
| "learning_rate": 5.548125639482586e-05, | |
| "loss": 3.3479, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.4415471534115603, | |
| "grad_norm": 0.6413175463676453, | |
| "learning_rate": 5.539026071312191e-05, | |
| "loss": 3.2576, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 3.4450239026510214, | |
| "grad_norm": 0.6170135736465454, | |
| "learning_rate": 5.529924696488456e-05, | |
| "loss": 3.3199, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 3.448500651890482, | |
| "grad_norm": 0.5534811019897461, | |
| "learning_rate": 5.52082154551645e-05, | |
| "loss": 3.1903, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 3.4519774011299433, | |
| "grad_norm": 0.608566403388977, | |
| "learning_rate": 5.5117166489072014e-05, | |
| "loss": 3.319, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 3.4554541503694045, | |
| "grad_norm": 0.6388020515441895, | |
| "learning_rate": 5.502610037177586e-05, | |
| "loss": 3.2828, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 3.4589308996088657, | |
| "grad_norm": 0.6199042201042175, | |
| "learning_rate": 5.4935017408502274e-05, | |
| "loss": 3.3288, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 3.462407648848327, | |
| "grad_norm": 0.6209288835525513, | |
| "learning_rate": 5.4843917904533994e-05, | |
| "loss": 3.277, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 3.465884398087788, | |
| "grad_norm": 0.657084584236145, | |
| "learning_rate": 5.475280216520913e-05, | |
| "loss": 3.3167, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 3.469361147327249, | |
| "grad_norm": 0.6538966298103333, | |
| "learning_rate": 5.466167049592029e-05, | |
| "loss": 3.1976, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 3.47283789656671, | |
| "grad_norm": 0.7147399187088013, | |
| "learning_rate": 5.4570523202113396e-05, | |
| "loss": 3.3552, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.476314645806171, | |
| "grad_norm": 0.7168034315109253, | |
| "learning_rate": 5.44793605892868e-05, | |
| "loss": 3.3, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 3.4797913950456323, | |
| "grad_norm": 0.6470374464988708, | |
| "learning_rate": 5.438818296299015e-05, | |
| "loss": 3.3219, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 3.4832681442850935, | |
| "grad_norm": 0.666338324546814, | |
| "learning_rate": 5.4296990628823455e-05, | |
| "loss": 3.2503, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 3.4867448935245546, | |
| "grad_norm": 0.6241434216499329, | |
| "learning_rate": 5.420578389243599e-05, | |
| "loss": 3.1832, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 3.490221642764016, | |
| "grad_norm": 0.6188483238220215, | |
| "learning_rate": 5.4114563059525346e-05, | |
| "loss": 3.256, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 3.493698392003477, | |
| "grad_norm": 0.6511822938919067, | |
| "learning_rate": 5.402332843583631e-05, | |
| "loss": 3.3172, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 3.4971751412429377, | |
| "grad_norm": 0.5653273463249207, | |
| "learning_rate": 5.3932080327159886e-05, | |
| "loss": 3.2706, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 3.500651890482399, | |
| "grad_norm": 0.5717982649803162, | |
| "learning_rate": 5.384081903933235e-05, | |
| "loss": 3.2577, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 3.50412863972186, | |
| "grad_norm": 0.5827626585960388, | |
| "learning_rate": 5.374954487823407e-05, | |
| "loss": 3.3031, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 3.5076053889613212, | |
| "grad_norm": 0.611819326877594, | |
| "learning_rate": 5.365825814978861e-05, | |
| "loss": 3.2244, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.5110821382007824, | |
| "grad_norm": 0.6256561875343323, | |
| "learning_rate": 5.3566959159961615e-05, | |
| "loss": 3.2336, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 3.514558887440243, | |
| "grad_norm": 0.6403440237045288, | |
| "learning_rate": 5.3475648214759896e-05, | |
| "loss": 3.3431, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 3.5180356366797043, | |
| "grad_norm": 0.612866997718811, | |
| "learning_rate": 5.3384325620230245e-05, | |
| "loss": 3.2884, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 3.5215123859191655, | |
| "grad_norm": 0.5799668431282043, | |
| "learning_rate": 5.3292991682458574e-05, | |
| "loss": 3.3096, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 3.5249891351586267, | |
| "grad_norm": 0.5905560255050659, | |
| "learning_rate": 5.3201646707568764e-05, | |
| "loss": 3.274, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 3.528465884398088, | |
| "grad_norm": 0.5543707609176636, | |
| "learning_rate": 5.311029100172172e-05, | |
| "loss": 3.1989, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 3.531942633637549, | |
| "grad_norm": 0.5833373069763184, | |
| "learning_rate": 5.3018924871114305e-05, | |
| "loss": 3.2519, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 3.53541938287701, | |
| "grad_norm": 0.5746926069259644, | |
| "learning_rate": 5.292754862197831e-05, | |
| "loss": 3.1616, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 3.538896132116471, | |
| "grad_norm": 0.5569521188735962, | |
| "learning_rate": 5.2836162560579486e-05, | |
| "loss": 3.2796, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 3.542372881355932, | |
| "grad_norm": 0.5777444839477539, | |
| "learning_rate": 5.274476699321638e-05, | |
| "loss": 3.2595, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.5458496305953933, | |
| "grad_norm": 0.5533985495567322, | |
| "learning_rate": 5.265336222621949e-05, | |
| "loss": 3.2107, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 3.5493263798348544, | |
| "grad_norm": 0.5138996839523315, | |
| "learning_rate": 5.2561948565950126e-05, | |
| "loss": 3.2121, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 3.5528031290743156, | |
| "grad_norm": 0.5798884630203247, | |
| "learning_rate": 5.2470526318799365e-05, | |
| "loss": 3.2697, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 3.5562798783137763, | |
| "grad_norm": 0.6103897094726562, | |
| "learning_rate": 5.2379095791187124e-05, | |
| "loss": 3.2417, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 3.559756627553238, | |
| "grad_norm": 0.5791857242584229, | |
| "learning_rate": 5.228765728956102e-05, | |
| "loss": 3.2789, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 3.5632333767926987, | |
| "grad_norm": 0.5706868767738342, | |
| "learning_rate": 5.2196211120395444e-05, | |
| "loss": 3.3693, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 3.56671012603216, | |
| "grad_norm": 0.5966957807540894, | |
| "learning_rate": 5.2104757590190445e-05, | |
| "loss": 3.2444, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 3.570186875271621, | |
| "grad_norm": 0.5554029941558838, | |
| "learning_rate": 5.201329700547076e-05, | |
| "loss": 3.2575, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 3.573663624511082, | |
| "grad_norm": 0.5967298150062561, | |
| "learning_rate": 5.1921829672784786e-05, | |
| "loss": 3.2913, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 3.5771403737505434, | |
| "grad_norm": 0.6098992824554443, | |
| "learning_rate": 5.183035589870353e-05, | |
| "loss": 3.2266, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.580617122990004, | |
| "grad_norm": 0.5845767259597778, | |
| "learning_rate": 5.173887598981956e-05, | |
| "loss": 3.2771, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 3.5840938722294653, | |
| "grad_norm": 0.6617316603660583, | |
| "learning_rate": 5.164739025274604e-05, | |
| "loss": 3.2556, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 3.5875706214689265, | |
| "grad_norm": 0.7001234292984009, | |
| "learning_rate": 5.155589899411567e-05, | |
| "loss": 3.264, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 3.5910473707083876, | |
| "grad_norm": 0.7605653405189514, | |
| "learning_rate": 5.146440252057961e-05, | |
| "loss": 3.2912, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 3.594524119947849, | |
| "grad_norm": 0.74252849817276, | |
| "learning_rate": 5.137290113880656e-05, | |
| "loss": 3.2395, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 3.59800086918731, | |
| "grad_norm": 0.7059817314147949, | |
| "learning_rate": 5.128139515548164e-05, | |
| "loss": 3.2552, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 3.601477618426771, | |
| "grad_norm": 0.7125615477561951, | |
| "learning_rate": 5.1189884877305375e-05, | |
| "loss": 3.2621, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 3.604954367666232, | |
| "grad_norm": 0.7657261490821838, | |
| "learning_rate": 5.109837061099274e-05, | |
| "loss": 3.1871, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 3.608431116905693, | |
| "grad_norm": 0.6542237401008606, | |
| "learning_rate": 5.100685266327202e-05, | |
| "loss": 3.1583, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 3.6119078661451542, | |
| "grad_norm": 0.6304188370704651, | |
| "learning_rate": 5.091533134088388e-05, | |
| "loss": 3.2659, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.6153846153846154, | |
| "grad_norm": 0.6933708190917969, | |
| "learning_rate": 5.0823806950580254e-05, | |
| "loss": 3.2046, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 3.6188613646240766, | |
| "grad_norm": 0.6795551776885986, | |
| "learning_rate": 5.073227979912339e-05, | |
| "loss": 3.3441, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 3.6223381138635373, | |
| "grad_norm": 0.6919432282447815, | |
| "learning_rate": 5.064075019328479e-05, | |
| "loss": 3.2962, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 3.625814863102999, | |
| "grad_norm": 0.7967680096626282, | |
| "learning_rate": 5.054921843984418e-05, | |
| "loss": 3.2839, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 3.6292916123424597, | |
| "grad_norm": 0.7113544940948486, | |
| "learning_rate": 5.045768484558847e-05, | |
| "loss": 3.2422, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 3.632768361581921, | |
| "grad_norm": 0.6717776656150818, | |
| "learning_rate": 5.036614971731076e-05, | |
| "loss": 3.256, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 3.636245110821382, | |
| "grad_norm": 0.5887851119041443, | |
| "learning_rate": 5.027461336180929e-05, | |
| "loss": 3.1419, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 3.639721860060843, | |
| "grad_norm": 0.6715282201766968, | |
| "learning_rate": 5.018307608588636e-05, | |
| "loss": 3.2874, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 3.6431986093003044, | |
| "grad_norm": 0.6706719994544983, | |
| "learning_rate": 5.0091538196347445e-05, | |
| "loss": 3.2945, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 3.646675358539765, | |
| "grad_norm": 0.6065131425857544, | |
| "learning_rate": 5e-05, | |
| "loss": 3.2286, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.6501521077792263, | |
| "grad_norm": 0.5673298239707947, | |
| "learning_rate": 4.9908461803652566e-05, | |
| "loss": 3.332, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 3.6536288570186874, | |
| "grad_norm": 0.5908064842224121, | |
| "learning_rate": 4.981692391411366e-05, | |
| "loss": 3.3474, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 3.6571056062581486, | |
| "grad_norm": 0.5967066884040833, | |
| "learning_rate": 4.972538663819073e-05, | |
| "loss": 3.248, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 3.66058235549761, | |
| "grad_norm": 0.6299230456352234, | |
| "learning_rate": 4.9633850282689246e-05, | |
| "loss": 3.1357, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 3.664059104737071, | |
| "grad_norm": 0.5750209093093872, | |
| "learning_rate": 4.954231515441153e-05, | |
| "loss": 3.1899, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 3.667535853976532, | |
| "grad_norm": 0.6004741787910461, | |
| "learning_rate": 4.9450781560155816e-05, | |
| "loss": 3.3146, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 3.671012603215993, | |
| "grad_norm": 0.6603873372077942, | |
| "learning_rate": 4.935924980671522e-05, | |
| "loss": 3.3052, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 3.674489352455454, | |
| "grad_norm": 0.5748945474624634, | |
| "learning_rate": 4.926772020087663e-05, | |
| "loss": 3.294, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 3.6779661016949152, | |
| "grad_norm": 0.56917804479599, | |
| "learning_rate": 4.917619304941977e-05, | |
| "loss": 3.2473, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 3.6814428509343764, | |
| "grad_norm": 0.6211404204368591, | |
| "learning_rate": 4.9084668659116154e-05, | |
| "loss": 3.3157, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.6849196001738376, | |
| "grad_norm": 0.6026207804679871, | |
| "learning_rate": 4.899314733672799e-05, | |
| "loss": 3.2696, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 3.6883963494132983, | |
| "grad_norm": 0.5868690609931946, | |
| "learning_rate": 4.890162938900727e-05, | |
| "loss": 3.1322, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 3.69187309865276, | |
| "grad_norm": 0.5615720748901367, | |
| "learning_rate": 4.881011512269463e-05, | |
| "loss": 3.3041, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 3.6953498478922207, | |
| "grad_norm": 0.5574554800987244, | |
| "learning_rate": 4.871860484451838e-05, | |
| "loss": 3.2903, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 3.698826597131682, | |
| "grad_norm": 0.589513897895813, | |
| "learning_rate": 4.862709886119344e-05, | |
| "loss": 3.1515, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 3.702303346371143, | |
| "grad_norm": 0.6018418073654175, | |
| "learning_rate": 4.8535597479420406e-05, | |
| "loss": 3.2614, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 3.705780095610604, | |
| "grad_norm": 0.5487574934959412, | |
| "learning_rate": 4.844410100588435e-05, | |
| "loss": 3.2011, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 3.7092568448500653, | |
| "grad_norm": 0.5497720241546631, | |
| "learning_rate": 4.835260974725397e-05, | |
| "loss": 3.2824, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 3.712733594089526, | |
| "grad_norm": 0.5350008606910706, | |
| "learning_rate": 4.8261124010180445e-05, | |
| "loss": 3.2405, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 3.7162103433289873, | |
| "grad_norm": 0.5541216135025024, | |
| "learning_rate": 4.8169644101296474e-05, | |
| "loss": 3.2287, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.7196870925684484, | |
| "grad_norm": 0.6203867197036743, | |
| "learning_rate": 4.807817032721522e-05, | |
| "loss": 3.2801, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 3.7231638418079096, | |
| "grad_norm": 0.6372770667076111, | |
| "learning_rate": 4.798670299452926e-05, | |
| "loss": 3.2477, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 3.7266405910473708, | |
| "grad_norm": 0.6761902570724487, | |
| "learning_rate": 4.789524240980958e-05, | |
| "loss": 3.2624, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 3.730117340286832, | |
| "grad_norm": 0.732210636138916, | |
| "learning_rate": 4.780378887960458e-05, | |
| "loss": 3.2492, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 3.733594089526293, | |
| "grad_norm": 0.6838912963867188, | |
| "learning_rate": 4.7712342710438987e-05, | |
| "loss": 3.1799, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 3.737070838765754, | |
| "grad_norm": 0.6677350401878357, | |
| "learning_rate": 4.762090420881289e-05, | |
| "loss": 3.1473, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 3.740547588005215, | |
| "grad_norm": 0.6039561033248901, | |
| "learning_rate": 4.7529473681200646e-05, | |
| "loss": 3.178, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 3.744024337244676, | |
| "grad_norm": 0.6518564224243164, | |
| "learning_rate": 4.743805143404989e-05, | |
| "loss": 3.2581, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 3.7475010864841374, | |
| "grad_norm": 0.6599782705307007, | |
| "learning_rate": 4.734663777378052e-05, | |
| "loss": 3.2275, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 3.7509778357235986, | |
| "grad_norm": 0.5603999495506287, | |
| "learning_rate": 4.725523300678363e-05, | |
| "loss": 3.2184, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.7544545849630593, | |
| "grad_norm": 0.6059252619743347, | |
| "learning_rate": 4.7163837439420525e-05, | |
| "loss": 3.272, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 3.757931334202521, | |
| "grad_norm": 0.5787419080734253, | |
| "learning_rate": 4.707245137802169e-05, | |
| "loss": 3.1984, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 3.7614080834419816, | |
| "grad_norm": 0.5447966456413269, | |
| "learning_rate": 4.6981075128885693e-05, | |
| "loss": 3.1944, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 3.764884832681443, | |
| "grad_norm": 0.5587812066078186, | |
| "learning_rate": 4.6889708998278284e-05, | |
| "loss": 3.2819, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 3.768361581920904, | |
| "grad_norm": 0.5818442106246948, | |
| "learning_rate": 4.6798353292431254e-05, | |
| "loss": 3.2023, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 3.771838331160365, | |
| "grad_norm": 0.6101146340370178, | |
| "learning_rate": 4.6707008317541444e-05, | |
| "loss": 3.1397, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 3.7753150803998263, | |
| "grad_norm": 0.5633519291877747, | |
| "learning_rate": 4.661567437976977e-05, | |
| "loss": 3.196, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 3.778791829639287, | |
| "grad_norm": 0.5950900912284851, | |
| "learning_rate": 4.652435178524013e-05, | |
| "loss": 3.2732, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 3.7822685788787482, | |
| "grad_norm": 0.6070186495780945, | |
| "learning_rate": 4.643304084003839e-05, | |
| "loss": 3.233, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 3.7857453281182094, | |
| "grad_norm": 0.622246503829956, | |
| "learning_rate": 4.6341741850211404e-05, | |
| "loss": 3.2402, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.7892220773576706, | |
| "grad_norm": 0.628646731376648, | |
| "learning_rate": 4.6250455121765944e-05, | |
| "loss": 3.2109, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 3.7926988265971318, | |
| "grad_norm": 0.5549827218055725, | |
| "learning_rate": 4.615918096066766e-05, | |
| "loss": 3.254, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 3.796175575836593, | |
| "grad_norm": 0.5434677600860596, | |
| "learning_rate": 4.606791967284012e-05, | |
| "loss": 3.2507, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 3.799652325076054, | |
| "grad_norm": 0.5489455461502075, | |
| "learning_rate": 4.597667156416371e-05, | |
| "loss": 3.26, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 3.803129074315515, | |
| "grad_norm": 0.5459377765655518, | |
| "learning_rate": 4.588543694047466e-05, | |
| "loss": 3.2457, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 3.806605823554976, | |
| "grad_norm": 0.5723990201950073, | |
| "learning_rate": 4.5794216107564e-05, | |
| "loss": 3.2643, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 3.810082572794437, | |
| "grad_norm": 0.5779864192008972, | |
| "learning_rate": 4.570300937117655e-05, | |
| "loss": 3.1926, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 3.8135593220338984, | |
| "grad_norm": 0.5621626973152161, | |
| "learning_rate": 4.561181703700986e-05, | |
| "loss": 3.2111, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 3.8170360712733595, | |
| "grad_norm": 0.5523443818092346, | |
| "learning_rate": 4.552063941071323e-05, | |
| "loss": 3.2244, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 3.8205128205128203, | |
| "grad_norm": 0.5601244568824768, | |
| "learning_rate": 4.542947679788662e-05, | |
| "loss": 3.1726, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.823989569752282, | |
| "grad_norm": 0.5333982110023499, | |
| "learning_rate": 4.533832950407973e-05, | |
| "loss": 3.2602, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 3.8274663189917426, | |
| "grad_norm": 0.5482881665229797, | |
| "learning_rate": 4.5247197834790876e-05, | |
| "loss": 3.2696, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 3.830943068231204, | |
| "grad_norm": 0.5877856016159058, | |
| "learning_rate": 4.515608209546602e-05, | |
| "loss": 3.2463, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 3.834419817470665, | |
| "grad_norm": 0.6199784874916077, | |
| "learning_rate": 4.506498259149774e-05, | |
| "loss": 3.1951, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 3.837896566710126, | |
| "grad_norm": 0.5966984629631042, | |
| "learning_rate": 4.4973899628224154e-05, | |
| "loss": 3.2796, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 3.8413733159495873, | |
| "grad_norm": 0.5492768287658691, | |
| "learning_rate": 4.488283351092799e-05, | |
| "loss": 3.2153, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 3.844850065189048, | |
| "grad_norm": 0.5626404881477356, | |
| "learning_rate": 4.4791784544835515e-05, | |
| "loss": 3.0994, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 3.848326814428509, | |
| "grad_norm": 0.5543808937072754, | |
| "learning_rate": 4.4700753035115454e-05, | |
| "loss": 3.2721, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 3.8518035636679704, | |
| "grad_norm": 0.6047621369361877, | |
| "learning_rate": 4.460973928687809e-05, | |
| "loss": 3.2098, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 3.8552803129074316, | |
| "grad_norm": 0.6789091229438782, | |
| "learning_rate": 4.4518743605174136e-05, | |
| "loss": 3.2294, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.8587570621468927, | |
| "grad_norm": 0.5913859009742737, | |
| "learning_rate": 4.442776629499375e-05, | |
| "loss": 3.1556, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 3.862233811386354, | |
| "grad_norm": 0.5494495034217834, | |
| "learning_rate": 4.433680766126554e-05, | |
| "loss": 3.2562, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 3.865710560625815, | |
| "grad_norm": 0.6001352667808533, | |
| "learning_rate": 4.424586800885551e-05, | |
| "loss": 3.2289, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 3.869187309865276, | |
| "grad_norm": 0.6379590630531311, | |
| "learning_rate": 4.415494764256599e-05, | |
| "loss": 3.2218, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 3.872664059104737, | |
| "grad_norm": 0.5929582118988037, | |
| "learning_rate": 4.4064046867134756e-05, | |
| "loss": 3.2409, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 3.876140808344198, | |
| "grad_norm": 0.567794680595398, | |
| "learning_rate": 4.397316598723385e-05, | |
| "loss": 3.1523, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 3.8796175575836593, | |
| "grad_norm": 0.6330842971801758, | |
| "learning_rate": 4.388230530746869e-05, | |
| "loss": 3.1354, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 3.8830943068231205, | |
| "grad_norm": 0.5656293034553528, | |
| "learning_rate": 4.379146513237695e-05, | |
| "loss": 3.197, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 3.8865710560625812, | |
| "grad_norm": 0.5936200618743896, | |
| "learning_rate": 4.370064576642757e-05, | |
| "loss": 3.2288, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 3.890047805302043, | |
| "grad_norm": 0.6053287982940674, | |
| "learning_rate": 4.360984751401977e-05, | |
| "loss": 3.134, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.8935245545415036, | |
| "grad_norm": 0.5832516551017761, | |
| "learning_rate": 4.351907067948201e-05, | |
| "loss": 3.1563, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 3.8970013037809648, | |
| "grad_norm": 0.5615696907043457, | |
| "learning_rate": 4.3428315567070926e-05, | |
| "loss": 3.2662, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 3.900478053020426, | |
| "grad_norm": 0.557080090045929, | |
| "learning_rate": 4.333758248097039e-05, | |
| "loss": 3.2424, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 3.903954802259887, | |
| "grad_norm": 0.6137921810150146, | |
| "learning_rate": 4.324687172529041e-05, | |
| "loss": 3.2753, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 3.9074315514993483, | |
| "grad_norm": 0.6269393563270569, | |
| "learning_rate": 4.315618360406618e-05, | |
| "loss": 3.1655, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 3.910908300738809, | |
| "grad_norm": 0.6167050004005432, | |
| "learning_rate": 4.306551842125702e-05, | |
| "loss": 3.183, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 3.91438504997827, | |
| "grad_norm": 0.5596528649330139, | |
| "learning_rate": 4.297487648074538e-05, | |
| "loss": 3.212, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 3.9178617992177314, | |
| "grad_norm": 0.5810624957084656, | |
| "learning_rate": 4.288425808633575e-05, | |
| "loss": 3.2202, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 3.9213385484571925, | |
| "grad_norm": 0.5932186245918274, | |
| "learning_rate": 4.2793663541753784e-05, | |
| "loss": 3.1973, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 3.9248152976966537, | |
| "grad_norm": 0.5662857294082642, | |
| "learning_rate": 4.2703093150645144e-05, | |
| "loss": 3.2006, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.928292046936115, | |
| "grad_norm": 0.5792871117591858, | |
| "learning_rate": 4.261254721657454e-05, | |
| "loss": 3.2062, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 3.931768796175576, | |
| "grad_norm": 0.6331871151924133, | |
| "learning_rate": 4.252202604302476e-05, | |
| "loss": 3.2503, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 3.935245545415037, | |
| "grad_norm": 0.5286860466003418, | |
| "learning_rate": 4.2431529933395527e-05, | |
| "loss": 3.2181, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 3.938722294654498, | |
| "grad_norm": 0.5583444833755493, | |
| "learning_rate": 4.234105919100261e-05, | |
| "loss": 3.2073, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 3.942199043893959, | |
| "grad_norm": 0.5682042837142944, | |
| "learning_rate": 4.2250614119076735e-05, | |
| "loss": 3.2068, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 3.9456757931334203, | |
| "grad_norm": 0.6427503824234009, | |
| "learning_rate": 4.2160195020762597e-05, | |
| "loss": 3.2051, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 3.9491525423728815, | |
| "grad_norm": 0.5848159193992615, | |
| "learning_rate": 4.206980219911783e-05, | |
| "loss": 3.1387, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 3.952629291612342, | |
| "grad_norm": 0.5341483354568481, | |
| "learning_rate": 4.197943595711198e-05, | |
| "loss": 3.2449, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 3.956106040851804, | |
| "grad_norm": 0.5785036683082581, | |
| "learning_rate": 4.188909659762556e-05, | |
| "loss": 3.2745, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 3.9595827900912646, | |
| "grad_norm": 0.5817328095436096, | |
| "learning_rate": 4.179878442344892e-05, | |
| "loss": 3.1943, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.9630595393307257, | |
| "grad_norm": 0.6359755396842957, | |
| "learning_rate": 4.17084997372813e-05, | |
| "loss": 3.251, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 3.966536288570187, | |
| "grad_norm": 0.6350237727165222, | |
| "learning_rate": 4.1618242841729846e-05, | |
| "loss": 3.2164, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 3.970013037809648, | |
| "grad_norm": 0.6320176720619202, | |
| "learning_rate": 4.152801403930855e-05, | |
| "loss": 3.2481, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 3.9734897870491093, | |
| "grad_norm": 0.6210284233093262, | |
| "learning_rate": 4.14378136324372e-05, | |
| "loss": 3.1793, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 3.97696653628857, | |
| "grad_norm": 0.543938934803009, | |
| "learning_rate": 4.134764192344046e-05, | |
| "loss": 3.2093, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 3.980443285528031, | |
| "grad_norm": 0.5592504143714905, | |
| "learning_rate": 4.125749921454679e-05, | |
| "loss": 3.1314, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 3.9839200347674923, | |
| "grad_norm": 0.5563082695007324, | |
| "learning_rate": 4.116738580788744e-05, | |
| "loss": 3.1755, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 3.9873967840069535, | |
| "grad_norm": 0.5996997952461243, | |
| "learning_rate": 4.107730200549549e-05, | |
| "loss": 3.3564, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 3.9908735332464147, | |
| "grad_norm": 0.5965397357940674, | |
| "learning_rate": 4.0987248109304714e-05, | |
| "loss": 3.1906, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 3.994350282485876, | |
| "grad_norm": 0.5691776871681213, | |
| "learning_rate": 4.089722442114873e-05, | |
| "loss": 3.1736, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.997827031725337, | |
| "grad_norm": 0.5946555733680725, | |
| "learning_rate": 4.080723124275988e-05, | |
| "loss": 3.1895, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5992119312286377, | |
| "learning_rate": 4.0717268875768225e-05, | |
| "loss": 3.2406, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 4.003476749239461, | |
| "grad_norm": 0.5725580453872681, | |
| "learning_rate": 4.062733762170059e-05, | |
| "loss": 3.2336, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 4.006953498478922, | |
| "grad_norm": 0.6200286746025085, | |
| "learning_rate": 4.0537437781979506e-05, | |
| "loss": 3.0999, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 4.010430247718383, | |
| "grad_norm": 0.6485391855239868, | |
| "learning_rate": 4.044756965792218e-05, | |
| "loss": 3.259, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 4.013906996957845, | |
| "grad_norm": 0.5790440440177917, | |
| "learning_rate": 4.0357733550739554e-05, | |
| "loss": 3.1542, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 4.017383746197305, | |
| "grad_norm": 0.6082708835601807, | |
| "learning_rate": 4.026792976153527e-05, | |
| "loss": 3.2343, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 4.020860495436767, | |
| "grad_norm": 0.6026447415351868, | |
| "learning_rate": 4.017815859130461e-05, | |
| "loss": 3.181, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 4.024337244676228, | |
| "grad_norm": 0.617872953414917, | |
| "learning_rate": 4.008842034093359e-05, | |
| "loss": 3.1653, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 4.0278139939156885, | |
| "grad_norm": 0.5940406918525696, | |
| "learning_rate": 3.9998715311197785e-05, | |
| "loss": 3.082, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.03129074315515, | |
| "grad_norm": 0.5831135511398315, | |
| "learning_rate": 3.990904380276153e-05, | |
| "loss": 3.153, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 4.034767492394611, | |
| "grad_norm": 0.5792697072029114, | |
| "learning_rate": 3.981940611617675e-05, | |
| "loss": 3.2169, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 4.0382442416340725, | |
| "grad_norm": 0.5567387342453003, | |
| "learning_rate": 3.972980255188201e-05, | |
| "loss": 3.1521, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 4.041720990873533, | |
| "grad_norm": 0.6137407422065735, | |
| "learning_rate": 3.9640233410201553e-05, | |
| "loss": 3.2615, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 4.045197740112994, | |
| "grad_norm": 0.6380611062049866, | |
| "learning_rate": 3.955069899134418e-05, | |
| "loss": 3.2734, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 4.0486744893524556, | |
| "grad_norm": 0.6026031374931335, | |
| "learning_rate": 3.9461199595402354e-05, | |
| "loss": 3.1017, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 4.052151238591916, | |
| "grad_norm": 0.5493614673614502, | |
| "learning_rate": 3.937173552235117e-05, | |
| "loss": 3.1587, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 4.055627987831378, | |
| "grad_norm": 0.5663195848464966, | |
| "learning_rate": 3.928230707204729e-05, | |
| "loss": 3.1702, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 4.059104737070839, | |
| "grad_norm": 0.5667363405227661, | |
| "learning_rate": 3.919291454422796e-05, | |
| "loss": 3.1958, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 4.0625814863103, | |
| "grad_norm": 0.5629976987838745, | |
| "learning_rate": 3.9103558238510086e-05, | |
| "loss": 3.2153, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.066058235549761, | |
| "grad_norm": 0.6252686381340027, | |
| "learning_rate": 3.901423845438916e-05, | |
| "loss": 3.2646, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 4.069534984789222, | |
| "grad_norm": 0.5598259568214417, | |
| "learning_rate": 3.8924955491238216e-05, | |
| "loss": 3.1532, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 4.073011734028683, | |
| "grad_norm": 0.5491876602172852, | |
| "learning_rate": 3.883570964830692e-05, | |
| "loss": 3.124, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 4.076488483268144, | |
| "grad_norm": 0.5444916486740112, | |
| "learning_rate": 3.874650122472049e-05, | |
| "loss": 3.1616, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 4.079965232507606, | |
| "grad_norm": 0.5169205069541931, | |
| "learning_rate": 3.865733051947876e-05, | |
| "loss": 3.1565, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 4.083441981747066, | |
| "grad_norm": 0.5438332557678223, | |
| "learning_rate": 3.856819783145514e-05, | |
| "loss": 3.2329, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 4.086918730986528, | |
| "grad_norm": 0.551806628704071, | |
| "learning_rate": 3.847910345939557e-05, | |
| "loss": 3.2382, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 4.090395480225989, | |
| "grad_norm": 0.5678349733352661, | |
| "learning_rate": 3.839004770191762e-05, | |
| "loss": 3.2062, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 4.0938722294654495, | |
| "grad_norm": 0.5699625611305237, | |
| "learning_rate": 3.83010308575094e-05, | |
| "loss": 3.1105, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 4.097348978704911, | |
| "grad_norm": 0.5540375709533691, | |
| "learning_rate": 3.821205322452863e-05, | |
| "loss": 3.1929, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.100825727944372, | |
| "grad_norm": 0.5406695008277893, | |
| "learning_rate": 3.812311510120159e-05, | |
| "loss": 3.0958, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 4.1043024771838335, | |
| "grad_norm": 0.5478038787841797, | |
| "learning_rate": 3.803421678562213e-05, | |
| "loss": 3.1762, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 4.107779226423294, | |
| "grad_norm": 0.6140181422233582, | |
| "learning_rate": 3.794535857575064e-05, | |
| "loss": 3.1347, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 4.111255975662755, | |
| "grad_norm": 0.578913152217865, | |
| "learning_rate": 3.785654076941317e-05, | |
| "loss": 3.1791, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 4.1147327249022165, | |
| "grad_norm": 0.6035884618759155, | |
| "learning_rate": 3.776776366430027e-05, | |
| "loss": 3.1587, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 4.118209474141677, | |
| "grad_norm": 0.5582138895988464, | |
| "learning_rate": 3.767902755796613e-05, | |
| "loss": 3.1542, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 4.121686223381139, | |
| "grad_norm": 0.526336669921875, | |
| "learning_rate": 3.759033274782751e-05, | |
| "loss": 3.1465, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 4.1251629726206, | |
| "grad_norm": 0.6234402060508728, | |
| "learning_rate": 3.750167953116272e-05, | |
| "loss": 3.1536, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 4.128639721860061, | |
| "grad_norm": 0.7032943964004517, | |
| "learning_rate": 3.741306820511072e-05, | |
| "loss": 3.1409, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 4.132116471099522, | |
| "grad_norm": 0.7070549726486206, | |
| "learning_rate": 3.7324499066670006e-05, | |
| "loss": 3.1695, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.135593220338983, | |
| "grad_norm": 0.6663886904716492, | |
| "learning_rate": 3.723597241269772e-05, | |
| "loss": 3.1933, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 4.139069969578444, | |
| "grad_norm": 0.6331878900527954, | |
| "learning_rate": 3.7147488539908596e-05, | |
| "loss": 3.1693, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 4.142546718817905, | |
| "grad_norm": 0.5591497421264648, | |
| "learning_rate": 3.705904774487396e-05, | |
| "loss": 3.267, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 4.146023468057367, | |
| "grad_norm": 0.6263229250907898, | |
| "learning_rate": 3.697065032402078e-05, | |
| "loss": 3.1842, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 4.149500217296827, | |
| "grad_norm": 0.6090732216835022, | |
| "learning_rate": 3.6882296573630634e-05, | |
| "loss": 3.1744, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 4.152976966536288, | |
| "grad_norm": 0.5834642648696899, | |
| "learning_rate": 3.6793986789838744e-05, | |
| "loss": 3.1195, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 4.15645371577575, | |
| "grad_norm": 0.5978863835334778, | |
| "learning_rate": 3.6705721268632915e-05, | |
| "loss": 3.1333, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 4.1599304650152105, | |
| "grad_norm": 0.5608931183815002, | |
| "learning_rate": 3.6617500305852674e-05, | |
| "loss": 3.0997, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 4.163407214254672, | |
| "grad_norm": 0.6076219081878662, | |
| "learning_rate": 3.6529324197188154e-05, | |
| "loss": 3.1598, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 4.166883963494133, | |
| "grad_norm": 0.571110725402832, | |
| "learning_rate": 3.644119323817915e-05, | |
| "loss": 3.1411, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.170360712733594, | |
| "grad_norm": 0.524634838104248, | |
| "learning_rate": 3.6353107724214175e-05, | |
| "loss": 3.1337, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 4.173837461973055, | |
| "grad_norm": 0.5356817245483398, | |
| "learning_rate": 3.6265067950529365e-05, | |
| "loss": 3.1192, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 4.177314211212516, | |
| "grad_norm": 0.5559493899345398, | |
| "learning_rate": 3.617707421220758e-05, | |
| "loss": 3.1269, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 4.1807909604519775, | |
| "grad_norm": 0.5397975444793701, | |
| "learning_rate": 3.608912680417737e-05, | |
| "loss": 3.1603, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 4.184267709691438, | |
| "grad_norm": 0.5938603281974792, | |
| "learning_rate": 3.600122602121202e-05, | |
| "loss": 3.13, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 4.1877444589309, | |
| "grad_norm": 0.5785700082778931, | |
| "learning_rate": 3.591337215792852e-05, | |
| "loss": 3.2027, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 4.191221208170361, | |
| "grad_norm": 0.5957548022270203, | |
| "learning_rate": 3.58255655087866e-05, | |
| "loss": 3.1809, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 4.194697957409822, | |
| "grad_norm": 0.5649887323379517, | |
| "learning_rate": 3.5737806368087774e-05, | |
| "loss": 3.2136, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 4.198174706649283, | |
| "grad_norm": 0.5808984637260437, | |
| "learning_rate": 3.5650095029974266e-05, | |
| "loss": 3.1725, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 4.201651455888744, | |
| "grad_norm": 0.5980812907218933, | |
| "learning_rate": 3.5562431788428156e-05, | |
| "loss": 3.1213, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.205128205128205, | |
| "grad_norm": 0.548862099647522, | |
| "learning_rate": 3.5474816937270225e-05, | |
| "loss": 3.1082, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 4.208604954367666, | |
| "grad_norm": 0.5771917104721069, | |
| "learning_rate": 3.538725077015915e-05, | |
| "loss": 3.2051, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 4.212081703607128, | |
| "grad_norm": 0.5784444808959961, | |
| "learning_rate": 3.529973358059038e-05, | |
| "loss": 3.0524, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 4.215558452846588, | |
| "grad_norm": 0.5533136129379272, | |
| "learning_rate": 3.521226566189523e-05, | |
| "loss": 3.1359, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 4.219035202086049, | |
| "grad_norm": 0.5978413224220276, | |
| "learning_rate": 3.512484730723986e-05, | |
| "loss": 3.1429, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 4.222511951325511, | |
| "grad_norm": 0.5688326358795166, | |
| "learning_rate": 3.503747880962431e-05, | |
| "loss": 3.1544, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 4.2259887005649714, | |
| "grad_norm": 0.5804445147514343, | |
| "learning_rate": 3.495016046188155e-05, | |
| "loss": 3.1167, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 4.229465449804433, | |
| "grad_norm": 0.5675270557403564, | |
| "learning_rate": 3.4862892556676395e-05, | |
| "loss": 3.1552, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 4.232942199043894, | |
| "grad_norm": 0.5292279720306396, | |
| "learning_rate": 3.4775675386504656e-05, | |
| "loss": 3.1221, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 4.236418948283355, | |
| "grad_norm": 0.553810179233551, | |
| "learning_rate": 3.468850924369203e-05, | |
| "loss": 3.2311, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.239895697522816, | |
| "grad_norm": 0.5489530563354492, | |
| "learning_rate": 3.460139442039326e-05, | |
| "loss": 3.1054, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 4.243372446762277, | |
| "grad_norm": 0.5100445747375488, | |
| "learning_rate": 3.4514331208591025e-05, | |
| "loss": 3.2079, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 4.2468491960017385, | |
| "grad_norm": 0.5427148938179016, | |
| "learning_rate": 3.4427319900095055e-05, | |
| "loss": 3.1394, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 4.250325945241199, | |
| "grad_norm": 0.5344714522361755, | |
| "learning_rate": 3.4340360786541064e-05, | |
| "loss": 3.1592, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 4.253802694480661, | |
| "grad_norm": 0.5320305824279785, | |
| "learning_rate": 3.425345415938988e-05, | |
| "loss": 3.193, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 4.257279443720122, | |
| "grad_norm": 0.5287283062934875, | |
| "learning_rate": 3.4166600309926387e-05, | |
| "loss": 3.2131, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 4.260756192959583, | |
| "grad_norm": 0.5557782649993896, | |
| "learning_rate": 3.407979952925857e-05, | |
| "loss": 3.24, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 4.264232942199044, | |
| "grad_norm": 0.5349240303039551, | |
| "learning_rate": 3.399305210831656e-05, | |
| "loss": 3.1917, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 4.267709691438505, | |
| "grad_norm": 0.5544419884681702, | |
| "learning_rate": 3.390635833785163e-05, | |
| "loss": 3.1339, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 4.271186440677966, | |
| "grad_norm": 0.5695310235023499, | |
| "learning_rate": 3.3819718508435226e-05, | |
| "loss": 3.1142, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.274663189917427, | |
| "grad_norm": 0.4966232180595398, | |
| "learning_rate": 3.3733132910458034e-05, | |
| "loss": 3.1916, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 4.278139939156889, | |
| "grad_norm": 0.5800032019615173, | |
| "learning_rate": 3.364660183412892e-05, | |
| "loss": 3.1645, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 4.281616688396349, | |
| "grad_norm": 0.5773885250091553, | |
| "learning_rate": 3.356012556947405e-05, | |
| "loss": 3.1542, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 4.28509343763581, | |
| "grad_norm": 0.5234330296516418, | |
| "learning_rate": 3.347370440633587e-05, | |
| "loss": 3.1371, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 4.288570186875272, | |
| "grad_norm": 0.5608299374580383, | |
| "learning_rate": 3.338733863437212e-05, | |
| "loss": 3.1268, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 4.292046936114732, | |
| "grad_norm": 0.5602731108665466, | |
| "learning_rate": 3.3301028543054935e-05, | |
| "loss": 3.1742, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 4.295523685354194, | |
| "grad_norm": 0.5070881247520447, | |
| "learning_rate": 3.3214774421669774e-05, | |
| "loss": 3.1491, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 4.299000434593655, | |
| "grad_norm": 0.6085598468780518, | |
| "learning_rate": 3.3128576559314504e-05, | |
| "loss": 3.1355, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 4.302477183833116, | |
| "grad_norm": 0.562229335308075, | |
| "learning_rate": 3.304243524489847e-05, | |
| "loss": 3.1475, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 4.305953933072577, | |
| "grad_norm": 0.50657719373703, | |
| "learning_rate": 3.295635076714144e-05, | |
| "loss": 3.1014, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 4.309430682312038, | |
| "grad_norm": 0.547308623790741, | |
| "learning_rate": 3.2870323414572726e-05, | |
| "loss": 3.1063, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 4.3129074315514995, | |
| "grad_norm": 0.538101315498352, | |
| "learning_rate": 3.278435347553014e-05, | |
| "loss": 3.2346, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 4.31638418079096, | |
| "grad_norm": 0.5632209777832031, | |
| "learning_rate": 3.2698441238159065e-05, | |
| "loss": 3.1009, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 4.319860930030422, | |
| "grad_norm": 0.538557231426239, | |
| "learning_rate": 3.261258699041152e-05, | |
| "loss": 3.1992, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 4.3233376792698825, | |
| "grad_norm": 0.5102593302726746, | |
| "learning_rate": 3.2526791020045086e-05, | |
| "loss": 3.0824, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 4.326814428509344, | |
| "grad_norm": 0.5571600794792175, | |
| "learning_rate": 3.24410536146221e-05, | |
| "loss": 3.2344, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 4.330291177748805, | |
| "grad_norm": 0.5731806755065918, | |
| "learning_rate": 3.235537506150856e-05, | |
| "loss": 3.1911, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 4.333767926988266, | |
| "grad_norm": 0.568469226360321, | |
| "learning_rate": 3.226975564787322e-05, | |
| "loss": 3.1631, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 4.337244676227727, | |
| "grad_norm": 0.6091863512992859, | |
| "learning_rate": 3.218419566068661e-05, | |
| "loss": 3.1156, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 4.340721425467188, | |
| "grad_norm": 0.5082367658615112, | |
| "learning_rate": 3.209869538672008e-05, | |
| "loss": 3.1481, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.34419817470665, | |
| "grad_norm": 0.531330943107605, | |
| "learning_rate": 3.201325511254487e-05, | |
| "loss": 3.2305, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 4.34767492394611, | |
| "grad_norm": 0.542111873626709, | |
| "learning_rate": 3.192787512453105e-05, | |
| "loss": 3.155, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 4.351151673185571, | |
| "grad_norm": 0.5078598260879517, | |
| "learning_rate": 3.18425557088467e-05, | |
| "loss": 3.1793, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 4.354628422425033, | |
| "grad_norm": 0.5410819053649902, | |
| "learning_rate": 3.175729715145684e-05, | |
| "loss": 3.2161, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 4.358105171664493, | |
| "grad_norm": 0.5382195711135864, | |
| "learning_rate": 3.167209973812253e-05, | |
| "loss": 3.1332, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 4.361581920903955, | |
| "grad_norm": 0.574900209903717, | |
| "learning_rate": 3.158696375439989e-05, | |
| "loss": 3.1937, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 4.365058670143416, | |
| "grad_norm": 0.527801513671875, | |
| "learning_rate": 3.1501889485639124e-05, | |
| "loss": 3.1353, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 4.368535419382877, | |
| "grad_norm": 0.5430712699890137, | |
| "learning_rate": 3.141687721698363e-05, | |
| "loss": 3.1929, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 4.372012168622338, | |
| "grad_norm": 0.5637326240539551, | |
| "learning_rate": 3.133192723336895e-05, | |
| "loss": 3.1734, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 4.375488917861799, | |
| "grad_norm": 0.5349778532981873, | |
| "learning_rate": 3.124703981952191e-05, | |
| "loss": 3.1223, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.3789656671012605, | |
| "grad_norm": 0.5977018475532532, | |
| "learning_rate": 3.1162215259959594e-05, | |
| "loss": 3.1931, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 4.382442416340721, | |
| "grad_norm": 0.5777693390846252, | |
| "learning_rate": 3.107745383898841e-05, | |
| "loss": 3.2519, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 4.385919165580183, | |
| "grad_norm": 0.5384812951087952, | |
| "learning_rate": 3.0992755840703195e-05, | |
| "loss": 3.1766, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 4.3893959148196435, | |
| "grad_norm": 0.5800558924674988, | |
| "learning_rate": 3.0908121548986136e-05, | |
| "loss": 3.1622, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 4.392872664059105, | |
| "grad_norm": 0.5624348521232605, | |
| "learning_rate": 3.0823551247505975e-05, | |
| "loss": 3.1381, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 4.396349413298566, | |
| "grad_norm": 0.5728154182434082, | |
| "learning_rate": 3.073904521971689e-05, | |
| "loss": 3.0738, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 4.399826162538027, | |
| "grad_norm": 0.6084502935409546, | |
| "learning_rate": 3.065460374885771e-05, | |
| "loss": 3.1587, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 4.403302911777488, | |
| "grad_norm": 0.5481749773025513, | |
| "learning_rate": 3.057022711795086e-05, | |
| "loss": 3.1831, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 4.406779661016949, | |
| "grad_norm": 0.5750850439071655, | |
| "learning_rate": 3.048591560980143e-05, | |
| "loss": 3.1694, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 4.410256410256411, | |
| "grad_norm": 0.5632287859916687, | |
| "learning_rate": 3.0401669506996256e-05, | |
| "loss": 3.1857, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.413733159495871, | |
| "grad_norm": 0.5208144783973694, | |
| "learning_rate": 3.0317489091902935e-05, | |
| "loss": 3.161, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 4.417209908735332, | |
| "grad_norm": 0.5646597146987915, | |
| "learning_rate": 3.0233374646668933e-05, | |
| "loss": 3.1561, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 4.420686657974794, | |
| "grad_norm": 0.6238527894020081, | |
| "learning_rate": 3.014932645322056e-05, | |
| "loss": 3.1294, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 4.424163407214254, | |
| "grad_norm": 0.5221900939941406, | |
| "learning_rate": 3.0065344793262112e-05, | |
| "loss": 3.1726, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 4.427640156453716, | |
| "grad_norm": 0.507268488407135, | |
| "learning_rate": 2.9981429948274848e-05, | |
| "loss": 3.1116, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 4.431116905693177, | |
| "grad_norm": 0.5262473225593567, | |
| "learning_rate": 2.9897582199516104e-05, | |
| "loss": 3.1248, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 4.434593654932638, | |
| "grad_norm": 0.5219045281410217, | |
| "learning_rate": 2.9813801828018344e-05, | |
| "loss": 3.1733, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 4.438070404172099, | |
| "grad_norm": 0.5666697025299072, | |
| "learning_rate": 2.973008911458816e-05, | |
| "loss": 3.1438, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 4.44154715341156, | |
| "grad_norm": 0.5166260004043579, | |
| "learning_rate": 2.9646444339805436e-05, | |
| "loss": 3.1529, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 4.445023902651021, | |
| "grad_norm": 0.5471430420875549, | |
| "learning_rate": 2.9562867784022262e-05, | |
| "loss": 3.1087, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.448500651890482, | |
| "grad_norm": 0.5563281774520874, | |
| "learning_rate": 2.9479359727362173e-05, | |
| "loss": 3.1607, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 4.451977401129944, | |
| "grad_norm": 0.5162192583084106, | |
| "learning_rate": 2.9395920449719027e-05, | |
| "loss": 3.0287, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 4.4554541503694045, | |
| "grad_norm": 0.5430468320846558, | |
| "learning_rate": 2.931255023075624e-05, | |
| "loss": 3.1928, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 4.458930899608866, | |
| "grad_norm": 0.5762031078338623, | |
| "learning_rate": 2.9229249349905684e-05, | |
| "loss": 3.1146, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 4.462407648848327, | |
| "grad_norm": 0.5716304183006287, | |
| "learning_rate": 2.91460180863669e-05, | |
| "loss": 3.1328, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 4.465884398087788, | |
| "grad_norm": 0.48609912395477295, | |
| "learning_rate": 2.9062856719106034e-05, | |
| "loss": 3.1918, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 4.469361147327249, | |
| "grad_norm": 0.5626135468482971, | |
| "learning_rate": 2.8979765526855002e-05, | |
| "loss": 3.0825, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 4.47283789656671, | |
| "grad_norm": 0.5432965755462646, | |
| "learning_rate": 2.8896744788110497e-05, | |
| "loss": 3.1992, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 4.4763146458061716, | |
| "grad_norm": 0.5227442979812622, | |
| "learning_rate": 2.881379478113311e-05, | |
| "loss": 3.1348, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 4.479791395045632, | |
| "grad_norm": 0.5127291679382324, | |
| "learning_rate": 2.873091578394626e-05, | |
| "loss": 3.1505, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.483268144285093, | |
| "grad_norm": 0.5297070145606995, | |
| "learning_rate": 2.8648108074335472e-05, | |
| "loss": 3.1592, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 4.486744893524555, | |
| "grad_norm": 0.5441714525222778, | |
| "learning_rate": 2.8565371929847284e-05, | |
| "loss": 3.1568, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 4.490221642764015, | |
| "grad_norm": 0.527181088924408, | |
| "learning_rate": 2.8482707627788406e-05, | |
| "loss": 3.0347, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 4.493698392003477, | |
| "grad_norm": 0.5165045261383057, | |
| "learning_rate": 2.840011544522467e-05, | |
| "loss": 3.0401, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 4.497175141242938, | |
| "grad_norm": 0.5333744287490845, | |
| "learning_rate": 2.831759565898029e-05, | |
| "loss": 3.1488, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 4.500651890482399, | |
| "grad_norm": 0.5299988985061646, | |
| "learning_rate": 2.8235148545636776e-05, | |
| "loss": 3.1192, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 4.50412863972186, | |
| "grad_norm": 0.5290223956108093, | |
| "learning_rate": 2.8152774381532033e-05, | |
| "loss": 3.1706, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 4.507605388961321, | |
| "grad_norm": 0.5143240690231323, | |
| "learning_rate": 2.8070473442759515e-05, | |
| "loss": 3.0781, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 4.511082138200782, | |
| "grad_norm": 0.5055028796195984, | |
| "learning_rate": 2.798824600516723e-05, | |
| "loss": 3.0796, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 4.514558887440243, | |
| "grad_norm": 0.5169261693954468, | |
| "learning_rate": 2.790609234435683e-05, | |
| "loss": 3.026, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.518035636679705, | |
| "grad_norm": 0.48205968737602234, | |
| "learning_rate": 2.7824012735682693e-05, | |
| "loss": 3.0564, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 4.5215123859191655, | |
| "grad_norm": 0.5130692720413208, | |
| "learning_rate": 2.774200745425096e-05, | |
| "loss": 3.0893, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 4.524989135158627, | |
| "grad_norm": 0.502295196056366, | |
| "learning_rate": 2.7660076774918708e-05, | |
| "loss": 3.0858, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 4.528465884398088, | |
| "grad_norm": 0.5414760708808899, | |
| "learning_rate": 2.757822097229294e-05, | |
| "loss": 3.1763, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 4.531942633637549, | |
| "grad_norm": 0.5080839991569519, | |
| "learning_rate": 2.749644032072969e-05, | |
| "loss": 3.1469, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 4.53541938287701, | |
| "grad_norm": 0.510778546333313, | |
| "learning_rate": 2.7414735094333137e-05, | |
| "loss": 3.1893, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 4.538896132116471, | |
| "grad_norm": 0.5154252648353577, | |
| "learning_rate": 2.7333105566954627e-05, | |
| "loss": 3.1355, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 4.5423728813559325, | |
| "grad_norm": 0.5087947249412537, | |
| "learning_rate": 2.7251552012191762e-05, | |
| "loss": 3.1925, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 4.545849630595393, | |
| "grad_norm": 0.5253137350082397, | |
| "learning_rate": 2.7170074703387565e-05, | |
| "loss": 3.2108, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 4.549326379834854, | |
| "grad_norm": 0.5336745381355286, | |
| "learning_rate": 2.708867391362948e-05, | |
| "loss": 3.1476, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.552803129074316, | |
| "grad_norm": 0.5403643846511841, | |
| "learning_rate": 2.700734991574849e-05, | |
| "loss": 3.1474, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 4.556279878313776, | |
| "grad_norm": 0.5262840986251831, | |
| "learning_rate": 2.6926102982318192e-05, | |
| "loss": 3.2003, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 4.559756627553238, | |
| "grad_norm": 0.5386162996292114, | |
| "learning_rate": 2.684493338565386e-05, | |
| "loss": 3.1021, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 4.563233376792699, | |
| "grad_norm": 0.4971773624420166, | |
| "learning_rate": 2.6763841397811573e-05, | |
| "loss": 3.1549, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 4.56671012603216, | |
| "grad_norm": 0.5384669303894043, | |
| "learning_rate": 2.668282729058732e-05, | |
| "loss": 3.1015, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 4.570186875271621, | |
| "grad_norm": 0.5245262384414673, | |
| "learning_rate": 2.6601891335516028e-05, | |
| "loss": 3.1012, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 4.573663624511082, | |
| "grad_norm": 0.5222594141960144, | |
| "learning_rate": 2.6521033803870692e-05, | |
| "loss": 3.1934, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 4.577140373750543, | |
| "grad_norm": 0.5642375349998474, | |
| "learning_rate": 2.6440254966661425e-05, | |
| "loss": 3.1681, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 4.580617122990004, | |
| "grad_norm": 0.5803114175796509, | |
| "learning_rate": 2.6359555094634615e-05, | |
| "loss": 3.152, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 4.584093872229466, | |
| "grad_norm": 0.5136197209358215, | |
| "learning_rate": 2.6278934458271997e-05, | |
| "loss": 3.1403, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.5875706214689265, | |
| "grad_norm": 0.5403902530670166, | |
| "learning_rate": 2.6198393327789662e-05, | |
| "loss": 3.1996, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 4.591047370708388, | |
| "grad_norm": 0.5205239653587341, | |
| "learning_rate": 2.6117931973137296e-05, | |
| "loss": 3.0724, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 4.594524119947849, | |
| "grad_norm": 0.5435472726821899, | |
| "learning_rate": 2.603755066399718e-05, | |
| "loss": 3.0977, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 4.5980008691873095, | |
| "grad_norm": 0.5279546976089478, | |
| "learning_rate": 2.5957249669783256e-05, | |
| "loss": 3.0092, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 4.601477618426771, | |
| "grad_norm": 0.5379184484481812, | |
| "learning_rate": 2.587702925964034e-05, | |
| "loss": 3.1167, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 4.604954367666232, | |
| "grad_norm": 0.5148362517356873, | |
| "learning_rate": 2.579688970244313e-05, | |
| "loss": 3.098, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 4.6084311169056935, | |
| "grad_norm": 0.5144993662834167, | |
| "learning_rate": 2.5716831266795326e-05, | |
| "loss": 3.0208, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 4.611907866145154, | |
| "grad_norm": 0.5288809537887573, | |
| "learning_rate": 2.563685422102876e-05, | |
| "loss": 3.1685, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 4.615384615384615, | |
| "grad_norm": 0.5119858980178833, | |
| "learning_rate": 2.5556958833202405e-05, | |
| "loss": 3.1729, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 4.618861364624077, | |
| "grad_norm": 0.5306943655014038, | |
| "learning_rate": 2.5477145371101597e-05, | |
| "loss": 3.113, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 4.622338113863537, | |
| "grad_norm": 0.4869849681854248, | |
| "learning_rate": 2.539741410223707e-05, | |
| "loss": 3.0586, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 4.625814863102999, | |
| "grad_norm": 0.5126588940620422, | |
| "learning_rate": 2.531776529384407e-05, | |
| "loss": 3.0891, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 4.62929161234246, | |
| "grad_norm": 0.48841309547424316, | |
| "learning_rate": 2.523819921288147e-05, | |
| "loss": 3.0713, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 4.632768361581921, | |
| "grad_norm": 0.5154165625572205, | |
| "learning_rate": 2.5158716126030836e-05, | |
| "loss": 3.1351, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 4.636245110821382, | |
| "grad_norm": 0.5262759327888489, | |
| "learning_rate": 2.507931629969556e-05, | |
| "loss": 3.0784, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 4.639721860060843, | |
| "grad_norm": 0.5570662021636963, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 3.1477, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 4.643198609300304, | |
| "grad_norm": 0.5133278369903564, | |
| "learning_rate": 2.4920767492788576e-05, | |
| "loss": 3.1487, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 4.646675358539765, | |
| "grad_norm": 0.5384029150009155, | |
| "learning_rate": 2.4841619043624807e-05, | |
| "loss": 3.0638, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 4.650152107779227, | |
| "grad_norm": 0.5107344388961792, | |
| "learning_rate": 2.4762554917790525e-05, | |
| "loss": 3.1279, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 4.6536288570186874, | |
| "grad_norm": 0.5281763672828674, | |
| "learning_rate": 2.4683575380284874e-05, | |
| "loss": 3.162, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 4.657105606258149, | |
| "grad_norm": 0.5439243912696838, | |
| "learning_rate": 2.4604680695823533e-05, | |
| "loss": 3.1004, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 4.66058235549761, | |
| "grad_norm": 0.49902161955833435, | |
| "learning_rate": 2.4525871128837773e-05, | |
| "loss": 3.0821, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 4.6640591047370705, | |
| "grad_norm": 0.5409182906150818, | |
| "learning_rate": 2.4447146943473565e-05, | |
| "loss": 3.1635, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 4.667535853976532, | |
| "grad_norm": 0.5150710940361023, | |
| "learning_rate": 2.436850840359073e-05, | |
| "loss": 3.0735, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 4.671012603215993, | |
| "grad_norm": 0.5056044459342957, | |
| "learning_rate": 2.4289955772761974e-05, | |
| "loss": 3.0758, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 4.6744893524554545, | |
| "grad_norm": 0.5287520289421082, | |
| "learning_rate": 2.4211489314272127e-05, | |
| "loss": 3.1805, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 4.677966101694915, | |
| "grad_norm": 0.5224068760871887, | |
| "learning_rate": 2.4133109291117156e-05, | |
| "loss": 3.0326, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 4.681442850934376, | |
| "grad_norm": 0.4915207624435425, | |
| "learning_rate": 2.4054815966003363e-05, | |
| "loss": 3.0884, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 4.684919600173838, | |
| "grad_norm": 0.542336642742157, | |
| "learning_rate": 2.3976609601346394e-05, | |
| "loss": 3.1397, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 4.688396349413298, | |
| "grad_norm": 0.5344177484512329, | |
| "learning_rate": 2.389849045927049e-05, | |
| "loss": 3.1556, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.69187309865276, | |
| "grad_norm": 0.49078139662742615, | |
| "learning_rate": 2.3820458801607548e-05, | |
| "loss": 3.1368, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 4.695349847892221, | |
| "grad_norm": 0.55536949634552, | |
| "learning_rate": 2.3742514889896196e-05, | |
| "loss": 3.1188, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 4.698826597131681, | |
| "grad_norm": 0.5023874640464783, | |
| "learning_rate": 2.3664658985381e-05, | |
| "loss": 3.1357, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 4.702303346371143, | |
| "grad_norm": 0.5190631151199341, | |
| "learning_rate": 2.3586891349011552e-05, | |
| "loss": 3.175, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 4.705780095610604, | |
| "grad_norm": 0.5370966196060181, | |
| "learning_rate": 2.3509212241441593e-05, | |
| "loss": 3.165, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 4.709256844850065, | |
| "grad_norm": 0.5101494193077087, | |
| "learning_rate": 2.3431621923028145e-05, | |
| "loss": 3.1552, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 4.712733594089526, | |
| "grad_norm": 0.5176619291305542, | |
| "learning_rate": 2.335412065383061e-05, | |
| "loss": 3.0863, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 4.716210343328988, | |
| "grad_norm": 0.5212742686271667, | |
| "learning_rate": 2.3276708693609943e-05, | |
| "loss": 3.1169, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 4.719687092568448, | |
| "grad_norm": 0.507392168045044, | |
| "learning_rate": 2.3199386301827775e-05, | |
| "loss": 3.1013, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 4.72316384180791, | |
| "grad_norm": 0.516706109046936, | |
| "learning_rate": 2.312215373764551e-05, | |
| "loss": 3.0495, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 4.726640591047371, | |
| "grad_norm": 0.49758976697921753, | |
| "learning_rate": 2.304501125992351e-05, | |
| "loss": 3.0798, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 4.7301173402868315, | |
| "grad_norm": 0.525264322757721, | |
| "learning_rate": 2.296795912722014e-05, | |
| "loss": 3.0509, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 4.733594089526293, | |
| "grad_norm": 0.5171617865562439, | |
| "learning_rate": 2.2890997597790968e-05, | |
| "loss": 3.1179, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 4.737070838765754, | |
| "grad_norm": 0.536133885383606, | |
| "learning_rate": 2.2814126929587933e-05, | |
| "loss": 3.0698, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 4.7405475880052155, | |
| "grad_norm": 0.5195457935333252, | |
| "learning_rate": 2.2737347380258394e-05, | |
| "loss": 3.0672, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 4.744024337244676, | |
| "grad_norm": 0.5235431790351868, | |
| "learning_rate": 2.266065920714432e-05, | |
| "loss": 3.1695, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 4.747501086484137, | |
| "grad_norm": 0.5295529365539551, | |
| "learning_rate": 2.258406266728143e-05, | |
| "loss": 3.1604, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 4.7509778357235986, | |
| "grad_norm": 0.5380683541297913, | |
| "learning_rate": 2.2507558017398263e-05, | |
| "loss": 3.0974, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 4.754454584963059, | |
| "grad_norm": 0.49247080087661743, | |
| "learning_rate": 2.243114551391542e-05, | |
| "loss": 3.1145, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 4.757931334202521, | |
| "grad_norm": 0.5573008060455322, | |
| "learning_rate": 2.2354825412944642e-05, | |
| "loss": 3.1544, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 4.761408083441982, | |
| "grad_norm": 0.5144769549369812, | |
| "learning_rate": 2.2278597970287966e-05, | |
| "loss": 3.1163, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 4.764884832681442, | |
| "grad_norm": 0.5623053908348083, | |
| "learning_rate": 2.2202463441436884e-05, | |
| "loss": 3.2186, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 4.768361581920904, | |
| "grad_norm": 0.5026108026504517, | |
| "learning_rate": 2.2126422081571403e-05, | |
| "loss": 3.1492, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 4.771838331160365, | |
| "grad_norm": 0.5406736731529236, | |
| "learning_rate": 2.2050474145559326e-05, | |
| "loss": 3.125, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 4.775315080399826, | |
| "grad_norm": 0.526690661907196, | |
| "learning_rate": 2.1974619887955294e-05, | |
| "loss": 3.1364, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 4.778791829639287, | |
| "grad_norm": 0.5063409209251404, | |
| "learning_rate": 2.1898859563e-05, | |
| "loss": 3.1636, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 4.782268578878749, | |
| "grad_norm": 0.5202750563621521, | |
| "learning_rate": 2.1823193424619238e-05, | |
| "loss": 3.1399, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 4.785745328118209, | |
| "grad_norm": 0.5034672617912292, | |
| "learning_rate": 2.174762172642319e-05, | |
| "loss": 3.2438, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 4.789222077357671, | |
| "grad_norm": 0.5040730834007263, | |
| "learning_rate": 2.1672144721705433e-05, | |
| "loss": 3.0598, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 4.792698826597132, | |
| "grad_norm": 0.47813880443573, | |
| "learning_rate": 2.1596762663442218e-05, | |
| "loss": 3.1059, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 4.7961755758365925, | |
| "grad_norm": 0.5351612567901611, | |
| "learning_rate": 2.1521475804291535e-05, | |
| "loss": 3.1185, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 4.799652325076054, | |
| "grad_norm": 0.5197668075561523, | |
| "learning_rate": 2.144628439659231e-05, | |
| "loss": 3.1053, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 4.803129074315515, | |
| "grad_norm": 0.49083802103996277, | |
| "learning_rate": 2.1371188692363552e-05, | |
| "loss": 3.093, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 4.8066058235549765, | |
| "grad_norm": 0.5041195750236511, | |
| "learning_rate": 2.1296188943303448e-05, | |
| "loss": 3.1273, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 4.810082572794437, | |
| "grad_norm": 0.5689459443092346, | |
| "learning_rate": 2.122128540078862e-05, | |
| "loss": 3.1277, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 4.813559322033898, | |
| "grad_norm": 0.48876744508743286, | |
| "learning_rate": 2.1146478315873238e-05, | |
| "loss": 3.0268, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 4.8170360712733595, | |
| "grad_norm": 0.5481560230255127, | |
| "learning_rate": 2.1071767939288144e-05, | |
| "loss": 3.1129, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 4.82051282051282, | |
| "grad_norm": 0.5860038995742798, | |
| "learning_rate": 2.09971545214401e-05, | |
| "loss": 3.1246, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 4.823989569752282, | |
| "grad_norm": 0.4835022985935211, | |
| "learning_rate": 2.0922638312410792e-05, | |
| "loss": 3.0547, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 4.827466318991743, | |
| "grad_norm": 0.5766292214393616, | |
| "learning_rate": 2.0848219561956206e-05, | |
| "loss": 3.2223, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 4.830943068231203, | |
| "grad_norm": 0.4890536963939667, | |
| "learning_rate": 2.077389851950557e-05, | |
| "loss": 3.0601, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 4.834419817470665, | |
| "grad_norm": 0.508464515209198, | |
| "learning_rate": 2.06996754341607e-05, | |
| "loss": 3.032, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 4.837896566710126, | |
| "grad_norm": 0.5292408466339111, | |
| "learning_rate": 2.062555055469506e-05, | |
| "loss": 3.1608, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 4.841373315949587, | |
| "grad_norm": 0.4770180583000183, | |
| "learning_rate": 2.0551524129552985e-05, | |
| "loss": 3.116, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 4.844850065189048, | |
| "grad_norm": 0.512302041053772, | |
| "learning_rate": 2.047759640684876e-05, | |
| "loss": 3.0814, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 4.84832681442851, | |
| "grad_norm": 0.48571115732192993, | |
| "learning_rate": 2.04037676343659e-05, | |
| "loss": 3.1722, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 4.85180356366797, | |
| "grad_norm": 0.48603254556655884, | |
| "learning_rate": 2.0330038059556256e-05, | |
| "loss": 3.12, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 4.855280312907432, | |
| "grad_norm": 0.49838802218437195, | |
| "learning_rate": 2.0256407929539202e-05, | |
| "loss": 3.1449, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 4.858757062146893, | |
| "grad_norm": 0.49490660429000854, | |
| "learning_rate": 2.0182877491100806e-05, | |
| "loss": 3.125, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 4.8622338113863535, | |
| "grad_norm": 0.4796282649040222, | |
| "learning_rate": 2.0109446990692964e-05, | |
| "loss": 3.0519, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.865710560625815, | |
| "grad_norm": 0.5214862823486328, | |
| "learning_rate": 2.0036116674432654e-05, | |
| "loss": 3.1385, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 4.869187309865276, | |
| "grad_norm": 0.4797852337360382, | |
| "learning_rate": 1.996288678810105e-05, | |
| "loss": 3.0939, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 4.872664059104737, | |
| "grad_norm": 0.5158775448799133, | |
| "learning_rate": 1.9889757577142737e-05, | |
| "loss": 3.1246, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 4.876140808344198, | |
| "grad_norm": 0.4937228858470917, | |
| "learning_rate": 1.9816729286664798e-05, | |
| "loss": 2.9946, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 4.879617557583659, | |
| "grad_norm": 0.491494357585907, | |
| "learning_rate": 1.9743802161436136e-05, | |
| "loss": 3.0483, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 4.8830943068231205, | |
| "grad_norm": 0.4826614558696747, | |
| "learning_rate": 1.967097644588657e-05, | |
| "loss": 3.0563, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 4.886571056062581, | |
| "grad_norm": 0.5007966160774231, | |
| "learning_rate": 1.959825238410596e-05, | |
| "loss": 3.1255, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 4.890047805302043, | |
| "grad_norm": 0.5254808664321899, | |
| "learning_rate": 1.9525630219843522e-05, | |
| "loss": 3.1027, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 4.893524554541504, | |
| "grad_norm": 0.5202295184135437, | |
| "learning_rate": 1.9453110196506913e-05, | |
| "loss": 3.0686, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 4.897001303780964, | |
| "grad_norm": 0.5098284482955933, | |
| "learning_rate": 1.938069255716145e-05, | |
| "loss": 3.2055, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 4.900478053020426, | |
| "grad_norm": 0.517220139503479, | |
| "learning_rate": 1.930837754452931e-05, | |
| "loss": 3.0823, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 4.903954802259887, | |
| "grad_norm": 0.5010183453559875, | |
| "learning_rate": 1.9236165400988638e-05, | |
| "loss": 3.0627, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 4.907431551499348, | |
| "grad_norm": 0.4953896105289459, | |
| "learning_rate": 1.9164056368572846e-05, | |
| "loss": 3.0968, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 4.910908300738809, | |
| "grad_norm": 0.4907744824886322, | |
| "learning_rate": 1.9092050688969738e-05, | |
| "loss": 2.934, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 4.914385049978271, | |
| "grad_norm": 0.5396456122398376, | |
| "learning_rate": 1.9020148603520694e-05, | |
| "loss": 3.1413, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 4.917861799217731, | |
| "grad_norm": 0.4796368479728699, | |
| "learning_rate": 1.894835035321991e-05, | |
| "loss": 3.0553, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 4.921338548457193, | |
| "grad_norm": 0.509888231754303, | |
| "learning_rate": 1.8876656178713525e-05, | |
| "loss": 3.0997, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 4.924815297696654, | |
| "grad_norm": 0.5054630041122437, | |
| "learning_rate": 1.8805066320298832e-05, | |
| "loss": 3.1347, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 4.9282920469361144, | |
| "grad_norm": 0.506965160369873, | |
| "learning_rate": 1.8733581017923533e-05, | |
| "loss": 3.1165, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 4.931768796175576, | |
| "grad_norm": 0.4983595311641693, | |
| "learning_rate": 1.8662200511184874e-05, | |
| "loss": 3.1969, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.935245545415037, | |
| "grad_norm": 0.48033374547958374, | |
| "learning_rate": 1.8590925039328855e-05, | |
| "loss": 3.0475, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 4.938722294654498, | |
| "grad_norm": 0.48913419246673584, | |
| "learning_rate": 1.8519754841249443e-05, | |
| "loss": 3.0912, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 4.942199043893959, | |
| "grad_norm": 0.4766146242618561, | |
| "learning_rate": 1.8448690155487715e-05, | |
| "loss": 3.0585, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 4.94567579313342, | |
| "grad_norm": 0.5143147706985474, | |
| "learning_rate": 1.837773122023114e-05, | |
| "loss": 3.1216, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 4.9491525423728815, | |
| "grad_norm": 0.5010018944740295, | |
| "learning_rate": 1.830687827331275e-05, | |
| "loss": 3.122, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 4.952629291612342, | |
| "grad_norm": 0.519191324710846, | |
| "learning_rate": 1.8236131552210317e-05, | |
| "loss": 3.1109, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 4.956106040851804, | |
| "grad_norm": 0.5037075877189636, | |
| "learning_rate": 1.8165491294045593e-05, | |
| "loss": 3.014, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 4.959582790091265, | |
| "grad_norm": 0.5274494290351868, | |
| "learning_rate": 1.8094957735583463e-05, | |
| "loss": 3.0914, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 4.963059539330725, | |
| "grad_norm": 0.5338578820228577, | |
| "learning_rate": 1.802453111323122e-05, | |
| "loss": 3.0104, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 4.966536288570187, | |
| "grad_norm": 0.5094507336616516, | |
| "learning_rate": 1.7954211663037728e-05, | |
| "loss": 3.1215, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 4.970013037809648, | |
| "grad_norm": 0.5530762076377869, | |
| "learning_rate": 1.788399962069266e-05, | |
| "loss": 3.1778, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 4.973489787049109, | |
| "grad_norm": 0.49168628454208374, | |
| "learning_rate": 1.781389522152565e-05, | |
| "loss": 3.1523, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 4.97696653628857, | |
| "grad_norm": 0.543717086315155, | |
| "learning_rate": 1.774389870050559e-05, | |
| "loss": 3.0974, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 4.980443285528032, | |
| "grad_norm": 0.4832988679409027, | |
| "learning_rate": 1.7674010292239744e-05, | |
| "loss": 3.0996, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 4.983920034767492, | |
| "grad_norm": 0.5092836022377014, | |
| "learning_rate": 1.760423023097307e-05, | |
| "loss": 3.0521, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 4.987396784006954, | |
| "grad_norm": 0.5006300210952759, | |
| "learning_rate": 1.7534558750587345e-05, | |
| "loss": 3.1263, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 4.990873533246415, | |
| "grad_norm": 0.4926410913467407, | |
| "learning_rate": 1.7464996084600433e-05, | |
| "loss": 3.0774, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 4.994350282485875, | |
| "grad_norm": 0.474171906709671, | |
| "learning_rate": 1.739554246616549e-05, | |
| "loss": 3.0128, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 4.997827031725337, | |
| "grad_norm": 0.4734145402908325, | |
| "learning_rate": 1.7326198128070133e-05, | |
| "loss": 2.9946, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.5680263042449951, | |
| "learning_rate": 1.725696330273575e-05, | |
| "loss": 3.0865, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.003476749239461, | |
| "grad_norm": 0.4952830970287323, | |
| "learning_rate": 1.7187838222216663e-05, | |
| "loss": 2.9644, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 5.006953498478922, | |
| "grad_norm": 0.4864019453525543, | |
| "learning_rate": 1.711882311819936e-05, | |
| "loss": 3.0979, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 5.010430247718383, | |
| "grad_norm": 0.506616473197937, | |
| "learning_rate": 1.7049918222001742e-05, | |
| "loss": 3.122, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 5.013906996957845, | |
| "grad_norm": 0.4952886700630188, | |
| "learning_rate": 1.6981123764572273e-05, | |
| "loss": 3.0794, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 5.017383746197305, | |
| "grad_norm": 0.5021927952766418, | |
| "learning_rate": 1.6912439976489313e-05, | |
| "loss": 3.1295, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 5.020860495436767, | |
| "grad_norm": 0.4874928891658783, | |
| "learning_rate": 1.684386708796025e-05, | |
| "loss": 3.1032, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 5.024337244676228, | |
| "grad_norm": 0.5078150033950806, | |
| "learning_rate": 1.6775405328820805e-05, | |
| "loss": 3.1201, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 5.0278139939156885, | |
| "grad_norm": 0.47642481327056885, | |
| "learning_rate": 1.670705492853421e-05, | |
| "loss": 3.0506, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 5.03129074315515, | |
| "grad_norm": 0.5237090587615967, | |
| "learning_rate": 1.6638816116190475e-05, | |
| "loss": 3.1616, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 5.034767492394611, | |
| "grad_norm": 0.5067710876464844, | |
| "learning_rate": 1.6570689120505555e-05, | |
| "loss": 3.1494, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.0382442416340725, | |
| "grad_norm": 0.5045792460441589, | |
| "learning_rate": 1.650267416982067e-05, | |
| "loss": 3.1457, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 5.041720990873533, | |
| "grad_norm": 0.5121103525161743, | |
| "learning_rate": 1.6434771492101485e-05, | |
| "loss": 3.0338, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 5.045197740112994, | |
| "grad_norm": 0.4789864420890808, | |
| "learning_rate": 1.6366981314937376e-05, | |
| "loss": 3.0468, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 5.0486744893524556, | |
| "grad_norm": 0.5022554993629456, | |
| "learning_rate": 1.6299303865540617e-05, | |
| "loss": 3.1252, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 5.052151238591916, | |
| "grad_norm": 0.49874892830848694, | |
| "learning_rate": 1.6231739370745703e-05, | |
| "loss": 3.0489, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 5.055627987831378, | |
| "grad_norm": 0.4912637770175934, | |
| "learning_rate": 1.6164288057008466e-05, | |
| "loss": 3.0752, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 5.059104737070839, | |
| "grad_norm": 0.4912840723991394, | |
| "learning_rate": 1.6096950150405454e-05, | |
| "loss": 3.1287, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 5.0625814863103, | |
| "grad_norm": 0.5014127492904663, | |
| "learning_rate": 1.6029725876633085e-05, | |
| "loss": 3.1283, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 5.066058235549761, | |
| "grad_norm": 0.4787100851535797, | |
| "learning_rate": 1.5962615461006924e-05, | |
| "loss": 3.1367, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 5.069534984789222, | |
| "grad_norm": 0.49083542823791504, | |
| "learning_rate": 1.589561912846089e-05, | |
| "loss": 3.081, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 5.073011734028683, | |
| "grad_norm": 0.48826974630355835, | |
| "learning_rate": 1.582873710354657e-05, | |
| "loss": 3.0724, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 5.076488483268144, | |
| "grad_norm": 0.4925864040851593, | |
| "learning_rate": 1.576196961043237e-05, | |
| "loss": 2.9944, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 5.079965232507606, | |
| "grad_norm": 0.4802667796611786, | |
| "learning_rate": 1.569531687290288e-05, | |
| "loss": 3.0584, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 5.083441981747066, | |
| "grad_norm": 0.5159469842910767, | |
| "learning_rate": 1.5628779114358034e-05, | |
| "loss": 3.079, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 5.086918730986528, | |
| "grad_norm": 0.4779518246650696, | |
| "learning_rate": 1.556235655781239e-05, | |
| "loss": 3.1105, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 5.090395480225989, | |
| "grad_norm": 0.5127723217010498, | |
| "learning_rate": 1.549604942589441e-05, | |
| "loss": 3.0305, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 5.0938722294654495, | |
| "grad_norm": 0.5168395638465881, | |
| "learning_rate": 1.5429857940845633e-05, | |
| "loss": 3.1509, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 5.097348978704911, | |
| "grad_norm": 0.49684715270996094, | |
| "learning_rate": 1.536378232452003e-05, | |
| "loss": 3.037, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 5.100825727944372, | |
| "grad_norm": 0.5140928626060486, | |
| "learning_rate": 1.529782279838321e-05, | |
| "loss": 3.0983, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 5.1043024771838335, | |
| "grad_norm": 0.5105652213096619, | |
| "learning_rate": 1.5231979583511668e-05, | |
| "loss": 3.0414, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 5.107779226423294, | |
| "grad_norm": 0.5075240135192871, | |
| "learning_rate": 1.5166252900592082e-05, | |
| "loss": 3.0819, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 5.111255975662755, | |
| "grad_norm": 0.5023607015609741, | |
| "learning_rate": 1.5100642969920503e-05, | |
| "loss": 3.0479, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 5.1147327249022165, | |
| "grad_norm": 0.519066333770752, | |
| "learning_rate": 1.5035150011401727e-05, | |
| "loss": 3.1059, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 5.118209474141677, | |
| "grad_norm": 0.5103733539581299, | |
| "learning_rate": 1.496977424454843e-05, | |
| "loss": 3.0913, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 5.121686223381139, | |
| "grad_norm": 0.49367067217826843, | |
| "learning_rate": 1.4904515888480536e-05, | |
| "loss": 3.1771, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 5.1251629726206, | |
| "grad_norm": 0.4967593252658844, | |
| "learning_rate": 1.4839375161924446e-05, | |
| "loss": 3.0717, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 5.128639721860061, | |
| "grad_norm": 0.483659952878952, | |
| "learning_rate": 1.4774352283212306e-05, | |
| "loss": 3.0219, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 5.132116471099522, | |
| "grad_norm": 0.47200506925582886, | |
| "learning_rate": 1.4709447470281217e-05, | |
| "loss": 3.0199, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 5.135593220338983, | |
| "grad_norm": 0.49920839071273804, | |
| "learning_rate": 1.4644660940672627e-05, | |
| "loss": 3.1066, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 5.139069969578444, | |
| "grad_norm": 0.4712933599948883, | |
| "learning_rate": 1.4579992911531498e-05, | |
| "loss": 3.0786, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 5.142546718817905, | |
| "grad_norm": 0.4965963661670685, | |
| "learning_rate": 1.4515443599605627e-05, | |
| "loss": 3.0316, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 5.146023468057367, | |
| "grad_norm": 0.46099987626075745, | |
| "learning_rate": 1.4451013221244913e-05, | |
| "loss": 3.0089, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 5.149500217296827, | |
| "grad_norm": 0.49877581000328064, | |
| "learning_rate": 1.4386701992400586e-05, | |
| "loss": 3.0595, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 5.152976966536288, | |
| "grad_norm": 0.47550636529922485, | |
| "learning_rate": 1.4322510128624567e-05, | |
| "loss": 2.9965, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 5.15645371577575, | |
| "grad_norm": 0.4906829595565796, | |
| "learning_rate": 1.4258437845068685e-05, | |
| "loss": 3.0932, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 5.1599304650152105, | |
| "grad_norm": 0.47613778710365295, | |
| "learning_rate": 1.4194485356483977e-05, | |
| "loss": 3.0283, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 5.163407214254672, | |
| "grad_norm": 0.4815172851085663, | |
| "learning_rate": 1.4130652877219947e-05, | |
| "loss": 3.0631, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 5.166883963494133, | |
| "grad_norm": 0.47476691007614136, | |
| "learning_rate": 1.406694062122389e-05, | |
| "loss": 3.0189, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 5.170360712733594, | |
| "grad_norm": 0.4923785626888275, | |
| "learning_rate": 1.4003348802040112e-05, | |
| "loss": 3.0331, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 5.173837461973055, | |
| "grad_norm": 0.4678713381290436, | |
| "learning_rate": 1.3939877632809278e-05, | |
| "loss": 2.9611, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 5.177314211212516, | |
| "grad_norm": 0.48306557536125183, | |
| "learning_rate": 1.387652732626768e-05, | |
| "loss": 3.0772, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 5.1807909604519775, | |
| "grad_norm": 0.47807446122169495, | |
| "learning_rate": 1.3813298094746491e-05, | |
| "loss": 2.9963, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 5.184267709691438, | |
| "grad_norm": 0.46229419112205505, | |
| "learning_rate": 1.3750190150171105e-05, | |
| "loss": 3.0278, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 5.1877444589309, | |
| "grad_norm": 0.4713948965072632, | |
| "learning_rate": 1.3687203704060342e-05, | |
| "loss": 3.1379, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 5.191221208170361, | |
| "grad_norm": 0.47037366032600403, | |
| "learning_rate": 1.3624338967525857e-05, | |
| "loss": 3.1018, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 5.194697957409822, | |
| "grad_norm": 0.45954567193984985, | |
| "learning_rate": 1.356159615127135e-05, | |
| "loss": 3.0508, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 5.198174706649283, | |
| "grad_norm": 0.47411566972732544, | |
| "learning_rate": 1.3498975465591867e-05, | |
| "loss": 3.0462, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 5.201651455888744, | |
| "grad_norm": 0.4694608747959137, | |
| "learning_rate": 1.3436477120373143e-05, | |
| "loss": 3.0887, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 5.205128205128205, | |
| "grad_norm": 0.4654156565666199, | |
| "learning_rate": 1.337410132509081e-05, | |
| "loss": 3.0725, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 5.208604954367666, | |
| "grad_norm": 0.45913633704185486, | |
| "learning_rate": 1.3311848288809813e-05, | |
| "loss": 3.026, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.212081703607128, | |
| "grad_norm": 0.45800894498825073, | |
| "learning_rate": 1.3249718220183583e-05, | |
| "loss": 3.0755, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 5.215558452846588, | |
| "grad_norm": 0.46444812417030334, | |
| "learning_rate": 1.3187711327453445e-05, | |
| "loss": 3.0823, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 5.219035202086049, | |
| "grad_norm": 0.47551393508911133, | |
| "learning_rate": 1.3125827818447862e-05, | |
| "loss": 3.0347, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 5.222511951325511, | |
| "grad_norm": 0.49685007333755493, | |
| "learning_rate": 1.3064067900581773e-05, | |
| "loss": 3.1281, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 5.2259887005649714, | |
| "grad_norm": 0.47061434388160706, | |
| "learning_rate": 1.3002431780855817e-05, | |
| "loss": 3.1229, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 5.229465449804433, | |
| "grad_norm": 0.5090036392211914, | |
| "learning_rate": 1.2940919665855761e-05, | |
| "loss": 2.9923, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 5.232942199043894, | |
| "grad_norm": 0.45235443115234375, | |
| "learning_rate": 1.2879531761751713e-05, | |
| "loss": 3.0002, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 5.236418948283355, | |
| "grad_norm": 0.486585408449173, | |
| "learning_rate": 1.2818268274297474e-05, | |
| "loss": 3.1673, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 5.239895697522816, | |
| "grad_norm": 0.4809410274028778, | |
| "learning_rate": 1.2757129408829843e-05, | |
| "loss": 3.0792, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 5.243372446762277, | |
| "grad_norm": 0.4928673803806305, | |
| "learning_rate": 1.2696115370267925e-05, | |
| "loss": 3.1247, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 5.2468491960017385, | |
| "grad_norm": 0.4768332540988922, | |
| "learning_rate": 1.2635226363112401e-05, | |
| "loss": 3.072, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 5.250325945241199, | |
| "grad_norm": 0.4630432724952698, | |
| "learning_rate": 1.257446259144494e-05, | |
| "loss": 3.0369, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 5.253802694480661, | |
| "grad_norm": 0.47798049449920654, | |
| "learning_rate": 1.2513824258927438e-05, | |
| "loss": 3.131, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 5.257279443720122, | |
| "grad_norm": 0.4752330482006073, | |
| "learning_rate": 1.2453311568801367e-05, | |
| "loss": 3.1487, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 5.260756192959583, | |
| "grad_norm": 0.4717276990413666, | |
| "learning_rate": 1.2392924723887039e-05, | |
| "loss": 3.066, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 5.264232942199044, | |
| "grad_norm": 0.47000083327293396, | |
| "learning_rate": 1.2332663926583038e-05, | |
| "loss": 2.9598, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 5.267709691438505, | |
| "grad_norm": 0.46743789315223694, | |
| "learning_rate": 1.227252937886541e-05, | |
| "loss": 3.0595, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 5.271186440677966, | |
| "grad_norm": 0.4703062176704407, | |
| "learning_rate": 1.2212521282287092e-05, | |
| "loss": 3.0263, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 5.274663189917427, | |
| "grad_norm": 0.496021568775177, | |
| "learning_rate": 1.2152639837977187e-05, | |
| "loss": 3.013, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 5.278139939156889, | |
| "grad_norm": 0.48973992466926575, | |
| "learning_rate": 1.209288524664029e-05, | |
| "loss": 3.1375, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 5.281616688396349, | |
| "grad_norm": 0.49357324838638306, | |
| "learning_rate": 1.2033257708555845e-05, | |
| "loss": 3.0935, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 5.28509343763581, | |
| "grad_norm": 0.4617580771446228, | |
| "learning_rate": 1.1973757423577404e-05, | |
| "loss": 2.9214, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 5.288570186875272, | |
| "grad_norm": 0.4951948821544647, | |
| "learning_rate": 1.1914384591132044e-05, | |
| "loss": 3.0443, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 5.292046936114732, | |
| "grad_norm": 0.4785456657409668, | |
| "learning_rate": 1.1855139410219657e-05, | |
| "loss": 3.0451, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 5.295523685354194, | |
| "grad_norm": 0.47750359773635864, | |
| "learning_rate": 1.1796022079412272e-05, | |
| "loss": 3.1302, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 5.299000434593655, | |
| "grad_norm": 0.4931430518627167, | |
| "learning_rate": 1.1737032796853432e-05, | |
| "loss": 3.1065, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 5.302477183833116, | |
| "grad_norm": 0.48678264021873474, | |
| "learning_rate": 1.1678171760257439e-05, | |
| "loss": 3.0954, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 5.305953933072577, | |
| "grad_norm": 0.47306203842163086, | |
| "learning_rate": 1.161943916690883e-05, | |
| "loss": 3.0279, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 5.309430682312038, | |
| "grad_norm": 0.4727276861667633, | |
| "learning_rate": 1.1560835213661564e-05, | |
| "loss": 3.1249, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 5.3129074315514995, | |
| "grad_norm": 0.45371294021606445, | |
| "learning_rate": 1.1502360096938487e-05, | |
| "loss": 2.9934, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 5.31638418079096, | |
| "grad_norm": 0.48115792870521545, | |
| "learning_rate": 1.144401401273062e-05, | |
| "loss": 3.0486, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 5.319860930030422, | |
| "grad_norm": 0.4729406237602234, | |
| "learning_rate": 1.1385797156596506e-05, | |
| "loss": 3.0835, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 5.3233376792698825, | |
| "grad_norm": 0.46308913826942444, | |
| "learning_rate": 1.1327709723661517e-05, | |
| "loss": 3.0495, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 5.326814428509344, | |
| "grad_norm": 0.4657433032989502, | |
| "learning_rate": 1.1269751908617277e-05, | |
| "loss": 2.9911, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 5.330291177748805, | |
| "grad_norm": 0.45987650752067566, | |
| "learning_rate": 1.1211923905720956e-05, | |
| "loss": 3.0637, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 5.333767926988266, | |
| "grad_norm": 0.46726325154304504, | |
| "learning_rate": 1.1154225908794642e-05, | |
| "loss": 3.0673, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 5.337244676227727, | |
| "grad_norm": 0.4622207581996918, | |
| "learning_rate": 1.1096658111224684e-05, | |
| "loss": 3.0466, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 5.340721425467188, | |
| "grad_norm": 0.48004594445228577, | |
| "learning_rate": 1.1039220705961001e-05, | |
| "loss": 3.1712, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 5.34419817470665, | |
| "grad_norm": 0.4778420925140381, | |
| "learning_rate": 1.0981913885516527e-05, | |
| "loss": 3.1482, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 5.34767492394611, | |
| "grad_norm": 0.446816086769104, | |
| "learning_rate": 1.0924737841966498e-05, | |
| "loss": 3.0631, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 5.351151673185571, | |
| "grad_norm": 0.465069979429245, | |
| "learning_rate": 1.0867692766947812e-05, | |
| "loss": 3.0714, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 5.354628422425033, | |
| "grad_norm": 0.4674012064933777, | |
| "learning_rate": 1.0810778851658438e-05, | |
| "loss": 3.1051, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 5.358105171664493, | |
| "grad_norm": 0.46494433283805847, | |
| "learning_rate": 1.0753996286856688e-05, | |
| "loss": 3.0391, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 5.361581920903955, | |
| "grad_norm": 0.46729180216789246, | |
| "learning_rate": 1.0697345262860636e-05, | |
| "loss": 3.1156, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 5.365058670143416, | |
| "grad_norm": 0.4721660614013672, | |
| "learning_rate": 1.0640825969547496e-05, | |
| "loss": 3.1373, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 5.368535419382877, | |
| "grad_norm": 0.47204047441482544, | |
| "learning_rate": 1.0584438596352952e-05, | |
| "loss": 3.0871, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 5.372012168622338, | |
| "grad_norm": 0.47127583622932434, | |
| "learning_rate": 1.0528183332270519e-05, | |
| "loss": 3.1441, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 5.375488917861799, | |
| "grad_norm": 0.4934067130088806, | |
| "learning_rate": 1.047206036585095e-05, | |
| "loss": 3.0754, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 5.3789656671012605, | |
| "grad_norm": 0.45597216486930847, | |
| "learning_rate": 1.041606988520153e-05, | |
| "loss": 3.1062, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 5.382442416340721, | |
| "grad_norm": 0.46915626525878906, | |
| "learning_rate": 1.0360212077985521e-05, | |
| "loss": 3.0976, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 5.385919165580183, | |
| "grad_norm": 0.4795849919319153, | |
| "learning_rate": 1.030448713142151e-05, | |
| "loss": 3.0746, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 5.3893959148196435, | |
| "grad_norm": 0.4348246157169342, | |
| "learning_rate": 1.0248895232282762e-05, | |
| "loss": 3.0168, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 5.392872664059105, | |
| "grad_norm": 0.5004274845123291, | |
| "learning_rate": 1.0193436566896625e-05, | |
| "loss": 3.0659, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 5.396349413298566, | |
| "grad_norm": 0.4510630667209625, | |
| "learning_rate": 1.013811132114384e-05, | |
| "loss": 3.0664, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 5.399826162538027, | |
| "grad_norm": 0.47762078046798706, | |
| "learning_rate": 1.0082919680458014e-05, | |
| "loss": 3.1583, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 5.403302911777488, | |
| "grad_norm": 0.4656456410884857, | |
| "learning_rate": 1.0027861829824952e-05, | |
| "loss": 3.0065, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 5.406779661016949, | |
| "grad_norm": 0.45920252799987793, | |
| "learning_rate": 9.972937953781986e-06, | |
| "loss": 3.1666, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 5.410256410256411, | |
| "grad_norm": 0.4843366742134094, | |
| "learning_rate": 9.918148236417452e-06, | |
| "loss": 3.0652, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 5.413733159495871, | |
| "grad_norm": 0.46141600608825684, | |
| "learning_rate": 9.863492861370004e-06, | |
| "loss": 3.0977, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 5.417209908735332, | |
| "grad_norm": 0.45874103903770447, | |
| "learning_rate": 9.808972011828055e-06, | |
| "loss": 2.9799, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.420686657974794, | |
| "grad_norm": 0.46027660369873047, | |
| "learning_rate": 9.754585870529059e-06, | |
| "loss": 3.1338, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 5.424163407214254, | |
| "grad_norm": 0.4515167772769928, | |
| "learning_rate": 9.700334619759038e-06, | |
| "loss": 3.0767, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 5.427640156453716, | |
| "grad_norm": 0.4566735327243805, | |
| "learning_rate": 9.646218441351867e-06, | |
| "loss": 3.0172, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 5.431116905693177, | |
| "grad_norm": 0.45395126938819885, | |
| "learning_rate": 9.592237516688708e-06, | |
| "loss": 3.0072, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 5.434593654932638, | |
| "grad_norm": 0.4817012548446655, | |
| "learning_rate": 9.538392026697408e-06, | |
| "loss": 3.1232, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 5.438070404172099, | |
| "grad_norm": 0.4628289043903351, | |
| "learning_rate": 9.484682151851832e-06, | |
| "loss": 3.0401, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 5.44154715341156, | |
| "grad_norm": 0.4673340320587158, | |
| "learning_rate": 9.431108072171346e-06, | |
| "loss": 2.9883, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 5.445023902651021, | |
| "grad_norm": 0.45545145869255066, | |
| "learning_rate": 9.377669967220165e-06, | |
| "loss": 3.0543, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 5.448500651890482, | |
| "grad_norm": 0.45707741379737854, | |
| "learning_rate": 9.324368016106761e-06, | |
| "loss": 3.1049, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 5.451977401129944, | |
| "grad_norm": 0.4631718099117279, | |
| "learning_rate": 9.271202397483215e-06, | |
| "loss": 3.1157, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.4554541503694045, | |
| "grad_norm": 0.46336543560028076, | |
| "learning_rate": 9.218173289544735e-06, | |
| "loss": 2.9833, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 5.458930899608866, | |
| "grad_norm": 0.4526154398918152, | |
| "learning_rate": 9.16528087002892e-06, | |
| "loss": 3.0316, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 5.462407648848327, | |
| "grad_norm": 0.44709017872810364, | |
| "learning_rate": 9.11252531621527e-06, | |
| "loss": 3.0742, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 5.465884398087788, | |
| "grad_norm": 0.4863232374191284, | |
| "learning_rate": 9.05990680492454e-06, | |
| "loss": 3.1241, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 5.469361147327249, | |
| "grad_norm": 0.4572135806083679, | |
| "learning_rate": 9.007425512518164e-06, | |
| "loss": 3.0628, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 5.47283789656671, | |
| "grad_norm": 0.4494728147983551, | |
| "learning_rate": 8.955081614897664e-06, | |
| "loss": 3.0376, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 5.4763146458061716, | |
| "grad_norm": 0.4699227809906006, | |
| "learning_rate": 8.902875287504025e-06, | |
| "loss": 3.0442, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 5.479791395045632, | |
| "grad_norm": 0.4395771324634552, | |
| "learning_rate": 8.850806705317183e-06, | |
| "loss": 2.9835, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 5.483268144285093, | |
| "grad_norm": 0.4605022370815277, | |
| "learning_rate": 8.798876042855358e-06, | |
| "loss": 3.1216, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 5.486744893524555, | |
| "grad_norm": 0.47916972637176514, | |
| "learning_rate": 8.747083474174527e-06, | |
| "loss": 3.1716, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.490221642764015, | |
| "grad_norm": 0.4568222761154175, | |
| "learning_rate": 8.69542917286782e-06, | |
| "loss": 3.1073, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 5.493698392003477, | |
| "grad_norm": 0.4488259255886078, | |
| "learning_rate": 8.6439133120649e-06, | |
| "loss": 3.0121, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 5.497175141242938, | |
| "grad_norm": 0.4373837411403656, | |
| "learning_rate": 8.592536064431467e-06, | |
| "loss": 3.0243, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 5.500651890482399, | |
| "grad_norm": 0.46101701259613037, | |
| "learning_rate": 8.541297602168591e-06, | |
| "loss": 3.099, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 5.50412863972186, | |
| "grad_norm": 0.4696287214756012, | |
| "learning_rate": 8.490198097012203e-06, | |
| "loss": 3.1134, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 5.507605388961321, | |
| "grad_norm": 0.43790748715400696, | |
| "learning_rate": 8.439237720232484e-06, | |
| "loss": 3.0106, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 5.511082138200782, | |
| "grad_norm": 0.4761092960834503, | |
| "learning_rate": 8.3884166426333e-06, | |
| "loss": 3.0973, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 5.514558887440243, | |
| "grad_norm": 0.46510714292526245, | |
| "learning_rate": 8.337735034551613e-06, | |
| "loss": 3.1247, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 5.518035636679705, | |
| "grad_norm": 0.4549061059951782, | |
| "learning_rate": 8.287193065856935e-06, | |
| "loss": 3.0587, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 5.5215123859191655, | |
| "grad_norm": 0.46748998761177063, | |
| "learning_rate": 8.236790905950765e-06, | |
| "loss": 3.1052, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.524989135158627, | |
| "grad_norm": 0.4555809497833252, | |
| "learning_rate": 8.186528723765979e-06, | |
| "loss": 3.0366, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 5.528465884398088, | |
| "grad_norm": 0.45778852701187134, | |
| "learning_rate": 8.136406687766318e-06, | |
| "loss": 3.0882, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 5.531942633637549, | |
| "grad_norm": 0.4485376477241516, | |
| "learning_rate": 8.086424965945755e-06, | |
| "loss": 3.0317, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 5.53541938287701, | |
| "grad_norm": 0.44758340716362, | |
| "learning_rate": 8.036583725828001e-06, | |
| "loss": 3.0241, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 5.538896132116471, | |
| "grad_norm": 0.4677172303199768, | |
| "learning_rate": 7.986883134465916e-06, | |
| "loss": 3.1337, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 5.5423728813559325, | |
| "grad_norm": 0.4411722421646118, | |
| "learning_rate": 7.937323358440935e-06, | |
| "loss": 3.1009, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 5.545849630595393, | |
| "grad_norm": 0.45588281750679016, | |
| "learning_rate": 7.887904563862547e-06, | |
| "loss": 3.1043, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 5.549326379834854, | |
| "grad_norm": 0.4676235616207123, | |
| "learning_rate": 7.838626916367675e-06, | |
| "loss": 3.1354, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 5.552803129074316, | |
| "grad_norm": 0.4364144504070282, | |
| "learning_rate": 7.789490581120169e-06, | |
| "loss": 3.051, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 5.556279878313776, | |
| "grad_norm": 0.457533061504364, | |
| "learning_rate": 7.740495722810271e-06, | |
| "loss": 3.1305, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.559756627553238, | |
| "grad_norm": 0.4606762230396271, | |
| "learning_rate": 7.691642505654007e-06, | |
| "loss": 3.0427, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 5.563233376792699, | |
| "grad_norm": 0.4465121328830719, | |
| "learning_rate": 7.642931093392674e-06, | |
| "loss": 3.1152, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 5.56671012603216, | |
| "grad_norm": 0.4565368592739105, | |
| "learning_rate": 7.594361649292303e-06, | |
| "loss": 3.0521, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 5.570186875271621, | |
| "grad_norm": 0.44448092579841614, | |
| "learning_rate": 7.545934336143034e-06, | |
| "loss": 3.0368, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 5.573663624511082, | |
| "grad_norm": 0.44336286187171936, | |
| "learning_rate": 7.497649316258687e-06, | |
| "loss": 3.0708, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 5.577140373750543, | |
| "grad_norm": 0.4594724178314209, | |
| "learning_rate": 7.449506751476121e-06, | |
| "loss": 3.0141, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 5.580617122990004, | |
| "grad_norm": 0.45023006200790405, | |
| "learning_rate": 7.40150680315474e-06, | |
| "loss": 3.1725, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 5.584093872229466, | |
| "grad_norm": 0.46296626329421997, | |
| "learning_rate": 7.353649632175957e-06, | |
| "loss": 3.046, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 5.5875706214689265, | |
| "grad_norm": 0.4497106969356537, | |
| "learning_rate": 7.305935398942598e-06, | |
| "loss": 2.9753, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 5.591047370708388, | |
| "grad_norm": 0.46122488379478455, | |
| "learning_rate": 7.258364263378437e-06, | |
| "loss": 3.0618, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 5.594524119947849, | |
| "grad_norm": 0.45882707834243774, | |
| "learning_rate": 7.21093638492763e-06, | |
| "loss": 3.0114, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 5.5980008691873095, | |
| "grad_norm": 0.4593908488750458, | |
| "learning_rate": 7.163651922554149e-06, | |
| "loss": 3.192, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 5.601477618426771, | |
| "grad_norm": 0.4521577060222626, | |
| "learning_rate": 7.1165110347413025e-06, | |
| "loss": 3.0416, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 5.604954367666232, | |
| "grad_norm": 0.46317487955093384, | |
| "learning_rate": 7.06951387949118e-06, | |
| "loss": 3.1004, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 5.6084311169056935, | |
| "grad_norm": 0.4537200927734375, | |
| "learning_rate": 7.022660614324122e-06, | |
| "loss": 3.0639, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 5.611907866145154, | |
| "grad_norm": 0.4527956545352936, | |
| "learning_rate": 6.975951396278168e-06, | |
| "loss": 3.0711, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 5.615384615384615, | |
| "grad_norm": 0.46440109610557556, | |
| "learning_rate": 6.9293863819085865e-06, | |
| "loss": 3.095, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 5.618861364624077, | |
| "grad_norm": 0.4648788571357727, | |
| "learning_rate": 6.882965727287305e-06, | |
| "loss": 3.0182, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 5.622338113863537, | |
| "grad_norm": 0.4295171797275543, | |
| "learning_rate": 6.836689588002399e-06, | |
| "loss": 2.9744, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 5.625814863102999, | |
| "grad_norm": 0.464074045419693, | |
| "learning_rate": 6.790558119157597e-06, | |
| "loss": 3.0175, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 5.62929161234246, | |
| "grad_norm": 0.4391389787197113, | |
| "learning_rate": 6.74457147537168e-06, | |
| "loss": 3.0392, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 5.632768361581921, | |
| "grad_norm": 0.4579889476299286, | |
| "learning_rate": 6.698729810778065e-06, | |
| "loss": 3.0613, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 5.636245110821382, | |
| "grad_norm": 0.46115800738334656, | |
| "learning_rate": 6.65303327902424e-06, | |
| "loss": 3.044, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 5.639721860060843, | |
| "grad_norm": 0.4502374529838562, | |
| "learning_rate": 6.607482033271229e-06, | |
| "loss": 3.0842, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 5.643198609300304, | |
| "grad_norm": 0.44308748841285706, | |
| "learning_rate": 6.562076226193136e-06, | |
| "loss": 3.1463, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 5.646675358539765, | |
| "grad_norm": 0.44475382566452026, | |
| "learning_rate": 6.516816009976556e-06, | |
| "loss": 3.1117, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 5.650152107779227, | |
| "grad_norm": 0.4457295536994934, | |
| "learning_rate": 6.471701536320118e-06, | |
| "loss": 3.0655, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 5.6536288570186874, | |
| "grad_norm": 0.43922004103660583, | |
| "learning_rate": 6.4267329564339895e-06, | |
| "loss": 3.0753, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 5.657105606258149, | |
| "grad_norm": 0.4469055235385895, | |
| "learning_rate": 6.381910421039328e-06, | |
| "loss": 3.2001, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 5.66058235549761, | |
| "grad_norm": 0.4434399902820587, | |
| "learning_rate": 6.337234080367794e-06, | |
| "loss": 3.1005, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 5.6640591047370705, | |
| "grad_norm": 0.4269639253616333, | |
| "learning_rate": 6.292704084161067e-06, | |
| "loss": 3.0321, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 5.667535853976532, | |
| "grad_norm": 0.44678542017936707, | |
| "learning_rate": 6.248320581670281e-06, | |
| "loss": 3.0467, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 5.671012603215993, | |
| "grad_norm": 0.4449600577354431, | |
| "learning_rate": 6.204083721655607e-06, | |
| "loss": 3.0347, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 5.6744893524554545, | |
| "grad_norm": 0.43621253967285156, | |
| "learning_rate": 6.159993652385698e-06, | |
| "loss": 3.0156, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 5.677966101694915, | |
| "grad_norm": 0.4372401833534241, | |
| "learning_rate": 6.116050521637218e-06, | |
| "loss": 2.9871, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 5.681442850934376, | |
| "grad_norm": 0.449531614780426, | |
| "learning_rate": 6.072254476694328e-06, | |
| "loss": 3.1147, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 5.684919600173838, | |
| "grad_norm": 0.4365013539791107, | |
| "learning_rate": 6.028605664348191e-06, | |
| "loss": 3.0371, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 5.688396349413298, | |
| "grad_norm": 0.45457354187965393, | |
| "learning_rate": 5.985104230896516e-06, | |
| "loss": 3.0452, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 5.69187309865276, | |
| "grad_norm": 0.4303637444972992, | |
| "learning_rate": 5.941750322143036e-06, | |
| "loss": 3.0588, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 5.695349847892221, | |
| "grad_norm": 0.42853423953056335, | |
| "learning_rate": 5.898544083397e-06, | |
| "loss": 3.1129, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 5.698826597131681, | |
| "grad_norm": 0.4471043348312378, | |
| "learning_rate": 5.8554856594727425e-06, | |
| "loss": 3.0419, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 5.702303346371143, | |
| "grad_norm": 0.44851046800613403, | |
| "learning_rate": 5.812575194689163e-06, | |
| "loss": 3.0814, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 5.705780095610604, | |
| "grad_norm": 0.42725709080696106, | |
| "learning_rate": 5.76981283286922e-06, | |
| "loss": 3.0032, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 5.709256844850065, | |
| "grad_norm": 0.43283796310424805, | |
| "learning_rate": 5.727198717339511e-06, | |
| "loss": 3.05, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 5.712733594089526, | |
| "grad_norm": 0.44458362460136414, | |
| "learning_rate": 5.684732990929725e-06, | |
| "loss": 3.0892, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 5.716210343328988, | |
| "grad_norm": 0.4499422609806061, | |
| "learning_rate": 5.642415795972228e-06, | |
| "loss": 3.1489, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 5.719687092568448, | |
| "grad_norm": 0.4384379982948303, | |
| "learning_rate": 5.600247274301535e-06, | |
| "loss": 3.0991, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 5.72316384180791, | |
| "grad_norm": 0.4302300214767456, | |
| "learning_rate": 5.558227567253832e-06, | |
| "loss": 3.0915, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 5.726640591047371, | |
| "grad_norm": 0.4346764385700226, | |
| "learning_rate": 5.5163568156665565e-06, | |
| "loss": 3.0154, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 5.7301173402868315, | |
| "grad_norm": 0.429609477519989, | |
| "learning_rate": 5.474635159877883e-06, | |
| "loss": 2.9877, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.733594089526293, | |
| "grad_norm": 0.45559728145599365, | |
| "learning_rate": 5.433062739726258e-06, | |
| "loss": 3.1049, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 5.737070838765754, | |
| "grad_norm": 0.4608874022960663, | |
| "learning_rate": 5.391639694549943e-06, | |
| "loss": 3.1147, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 5.7405475880052155, | |
| "grad_norm": 0.4482730031013489, | |
| "learning_rate": 5.350366163186521e-06, | |
| "loss": 3.069, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 5.744024337244676, | |
| "grad_norm": 0.4392632246017456, | |
| "learning_rate": 5.30924228397246e-06, | |
| "loss": 3.1065, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 5.747501086484137, | |
| "grad_norm": 0.43618863821029663, | |
| "learning_rate": 5.2682681947426375e-06, | |
| "loss": 3.0498, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 5.7509778357235986, | |
| "grad_norm": 0.4522383213043213, | |
| "learning_rate": 5.227444032829887e-06, | |
| "loss": 3.1129, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 5.754454584963059, | |
| "grad_norm": 0.4380725920200348, | |
| "learning_rate": 5.186769935064534e-06, | |
| "loss": 3.0673, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 5.757931334202521, | |
| "grad_norm": 0.4552653729915619, | |
| "learning_rate": 5.146246037773922e-06, | |
| "loss": 3.121, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 5.761408083441982, | |
| "grad_norm": 0.44829854369163513, | |
| "learning_rate": 5.105872476781964e-06, | |
| "loss": 3.0677, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 5.764884832681442, | |
| "grad_norm": 0.4196740984916687, | |
| "learning_rate": 5.065649387408705e-06, | |
| "loss": 2.9564, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 5.768361581920904, | |
| "grad_norm": 0.44454118609428406, | |
| "learning_rate": 5.025576904469842e-06, | |
| "loss": 2.9863, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 5.771838331160365, | |
| "grad_norm": 0.4422823190689087, | |
| "learning_rate": 4.985655162276298e-06, | |
| "loss": 3.0987, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 5.775315080399826, | |
| "grad_norm": 0.44028976559638977, | |
| "learning_rate": 4.945884294633757e-06, | |
| "loss": 3.0641, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 5.778791829639287, | |
| "grad_norm": 0.43805432319641113, | |
| "learning_rate": 4.906264434842195e-06, | |
| "loss": 3.1735, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 5.782268578878749, | |
| "grad_norm": 0.4369741380214691, | |
| "learning_rate": 4.866795715695477e-06, | |
| "loss": 2.983, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 5.785745328118209, | |
| "grad_norm": 0.42888718843460083, | |
| "learning_rate": 4.827478269480895e-06, | |
| "loss": 2.9566, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 5.789222077357671, | |
| "grad_norm": 0.44136032462120056, | |
| "learning_rate": 4.788312227978686e-06, | |
| "loss": 3.143, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 5.792698826597132, | |
| "grad_norm": 0.42934727668762207, | |
| "learning_rate": 4.74929772246166e-06, | |
| "loss": 3.1335, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 5.7961755758365925, | |
| "grad_norm": 0.44165438413619995, | |
| "learning_rate": 4.710434883694715e-06, | |
| "loss": 3.0607, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 5.799652325076054, | |
| "grad_norm": 0.4275215268135071, | |
| "learning_rate": 4.6717238419344e-06, | |
| "loss": 3.052, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 5.803129074315515, | |
| "grad_norm": 0.4319266676902771, | |
| "learning_rate": 4.6331647269284795e-06, | |
| "loss": 3.0009, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 5.8066058235549765, | |
| "grad_norm": 0.4423021972179413, | |
| "learning_rate": 4.594757667915523e-06, | |
| "loss": 3.0179, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 5.810082572794437, | |
| "grad_norm": 0.4336705207824707, | |
| "learning_rate": 4.556502793624445e-06, | |
| "loss": 3.0279, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 5.813559322033898, | |
| "grad_norm": 0.41388699412345886, | |
| "learning_rate": 4.5184002322740785e-06, | |
| "loss": 3.015, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 5.8170360712733595, | |
| "grad_norm": 0.43204250931739807, | |
| "learning_rate": 4.48045011157277e-06, | |
| "loss": 3.0567, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 5.82051282051282, | |
| "grad_norm": 0.4401572048664093, | |
| "learning_rate": 4.442652558717897e-06, | |
| "loss": 3.0272, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 5.823989569752282, | |
| "grad_norm": 0.41776955127716064, | |
| "learning_rate": 4.405007700395497e-06, | |
| "loss": 2.9544, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 5.827466318991743, | |
| "grad_norm": 0.4356550872325897, | |
| "learning_rate": 4.3675156627798196e-06, | |
| "loss": 3.0096, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 5.830943068231203, | |
| "grad_norm": 0.43083855509757996, | |
| "learning_rate": 4.330176571532907e-06, | |
| "loss": 3.0231, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 5.834419817470665, | |
| "grad_norm": 0.43351972103118896, | |
| "learning_rate": 4.292990551804171e-06, | |
| "loss": 3.0642, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 5.837896566710126, | |
| "grad_norm": 0.4334842562675476, | |
| "learning_rate": 4.255957728229965e-06, | |
| "loss": 3.0491, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 5.841373315949587, | |
| "grad_norm": 0.4342375695705414, | |
| "learning_rate": 4.219078224933176e-06, | |
| "loss": 3.0382, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 5.844850065189048, | |
| "grad_norm": 0.42653897404670715, | |
| "learning_rate": 4.182352165522807e-06, | |
| "loss": 3.0741, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 5.84832681442851, | |
| "grad_norm": 0.43444690108299255, | |
| "learning_rate": 4.145779673093581e-06, | |
| "loss": 3.0743, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 5.85180356366797, | |
| "grad_norm": 0.42880359292030334, | |
| "learning_rate": 4.109360870225493e-06, | |
| "loss": 3.0114, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 5.855280312907432, | |
| "grad_norm": 0.4228828251361847, | |
| "learning_rate": 4.0730958789834295e-06, | |
| "loss": 3.103, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 5.858757062146893, | |
| "grad_norm": 0.4425991475582123, | |
| "learning_rate": 4.036984820916723e-06, | |
| "loss": 3.0108, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 5.8622338113863535, | |
| "grad_norm": 0.43377602100372314, | |
| "learning_rate": 4.001027817058789e-06, | |
| "loss": 3.1284, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 5.865710560625815, | |
| "grad_norm": 0.4408301115036011, | |
| "learning_rate": 3.965224987926702e-06, | |
| "loss": 3.0967, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 5.869187309865276, | |
| "grad_norm": 0.43531477451324463, | |
| "learning_rate": 3.9295764535207724e-06, | |
| "loss": 3.0079, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 5.872664059104737, | |
| "grad_norm": 0.43204811215400696, | |
| "learning_rate": 3.894082333324184e-06, | |
| "loss": 3.01, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 5.876140808344198, | |
| "grad_norm": 0.43953630328178406, | |
| "learning_rate": 3.858742746302535e-06, | |
| "loss": 3.1421, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 5.879617557583659, | |
| "grad_norm": 0.4389553964138031, | |
| "learning_rate": 3.823557810903517e-06, | |
| "loss": 3.1524, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 5.8830943068231205, | |
| "grad_norm": 0.43646112084388733, | |
| "learning_rate": 3.78852764505645e-06, | |
| "loss": 3.1744, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 5.886571056062581, | |
| "grad_norm": 0.4238009452819824, | |
| "learning_rate": 3.7536523661719112e-06, | |
| "loss": 3.0273, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 5.890047805302043, | |
| "grad_norm": 0.42250022292137146, | |
| "learning_rate": 3.7189320911413526e-06, | |
| "loss": 2.9794, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 5.893524554541504, | |
| "grad_norm": 0.41812658309936523, | |
| "learning_rate": 3.684366936336714e-06, | |
| "loss": 3.0897, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 5.897001303780964, | |
| "grad_norm": 0.4232540428638458, | |
| "learning_rate": 3.6499570176099785e-06, | |
| "loss": 3.0451, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 5.900478053020426, | |
| "grad_norm": 0.4142310917377472, | |
| "learning_rate": 3.615702450292857e-06, | |
| "loss": 2.9745, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 5.903954802259887, | |
| "grad_norm": 0.4126403331756592, | |
| "learning_rate": 3.581603349196372e-06, | |
| "loss": 2.9889, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.907431551499348, | |
| "grad_norm": 0.4263981282711029, | |
| "learning_rate": 3.5476598286104447e-06, | |
| "loss": 3.0619, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 5.910908300738809, | |
| "grad_norm": 0.41806501150131226, | |
| "learning_rate": 3.5138720023035696e-06, | |
| "loss": 3.0235, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 5.914385049978271, | |
| "grad_norm": 0.41938862204551697, | |
| "learning_rate": 3.4802399835223653e-06, | |
| "loss": 3.0578, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 5.917861799217731, | |
| "grad_norm": 0.4120302200317383, | |
| "learning_rate": 3.4467638849912497e-06, | |
| "loss": 2.9969, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 5.921338548457193, | |
| "grad_norm": 0.4265589714050293, | |
| "learning_rate": 3.413443818912049e-06, | |
| "loss": 3.0355, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 5.924815297696654, | |
| "grad_norm": 0.4226160943508148, | |
| "learning_rate": 3.3802798969636008e-06, | |
| "loss": 3.0972, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 5.9282920469361144, | |
| "grad_norm": 0.41874009370803833, | |
| "learning_rate": 3.3472722303014124e-06, | |
| "loss": 3.0843, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 5.931768796175576, | |
| "grad_norm": 0.42675143480300903, | |
| "learning_rate": 3.3144209295572494e-06, | |
| "loss": 3.0468, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 5.935245545415037, | |
| "grad_norm": 0.42002323269844055, | |
| "learning_rate": 3.2817261048387893e-06, | |
| "loss": 3.1044, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 5.938722294654498, | |
| "grad_norm": 0.41737180948257446, | |
| "learning_rate": 3.249187865729264e-06, | |
| "loss": 3.0161, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 5.942199043893959, | |
| "grad_norm": 0.42569711804389954, | |
| "learning_rate": 3.216806321287069e-06, | |
| "loss": 3.1494, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 5.94567579313342, | |
| "grad_norm": 0.4135091006755829, | |
| "learning_rate": 3.1845815800454036e-06, | |
| "loss": 3.0645, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 5.9491525423728815, | |
| "grad_norm": 0.4207760691642761, | |
| "learning_rate": 3.1525137500119207e-06, | |
| "loss": 3.0256, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 5.952629291612342, | |
| "grad_norm": 0.42668992280960083, | |
| "learning_rate": 3.1206029386683324e-06, | |
| "loss": 3.0805, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 5.956106040851804, | |
| "grad_norm": 0.42762625217437744, | |
| "learning_rate": 3.0888492529700885e-06, | |
| "loss": 3.0698, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 5.959582790091265, | |
| "grad_norm": 0.4181903600692749, | |
| "learning_rate": 3.0572527993460053e-06, | |
| "loss": 3.0316, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 5.963059539330725, | |
| "grad_norm": 0.4258002042770386, | |
| "learning_rate": 3.0258136836978866e-06, | |
| "loss": 3.0404, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 5.966536288570187, | |
| "grad_norm": 0.41894468665122986, | |
| "learning_rate": 2.994532011400214e-06, | |
| "loss": 3.0116, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 5.970013037809648, | |
| "grad_norm": 0.41693276166915894, | |
| "learning_rate": 2.963407887299724e-06, | |
| "loss": 3.0115, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 5.973489787049109, | |
| "grad_norm": 0.4395310878753662, | |
| "learning_rate": 2.932441415715137e-06, | |
| "loss": 3.1532, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 5.97696653628857, | |
| "grad_norm": 0.4162778854370117, | |
| "learning_rate": 2.901632700436757e-06, | |
| "loss": 3.0968, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 5.980443285528032, | |
| "grad_norm": 0.41690173745155334, | |
| "learning_rate": 2.8709818447261337e-06, | |
| "loss": 3.034, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 5.983920034767492, | |
| "grad_norm": 0.42720499634742737, | |
| "learning_rate": 2.8404889513157176e-06, | |
| "loss": 3.0948, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 5.987396784006954, | |
| "grad_norm": 0.4119645655155182, | |
| "learning_rate": 2.81015412240852e-06, | |
| "loss": 2.9471, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 5.990873533246415, | |
| "grad_norm": 0.42118823528289795, | |
| "learning_rate": 2.7799774596777794e-06, | |
| "loss": 3.0288, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 5.994350282485875, | |
| "grad_norm": 0.40349310636520386, | |
| "learning_rate": 2.7499590642665774e-06, | |
| "loss": 2.9575, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 5.997827031725337, | |
| "grad_norm": 0.4119188189506531, | |
| "learning_rate": 2.7200990367875656e-06, | |
| "loss": 2.9967, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.5107985138893127, | |
| "learning_rate": 2.6903974773225702e-06, | |
| "loss": 2.9496, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 6.003476749239461, | |
| "grad_norm": 0.42430585622787476, | |
| "learning_rate": 2.660854485422298e-06, | |
| "loss": 3.0021, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 6.006953498478922, | |
| "grad_norm": 0.42097654938697815, | |
| "learning_rate": 2.6314701601059756e-06, | |
| "loss": 3.0434, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 6.010430247718383, | |
| "grad_norm": 0.43055519461631775, | |
| "learning_rate": 2.60224459986102e-06, | |
| "loss": 3.1388, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 6.013906996957845, | |
| "grad_norm": 0.42370346188545227, | |
| "learning_rate": 2.573177902642726e-06, | |
| "loss": 3.0623, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 6.017383746197305, | |
| "grad_norm": 0.41282978653907776, | |
| "learning_rate": 2.5442701658739186e-06, | |
| "loss": 3.0301, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 6.020860495436767, | |
| "grad_norm": 0.42440617084503174, | |
| "learning_rate": 2.515521486444655e-06, | |
| "loss": 3.0945, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 6.024337244676228, | |
| "grad_norm": 0.42208147048950195, | |
| "learning_rate": 2.4869319607118636e-06, | |
| "loss": 3.138, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 6.0278139939156885, | |
| "grad_norm": 0.40505877137184143, | |
| "learning_rate": 2.4585016844990316e-06, | |
| "loss": 2.9389, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 6.03129074315515, | |
| "grad_norm": 0.4153197109699249, | |
| "learning_rate": 2.430230753095891e-06, | |
| "loss": 3.027, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 6.034767492394611, | |
| "grad_norm": 0.41399866342544556, | |
| "learning_rate": 2.4021192612581178e-06, | |
| "loss": 2.9704, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 6.0382442416340725, | |
| "grad_norm": 0.42374345660209656, | |
| "learning_rate": 2.3741673032069756e-06, | |
| "loss": 3.0041, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 6.041720990873533, | |
| "grad_norm": 0.41399243474006653, | |
| "learning_rate": 2.3463749726290286e-06, | |
| "loss": 2.9925, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 6.045197740112994, | |
| "grad_norm": 0.4206804633140564, | |
| "learning_rate": 2.318742362675813e-06, | |
| "loss": 3.001, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 6.0486744893524556, | |
| "grad_norm": 0.4350409209728241, | |
| "learning_rate": 2.291269565963522e-06, | |
| "loss": 3.1501, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 6.052151238591916, | |
| "grad_norm": 0.4238588809967041, | |
| "learning_rate": 2.2639566745727205e-06, | |
| "loss": 3.108, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 6.055627987831378, | |
| "grad_norm": 0.42708900570869446, | |
| "learning_rate": 2.2368037800480023e-06, | |
| "loss": 3.071, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 6.059104737070839, | |
| "grad_norm": 0.43548697233200073, | |
| "learning_rate": 2.2098109733977136e-06, | |
| "loss": 3.0807, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 6.0625814863103, | |
| "grad_norm": 0.4319799542427063, | |
| "learning_rate": 2.1829783450936283e-06, | |
| "loss": 3.1164, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 6.066058235549761, | |
| "grad_norm": 0.41852834820747375, | |
| "learning_rate": 2.1563059850706337e-06, | |
| "loss": 3.0957, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 6.069534984789222, | |
| "grad_norm": 0.4240981340408325, | |
| "learning_rate": 2.12979398272648e-06, | |
| "loss": 2.9745, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 6.073011734028683, | |
| "grad_norm": 0.41493934392929077, | |
| "learning_rate": 2.1034424269214257e-06, | |
| "loss": 3.0467, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 6.076488483268144, | |
| "grad_norm": 0.411040335893631, | |
| "learning_rate": 2.0772514059779535e-06, | |
| "loss": 2.9896, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 6.079965232507606, | |
| "grad_norm": 0.41265007853507996, | |
| "learning_rate": 2.0512210076804982e-06, | |
| "loss": 2.9951, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 6.083441981747066, | |
| "grad_norm": 0.4220527410507202, | |
| "learning_rate": 2.0253513192751373e-06, | |
| "loss": 3.0334, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 6.086918730986528, | |
| "grad_norm": 0.4182776212692261, | |
| "learning_rate": 1.9996424274692725e-06, | |
| "loss": 3.0428, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 6.090395480225989, | |
| "grad_norm": 0.4076248109340668, | |
| "learning_rate": 1.974094418431388e-06, | |
| "loss": 2.9442, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 6.0938722294654495, | |
| "grad_norm": 0.40764060616493225, | |
| "learning_rate": 1.9487073777907316e-06, | |
| "loss": 2.9983, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 6.097348978704911, | |
| "grad_norm": 0.41706383228302, | |
| "learning_rate": 1.9234813906370276e-06, | |
| "loss": 3.0464, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 6.100825727944372, | |
| "grad_norm": 0.41518938541412354, | |
| "learning_rate": 1.8984165415202094e-06, | |
| "loss": 3.0598, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 6.1043024771838335, | |
| "grad_norm": 0.41128143668174744, | |
| "learning_rate": 1.873512914450104e-06, | |
| "loss": 2.9316, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 6.107779226423294, | |
| "grad_norm": 0.4145541191101074, | |
| "learning_rate": 1.8487705928961874e-06, | |
| "loss": 3.1041, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 6.111255975662755, | |
| "grad_norm": 0.40822362899780273, | |
| "learning_rate": 1.8241896597872842e-06, | |
| "loss": 3.0146, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 6.1147327249022165, | |
| "grad_norm": 0.4129459857940674, | |
| "learning_rate": 1.7997701975112912e-06, | |
| "loss": 3.0685, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 6.118209474141677, | |
| "grad_norm": 0.4087814390659332, | |
| "learning_rate": 1.77551228791491e-06, | |
| "loss": 2.9484, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 6.121686223381139, | |
| "grad_norm": 0.41240808367729187, | |
| "learning_rate": 1.7514160123033584e-06, | |
| "loss": 3.0681, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 6.1251629726206, | |
| "grad_norm": 0.4155349135398865, | |
| "learning_rate": 1.7274814514400994e-06, | |
| "loss": 3.1166, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 6.128639721860061, | |
| "grad_norm": 0.41594305634498596, | |
| "learning_rate": 1.70370868554659e-06, | |
| "loss": 3.0405, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 6.132116471099522, | |
| "grad_norm": 0.4166364371776581, | |
| "learning_rate": 1.6800977943019936e-06, | |
| "loss": 3.0783, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 6.135593220338983, | |
| "grad_norm": 0.42120352387428284, | |
| "learning_rate": 1.6566488568429294e-06, | |
| "loss": 3.0457, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 6.139069969578444, | |
| "grad_norm": 0.40205660462379456, | |
| "learning_rate": 1.6333619517631793e-06, | |
| "loss": 3.0141, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 6.142546718817905, | |
| "grad_norm": 0.40919235348701477, | |
| "learning_rate": 1.6102371571134423e-06, | |
| "loss": 3.0343, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 6.146023468057367, | |
| "grad_norm": 0.4099332392215729, | |
| "learning_rate": 1.5872745504010799e-06, | |
| "loss": 2.9358, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 6.149500217296827, | |
| "grad_norm": 0.4156320095062256, | |
| "learning_rate": 1.5644742085898556e-06, | |
| "loss": 3.0225, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 6.152976966536288, | |
| "grad_norm": 0.41079193353652954, | |
| "learning_rate": 1.5418362080996507e-06, | |
| "loss": 3.0517, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 6.15645371577575, | |
| "grad_norm": 0.4225104749202728, | |
| "learning_rate": 1.5193606248062486e-06, | |
| "loss": 3.0728, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 6.1599304650152105, | |
| "grad_norm": 0.41226664185523987, | |
| "learning_rate": 1.497047534041035e-06, | |
| "loss": 3.0909, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 6.163407214254672, | |
| "grad_norm": 0.4080653488636017, | |
| "learning_rate": 1.4748970105907866e-06, | |
| "loss": 3.0225, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 6.166883963494133, | |
| "grad_norm": 0.42255541682243347, | |
| "learning_rate": 1.4529091286973995e-06, | |
| "loss": 3.0809, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 6.170360712733594, | |
| "grad_norm": 0.4306301474571228, | |
| "learning_rate": 1.4310839620576444e-06, | |
| "loss": 3.0776, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 6.173837461973055, | |
| "grad_norm": 0.4123653769493103, | |
| "learning_rate": 1.4094215838229176e-06, | |
| "loss": 2.9917, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 6.177314211212516, | |
| "grad_norm": 0.40510258078575134, | |
| "learning_rate": 1.3879220665989955e-06, | |
| "loss": 2.9276, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 6.1807909604519775, | |
| "grad_norm": 0.41338902711868286, | |
| "learning_rate": 1.3665854824458036e-06, | |
| "loss": 2.9885, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 6.184267709691438, | |
| "grad_norm": 0.40919315814971924, | |
| "learning_rate": 1.3454119028771528e-06, | |
| "loss": 2.9884, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 6.1877444589309, | |
| "grad_norm": 0.40917351841926575, | |
| "learning_rate": 1.3244013988605086e-06, | |
| "loss": 3.0144, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 6.191221208170361, | |
| "grad_norm": 0.4239337146282196, | |
| "learning_rate": 1.303554040816779e-06, | |
| "loss": 3.168, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 6.194697957409822, | |
| "grad_norm": 0.42139217257499695, | |
| "learning_rate": 1.282869898620026e-06, | |
| "loss": 3.0753, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 6.198174706649283, | |
| "grad_norm": 0.42025497555732727, | |
| "learning_rate": 1.2623490415972938e-06, | |
| "loss": 3.0688, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 6.201651455888744, | |
| "grad_norm": 0.4139435589313507, | |
| "learning_rate": 1.2419915385283088e-06, | |
| "loss": 3.0563, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 6.205128205128205, | |
| "grad_norm": 0.3968459367752075, | |
| "learning_rate": 1.2217974576453073e-06, | |
| "loss": 2.9678, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 6.208604954367666, | |
| "grad_norm": 0.4100431203842163, | |
| "learning_rate": 1.2017668666327753e-06, | |
| "loss": 3.0309, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 6.212081703607128, | |
| "grad_norm": 0.4222622811794281, | |
| "learning_rate": 1.1818998326272369e-06, | |
| "loss": 3.1016, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 6.215558452846588, | |
| "grad_norm": 0.42679232358932495, | |
| "learning_rate": 1.1621964222170213e-06, | |
| "loss": 3.0438, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 6.219035202086049, | |
| "grad_norm": 0.4114995002746582, | |
| "learning_rate": 1.1426567014420297e-06, | |
| "loss": 3.0503, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 6.222511951325511, | |
| "grad_norm": 0.41597941517829895, | |
| "learning_rate": 1.1232807357935248e-06, | |
| "loss": 2.9836, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 6.2259887005649714, | |
| "grad_norm": 0.4132709205150604, | |
| "learning_rate": 1.1040685902139304e-06, | |
| "loss": 3.0925, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 6.229465449804433, | |
| "grad_norm": 0.4016041159629822, | |
| "learning_rate": 1.08502032909657e-06, | |
| "loss": 3.0071, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 6.232942199043894, | |
| "grad_norm": 0.4292777478694916, | |
| "learning_rate": 1.0661360162855016e-06, | |
| "loss": 3.1368, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 6.236418948283355, | |
| "grad_norm": 0.4259623885154724, | |
| "learning_rate": 1.0474157150752672e-06, | |
| "loss": 3.0294, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 6.239895697522816, | |
| "grad_norm": 0.4248141646385193, | |
| "learning_rate": 1.0288594882106707e-06, | |
| "loss": 3.0409, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 6.243372446762277, | |
| "grad_norm": 0.41500625014305115, | |
| "learning_rate": 1.0104673978866164e-06, | |
| "loss": 3.069, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 6.2468491960017385, | |
| "grad_norm": 0.40690165758132935, | |
| "learning_rate": 9.922395057478607e-07, | |
| "loss": 3.0169, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 6.250325945241199, | |
| "grad_norm": 0.40856775641441345, | |
| "learning_rate": 9.741758728888218e-07, | |
| "loss": 3.0363, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 6.253802694480661, | |
| "grad_norm": 0.4182389974594116, | |
| "learning_rate": 9.562765598533641e-07, | |
| "loss": 3.0165, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 6.257279443720122, | |
| "grad_norm": 0.42129838466644287, | |
| "learning_rate": 9.385416266345982e-07, | |
| "loss": 3.0921, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 6.260756192959583, | |
| "grad_norm": 0.4121788740158081, | |
| "learning_rate": 9.209711326746918e-07, | |
| "loss": 3.0918, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 6.264232942199044, | |
| "grad_norm": 0.40901103615760803, | |
| "learning_rate": 9.035651368646648e-07, | |
| "loss": 3.1085, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 6.267709691438505, | |
| "grad_norm": 0.3984892666339874, | |
| "learning_rate": 8.86323697544178e-07, | |
| "loss": 3.0205, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 6.271186440677966, | |
| "grad_norm": 0.41698625683784485, | |
| "learning_rate": 8.692468725013448e-07, | |
| "loss": 3.0734, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 6.274663189917427, | |
| "grad_norm": 0.4178268611431122, | |
| "learning_rate": 8.523347189725639e-07, | |
| "loss": 3.0949, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 6.278139939156889, | |
| "grad_norm": 0.41233837604522705, | |
| "learning_rate": 8.355872936422759e-07, | |
| "loss": 2.9744, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 6.281616688396349, | |
| "grad_norm": 0.4059881567955017, | |
| "learning_rate": 8.190046526428242e-07, | |
| "loss": 2.9941, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 6.28509343763581, | |
| "grad_norm": 0.4065244495868683, | |
| "learning_rate": 8.025868515542268e-07, | |
| "loss": 3.0267, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 6.288570186875272, | |
| "grad_norm": 0.41115114092826843, | |
| "learning_rate": 7.863339454040275e-07, | |
| "loss": 3.05, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 6.292046936114732, | |
| "grad_norm": 0.4080888628959656, | |
| "learning_rate": 7.702459886670788e-07, | |
| "loss": 3.0628, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 6.295523685354194, | |
| "grad_norm": 0.41215333342552185, | |
| "learning_rate": 7.543230352653751e-07, | |
| "loss": 3.079, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 6.299000434593655, | |
| "grad_norm": 0.4129772484302521, | |
| "learning_rate": 7.385651385678649e-07, | |
| "loss": 3.0567, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 6.302477183833116, | |
| "grad_norm": 0.40636974573135376, | |
| "learning_rate": 7.229723513902831e-07, | |
| "loss": 3.0737, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 6.305953933072577, | |
| "grad_norm": 0.4125308692455292, | |
| "learning_rate": 7.07544725994963e-07, | |
| "loss": 2.9995, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 6.309430682312038, | |
| "grad_norm": 0.4017834961414337, | |
| "learning_rate": 6.922823140906753e-07, | |
| "loss": 3.0389, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 6.3129074315514995, | |
| "grad_norm": 0.39485737681388855, | |
| "learning_rate": 6.771851668324225e-07, | |
| "loss": 2.9166, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 6.31638418079096, | |
| "grad_norm": 0.41091981530189514, | |
| "learning_rate": 6.622533348213167e-07, | |
| "loss": 3.1168, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 6.319860930030422, | |
| "grad_norm": 0.40876245498657227, | |
| "learning_rate": 6.474868681043578e-07, | |
| "loss": 3.0703, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 6.3233376792698825, | |
| "grad_norm": 0.41209086775779724, | |
| "learning_rate": 6.328858161743112e-07, | |
| "loss": 3.079, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 6.326814428509344, | |
| "grad_norm": 0.4071592390537262, | |
| "learning_rate": 6.184502279695137e-07, | |
| "loss": 3.1334, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 6.330291177748805, | |
| "grad_norm": 0.3960627317428589, | |
| "learning_rate": 6.041801518737122e-07, | |
| "loss": 2.9825, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 6.333767926988266, | |
| "grad_norm": 0.4103507995605469, | |
| "learning_rate": 5.900756357159143e-07, | |
| "loss": 3.0592, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 6.337244676227727, | |
| "grad_norm": 0.4149017930030823, | |
| "learning_rate": 5.761367267702155e-07, | |
| "loss": 3.1168, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 6.340721425467188, | |
| "grad_norm": 0.4141378104686737, | |
| "learning_rate": 5.623634717556503e-07, | |
| "loss": 3.099, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 6.34419817470665, | |
| "grad_norm": 0.40302687883377075, | |
| "learning_rate": 5.487559168360301e-07, | |
| "loss": 3.0205, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 6.34767492394611, | |
| "grad_norm": 0.4126074016094208, | |
| "learning_rate": 5.353141076197887e-07, | |
| "loss": 3.0381, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 6.351151673185571, | |
| "grad_norm": 0.41772815585136414, | |
| "learning_rate": 5.220380891598265e-07, | |
| "loss": 3.0821, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 6.354628422425033, | |
| "grad_norm": 0.408920019865036, | |
| "learning_rate": 5.089279059533658e-07, | |
| "loss": 3.0344, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 6.358105171664493, | |
| "grad_norm": 0.39784538745880127, | |
| "learning_rate": 4.959836019417963e-07, | |
| "loss": 3.0118, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 6.361581920903955, | |
| "grad_norm": 0.41000446677207947, | |
| "learning_rate": 4.832052205105464e-07, | |
| "loss": 3.0489, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 6.365058670143416, | |
| "grad_norm": 0.40835651755332947, | |
| "learning_rate": 4.705928044888952e-07, | |
| "loss": 3.0752, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 6.368535419382877, | |
| "grad_norm": 0.40791556239128113, | |
| "learning_rate": 4.581463961498722e-07, | |
| "loss": 3.0884, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 6.372012168622338, | |
| "grad_norm": 0.408925861120224, | |
| "learning_rate": 4.45866037210102e-07, | |
| "loss": 3.0482, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 6.375488917861799, | |
| "grad_norm": 0.4026236832141876, | |
| "learning_rate": 4.337517688296544e-07, | |
| "loss": 2.9968, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 6.3789656671012605, | |
| "grad_norm": 0.4146400988101959, | |
| "learning_rate": 4.218036316119112e-07, | |
| "loss": 3.1706, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 6.382442416340721, | |
| "grad_norm": 0.4027198553085327, | |
| "learning_rate": 4.100216656034328e-07, | |
| "loss": 3.0222, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 6.385919165580183, | |
| "grad_norm": 0.40519019961357117, | |
| "learning_rate": 3.98405910293842e-07, | |
| "loss": 3.0531, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 6.3893959148196435, | |
| "grad_norm": 0.40851891040802, | |
| "learning_rate": 3.86956404615646e-07, | |
| "loss": 3.0943, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 6.392872664059105, | |
| "grad_norm": 0.40011292695999146, | |
| "learning_rate": 3.7567318694414765e-07, | |
| "loss": 3.013, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 6.396349413298566, | |
| "grad_norm": 0.40723660588264465, | |
| "learning_rate": 3.6455629509730136e-07, | |
| "loss": 3.091, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 6.399826162538027, | |
| "grad_norm": 0.41157960891723633, | |
| "learning_rate": 3.536057663355852e-07, | |
| "loss": 3.1858, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 6.403302911777488, | |
| "grad_norm": 0.4096364974975586, | |
| "learning_rate": 3.4282163736188424e-07, | |
| "loss": 3.0255, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 6.406779661016949, | |
| "grad_norm": 0.40242981910705566, | |
| "learning_rate": 3.3220394432135205e-07, | |
| "loss": 3.0467, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 6.410256410256411, | |
| "grad_norm": 0.4040038585662842, | |
| "learning_rate": 3.2175272280131064e-07, | |
| "loss": 3.0921, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 6.413733159495871, | |
| "grad_norm": 0.3992011547088623, | |
| "learning_rate": 3.114680078311005e-07, | |
| "loss": 3.0465, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 6.417209908735332, | |
| "grad_norm": 0.4034344255924225, | |
| "learning_rate": 3.013498338820031e-07, | |
| "loss": 3.02, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 6.420686657974794, | |
| "grad_norm": 0.4033249020576477, | |
| "learning_rate": 2.913982348670907e-07, | |
| "loss": 3.0312, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 6.424163407214254, | |
| "grad_norm": 0.4111652076244354, | |
| "learning_rate": 2.816132441411379e-07, | |
| "loss": 3.1184, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 6.427640156453716, | |
| "grad_norm": 0.41480228304862976, | |
| "learning_rate": 2.71994894500488e-07, | |
| "loss": 3.0247, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 6.431116905693177, | |
| "grad_norm": 0.40189048647880554, | |
| "learning_rate": 2.6254321818295345e-07, | |
| "loss": 3.129, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 6.434593654932638, | |
| "grad_norm": 0.40309739112854004, | |
| "learning_rate": 2.532582468677214e-07, | |
| "loss": 3.0843, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 6.438070404172099, | |
| "grad_norm": 0.4054506719112396, | |
| "learning_rate": 2.441400116752146e-07, | |
| "loss": 2.9962, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 6.44154715341156, | |
| "grad_norm": 0.40184342861175537, | |
| "learning_rate": 2.3518854316701977e-07, | |
| "loss": 3.0369, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 6.445023902651021, | |
| "grad_norm": 0.41236573457717896, | |
| "learning_rate": 2.2640387134577058e-07, | |
| "loss": 3.121, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 6.448500651890482, | |
| "grad_norm": 0.4023594856262207, | |
| "learning_rate": 2.1778602565504237e-07, | |
| "loss": 3.07, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 6.451977401129944, | |
| "grad_norm": 0.4037077724933624, | |
| "learning_rate": 2.0933503497926888e-07, | |
| "loss": 3.0607, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 6.4554541503694045, | |
| "grad_norm": 0.4005909264087677, | |
| "learning_rate": 2.010509276436201e-07, | |
| "loss": 3.0266, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 6.458930899608866, | |
| "grad_norm": 0.40143847465515137, | |
| "learning_rate": 1.9293373141394122e-07, | |
| "loss": 3.0043, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 6.462407648848327, | |
| "grad_norm": 0.38799387216567993, | |
| "learning_rate": 1.8498347349663602e-07, | |
| "loss": 2.8786, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 6.465884398087788, | |
| "grad_norm": 0.40057605504989624, | |
| "learning_rate": 1.7720018053857257e-07, | |
| "loss": 3.0905, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 6.469361147327249, | |
| "grad_norm": 0.406303733587265, | |
| "learning_rate": 1.6958387862701098e-07, | |
| "loss": 3.1033, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 6.47283789656671, | |
| "grad_norm": 0.40010327100753784, | |
| "learning_rate": 1.6213459328950352e-07, | |
| "loss": 3.0289, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 6.4763146458061716, | |
| "grad_norm": 0.3997277021408081, | |
| "learning_rate": 1.54852349493817e-07, | |
| "loss": 3.0767, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 6.479791395045632, | |
| "grad_norm": 0.40357694029808044, | |
| "learning_rate": 1.4773717164784373e-07, | |
| "loss": 3.0858, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 6.483268144285093, | |
| "grad_norm": 0.4033971130847931, | |
| "learning_rate": 1.4078908359952403e-07, | |
| "loss": 3.0353, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 6.486744893524555, | |
| "grad_norm": 0.40543147921562195, | |
| "learning_rate": 1.3400810863675174e-07, | |
| "loss": 3.0127, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 6.490221642764015, | |
| "grad_norm": 0.40934666991233826, | |
| "learning_rate": 1.2739426948732424e-07, | |
| "loss": 3.132, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 6.493698392003477, | |
| "grad_norm": 0.4018721878528595, | |
| "learning_rate": 1.2094758831883712e-07, | |
| "loss": 3.0282, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 6.497175141242938, | |
| "grad_norm": 0.40862470865249634, | |
| "learning_rate": 1.146680867386285e-07, | |
| "loss": 3.1336, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 6.500651890482399, | |
| "grad_norm": 0.4113106429576874, | |
| "learning_rate": 1.0855578579370695e-07, | |
| "loss": 3.0698, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 6.50412863972186, | |
| "grad_norm": 0.39900723099708557, | |
| "learning_rate": 1.0261070597065713e-07, | |
| "loss": 3.025, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 6.507605388961321, | |
| "grad_norm": 0.39849308133125305, | |
| "learning_rate": 9.683286719560647e-08, | |
| "loss": 3.042, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 6.511082138200782, | |
| "grad_norm": 0.39917635917663574, | |
| "learning_rate": 9.12222888341252e-08, | |
| "loss": 3.0235, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 6.514558887440243, | |
| "grad_norm": 0.40099823474884033, | |
| "learning_rate": 8.577898969119869e-08, | |
| "loss": 3.0162, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 6.518035636679705, | |
| "grad_norm": 0.40142250061035156, | |
| "learning_rate": 8.050298801111633e-08, | |
| "loss": 2.98, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 6.5215123859191655, | |
| "grad_norm": 0.40689295530319214, | |
| "learning_rate": 7.539430147745496e-08, | |
| "loss": 3.1645, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 6.524989135158627, | |
| "grad_norm": 0.39953750371932983, | |
| "learning_rate": 7.045294721299556e-08, | |
| "loss": 3.0468, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 6.528465884398088, | |
| "grad_norm": 0.4039493501186371, | |
| "learning_rate": 6.567894177967326e-08, | |
| "loss": 3.109, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 6.531942633637549, | |
| "grad_norm": 0.4100818336009979, | |
| "learning_rate": 6.107230117851636e-08, | |
| "loss": 3.0905, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 6.53541938287701, | |
| "grad_norm": 0.4003710448741913, | |
| "learning_rate": 5.663304084960186e-08, | |
| "loss": 3.0059, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 6.538896132116471, | |
| "grad_norm": 0.4005975127220154, | |
| "learning_rate": 5.236117567199439e-08, | |
| "loss": 2.9985, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 6.5423728813559325, | |
| "grad_norm": 0.39996832609176636, | |
| "learning_rate": 4.825671996370185e-08, | |
| "loss": 2.993, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 6.545849630595393, | |
| "grad_norm": 0.4038967490196228, | |
| "learning_rate": 4.431968748162541e-08, | |
| "loss": 3.0491, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 6.549326379834854, | |
| "grad_norm": 0.39866870641708374, | |
| "learning_rate": 4.055009142152067e-08, | |
| "loss": 2.9085, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 6.552803129074316, | |
| "grad_norm": 0.40169060230255127, | |
| "learning_rate": 3.6947944417925483e-08, | |
| "loss": 2.9973, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 6.556279878313776, | |
| "grad_norm": 0.41364067792892456, | |
| "learning_rate": 3.351325854417109e-08, | |
| "loss": 3.095, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 6.559756627553238, | |
| "grad_norm": 0.40573641657829285, | |
| "learning_rate": 3.0246045312282144e-08, | |
| "loss": 3.0339, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 6.563233376792699, | |
| "grad_norm": 0.39951014518737793, | |
| "learning_rate": 2.7146315672971212e-08, | |
| "loss": 3.0338, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 6.56671012603216, | |
| "grad_norm": 0.4012530744075775, | |
| "learning_rate": 2.4214080015610986e-08, | |
| "loss": 3.0469, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 6.570186875271621, | |
| "grad_norm": 0.4066547453403473, | |
| "learning_rate": 2.1449348168167682e-08, | |
| "loss": 3.0924, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 6.573663624511082, | |
| "grad_norm": 0.4027867913246155, | |
| "learning_rate": 1.8852129397189942e-08, | |
| "loss": 3.008, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 6.577140373750543, | |
| "grad_norm": 0.4034714102745056, | |
| "learning_rate": 1.6422432407781075e-08, | |
| "loss": 3.1072, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 6.580617122990004, | |
| "grad_norm": 0.3969672620296478, | |
| "learning_rate": 1.4160265343549083e-08, | |
| "loss": 3.0111, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 6.584093872229466, | |
| "grad_norm": 0.4007243514060974, | |
| "learning_rate": 1.2065635786595586e-08, | |
| "loss": 3.0336, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 6.5875706214689265, | |
| "grad_norm": 0.40860840678215027, | |
| "learning_rate": 1.0138550757493592e-08, | |
| "loss": 3.1238, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 6.591047370708388, | |
| "grad_norm": 0.40205299854278564, | |
| "learning_rate": 8.379016715254207e-09, | |
| "loss": 3.0705, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 6.594524119947849, | |
| "grad_norm": 0.39915701746940613, | |
| "learning_rate": 6.78703955730442e-09, | |
| "loss": 2.989, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 6.5980008691873095, | |
| "grad_norm": 0.3982495367527008, | |
| "learning_rate": 5.362624619470458e-09, | |
| "loss": 3.0144, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.601477618426771, | |
| "grad_norm": 0.40340107679367065, | |
| "learning_rate": 4.105776675966677e-09, | |
| "loss": 3.0345, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 6.604954367666232, | |
| "grad_norm": 0.3958084285259247, | |
| "learning_rate": 3.0164999393678117e-09, | |
| "loss": 3.0071, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 6.6084311169056935, | |
| "grad_norm": 0.3997930586338043, | |
| "learning_rate": 2.0947980606034203e-09, | |
| "loss": 3.0614, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 6.611907866145154, | |
| "grad_norm": 0.4008050560951233, | |
| "learning_rate": 1.3406741289412329e-09, | |
| "loss": 3.0648, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 6.615384615384615, | |
| "grad_norm": 0.3971216082572937, | |
| "learning_rate": 7.541306719704988e-10, | |
| "loss": 3.0075, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 6.618861364624077, | |
| "grad_norm": 0.39623579382896423, | |
| "learning_rate": 3.3516965562419013e-10, | |
| "loss": 2.9764, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 6.622338113863537, | |
| "grad_norm": 0.39841023087501526, | |
| "learning_rate": 8.379248411793939e-11, | |
| "loss": 3.0144, | |
| "step": 1907 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1907, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8113861270435267e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |