Instructions to use furproxy/9b-25 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/9b-25 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "furproxy/9b-25") - Transformers
How to use furproxy/9b-25 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/9b-25") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/9b-25", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-25 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-25" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-25", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/9b-25
- SGLang
How to use furproxy/9b-25 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-25" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-25", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-25" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-25", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/9b-25 with Docker Model Runner:
docker model run hf.co/furproxy/9b-25
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2055, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00291970802919708, | |
| "grad_norm": 0.7710117101669312, | |
| "learning_rate": 9.70873786407767e-13, | |
| "loss": 1.7770352363586426, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00583941605839416, | |
| "grad_norm": 0.50068199634552, | |
| "learning_rate": 2.912621359223301e-12, | |
| "loss": 1.800447702407837, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.008759124087591242, | |
| "grad_norm": 0.4896266758441925, | |
| "learning_rate": 4.854368932038835e-12, | |
| "loss": 1.8154051303863525, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01167883211678832, | |
| "grad_norm": 0.5296488404273987, | |
| "learning_rate": 6.796116504854369e-12, | |
| "loss": 1.8012330532073975, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.014598540145985401, | |
| "grad_norm": 0.5107647776603699, | |
| "learning_rate": 8.737864077669904e-12, | |
| "loss": 1.7116845846176147, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.017518248175182483, | |
| "grad_norm": 1.2904026508331299, | |
| "learning_rate": 1.0679611650485436e-11, | |
| "loss": 1.7498056888580322, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020437956204379562, | |
| "grad_norm": 0.7178974151611328, | |
| "learning_rate": 1.2621359223300972e-11, | |
| "loss": 1.6429134607315063, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02335766423357664, | |
| "grad_norm": 0.7781834602355957, | |
| "learning_rate": 1.4563106796116506e-11, | |
| "loss": 1.7503031492233276, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026277372262773723, | |
| "grad_norm": 0.4699067771434784, | |
| "learning_rate": 1.6504854368932042e-11, | |
| "loss": 1.523249626159668, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 0.47985774278640747, | |
| "learning_rate": 1.8446601941747574e-11, | |
| "loss": 1.8180807828903198, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.032116788321167884, | |
| "grad_norm": 0.696946382522583, | |
| "learning_rate": 2.0388349514563107e-11, | |
| "loss": 1.7482869625091553, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.035036496350364967, | |
| "grad_norm": 0.3507264256477356, | |
| "learning_rate": 2.2330097087378642e-11, | |
| "loss": 1.5782928466796875, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03795620437956204, | |
| "grad_norm": 0.5048494338989258, | |
| "learning_rate": 2.4271844660194175e-11, | |
| "loss": 1.7402620315551758, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.040875912408759124, | |
| "grad_norm": 1.353652834892273, | |
| "learning_rate": 2.621359223300971e-11, | |
| "loss": 1.9062652587890625, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.043795620437956206, | |
| "grad_norm": 1.4105503559112549, | |
| "learning_rate": 2.8155339805825243e-11, | |
| "loss": 1.7928016185760498, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04671532846715328, | |
| "grad_norm": 0.6986204981803894, | |
| "learning_rate": 3.009708737864078e-11, | |
| "loss": 1.7498691082000732, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.049635036496350364, | |
| "grad_norm": 0.32239457964897156, | |
| "learning_rate": 3.203883495145632e-11, | |
| "loss": 1.4551384449005127, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.052554744525547446, | |
| "grad_norm": 0.3401832580566406, | |
| "learning_rate": 3.398058252427185e-11, | |
| "loss": 1.3110437393188477, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05547445255474453, | |
| "grad_norm": 0.5031893253326416, | |
| "learning_rate": 3.592233009708738e-11, | |
| "loss": 1.6439049243927002, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 0.4967627227306366, | |
| "learning_rate": 3.7864077669902915e-11, | |
| "loss": 1.793494462966919, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.061313868613138686, | |
| "grad_norm": 0.8739052414894104, | |
| "learning_rate": 3.980582524271845e-11, | |
| "loss": 1.8159303665161133, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06423357664233577, | |
| "grad_norm": 1.0607365369796753, | |
| "learning_rate": 4.174757281553398e-11, | |
| "loss": 1.8359348773956299, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06715328467153285, | |
| "grad_norm": 1.063367486000061, | |
| "learning_rate": 4.368932038834951e-11, | |
| "loss": 2.0863425731658936, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07007299270072993, | |
| "grad_norm": 1.1204711198806763, | |
| "learning_rate": 4.563106796116505e-11, | |
| "loss": 1.9219363927841187, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.072992700729927, | |
| "grad_norm": 0.5453667044639587, | |
| "learning_rate": 4.757281553398058e-11, | |
| "loss": 1.6786893606185913, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07591240875912408, | |
| "grad_norm": 0.6092082262039185, | |
| "learning_rate": 4.9514563106796115e-11, | |
| "loss": 1.8152481317520142, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07883211678832117, | |
| "grad_norm": 0.9141983985900879, | |
| "learning_rate": 5.1456310679611654e-11, | |
| "loss": 1.858360767364502, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08175182481751825, | |
| "grad_norm": 0.5577829480171204, | |
| "learning_rate": 5.339805825242719e-11, | |
| "loss": 1.8900744915008545, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08467153284671533, | |
| "grad_norm": 0.4391677677631378, | |
| "learning_rate": 5.533980582524272e-11, | |
| "loss": 1.4955909252166748, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 0.37971094250679016, | |
| "learning_rate": 5.728155339805825e-11, | |
| "loss": 1.445906639099121, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0905109489051095, | |
| "grad_norm": 1.3188059329986572, | |
| "learning_rate": 5.922330097087378e-11, | |
| "loss": 1.5197231769561768, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09343065693430656, | |
| "grad_norm": 0.6179693341255188, | |
| "learning_rate": 6.116504854368932e-11, | |
| "loss": 1.6352086067199707, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09635036496350365, | |
| "grad_norm": 0.4383544325828552, | |
| "learning_rate": 6.310679611650486e-11, | |
| "loss": 1.618710994720459, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09927007299270073, | |
| "grad_norm": 0.829305112361908, | |
| "learning_rate": 6.50485436893204e-11, | |
| "loss": 1.593348741531372, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10218978102189781, | |
| "grad_norm": 0.4982835352420807, | |
| "learning_rate": 6.699029126213593e-11, | |
| "loss": 1.678452730178833, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10510948905109489, | |
| "grad_norm": 0.8297693133354187, | |
| "learning_rate": 6.893203883495146e-11, | |
| "loss": 1.84993314743042, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.10802919708029197, | |
| "grad_norm": 0.45759549736976624, | |
| "learning_rate": 7.087378640776699e-11, | |
| "loss": 1.8269462585449219, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11094890510948906, | |
| "grad_norm": 2.368788957595825, | |
| "learning_rate": 7.281553398058252e-11, | |
| "loss": 1.8063817024230957, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11386861313868613, | |
| "grad_norm": 0.9492171406745911, | |
| "learning_rate": 7.475728155339806e-11, | |
| "loss": 1.923382043838501, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 2.991898536682129, | |
| "learning_rate": 7.669902912621359e-11, | |
| "loss": 1.8141722679138184, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11970802919708029, | |
| "grad_norm": 1.3362377882003784, | |
| "learning_rate": 7.864077669902912e-11, | |
| "loss": 1.7752718925476074, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12262773722627737, | |
| "grad_norm": 1.5340899229049683, | |
| "learning_rate": 8.058252427184467e-11, | |
| "loss": 1.9735075235366821, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12554744525547445, | |
| "grad_norm": 0.6581540703773499, | |
| "learning_rate": 8.25242718446602e-11, | |
| "loss": 1.729853868484497, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12846715328467154, | |
| "grad_norm": 1.8029367923736572, | |
| "learning_rate": 8.446601941747573e-11, | |
| "loss": 1.5339643955230713, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13138686131386862, | |
| "grad_norm": 0.6883419156074524, | |
| "learning_rate": 8.640776699029126e-11, | |
| "loss": 1.8178942203521729, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1343065693430657, | |
| "grad_norm": 0.4987771213054657, | |
| "learning_rate": 8.834951456310681e-11, | |
| "loss": 1.8227510452270508, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13722627737226278, | |
| "grad_norm": 0.567257821559906, | |
| "learning_rate": 9.029126213592234e-11, | |
| "loss": 1.894646406173706, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14014598540145987, | |
| "grad_norm": 0.8034306764602661, | |
| "learning_rate": 9.223300970873787e-11, | |
| "loss": 1.7770830392837524, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14306569343065692, | |
| "grad_norm": 0.6262503266334534, | |
| "learning_rate": 9.41747572815534e-11, | |
| "loss": 1.7002520561218262, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 2.610974073410034, | |
| "learning_rate": 9.611650485436894e-11, | |
| "loss": 1.9687740802764893, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14890510948905109, | |
| "grad_norm": 0.995183527469635, | |
| "learning_rate": 9.805825242718447e-11, | |
| "loss": 1.901183843612671, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15182481751824817, | |
| "grad_norm": 1.16482675075531, | |
| "learning_rate": 1e-10, | |
| "loss": 1.745378017425537, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15474452554744525, | |
| "grad_norm": 0.6300555467605591, | |
| "learning_rate": 9.999976687854062e-11, | |
| "loss": 1.8623970746994019, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15766423357664233, | |
| "grad_norm": 0.7828406691551208, | |
| "learning_rate": 9.999906751657785e-11, | |
| "loss": 1.9137486219406128, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16058394160583941, | |
| "grad_norm": 0.5942174792289734, | |
| "learning_rate": 9.999790192135772e-11, | |
| "loss": 1.825657606124878, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1635036496350365, | |
| "grad_norm": 0.8169430494308472, | |
| "learning_rate": 9.999627010495695e-11, | |
| "loss": 1.8757758140563965, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16642335766423358, | |
| "grad_norm": 0.5821642875671387, | |
| "learning_rate": 9.999417208428267e-11, | |
| "loss": 1.6332734823226929, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16934306569343066, | |
| "grad_norm": 0.7255250811576843, | |
| "learning_rate": 9.999160788107241e-11, | |
| "loss": 1.946354866027832, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17226277372262774, | |
| "grad_norm": 0.4682669937610626, | |
| "learning_rate": 9.998857752189376e-11, | |
| "loss": 1.8493037223815918, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 0.9524459838867188, | |
| "learning_rate": 9.99850810381441e-11, | |
| "loss": 1.9434313774108887, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1781021897810219, | |
| "grad_norm": 0.659051775932312, | |
| "learning_rate": 9.998111846605035e-11, | |
| "loss": 1.8111573457717896, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.181021897810219, | |
| "grad_norm": 0.5975978374481201, | |
| "learning_rate": 9.997668984666856e-11, | |
| "loss": 1.737710952758789, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18394160583941604, | |
| "grad_norm": 0.7603049278259277, | |
| "learning_rate": 9.99717952258834e-11, | |
| "loss": 1.9193196296691895, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18686131386861313, | |
| "grad_norm": 0.49651116132736206, | |
| "learning_rate": 9.996643465440788e-11, | |
| "loss": 1.7929141521453857, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1897810218978102, | |
| "grad_norm": 0.6368911862373352, | |
| "learning_rate": 9.996060818778257e-11, | |
| "loss": 1.8175899982452393, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1927007299270073, | |
| "grad_norm": 0.670755922794342, | |
| "learning_rate": 9.995431588637526e-11, | |
| "loss": 1.8839011192321777, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19562043795620437, | |
| "grad_norm": 0.38968625664711, | |
| "learning_rate": 9.994755781538018e-11, | |
| "loss": 1.705913782119751, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19854014598540146, | |
| "grad_norm": 0.5185222625732422, | |
| "learning_rate": 9.994033404481737e-11, | |
| "loss": 1.5453472137451172, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.20145985401459854, | |
| "grad_norm": 0.3967597484588623, | |
| "learning_rate": 9.993264464953202e-11, | |
| "loss": 1.3077566623687744, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20437956204379562, | |
| "grad_norm": 0.570259690284729, | |
| "learning_rate": 9.992448970919358e-11, | |
| "loss": 1.7901952266693115, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2072992700729927, | |
| "grad_norm": 0.5491088628768921, | |
| "learning_rate": 9.991586930829501e-11, | |
| "loss": 1.7135465145111084, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.21021897810218979, | |
| "grad_norm": 0.6296722888946533, | |
| "learning_rate": 9.990678353615189e-11, | |
| "loss": 1.6692614555358887, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.21313868613138687, | |
| "grad_norm": 0.8743859529495239, | |
| "learning_rate": 9.989723248690149e-11, | |
| "loss": 1.8054659366607666, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21605839416058395, | |
| "grad_norm": 0.6504361629486084, | |
| "learning_rate": 9.988721625950177e-11, | |
| "loss": 1.6982183456420898, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21897810218978103, | |
| "grad_norm": 0.5439779758453369, | |
| "learning_rate": 9.987673495773042e-11, | |
| "loss": 1.8458068370819092, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22189781021897811, | |
| "grad_norm": 0.614953339099884, | |
| "learning_rate": 9.986578869018371e-11, | |
| "loss": 1.8946077823638916, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22481751824817517, | |
| "grad_norm": 0.745878279209137, | |
| "learning_rate": 9.985437757027541e-11, | |
| "loss": 1.9836013317108154, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22773722627737225, | |
| "grad_norm": 1.0096070766448975, | |
| "learning_rate": 9.98425017162356e-11, | |
| "loss": 1.8608181476593018, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.23065693430656933, | |
| "grad_norm": 0.643815815448761, | |
| "learning_rate": 9.98301612511095e-11, | |
| "loss": 1.9647486209869385, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 1.723301887512207, | |
| "learning_rate": 9.981735630275602e-11, | |
| "loss": 2.0344762802124023, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2364963503649635, | |
| "grad_norm": 0.9570946097373962, | |
| "learning_rate": 9.980408700384671e-11, | |
| "loss": 2.0314955711364746, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23941605839416058, | |
| "grad_norm": 1.2537389993667603, | |
| "learning_rate": 9.979035349186414e-11, | |
| "loss": 2.1425282955169678, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24233576642335766, | |
| "grad_norm": 1.5286678075790405, | |
| "learning_rate": 9.977615590910056e-11, | |
| "loss": 2.0133280754089355, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24525547445255474, | |
| "grad_norm": 2.3915164470672607, | |
| "learning_rate": 9.976149440265651e-11, | |
| "loss": 2.32820987701416, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24817518248175183, | |
| "grad_norm": 1.9102085828781128, | |
| "learning_rate": 9.974636912443916e-11, | |
| "loss": 1.8823760747909546, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2510948905109489, | |
| "grad_norm": 1.7624311447143555, | |
| "learning_rate": 9.973078023116083e-11, | |
| "loss": 1.8588638305664062, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.25401459854014596, | |
| "grad_norm": 2.337764263153076, | |
| "learning_rate": 9.971472788433731e-11, | |
| "loss": 2.026655435562134, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2569343065693431, | |
| "grad_norm": 2.6706764698028564, | |
| "learning_rate": 9.969821225028622e-11, | |
| "loss": 2.4073486328125, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.25985401459854013, | |
| "grad_norm": 0.8066958785057068, | |
| "learning_rate": 9.968123350012531e-11, | |
| "loss": 1.744017243385315, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 1.1286042928695679, | |
| "learning_rate": 9.966379180977059e-11, | |
| "loss": 1.6937881708145142, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2656934306569343, | |
| "grad_norm": 0.5080934762954712, | |
| "learning_rate": 9.964588735993461e-11, | |
| "loss": 1.5222389698028564, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.2686131386861314, | |
| "grad_norm": 1.329997181892395, | |
| "learning_rate": 9.962752033612457e-11, | |
| "loss": 1.7388195991516113, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.27153284671532846, | |
| "grad_norm": 1.7352726459503174, | |
| "learning_rate": 9.960869092864034e-11, | |
| "loss": 1.5500221252441406, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.27445255474452557, | |
| "grad_norm": 0.6620221734046936, | |
| "learning_rate": 9.958939933257254e-11, | |
| "loss": 1.4342440366744995, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2773722627737226, | |
| "grad_norm": 0.46588146686553955, | |
| "learning_rate": 9.95696457478005e-11, | |
| "loss": 1.5565261840820312, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.28029197080291973, | |
| "grad_norm": 0.6279640793800354, | |
| "learning_rate": 9.954943037899022e-11, | |
| "loss": 1.175281286239624, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2832116788321168, | |
| "grad_norm": 1.3046506643295288, | |
| "learning_rate": 9.952875343559217e-11, | |
| "loss": 1.6035385131835938, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28613138686131384, | |
| "grad_norm": 1.4405308961868286, | |
| "learning_rate": 9.950761513183924e-11, | |
| "loss": 1.9352498054504395, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.28905109489051095, | |
| "grad_norm": 3.9479691982269287, | |
| "learning_rate": 9.948601568674439e-11, | |
| "loss": 1.969503402709961, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 1.7318059206008911, | |
| "learning_rate": 9.946395532409846e-11, | |
| "loss": 1.6835405826568604, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2948905109489051, | |
| "grad_norm": 2.14038348197937, | |
| "learning_rate": 9.94414342724679e-11, | |
| "loss": 2.306647539138794, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29781021897810217, | |
| "grad_norm": 0.5154449343681335, | |
| "learning_rate": 9.941845276519223e-11, | |
| "loss": 1.259742021560669, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3007299270072993, | |
| "grad_norm": 0.6503131985664368, | |
| "learning_rate": 9.93950110403818e-11, | |
| "loss": 1.5660749673843384, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.30364963503649633, | |
| "grad_norm": 1.3218872547149658, | |
| "learning_rate": 9.937110934091525e-11, | |
| "loss": 1.9479970932006836, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.30656934306569344, | |
| "grad_norm": 0.7453085780143738, | |
| "learning_rate": 9.934674791443698e-11, | |
| "loss": 1.6896703243255615, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3094890510948905, | |
| "grad_norm": 0.6497874855995178, | |
| "learning_rate": 9.932192701335459e-11, | |
| "loss": 1.8161481618881226, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3124087591240876, | |
| "grad_norm": 0.7945151329040527, | |
| "learning_rate": 9.929664689483631e-11, | |
| "loss": 1.7774834632873535, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31532846715328466, | |
| "grad_norm": 1.80641770362854, | |
| "learning_rate": 9.927090782080824e-11, | |
| "loss": 1.6096701622009277, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3182481751824818, | |
| "grad_norm": 0.3868231177330017, | |
| "learning_rate": 9.924471005795176e-11, | |
| "loss": 1.6356284618377686, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.32116788321167883, | |
| "grad_norm": 0.7032894492149353, | |
| "learning_rate": 9.92180538777007e-11, | |
| "loss": 1.5516180992126465, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.32408759124087594, | |
| "grad_norm": 1.5395406484603882, | |
| "learning_rate": 9.919093955623848e-11, | |
| "loss": 1.3875888586044312, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.327007299270073, | |
| "grad_norm": 1.8626580238342285, | |
| "learning_rate": 9.916336737449534e-11, | |
| "loss": 2.228710889816284, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.32992700729927005, | |
| "grad_norm": 0.573098361492157, | |
| "learning_rate": 9.913533761814537e-11, | |
| "loss": 1.7829864025115967, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.33284671532846716, | |
| "grad_norm": 0.7861718535423279, | |
| "learning_rate": 9.91068505776036e-11, | |
| "loss": 1.7219140529632568, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3357664233576642, | |
| "grad_norm": 32.82694625854492, | |
| "learning_rate": 9.907790654802293e-11, | |
| "loss": 1.5117745399475098, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3386861313868613, | |
| "grad_norm": 1.578041672706604, | |
| "learning_rate": 9.90485058292911e-11, | |
| "loss": 1.9717309474945068, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3416058394160584, | |
| "grad_norm": 1.4934152364730835, | |
| "learning_rate": 9.901864872602762e-11, | |
| "loss": 1.9546431303024292, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3445255474452555, | |
| "grad_norm": 1.7539751529693604, | |
| "learning_rate": 9.898833554758054e-11, | |
| "loss": 1.9993805885314941, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.34744525547445254, | |
| "grad_norm": 1.6374447345733643, | |
| "learning_rate": 9.895756660802331e-11, | |
| "loss": 1.9350717067718506, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.5807626247406006, | |
| "learning_rate": 9.892634222615147e-11, | |
| "loss": 1.7368359565734863, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3532846715328467, | |
| "grad_norm": 1.5483769178390503, | |
| "learning_rate": 9.889466272547941e-11, | |
| "loss": 2.0402066707611084, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3562043795620438, | |
| "grad_norm": 1.539088487625122, | |
| "learning_rate": 9.886252843423699e-11, | |
| "loss": 1.8806483745574951, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.35912408759124087, | |
| "grad_norm": 0.8003555536270142, | |
| "learning_rate": 9.882993968536607e-11, | |
| "loss": 1.7510344982147217, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.362043795620438, | |
| "grad_norm": 0.9570001363754272, | |
| "learning_rate": 9.879689681651721e-11, | |
| "loss": 2.15889573097229, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36496350364963503, | |
| "grad_norm": 6.615765571594238, | |
| "learning_rate": 9.876340017004604e-11, | |
| "loss": 2.0510971546173096, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3678832116788321, | |
| "grad_norm": 1.2032502889633179, | |
| "learning_rate": 9.872945009300975e-11, | |
| "loss": 2.085731029510498, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3708029197080292, | |
| "grad_norm": 2.6892430782318115, | |
| "learning_rate": 9.869504693716353e-11, | |
| "loss": 2.20062255859375, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.37372262773722625, | |
| "grad_norm": 1.8306725025177002, | |
| "learning_rate": 9.866019105895686e-11, | |
| "loss": 1.9631264209747314, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.37664233576642336, | |
| "grad_norm": 10.656391143798828, | |
| "learning_rate": 9.862488281952992e-11, | |
| "loss": 1.8208003044128418, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3795620437956204, | |
| "grad_norm": 0.5579459071159363, | |
| "learning_rate": 9.858912258470973e-11, | |
| "loss": 1.6464289426803589, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.38248175182481753, | |
| "grad_norm": 1.2717616558074951, | |
| "learning_rate": 9.855291072500643e-11, | |
| "loss": 1.8431742191314697, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3854014598540146, | |
| "grad_norm": 1.3744527101516724, | |
| "learning_rate": 9.851624761560942e-11, | |
| "loss": 2.367558479309082, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3883211678832117, | |
| "grad_norm": 2.548898696899414, | |
| "learning_rate": 9.847913363638348e-11, | |
| "loss": 1.9910647869110107, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.39124087591240875, | |
| "grad_norm": 0.8033563494682312, | |
| "learning_rate": 9.844156917186485e-11, | |
| "loss": 1.638918399810791, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.39416058394160586, | |
| "grad_norm": 6.324929714202881, | |
| "learning_rate": 9.840355461125717e-11, | |
| "loss": 1.8016602993011475, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3970802919708029, | |
| "grad_norm": 1.221652626991272, | |
| "learning_rate": 9.836509034842758e-11, | |
| "loss": 1.7385075092315674, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5838031768798828, | |
| "learning_rate": 9.832617678190252e-11, | |
| "loss": 1.7498688697814941, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4029197080291971, | |
| "grad_norm": 0.7867501974105835, | |
| "learning_rate": 9.828681431486365e-11, | |
| "loss": 1.8570300340652466, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4058394160583942, | |
| "grad_norm": 3.808224678039551, | |
| "learning_rate": 9.824700335514367e-11, | |
| "loss": 1.9073562622070312, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 2.6232757568359375, | |
| "learning_rate": 9.820674431522208e-11, | |
| "loss": 1.7951061725616455, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4116788321167883, | |
| "grad_norm": 1.330126166343689, | |
| "learning_rate": 9.816603761222096e-11, | |
| "loss": 1.9539861679077148, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4145985401459854, | |
| "grad_norm": 0.8891125321388245, | |
| "learning_rate": 9.812488366790056e-11, | |
| "loss": 2.0036733150482178, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41751824817518246, | |
| "grad_norm": 2.2520751953125, | |
| "learning_rate": 9.808328290865499e-11, | |
| "loss": 1.762925624847412, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.42043795620437957, | |
| "grad_norm": 0.7500330805778503, | |
| "learning_rate": 9.80412357655078e-11, | |
| "loss": 1.6032809019088745, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4233576642335766, | |
| "grad_norm": 1.2517192363739014, | |
| "learning_rate": 9.799874267410747e-11, | |
| "loss": 1.6832876205444336, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.42627737226277373, | |
| "grad_norm": 0.9904695153236389, | |
| "learning_rate": 9.7955804074723e-11, | |
| "loss": 1.8667923212051392, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4291970802919708, | |
| "grad_norm": 0.9252429604530334, | |
| "learning_rate": 9.791242041223921e-11, | |
| "loss": 1.9119977951049805, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4321167883211679, | |
| "grad_norm": 0.8679836392402649, | |
| "learning_rate": 9.786859213615222e-11, | |
| "loss": 1.3596782684326172, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.43503649635036495, | |
| "grad_norm": 1.5256797075271606, | |
| "learning_rate": 9.782431970056477e-11, | |
| "loss": 1.7140305042266846, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 1.0407711267471313, | |
| "learning_rate": 9.777960356418152e-11, | |
| "loss": 1.7743761539459229, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4408759124087591, | |
| "grad_norm": 0.9178169965744019, | |
| "learning_rate": 9.773444419030429e-11, | |
| "loss": 1.6715781688690186, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.44379562043795623, | |
| "grad_norm": 0.8948725461959839, | |
| "learning_rate": 9.768884204682725e-11, | |
| "loss": 1.8895018100738525, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4467153284671533, | |
| "grad_norm": 1.9853583574295044, | |
| "learning_rate": 9.764279760623213e-11, | |
| "loss": 1.995004653930664, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.44963503649635034, | |
| "grad_norm": 0.8257838487625122, | |
| "learning_rate": 9.759631134558321e-11, | |
| "loss": 1.5556952953338623, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.45255474452554745, | |
| "grad_norm": 1.0743261575698853, | |
| "learning_rate": 9.754938374652254e-11, | |
| "loss": 1.5427100658416748, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4554744525547445, | |
| "grad_norm": 0.7255197167396545, | |
| "learning_rate": 9.750201529526476e-11, | |
| "loss": 1.7268377542495728, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4583941605839416, | |
| "grad_norm": 0.795796275138855, | |
| "learning_rate": 9.745420648259221e-11, | |
| "loss": 1.584905743598938, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.46131386861313867, | |
| "grad_norm": 1.8482463359832764, | |
| "learning_rate": 9.74059578038498e-11, | |
| "loss": 2.065453052520752, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4642335766423358, | |
| "grad_norm": 0.6857784390449524, | |
| "learning_rate": 9.735726975893986e-11, | |
| "loss": 1.7699090242385864, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 0.4184860289096832, | |
| "learning_rate": 9.730814285231695e-11, | |
| "loss": 1.6940510272979736, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.47007299270072994, | |
| "grad_norm": 2.465660572052002, | |
| "learning_rate": 9.725857759298269e-11, | |
| "loss": 1.8895952701568604, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.472992700729927, | |
| "grad_norm": 1.6698033809661865, | |
| "learning_rate": 9.720857449448044e-11, | |
| "loss": 1.7651352882385254, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4759124087591241, | |
| "grad_norm": 0.49988099932670593, | |
| "learning_rate": 9.715813407489001e-11, | |
| "loss": 1.603371500968933, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.47883211678832116, | |
| "grad_norm": 0.6962953209877014, | |
| "learning_rate": 9.710725685682221e-11, | |
| "loss": 1.4724305868148804, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.48175182481751827, | |
| "grad_norm": 0.8053680658340454, | |
| "learning_rate": 9.70559433674136e-11, | |
| "loss": 1.597367525100708, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4846715328467153, | |
| "grad_norm": 0.6339701414108276, | |
| "learning_rate": 9.700419413832081e-11, | |
| "loss": 1.6341876983642578, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.48759124087591244, | |
| "grad_norm": 1.4476237297058105, | |
| "learning_rate": 9.695200970571525e-11, | |
| "loss": 1.7593011856079102, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4905109489051095, | |
| "grad_norm": 1.690050721168518, | |
| "learning_rate": 9.68993906102774e-11, | |
| "loss": 1.890257716178894, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.49343065693430654, | |
| "grad_norm": 1.4331129789352417, | |
| "learning_rate": 9.684633739719126e-11, | |
| "loss": 1.7976990938186646, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.49635036496350365, | |
| "grad_norm": 0.5912653803825378, | |
| "learning_rate": 9.679285061613877e-11, | |
| "loss": 2.0194454193115234, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4992700729927007, | |
| "grad_norm": 5.232271194458008, | |
| "learning_rate": 9.673893082129394e-11, | |
| "loss": 1.837735652923584, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5021897810218978, | |
| "grad_norm": 0.9034498333930969, | |
| "learning_rate": 9.668457857131728e-11, | |
| "loss": 1.6165506839752197, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5051094890510949, | |
| "grad_norm": 1.5295802354812622, | |
| "learning_rate": 9.662979442934995e-11, | |
| "loss": 1.3725310564041138, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5080291970802919, | |
| "grad_norm": 1.0186959505081177, | |
| "learning_rate": 9.657457896300791e-11, | |
| "loss": 1.5627892017364502, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5109489051094891, | |
| "grad_norm": 2.1289613246917725, | |
| "learning_rate": 9.651893274437606e-11, | |
| "loss": 1.834728479385376, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5138686131386861, | |
| "grad_norm": 0.8612548112869263, | |
| "learning_rate": 9.646285635000226e-11, | |
| "loss": 2.042515754699707, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5167883211678832, | |
| "grad_norm": 1.258448839187622, | |
| "learning_rate": 9.640635036089149e-11, | |
| "loss": 2.082827091217041, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5197080291970803, | |
| "grad_norm": 1.0726618766784668, | |
| "learning_rate": 9.634941536249966e-11, | |
| "loss": 1.9489014148712158, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5226277372262774, | |
| "grad_norm": 0.6719974279403687, | |
| "learning_rate": 9.629205194472766e-11, | |
| "loss": 1.8347880840301514, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 0.9830466508865356, | |
| "learning_rate": 9.623426070191521e-11, | |
| "loss": 1.6945587396621704, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5284671532846715, | |
| "grad_norm": 1.465045690536499, | |
| "learning_rate": 9.617604223283472e-11, | |
| "loss": 2.145533800125122, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5313868613138686, | |
| "grad_norm": 2.4139323234558105, | |
| "learning_rate": 9.611739714068503e-11, | |
| "loss": 2.2951149940490723, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5343065693430656, | |
| "grad_norm": 1.3702406883239746, | |
| "learning_rate": 9.605832603308523e-11, | |
| "loss": 1.8485170602798462, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5372262773722628, | |
| "grad_norm": 0.7219336032867432, | |
| "learning_rate": 9.599882952206834e-11, | |
| "loss": 1.5276951789855957, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5401459854014599, | |
| "grad_norm": 1.3941855430603027, | |
| "learning_rate": 9.593890822407497e-11, | |
| "loss": 1.9465556144714355, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5430656934306569, | |
| "grad_norm": 1.200696587562561, | |
| "learning_rate": 9.58785627599469e-11, | |
| "loss": 2.00054669380188, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.545985401459854, | |
| "grad_norm": 0.7770039439201355, | |
| "learning_rate": 9.58177937549207e-11, | |
| "loss": 2.0790209770202637, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5489051094890511, | |
| "grad_norm": 0.6146572828292847, | |
| "learning_rate": 9.575660183862122e-11, | |
| "loss": 1.793034553527832, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5518248175182482, | |
| "grad_norm": 1.3211643695831299, | |
| "learning_rate": 9.569498764505507e-11, | |
| "loss": 1.8952629566192627, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5547445255474452, | |
| "grad_norm": 1.3079071044921875, | |
| "learning_rate": 9.563295181260408e-11, | |
| "loss": 1.5669095516204834, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5576642335766423, | |
| "grad_norm": 0.5158920288085938, | |
| "learning_rate": 9.55704949840186e-11, | |
| "loss": 1.5015873908996582, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5605839416058395, | |
| "grad_norm": 1.1895203590393066, | |
| "learning_rate": 9.550761780641101e-11, | |
| "loss": 1.6375136375427246, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5635036496350365, | |
| "grad_norm": 1.8690167665481567, | |
| "learning_rate": 9.544432093124877e-11, | |
| "loss": 1.7828657627105713, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5664233576642336, | |
| "grad_norm": 0.8759300112724304, | |
| "learning_rate": 9.53806050143479e-11, | |
| "loss": 1.681144118309021, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5693430656934306, | |
| "grad_norm": 1.4870281219482422, | |
| "learning_rate": 9.531647071586608e-11, | |
| "loss": 1.9759222269058228, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5722627737226277, | |
| "grad_norm": 1.1019947528839111, | |
| "learning_rate": 9.52519187002958e-11, | |
| "loss": 1.5209102630615234, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5751824817518248, | |
| "grad_norm": 1.502726435661316, | |
| "learning_rate": 9.518694963645751e-11, | |
| "loss": 1.6411371231079102, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5781021897810219, | |
| "grad_norm": 0.5966328382492065, | |
| "learning_rate": 9.512156419749268e-11, | |
| "loss": 1.5855532884597778, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.581021897810219, | |
| "grad_norm": 1.452370047569275, | |
| "learning_rate": 9.505576306085681e-11, | |
| "loss": 1.655895709991455, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 1.8667651414871216, | |
| "learning_rate": 9.498954690831245e-11, | |
| "loss": 1.922675371170044, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5868613138686132, | |
| "grad_norm": 0.4939572513103485, | |
| "learning_rate": 9.492291642592205e-11, | |
| "loss": 1.3395423889160156, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5897810218978102, | |
| "grad_norm": 0.7743672132492065, | |
| "learning_rate": 9.485587230404102e-11, | |
| "loss": 1.43617844581604, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5927007299270073, | |
| "grad_norm": 0.4597339928150177, | |
| "learning_rate": 9.478841523731036e-11, | |
| "loss": 1.3731002807617188, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5956204379562043, | |
| "grad_norm": 1.0869733095169067, | |
| "learning_rate": 9.472054592464963e-11, | |
| "loss": 1.4698946475982666, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5985401459854015, | |
| "grad_norm": 1.613753080368042, | |
| "learning_rate": 9.465226506924965e-11, | |
| "loss": 1.909116268157959, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6014598540145986, | |
| "grad_norm": 0.3934522569179535, | |
| "learning_rate": 9.458357337856519e-11, | |
| "loss": 1.545150876045227, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6043795620437956, | |
| "grad_norm": 0.6359394788742065, | |
| "learning_rate": 9.451447156430769e-11, | |
| "loss": 1.3645131587982178, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6072992700729927, | |
| "grad_norm": 4.1194257736206055, | |
| "learning_rate": 9.444496034243786e-11, | |
| "loss": 1.5675917863845825, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6102189781021898, | |
| "grad_norm": 1.3098376989364624, | |
| "learning_rate": 9.437504043315824e-11, | |
| "loss": 1.6718039512634277, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 0.7749998569488525, | |
| "learning_rate": 9.430471256090578e-11, | |
| "loss": 1.8446910381317139, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6160583941605839, | |
| "grad_norm": 0.9727952480316162, | |
| "learning_rate": 9.42339774543443e-11, | |
| "loss": 1.4627130031585693, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.618978102189781, | |
| "grad_norm": 0.797150194644928, | |
| "learning_rate": 9.4162835846357e-11, | |
| "loss": 1.8627034425735474, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.621897810218978, | |
| "grad_norm": 0.7061981558799744, | |
| "learning_rate": 9.409128847403874e-11, | |
| "loss": 1.5798931121826172, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6248175182481752, | |
| "grad_norm": 1.0969212055206299, | |
| "learning_rate": 9.401933607868859e-11, | |
| "loss": 1.4541444778442383, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6277372262773723, | |
| "grad_norm": 0.4994068145751953, | |
| "learning_rate": 9.394697940580195e-11, | |
| "loss": 1.2827084064483643, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6306569343065693, | |
| "grad_norm": 1.6509168148040771, | |
| "learning_rate": 9.387421920506298e-11, | |
| "loss": 1.2852421998977661, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6335766423357664, | |
| "grad_norm": 0.5991594195365906, | |
| "learning_rate": 9.380105623033677e-11, | |
| "loss": 1.8149901628494263, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6364963503649635, | |
| "grad_norm": 0.5894845724105835, | |
| "learning_rate": 9.372749123966148e-11, | |
| "loss": 1.716212511062622, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6394160583941606, | |
| "grad_norm": 2.9688093662261963, | |
| "learning_rate": 9.365352499524059e-11, | |
| "loss": 1.8449535369873047, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 0.9899563789367676, | |
| "learning_rate": 9.357915826343496e-11, | |
| "loss": 1.7632842063903809, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6452554744525547, | |
| "grad_norm": 1.1627094745635986, | |
| "learning_rate": 9.350439181475483e-11, | |
| "loss": 1.769453525543213, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6481751824817519, | |
| "grad_norm": 0.9476377367973328, | |
| "learning_rate": 9.342922642385193e-11, | |
| "loss": 1.6457428932189941, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6510948905109489, | |
| "grad_norm": 1.3285548686981201, | |
| "learning_rate": 9.33536628695114e-11, | |
| "loss": 1.7582794427871704, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.654014598540146, | |
| "grad_norm": 1.8873428106307983, | |
| "learning_rate": 9.327770193464374e-11, | |
| "loss": 1.518355369567871, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.656934306569343, | |
| "grad_norm": 0.6091031432151794, | |
| "learning_rate": 9.320134440627665e-11, | |
| "loss": 1.5437514781951904, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6598540145985401, | |
| "grad_norm": 1.6648386716842651, | |
| "learning_rate": 9.312459107554698e-11, | |
| "loss": 1.7409050464630127, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6627737226277373, | |
| "grad_norm": 1.7773568630218506, | |
| "learning_rate": 9.304744273769243e-11, | |
| "loss": 1.8443725109100342, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6656934306569343, | |
| "grad_norm": 1.524847388267517, | |
| "learning_rate": 9.296990019204335e-11, | |
| "loss": 1.6544660329818726, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6686131386861314, | |
| "grad_norm": 0.8343350291252136, | |
| "learning_rate": 9.289196424201451e-11, | |
| "loss": 1.6101242303848267, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6715328467153284, | |
| "grad_norm": 0.964552104473114, | |
| "learning_rate": 9.281363569509662e-11, | |
| "loss": 1.7781121730804443, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6744525547445256, | |
| "grad_norm": 2.552283525466919, | |
| "learning_rate": 9.273491536284819e-11, | |
| "loss": 1.9473206996917725, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6773722627737226, | |
| "grad_norm": 1.9999078512191772, | |
| "learning_rate": 9.265580406088692e-11, | |
| "loss": 1.8164054155349731, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6802919708029197, | |
| "grad_norm": 0.46030619740486145, | |
| "learning_rate": 9.257630260888133e-11, | |
| "loss": 1.6395976543426514, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6832116788321168, | |
| "grad_norm": 0.6875807642936707, | |
| "learning_rate": 9.249641183054232e-11, | |
| "loss": 1.57194185256958, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6861313868613139, | |
| "grad_norm": 1.0560736656188965, | |
| "learning_rate": 9.241613255361454e-11, | |
| "loss": 1.878631830215454, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.689051094890511, | |
| "grad_norm": 0.9423820972442627, | |
| "learning_rate": 9.233546560986786e-11, | |
| "loss": 1.7931151390075684, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.691970802919708, | |
| "grad_norm": 1.7364797592163086, | |
| "learning_rate": 9.225441183508875e-11, | |
| "loss": 1.8996636867523193, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6948905109489051, | |
| "grad_norm": 0.688714861869812, | |
| "learning_rate": 9.217297206907161e-11, | |
| "loss": 1.6164474487304688, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6978102189781021, | |
| "grad_norm": 0.9566084146499634, | |
| "learning_rate": 9.209114715561012e-11, | |
| "loss": 1.7578508853912354, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.8340669870376587, | |
| "learning_rate": 9.200893794248843e-11, | |
| "loss": 1.6821520328521729, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7036496350364964, | |
| "grad_norm": 1.7323689460754395, | |
| "learning_rate": 9.192634528147234e-11, | |
| "loss": 1.8423354625701904, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7065693430656934, | |
| "grad_norm": 0.7809618711471558, | |
| "learning_rate": 9.184337002830065e-11, | |
| "loss": 1.5498263835906982, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7094890510948905, | |
| "grad_norm": 1.3158289194107056, | |
| "learning_rate": 9.176001304267607e-11, | |
| "loss": 1.84452223777771, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7124087591240876, | |
| "grad_norm": 0.7074493765830994, | |
| "learning_rate": 9.167627518825651e-11, | |
| "loss": 1.7949258089065552, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7153284671532847, | |
| "grad_norm": 0.7183763384819031, | |
| "learning_rate": 9.1592157332646e-11, | |
| "loss": 1.741299033164978, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7182481751824817, | |
| "grad_norm": 1.1310676336288452, | |
| "learning_rate": 9.150766034738575e-11, | |
| "loss": 1.8497264385223389, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7211678832116788, | |
| "grad_norm": 0.7075262665748596, | |
| "learning_rate": 9.14227851079451e-11, | |
| "loss": 1.7287085056304932, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.724087591240876, | |
| "grad_norm": 1.3456463813781738, | |
| "learning_rate": 9.133753249371251e-11, | |
| "loss": 1.7489293813705444, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.727007299270073, | |
| "grad_norm": 0.8895608186721802, | |
| "learning_rate": 9.125190338798634e-11, | |
| "loss": 1.6968376636505127, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 1.5361218452453613, | |
| "learning_rate": 9.116589867796582e-11, | |
| "loss": 1.842568039894104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7328467153284671, | |
| "grad_norm": 1.825984001159668, | |
| "learning_rate": 9.107951925474176e-11, | |
| "loss": 1.744608759880066, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7357664233576642, | |
| "grad_norm": 1.4578057527542114, | |
| "learning_rate": 9.099276601328738e-11, | |
| "loss": 1.597829818725586, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7386861313868613, | |
| "grad_norm": 0.7269303202629089, | |
| "learning_rate": 9.090563985244899e-11, | |
| "loss": 1.6843130588531494, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7416058394160584, | |
| "grad_norm": 0.7994980812072754, | |
| "learning_rate": 9.081814167493671e-11, | |
| "loss": 1.7621855735778809, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7445255474452555, | |
| "grad_norm": 0.7296390533447266, | |
| "learning_rate": 9.073027238731513e-11, | |
| "loss": 1.7365657091140747, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7474452554744525, | |
| "grad_norm": 0.7442068457603455, | |
| "learning_rate": 9.064203289999385e-11, | |
| "loss": 1.786052942276001, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7503649635036497, | |
| "grad_norm": 0.5901494026184082, | |
| "learning_rate": 9.055342412721815e-11, | |
| "loss": 1.6916297674179077, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7532846715328467, | |
| "grad_norm": 0.5933018326759338, | |
| "learning_rate": 9.04644469870594e-11, | |
| "loss": 1.765047311782837, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7562043795620438, | |
| "grad_norm": 1.6390026807785034, | |
| "learning_rate": 9.037510240140563e-11, | |
| "loss": 1.649626612663269, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 0.8749039173126221, | |
| "learning_rate": 9.028539129595198e-11, | |
| "loss": 1.7884538173675537, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.762043795620438, | |
| "grad_norm": 0.6646139025688171, | |
| "learning_rate": 9.019531460019105e-11, | |
| "loss": 1.803902506828308, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7649635036496351, | |
| "grad_norm": 0.7847602367401123, | |
| "learning_rate": 9.010487324740333e-11, | |
| "loss": 1.7177143096923828, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7678832116788321, | |
| "grad_norm": 1.0937227010726929, | |
| "learning_rate": 9.001406817464748e-11, | |
| "loss": 1.9987658262252808, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7708029197080292, | |
| "grad_norm": 2.1735150814056396, | |
| "learning_rate": 8.992290032275067e-11, | |
| "loss": 1.709079384803772, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7737226277372263, | |
| "grad_norm": 1.1910653114318848, | |
| "learning_rate": 8.983137063629879e-11, | |
| "loss": 1.6516835689544678, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7766423357664234, | |
| "grad_norm": 1.0264052152633667, | |
| "learning_rate": 8.97394800636267e-11, | |
| "loss": 1.8717613220214844, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7795620437956204, | |
| "grad_norm": 0.8426408171653748, | |
| "learning_rate": 8.964722955680835e-11, | |
| "loss": 1.67078697681427, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.7824817518248175, | |
| "grad_norm": 1.7408273220062256, | |
| "learning_rate": 8.955462007164699e-11, | |
| "loss": 1.6820244789123535, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7854014598540145, | |
| "grad_norm": 1.2982593774795532, | |
| "learning_rate": 8.946165256766524e-11, | |
| "loss": 1.4112169742584229, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 7.625301837921143, | |
| "learning_rate": 8.936832800809506e-11, | |
| "loss": 1.4367958307266235, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7912408759124088, | |
| "grad_norm": 0.8367983102798462, | |
| "learning_rate": 8.927464735986792e-11, | |
| "loss": 1.5659011602401733, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7941605839416058, | |
| "grad_norm": 0.5111868977546692, | |
| "learning_rate": 8.918061159360468e-11, | |
| "loss": 1.662480354309082, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7970802919708029, | |
| "grad_norm": 0.5908204913139343, | |
| "learning_rate": 8.908622168360558e-11, | |
| "loss": 1.4826583862304688, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.35878902673721313, | |
| "learning_rate": 8.899147860784006e-11, | |
| "loss": 1.2484965324401855, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8029197080291971, | |
| "grad_norm": 0.3313426375389099, | |
| "learning_rate": 8.889638334793682e-11, | |
| "loss": 1.2876758575439453, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8058394160583942, | |
| "grad_norm": 0.39089521765708923, | |
| "learning_rate": 8.88009368891734e-11, | |
| "loss": 1.1946688890457153, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8087591240875912, | |
| "grad_norm": 0.4971824288368225, | |
| "learning_rate": 8.870514022046617e-11, | |
| "loss": 1.1782593727111816, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8116788321167884, | |
| "grad_norm": 0.2659202218055725, | |
| "learning_rate": 8.860899433435994e-11, | |
| "loss": 1.1099154949188232, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8145985401459854, | |
| "grad_norm": 0.5767684578895569, | |
| "learning_rate": 8.851250022701784e-11, | |
| "loss": 1.0834901332855225, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 0.27106234431266785, | |
| "learning_rate": 8.841565889821084e-11, | |
| "loss": 1.101444959640503, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8204379562043795, | |
| "grad_norm": 0.42344605922698975, | |
| "learning_rate": 8.831847135130746e-11, | |
| "loss": 1.0758614540100098, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8233576642335766, | |
| "grad_norm": 0.387885719537735, | |
| "learning_rate": 8.822093859326339e-11, | |
| "loss": 1.1527973413467407, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8262773722627738, | |
| "grad_norm": 0.5286118984222412, | |
| "learning_rate": 8.8123061634611e-11, | |
| "loss": 1.5337316989898682, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8291970802919708, | |
| "grad_norm": 0.8238726258277893, | |
| "learning_rate": 8.802484148944897e-11, | |
| "loss": 1.5842351913452148, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8321167883211679, | |
| "grad_norm": 0.5292539000511169, | |
| "learning_rate": 8.792627917543165e-11, | |
| "loss": 1.6405889987945557, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8350364963503649, | |
| "grad_norm": 0.9521947503089905, | |
| "learning_rate": 8.782737571375861e-11, | |
| "loss": 1.5619168281555176, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8379562043795621, | |
| "grad_norm": 0.5571414232254028, | |
| "learning_rate": 8.772813212916408e-11, | |
| "loss": 1.590590476989746, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8408759124087591, | |
| "grad_norm": 0.7506124973297119, | |
| "learning_rate": 8.762854944990623e-11, | |
| "loss": 1.5383520126342773, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8437956204379562, | |
| "grad_norm": 0.7125639915466309, | |
| "learning_rate": 8.752862870775658e-11, | |
| "loss": 1.5687944889068604, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8467153284671532, | |
| "grad_norm": 1.341855764389038, | |
| "learning_rate": 8.742837093798934e-11, | |
| "loss": 1.6205568313598633, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8496350364963504, | |
| "grad_norm": 0.6118226647377014, | |
| "learning_rate": 8.732777717937063e-11, | |
| "loss": 1.574122667312622, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8525547445255475, | |
| "grad_norm": 0.6435967087745667, | |
| "learning_rate": 8.72268484741477e-11, | |
| "loss": 1.578256368637085, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8554744525547445, | |
| "grad_norm": 0.5821177363395691, | |
| "learning_rate": 8.712558586803822e-11, | |
| "loss": 1.5332674980163574, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8583941605839416, | |
| "grad_norm": 0.5686182379722595, | |
| "learning_rate": 8.702399041021937e-11, | |
| "loss": 1.5462627410888672, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8613138686131386, | |
| "grad_norm": 1.5727794170379639, | |
| "learning_rate": 8.692206315331697e-11, | |
| "loss": 1.5420336723327637, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8642335766423358, | |
| "grad_norm": 2.8418736457824707, | |
| "learning_rate": 8.681980515339464e-11, | |
| "loss": 1.5063787698745728, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8671532846715329, | |
| "grad_norm": 0.7189317345619202, | |
| "learning_rate": 8.671721746994278e-11, | |
| "loss": 1.5795114040374756, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8700729927007299, | |
| "grad_norm": 0.7251370549201965, | |
| "learning_rate": 8.661430116586763e-11, | |
| "loss": 1.5896693468093872, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.872992700729927, | |
| "grad_norm": 1.3324731588363647, | |
| "learning_rate": 8.651105730748023e-11, | |
| "loss": 1.6040496826171875, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.8588221073150635, | |
| "learning_rate": 8.640748696448546e-11, | |
| "loss": 1.5715079307556152, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8788321167883212, | |
| "grad_norm": 0.6456263065338135, | |
| "learning_rate": 8.630359120997084e-11, | |
| "loss": 1.5764148235321045, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8817518248175182, | |
| "grad_norm": 0.7720602750778198, | |
| "learning_rate": 8.61993711203955e-11, | |
| "loss": 1.5507595539093018, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8846715328467153, | |
| "grad_norm": 1.7567601203918457, | |
| "learning_rate": 8.609482777557891e-11, | |
| "loss": 1.5700254440307617, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8875912408759125, | |
| "grad_norm": 0.6601906418800354, | |
| "learning_rate": 8.59899622586899e-11, | |
| "loss": 1.4982445240020752, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8905109489051095, | |
| "grad_norm": 0.9867559671401978, | |
| "learning_rate": 8.58847756562352e-11, | |
| "loss": 1.5299499034881592, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8934306569343066, | |
| "grad_norm": 0.5381725430488586, | |
| "learning_rate": 8.577926905804833e-11, | |
| "loss": 1.5449988842010498, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.8963503649635036, | |
| "grad_norm": 1.6642448902130127, | |
| "learning_rate": 8.567344355727829e-11, | |
| "loss": 1.5295981168746948, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.8992700729927007, | |
| "grad_norm": 0.8142134547233582, | |
| "learning_rate": 8.556730025037819e-11, | |
| "loss": 1.5639193058013916, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9021897810218978, | |
| "grad_norm": 13.424260139465332, | |
| "learning_rate": 8.546084023709393e-11, | |
| "loss": 1.5428690910339355, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9051094890510949, | |
| "grad_norm": 0.6810373067855835, | |
| "learning_rate": 8.535406462045279e-11, | |
| "loss": 1.5449011325836182, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.908029197080292, | |
| "grad_norm": 0.5600751042366028, | |
| "learning_rate": 8.524697450675199e-11, | |
| "loss": 1.5136233568191528, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.910948905109489, | |
| "grad_norm": 0.9065161943435669, | |
| "learning_rate": 8.513957100554721e-11, | |
| "loss": 1.501448154449463, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9138686131386862, | |
| "grad_norm": 0.9321081042289734, | |
| "learning_rate": 8.503185522964122e-11, | |
| "loss": 1.5432753562927246, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9167883211678832, | |
| "grad_norm": 0.6998010277748108, | |
| "learning_rate": 8.492382829507215e-11, | |
| "loss": 1.5888001918792725, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9197080291970803, | |
| "grad_norm": 0.6488540172576904, | |
| "learning_rate": 8.48154913211021e-11, | |
| "loss": 1.533357858657837, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9226277372262773, | |
| "grad_norm": 0.5950493812561035, | |
| "learning_rate": 8.470684543020542e-11, | |
| "loss": 1.526620626449585, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9255474452554745, | |
| "grad_norm": 0.6320714354515076, | |
| "learning_rate": 8.45978917480572e-11, | |
| "loss": 1.4900453090667725, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9284671532846716, | |
| "grad_norm": 0.8587051630020142, | |
| "learning_rate": 8.448863140352144e-11, | |
| "loss": 1.5480128526687622, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9313868613138686, | |
| "grad_norm": 0.6327022314071655, | |
| "learning_rate": 8.437906552863958e-11, | |
| "loss": 1.5434049367904663, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 0.663105309009552, | |
| "learning_rate": 8.426919525861854e-11, | |
| "loss": 1.5092029571533203, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9372262773722628, | |
| "grad_norm": 1.4529094696044922, | |
| "learning_rate": 8.41590217318191e-11, | |
| "loss": 1.5402865409851074, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9401459854014599, | |
| "grad_norm": 0.5439270734786987, | |
| "learning_rate": 8.404854608974414e-11, | |
| "loss": 1.5238285064697266, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9430656934306569, | |
| "grad_norm": 1.19278883934021, | |
| "learning_rate": 8.39377694770266e-11, | |
| "loss": 1.5863299369812012, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.945985401459854, | |
| "grad_norm": 0.7118021845817566, | |
| "learning_rate": 8.38266930414179e-11, | |
| "loss": 1.5646288394927979, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.948905109489051, | |
| "grad_norm": 0.8593351244926453, | |
| "learning_rate": 8.37153179337758e-11, | |
| "loss": 1.553905963897705, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9518248175182482, | |
| "grad_norm": 0.78006511926651, | |
| "learning_rate": 8.360364530805266e-11, | |
| "loss": 1.5432817935943604, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9547445255474453, | |
| "grad_norm": 2.4391605854034424, | |
| "learning_rate": 8.349167632128342e-11, | |
| "loss": 1.5126714706420898, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9576642335766423, | |
| "grad_norm": 0.5853560566902161, | |
| "learning_rate": 8.337941213357355e-11, | |
| "loss": 1.4902369976043701, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9605839416058394, | |
| "grad_norm": 0.7599897980690002, | |
| "learning_rate": 8.326685390808711e-11, | |
| "loss": 1.4758626222610474, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 0.6267222762107849, | |
| "learning_rate": 8.315400281103466e-11, | |
| "loss": 1.4989067316055298, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9664233576642336, | |
| "grad_norm": 0.7297730445861816, | |
| "learning_rate": 8.304086001166119e-11, | |
| "loss": 1.4975358247756958, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9693430656934306, | |
| "grad_norm": 0.7411254644393921, | |
| "learning_rate": 8.292742668223403e-11, | |
| "loss": 1.545630931854248, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9722627737226277, | |
| "grad_norm": 2.3968822956085205, | |
| "learning_rate": 8.281370399803065e-11, | |
| "loss": 1.5507152080535889, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9751824817518249, | |
| "grad_norm": 0.8514155745506287, | |
| "learning_rate": 8.269969313732654e-11, | |
| "loss": 1.5324714183807373, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9781021897810219, | |
| "grad_norm": 2.2287166118621826, | |
| "learning_rate": 8.258539528138293e-11, | |
| "loss": 1.5232876539230347, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.981021897810219, | |
| "grad_norm": 0.7443653345108032, | |
| "learning_rate": 8.247081161443463e-11, | |
| "loss": 1.5413646697998047, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.983941605839416, | |
| "grad_norm": 1.0233676433563232, | |
| "learning_rate": 8.23559433236777e-11, | |
| "loss": 1.4631516933441162, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9868613138686131, | |
| "grad_norm": 0.7487879395484924, | |
| "learning_rate": 8.224079159925721e-11, | |
| "loss": 1.5500619411468506, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9897810218978103, | |
| "grad_norm": 1.9052271842956543, | |
| "learning_rate": 8.212535763425484e-11, | |
| "loss": 1.6026296615600586, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 1.4029343128204346, | |
| "learning_rate": 8.200964262467657e-11, | |
| "loss": 1.849067211151123, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9956204379562044, | |
| "grad_norm": 1.3085871934890747, | |
| "learning_rate": 8.189364776944026e-11, | |
| "loss": 2.2609763145446777, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.9985401459854014, | |
| "grad_norm": 1.1536389589309692, | |
| "learning_rate": 8.177737427036323e-11, | |
| "loss": 2.2199437618255615, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0014598540145985, | |
| "grad_norm": 0.5622366666793823, | |
| "learning_rate": 8.166082333214983e-11, | |
| "loss": 1.8828843832015991, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0043795620437956, | |
| "grad_norm": 2.2178897857666016, | |
| "learning_rate": 8.154399616237894e-11, | |
| "loss": 1.5576670169830322, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0072992700729928, | |
| "grad_norm": 0.6949933171272278, | |
| "learning_rate": 8.142689397149148e-11, | |
| "loss": 1.5247983932495117, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0102189781021897, | |
| "grad_norm": 1.0407248735427856, | |
| "learning_rate": 8.130951797277783e-11, | |
| "loss": 1.5038955211639404, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.013138686131387, | |
| "grad_norm": 0.9534472227096558, | |
| "learning_rate": 8.119186938236524e-11, | |
| "loss": 1.6115878820419312, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0160583941605839, | |
| "grad_norm": 1.0461090803146362, | |
| "learning_rate": 8.107394941920536e-11, | |
| "loss": 1.4514870643615723, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.018978102189781, | |
| "grad_norm": 0.4908963143825531, | |
| "learning_rate": 8.095575930506148e-11, | |
| "loss": 1.5594661235809326, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 0.5373883843421936, | |
| "learning_rate": 8.083730026449588e-11, | |
| "loss": 1.3820297718048096, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0248175182481751, | |
| "grad_norm": 0.48417338728904724, | |
| "learning_rate": 8.07185735248572e-11, | |
| "loss": 1.41359543800354, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0277372262773723, | |
| "grad_norm": 0.4498881697654724, | |
| "learning_rate": 8.059958031626771e-11, | |
| "loss": 1.3846168518066406, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0306569343065692, | |
| "grad_norm": 0.7400011420249939, | |
| "learning_rate": 8.048032187161055e-11, | |
| "loss": 1.5426232814788818, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0335766423357664, | |
| "grad_norm": 1.0404973030090332, | |
| "learning_rate": 8.036079942651694e-11, | |
| "loss": 1.4933040142059326, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0364963503649636, | |
| "grad_norm": 0.5060697197914124, | |
| "learning_rate": 8.02410142193534e-11, | |
| "loss": 1.2689898014068604, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0394160583941605, | |
| "grad_norm": 0.5267243981361389, | |
| "learning_rate": 8.01209674912089e-11, | |
| "loss": 1.493792176246643, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0423357664233577, | |
| "grad_norm": 0.6831766366958618, | |
| "learning_rate": 8.00006604858821e-11, | |
| "loss": 1.5755467414855957, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0452554744525548, | |
| "grad_norm": 0.4897480905056, | |
| "learning_rate": 7.988009444986827e-11, | |
| "loss": 1.4748653173446655, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0481751824817518, | |
| "grad_norm": 0.6031560301780701, | |
| "learning_rate": 7.975927063234655e-11, | |
| "loss": 1.4614906311035156, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.051094890510949, | |
| "grad_norm": 0.3444135785102844, | |
| "learning_rate": 7.963819028516691e-11, | |
| "loss": 1.1188762187957764, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.054014598540146, | |
| "grad_norm": 0.5612711906433105, | |
| "learning_rate": 7.951685466283724e-11, | |
| "loss": 1.2849868535995483, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.056934306569343, | |
| "grad_norm": 0.4925251603126526, | |
| "learning_rate": 7.93952650225103e-11, | |
| "loss": 1.5048502683639526, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0598540145985402, | |
| "grad_norm": 0.6800446510314941, | |
| "learning_rate": 7.927342262397074e-11, | |
| "loss": 1.4658869504928589, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.0627737226277372, | |
| "grad_norm": 0.8294490575790405, | |
| "learning_rate": 7.915132872962204e-11, | |
| "loss": 1.5184880495071411, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.0656934306569343, | |
| "grad_norm": 0.7866031527519226, | |
| "learning_rate": 7.902898460447333e-11, | |
| "loss": 1.61749267578125, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0686131386861315, | |
| "grad_norm": 3.885850429534912, | |
| "learning_rate": 7.890639151612647e-11, | |
| "loss": 1.5665937662124634, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.0715328467153284, | |
| "grad_norm": 0.77565598487854, | |
| "learning_rate": 7.878355073476282e-11, | |
| "loss": 1.456195592880249, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.0744525547445256, | |
| "grad_norm": 0.542365550994873, | |
| "learning_rate": 7.866046353312994e-11, | |
| "loss": 1.489433765411377, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.0773722627737226, | |
| "grad_norm": 0.6086686849594116, | |
| "learning_rate": 7.853713118652872e-11, | |
| "loss": 1.4931024312973022, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.0802919708029197, | |
| "grad_norm": 1.3472954034805298, | |
| "learning_rate": 7.841355497279987e-11, | |
| "loss": 1.5780309438705444, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0832116788321169, | |
| "grad_norm": 1.233951449394226, | |
| "learning_rate": 7.828973617231079e-11, | |
| "loss": 1.4519734382629395, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.0861313868613138, | |
| "grad_norm": 0.4615781903266907, | |
| "learning_rate": 7.816567606794239e-11, | |
| "loss": 1.2352495193481445, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.089051094890511, | |
| "grad_norm": 0.43236157298088074, | |
| "learning_rate": 7.804137594507565e-11, | |
| "loss": 1.278646469116211, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.091970802919708, | |
| "grad_norm": 0.42941945791244507, | |
| "learning_rate": 7.791683709157841e-11, | |
| "loss": 1.3942368030548096, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.094890510948905, | |
| "grad_norm": 0.4888475835323334, | |
| "learning_rate": 7.779206079779196e-11, | |
| "loss": 1.4085397720336914, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0978102189781023, | |
| "grad_norm": 0.48239484429359436, | |
| "learning_rate": 7.766704835651772e-11, | |
| "loss": 1.3130261898040771, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1007299270072992, | |
| "grad_norm": 0.6260477304458618, | |
| "learning_rate": 7.75418010630038e-11, | |
| "loss": 1.4074561595916748, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1036496350364964, | |
| "grad_norm": 1.1679105758666992, | |
| "learning_rate": 7.741632021493161e-11, | |
| "loss": 1.419480800628662, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1065693430656935, | |
| "grad_norm": 0.6991820931434631, | |
| "learning_rate": 7.729060711240244e-11, | |
| "loss": 1.6651334762573242, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1094890510948905, | |
| "grad_norm": 3.488729953765869, | |
| "learning_rate": 7.71646630579239e-11, | |
| "loss": 1.4023584127426147, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1124087591240877, | |
| "grad_norm": 0.8115634918212891, | |
| "learning_rate": 7.703848935639653e-11, | |
| "loss": 1.6808946132659912, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1153284671532846, | |
| "grad_norm": 0.8896425366401672, | |
| "learning_rate": 7.691208731510022e-11, | |
| "loss": 1.5221037864685059, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1182481751824818, | |
| "grad_norm": 1.0833176374435425, | |
| "learning_rate": 7.678545824368068e-11, | |
| "loss": 1.3470661640167236, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.121167883211679, | |
| "grad_norm": 1.1070582866668701, | |
| "learning_rate": 7.665860345413583e-11, | |
| "loss": 1.4074666500091553, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1240875912408759, | |
| "grad_norm": 0.8553116917610168, | |
| "learning_rate": 7.653152426080231e-11, | |
| "loss": 1.2583807706832886, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.127007299270073, | |
| "grad_norm": 0.46726834774017334, | |
| "learning_rate": 7.640422198034175e-11, | |
| "loss": 1.3039724826812744, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.12992700729927, | |
| "grad_norm": 0.6263546347618103, | |
| "learning_rate": 7.627669793172719e-11, | |
| "loss": 1.4425758123397827, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.1328467153284671, | |
| "grad_norm": 1.100542426109314, | |
| "learning_rate": 7.614895343622941e-11, | |
| "loss": 1.501619815826416, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1357664233576643, | |
| "grad_norm": 1.0879031419754028, | |
| "learning_rate": 7.60209898174032e-11, | |
| "loss": 1.5851643085479736, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1386861313868613, | |
| "grad_norm": 0.9223687052726746, | |
| "learning_rate": 7.589280840107372e-11, | |
| "loss": 1.5015313625335693, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1416058394160584, | |
| "grad_norm": 0.5587929487228394, | |
| "learning_rate": 7.576441051532268e-11, | |
| "loss": 1.3720371723175049, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.1445255474452556, | |
| "grad_norm": 0.6128681302070618, | |
| "learning_rate": 7.563579749047463e-11, | |
| "loss": 1.536409616470337, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.1474452554744525, | |
| "grad_norm": 1.5065646171569824, | |
| "learning_rate": 7.55069706590832e-11, | |
| "loss": 1.6214349269866943, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1503649635036497, | |
| "grad_norm": 0.6464596390724182, | |
| "learning_rate": 7.537793135591721e-11, | |
| "loss": 1.458618402481079, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.1532846715328466, | |
| "grad_norm": 0.5868789553642273, | |
| "learning_rate": 7.524868091794691e-11, | |
| "loss": 1.5169906616210938, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1562043795620438, | |
| "grad_norm": 1.0854041576385498, | |
| "learning_rate": 7.511922068433012e-11, | |
| "loss": 1.574601411819458, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.159124087591241, | |
| "grad_norm": 1.2204523086547852, | |
| "learning_rate": 7.498955199639834e-11, | |
| "loss": 1.5548131465911865, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.162043795620438, | |
| "grad_norm": 1.032820701599121, | |
| "learning_rate": 7.485967619764282e-11, | |
| "loss": 1.5450553894042969, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.164963503649635, | |
| "grad_norm": 0.38759198784828186, | |
| "learning_rate": 7.472959463370074e-11, | |
| "loss": 1.356863021850586, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.167883211678832, | |
| "grad_norm": 0.9296456575393677, | |
| "learning_rate": 7.459930865234115e-11, | |
| "loss": 1.538057804107666, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1708029197080292, | |
| "grad_norm": 0.8856072425842285, | |
| "learning_rate": 7.446881960345107e-11, | |
| "loss": 1.5826928615570068, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.1737226277372264, | |
| "grad_norm": 1.180622935295105, | |
| "learning_rate": 7.433812883902151e-11, | |
| "loss": 1.524979591369629, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.1766423357664233, | |
| "grad_norm": 0.8320207595825195, | |
| "learning_rate": 7.420723771313342e-11, | |
| "loss": 1.5835464000701904, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.1795620437956205, | |
| "grad_norm": 1.6011016368865967, | |
| "learning_rate": 7.407614758194375e-11, | |
| "loss": 1.5390067100524902, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.1824817518248176, | |
| "grad_norm": 0.857045590877533, | |
| "learning_rate": 7.394485980367121e-11, | |
| "loss": 1.4244121313095093, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1854014598540146, | |
| "grad_norm": 0.5209587812423706, | |
| "learning_rate": 7.381337573858245e-11, | |
| "loss": 1.4662278890609741, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.1883211678832117, | |
| "grad_norm": 0.7713713049888611, | |
| "learning_rate": 7.36816967489778e-11, | |
| "loss": 1.5011208057403564, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.1912408759124087, | |
| "grad_norm": 0.9531408548355103, | |
| "learning_rate": 7.354982419917714e-11, | |
| "loss": 1.4519563913345337, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.1941605839416058, | |
| "grad_norm": 0.6255420446395874, | |
| "learning_rate": 7.34177594555059e-11, | |
| "loss": 1.550148367881775, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.197080291970803, | |
| "grad_norm": 1.2728800773620605, | |
| "learning_rate": 7.328550388628072e-11, | |
| "loss": 1.4228880405426025, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.37029626965522766, | |
| "learning_rate": 7.31530588617955e-11, | |
| "loss": 1.2400577068328857, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2029197080291971, | |
| "grad_norm": 0.6366111636161804, | |
| "learning_rate": 7.3020425754307e-11, | |
| "loss": 1.343297004699707, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.205839416058394, | |
| "grad_norm": 0.7170737385749817, | |
| "learning_rate": 7.288760593802067e-11, | |
| "loss": 1.4028267860412598, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2087591240875912, | |
| "grad_norm": 0.6670755743980408, | |
| "learning_rate": 7.275460078907654e-11, | |
| "loss": 1.390630841255188, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2116788321167884, | |
| "grad_norm": 0.7601497769355774, | |
| "learning_rate": 7.262141168553478e-11, | |
| "loss": 1.5051729679107666, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2145985401459853, | |
| "grad_norm": 0.7791512608528137, | |
| "learning_rate": 7.248804000736153e-11, | |
| "loss": 1.398391842842102, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2175182481751825, | |
| "grad_norm": 1.093146562576294, | |
| "learning_rate": 7.235448713641457e-11, | |
| "loss": 1.466840147972107, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2204379562043797, | |
| "grad_norm": 0.9340672492980957, | |
| "learning_rate": 7.222075445642904e-11, | |
| "loss": 1.5514698028564453, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2233576642335766, | |
| "grad_norm": 1.160134196281433, | |
| "learning_rate": 7.208684335300305e-11, | |
| "loss": 1.6925793886184692, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2262773722627738, | |
| "grad_norm": 0.818298876285553, | |
| "learning_rate": 7.195275521358333e-11, | |
| "loss": 1.615720510482788, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2291970802919707, | |
| "grad_norm": 1.870531439781189, | |
| "learning_rate": 7.181849142745091e-11, | |
| "loss": 1.5814074277877808, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.2321167883211679, | |
| "grad_norm": 0.7843610048294067, | |
| "learning_rate": 7.168405338570667e-11, | |
| "loss": 1.5802574157714844, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.235036496350365, | |
| "grad_norm": 2.210127353668213, | |
| "learning_rate": 7.154944248125693e-11, | |
| "loss": 1.7284915447235107, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.237956204379562, | |
| "grad_norm": 4.429756164550781, | |
| "learning_rate": 7.141466010879904e-11, | |
| "loss": 1.5444270372390747, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.2408759124087592, | |
| "grad_norm": 1.3784843683242798, | |
| "learning_rate": 7.127970766480695e-11, | |
| "loss": 1.6951996088027954, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.243795620437956, | |
| "grad_norm": 2.2456016540527344, | |
| "learning_rate": 7.114458654751666e-11, | |
| "loss": 1.6329224109649658, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2467153284671533, | |
| "grad_norm": 1.2260881662368774, | |
| "learning_rate": 7.100929815691185e-11, | |
| "loss": 1.582199215888977, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.2496350364963504, | |
| "grad_norm": 0.7291862368583679, | |
| "learning_rate": 7.087384389470928e-11, | |
| "loss": 1.1192151308059692, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.2525547445255474, | |
| "grad_norm": 1.1797438859939575, | |
| "learning_rate": 7.073822516434425e-11, | |
| "loss": 1.3417707681655884, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.2554744525547445, | |
| "grad_norm": 60.873146057128906, | |
| "learning_rate": 7.060244337095619e-11, | |
| "loss": 1.4547762870788574, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2583941605839417, | |
| "grad_norm": 1.3333382606506348, | |
| "learning_rate": 7.046649992137399e-11, | |
| "loss": 1.3389256000518799, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.2613138686131387, | |
| "grad_norm": 0.7808487415313721, | |
| "learning_rate": 7.03303962241014e-11, | |
| "loss": 1.3111419677734375, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.2642335766423358, | |
| "grad_norm": 3.617492437362671, | |
| "learning_rate": 7.019413368930259e-11, | |
| "loss": 1.4535256624221802, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.2671532846715328, | |
| "grad_norm": 0.669325590133667, | |
| "learning_rate": 7.00577137287873e-11, | |
| "loss": 1.2923446893692017, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.27007299270073, | |
| "grad_norm": 1.293830156326294, | |
| "learning_rate": 6.992113775599654e-11, | |
| "loss": 1.0269770622253418, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.2729927007299269, | |
| "grad_norm": 0.541140615940094, | |
| "learning_rate": 6.978440718598756e-11, | |
| "loss": 1.1857497692108154, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.275912408759124, | |
| "grad_norm": 1.0085563659667969, | |
| "learning_rate": 6.964752343541952e-11, | |
| "loss": 1.356915831565857, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.2788321167883212, | |
| "grad_norm": 0.5604317784309387, | |
| "learning_rate": 6.95104879225386e-11, | |
| "loss": 1.1334080696105957, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.2817518248175181, | |
| "grad_norm": 1.7824565172195435, | |
| "learning_rate": 6.937330206716343e-11, | |
| "loss": 1.169769525527954, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.2846715328467153, | |
| "grad_norm": 0.7596222758293152, | |
| "learning_rate": 6.923596729067029e-11, | |
| "loss": 1.2873601913452148, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2875912408759125, | |
| "grad_norm": 1.7033840417861938, | |
| "learning_rate": 6.909848501597848e-11, | |
| "loss": 1.2963634729385376, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.2905109489051094, | |
| "grad_norm": 0.625435471534729, | |
| "learning_rate": 6.896085666753544e-11, | |
| "loss": 1.453802227973938, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.2934306569343066, | |
| "grad_norm": 1.6176484823226929, | |
| "learning_rate": 6.882308367130217e-11, | |
| "loss": 1.5025838613510132, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.2963503649635038, | |
| "grad_norm": 0.3474730849266052, | |
| "learning_rate": 6.868516745473831e-11, | |
| "loss": 1.1926612854003906, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.2992700729927007, | |
| "grad_norm": 0.7696311473846436, | |
| "learning_rate": 6.854710944678737e-11, | |
| "loss": 1.1586365699768066, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3021897810218979, | |
| "grad_norm": 0.9640692472457886, | |
| "learning_rate": 6.840891107786203e-11, | |
| "loss": 1.5028798580169678, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.305109489051095, | |
| "grad_norm": 4.5033955574035645, | |
| "learning_rate": 6.827057377982915e-11, | |
| "loss": 1.5194616317749023, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.308029197080292, | |
| "grad_norm": 1.9850316047668457, | |
| "learning_rate": 6.81320989859951e-11, | |
| "loss": 1.5591602325439453, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.310948905109489, | |
| "grad_norm": 1.100955605506897, | |
| "learning_rate": 6.799348813109082e-11, | |
| "loss": 1.706603765487671, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.313868613138686, | |
| "grad_norm": 0.5737488269805908, | |
| "learning_rate": 6.785474265125695e-11, | |
| "loss": 1.3330950736999512, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3167883211678832, | |
| "grad_norm": 1.1470412015914917, | |
| "learning_rate": 6.771586398402901e-11, | |
| "loss": 1.404266595840454, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3197080291970802, | |
| "grad_norm": 0.9973093867301941, | |
| "learning_rate": 6.757685356832243e-11, | |
| "loss": 1.281385898590088, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.3226277372262774, | |
| "grad_norm": 0.6097076535224915, | |
| "learning_rate": 6.743771284441771e-11, | |
| "loss": 1.2605456113815308, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3255474452554745, | |
| "grad_norm": 0.8650342226028442, | |
| "learning_rate": 6.729844325394546e-11, | |
| "loss": 1.3625212907791138, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3284671532846715, | |
| "grad_norm": 0.8610764145851135, | |
| "learning_rate": 6.715904623987145e-11, | |
| "loss": 1.6717928647994995, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3313868613138686, | |
| "grad_norm": 0.8736777305603027, | |
| "learning_rate": 6.701952324648167e-11, | |
| "loss": 1.5597894191741943, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3343065693430658, | |
| "grad_norm": 0.7504048943519592, | |
| "learning_rate": 6.687987571936747e-11, | |
| "loss": 1.161590814590454, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.3372262773722627, | |
| "grad_norm": 0.9293012022972107, | |
| "learning_rate": 6.674010510541039e-11, | |
| "loss": 1.3798034191131592, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.34014598540146, | |
| "grad_norm": 0.7910136580467224, | |
| "learning_rate": 6.660021285276727e-11, | |
| "loss": 1.7263545989990234, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.343065693430657, | |
| "grad_norm": 10.625754356384277, | |
| "learning_rate": 6.646020041085529e-11, | |
| "loss": 1.5023877620697021, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.345985401459854, | |
| "grad_norm": 0.8937836289405823, | |
| "learning_rate": 6.632006923033689e-11, | |
| "loss": 1.6307129859924316, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.348905109489051, | |
| "grad_norm": 0.8827693462371826, | |
| "learning_rate": 6.617982076310476e-11, | |
| "loss": 1.6425561904907227, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3518248175182481, | |
| "grad_norm": 1.9358364343643188, | |
| "learning_rate": 6.603945646226675e-11, | |
| "loss": 1.4509600400924683, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3547445255474453, | |
| "grad_norm": 0.7707522511482239, | |
| "learning_rate": 6.58989777821309e-11, | |
| "loss": 1.297328233718872, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.3576642335766422, | |
| "grad_norm": 1.6969257593154907, | |
| "learning_rate": 6.575838617819036e-11, | |
| "loss": 1.4513022899627686, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3605839416058394, | |
| "grad_norm": 0.878479540348053, | |
| "learning_rate": 6.561768310710817e-11, | |
| "loss": 1.5764425992965698, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.3635036496350366, | |
| "grad_norm": 1.5260920524597168, | |
| "learning_rate": 6.547687002670238e-11, | |
| "loss": 1.7976481914520264, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.3664233576642335, | |
| "grad_norm": 0.9654127359390259, | |
| "learning_rate": 6.53359483959308e-11, | |
| "loss": 1.636899709701538, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.3693430656934307, | |
| "grad_norm": 1.406870722770691, | |
| "learning_rate": 6.519491967487592e-11, | |
| "loss": 1.8173702955245972, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.3722627737226278, | |
| "grad_norm": 1.2826157808303833, | |
| "learning_rate": 6.505378532472983e-11, | |
| "loss": 1.4768022298812866, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3751824817518248, | |
| "grad_norm": 1.2048250436782837, | |
| "learning_rate": 6.491254680777894e-11, | |
| "loss": 1.4769136905670166, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.378102189781022, | |
| "grad_norm": 0.7053294777870178, | |
| "learning_rate": 6.477120558738903e-11, | |
| "loss": 1.1204323768615723, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.3810218978102191, | |
| "grad_norm": 0.7908552289009094, | |
| "learning_rate": 6.462976312798997e-11, | |
| "loss": 1.5190601348876953, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.383941605839416, | |
| "grad_norm": 2.0937423706054688, | |
| "learning_rate": 6.44882208950605e-11, | |
| "loss": 1.662810206413269, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.3868613138686132, | |
| "grad_norm": 1.2219175100326538, | |
| "learning_rate": 6.434658035511315e-11, | |
| "loss": 1.493818998336792, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3897810218978102, | |
| "grad_norm": 0.5845208168029785, | |
| "learning_rate": 6.420484297567905e-11, | |
| "loss": 1.4118988513946533, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.3927007299270073, | |
| "grad_norm": 0.7819458246231079, | |
| "learning_rate": 6.406301022529257e-11, | |
| "loss": 1.343995451927185, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.3956204379562043, | |
| "grad_norm": 0.9314578771591187, | |
| "learning_rate": 6.392108357347634e-11, | |
| "loss": 1.273950457572937, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.3985401459854014, | |
| "grad_norm": 0.67668217420578, | |
| "learning_rate": 6.377906449072578e-11, | |
| "loss": 1.1341450214385986, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4014598540145986, | |
| "grad_norm": 1.1271109580993652, | |
| "learning_rate": 6.363695444849407e-11, | |
| "loss": 1.469457983970642, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.4043795620437955, | |
| "grad_norm": 1.7484785318374634, | |
| "learning_rate": 6.349475491917677e-11, | |
| "loss": 1.5630698204040527, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4072992700729927, | |
| "grad_norm": 1.3456151485443115, | |
| "learning_rate": 6.335246737609664e-11, | |
| "loss": 1.591662883758545, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4102189781021899, | |
| "grad_norm": 0.5922049283981323, | |
| "learning_rate": 6.321009329348832e-11, | |
| "loss": 1.4085381031036377, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4131386861313868, | |
| "grad_norm": 1.5729414224624634, | |
| "learning_rate": 6.306763414648311e-11, | |
| "loss": 1.717803716659546, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.416058394160584, | |
| "grad_norm": 1.7132468223571777, | |
| "learning_rate": 6.292509141109364e-11, | |
| "loss": 1.4388347864151, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4189781021897812, | |
| "grad_norm": 1.1842399835586548, | |
| "learning_rate": 6.278246656419859e-11, | |
| "loss": 1.3138422966003418, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.421897810218978, | |
| "grad_norm": 0.8163485527038574, | |
| "learning_rate": 6.263976108352739e-11, | |
| "loss": 1.4941157102584839, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.4248175182481753, | |
| "grad_norm": 1.6739691495895386, | |
| "learning_rate": 6.249697644764493e-11, | |
| "loss": 1.4427509307861328, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4277372262773722, | |
| "grad_norm": 1.1508666276931763, | |
| "learning_rate": 6.235411413593627e-11, | |
| "loss": 1.3769874572753906, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4306569343065694, | |
| "grad_norm": 1.171834945678711, | |
| "learning_rate": 6.221117562859115e-11, | |
| "loss": 1.2018156051635742, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4335766423357663, | |
| "grad_norm": 0.44805899262428284, | |
| "learning_rate": 6.206816240658887e-11, | |
| "loss": 1.2645214796066284, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.4364963503649635, | |
| "grad_norm": 0.8003254532814026, | |
| "learning_rate": 6.192507595168279e-11, | |
| "loss": 1.5221058130264282, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.4394160583941606, | |
| "grad_norm": 0.7239990830421448, | |
| "learning_rate": 6.178191774638506e-11, | |
| "loss": 1.3511683940887451, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4423357664233576, | |
| "grad_norm": 0.9869601130485535, | |
| "learning_rate": 6.163868927395123e-11, | |
| "loss": 1.5366151332855225, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.4452554744525548, | |
| "grad_norm": 5.984511852264404, | |
| "learning_rate": 6.149539201836484e-11, | |
| "loss": 1.663490891456604, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.448175182481752, | |
| "grad_norm": 0.794996976852417, | |
| "learning_rate": 6.135202746432217e-11, | |
| "loss": 1.5609914064407349, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4510948905109489, | |
| "grad_norm": 0.5128317475318909, | |
| "learning_rate": 6.120859709721674e-11, | |
| "loss": 1.3358368873596191, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.454014598540146, | |
| "grad_norm": 1.1187900304794312, | |
| "learning_rate": 6.106510240312393e-11, | |
| "loss": 1.4128565788269043, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.4569343065693432, | |
| "grad_norm": 0.7358497381210327, | |
| "learning_rate": 6.092154486878562e-11, | |
| "loss": 1.3347644805908203, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 1.18520188331604, | |
| "learning_rate": 6.077792598159479e-11, | |
| "loss": 1.4752131700515747, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4627737226277373, | |
| "grad_norm": 3.6807000637054443, | |
| "learning_rate": 6.06342472295801e-11, | |
| "loss": 1.6720980405807495, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.4656934306569342, | |
| "grad_norm": 0.9608777165412903, | |
| "learning_rate": 6.049051010139046e-11, | |
| "loss": 1.5300023555755615, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.4686131386861314, | |
| "grad_norm": 1.0501829385757446, | |
| "learning_rate": 6.034671608627957e-11, | |
| "loss": 1.3792424201965332, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.4715328467153284, | |
| "grad_norm": 0.7508878111839294, | |
| "learning_rate": 6.020286667409061e-11, | |
| "loss": 1.4617664813995361, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.4744525547445255, | |
| "grad_norm": 1.0573078393936157, | |
| "learning_rate": 6.005896335524069e-11, | |
| "loss": 1.3801748752593994, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4773722627737227, | |
| "grad_norm": 1.8899734020233154, | |
| "learning_rate": 5.99150076207054e-11, | |
| "loss": 1.2965296506881714, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.4802919708029196, | |
| "grad_norm": 1.5424823760986328, | |
| "learning_rate": 5.977100096200347e-11, | |
| "loss": 1.3949477672576904, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.4832116788321168, | |
| "grad_norm": 0.6652219891548157, | |
| "learning_rate": 5.962694487118122e-11, | |
| "loss": 1.3593168258666992, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.486131386861314, | |
| "grad_norm": 1.5784106254577637, | |
| "learning_rate": 5.948284084079716e-11, | |
| "loss": 1.3809421062469482, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.489051094890511, | |
| "grad_norm": 0.8603954315185547, | |
| "learning_rate": 5.933869036390651e-11, | |
| "loss": 1.380603551864624, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.491970802919708, | |
| "grad_norm": 1.1792484521865845, | |
| "learning_rate": 5.919449493404563e-11, | |
| "loss": 1.6048811674118042, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.4948905109489052, | |
| "grad_norm": 0.9351183772087097, | |
| "learning_rate": 5.905025604521674e-11, | |
| "loss": 1.7812795639038086, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.4978102189781022, | |
| "grad_norm": 0.8663079142570496, | |
| "learning_rate": 5.890597519187229e-11, | |
| "loss": 1.6461979150772095, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.5007299270072991, | |
| "grad_norm": 1.4873929023742676, | |
| "learning_rate": 5.876165386889952e-11, | |
| "loss": 1.3349511623382568, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.5036496350364965, | |
| "grad_norm": 0.5745068192481995, | |
| "learning_rate": 5.861729357160501e-11, | |
| "loss": 1.1580829620361328, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5065693430656935, | |
| "grad_norm": 0.7135260701179504, | |
| "learning_rate": 5.84728957956991e-11, | |
| "loss": 1.2719569206237793, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.5094890510948904, | |
| "grad_norm": 0.8659630417823792, | |
| "learning_rate": 5.832846203728045e-11, | |
| "loss": 1.3541443347930908, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.5124087591240876, | |
| "grad_norm": 1.3131263256072998, | |
| "learning_rate": 5.81839937928206e-11, | |
| "loss": 1.7874629497528076, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5153284671532847, | |
| "grad_norm": 1.2424635887145996, | |
| "learning_rate": 5.8039492559148314e-11, | |
| "loss": 1.8875402212142944, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5182481751824817, | |
| "grad_norm": 11.118843078613281, | |
| "learning_rate": 5.789495983343418e-11, | |
| "loss": 1.626911997795105, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.5211678832116788, | |
| "grad_norm": 1.3168439865112305, | |
| "learning_rate": 5.7750397113175114e-11, | |
| "loss": 1.4138636589050293, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.524087591240876, | |
| "grad_norm": 1.0707910060882568, | |
| "learning_rate": 5.760580589617876e-11, | |
| "loss": 1.4132678508758545, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.527007299270073, | |
| "grad_norm": 1.8395414352416992, | |
| "learning_rate": 5.746118768054806e-11, | |
| "loss": 1.568450689315796, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.5299270072992701, | |
| "grad_norm": 0.9076103568077087, | |
| "learning_rate": 5.73165439646656e-11, | |
| "loss": 1.715510368347168, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5328467153284673, | |
| "grad_norm": 1.3996844291687012, | |
| "learning_rate": 5.717187624717827e-11, | |
| "loss": 1.687088966369629, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5357664233576642, | |
| "grad_norm": 0.6579364538192749, | |
| "learning_rate": 5.7027186026981604e-11, | |
| "loss": 1.2553963661193848, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5386861313868612, | |
| "grad_norm": 3.5541012287139893, | |
| "learning_rate": 5.6882474803204254e-11, | |
| "loss": 1.466941237449646, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5416058394160586, | |
| "grad_norm": 3.3361599445343018, | |
| "learning_rate": 5.673774407519253e-11, | |
| "loss": 1.5611426830291748, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5445255474452555, | |
| "grad_norm": 1.395855188369751, | |
| "learning_rate": 5.6592995342494836e-11, | |
| "loss": 1.778045892715454, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5474452554744524, | |
| "grad_norm": 1.4240235090255737, | |
| "learning_rate": 5.644823010484604e-11, | |
| "loss": 1.7712279558181763, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.5503649635036496, | |
| "grad_norm": 1.2174320220947266, | |
| "learning_rate": 5.6303449862152144e-11, | |
| "loss": 1.3837765455245972, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.5532846715328468, | |
| "grad_norm": 0.866814136505127, | |
| "learning_rate": 5.61586561144745e-11, | |
| "loss": 1.6202555894851685, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.5562043795620437, | |
| "grad_norm": 2.8624088764190674, | |
| "learning_rate": 5.601385036201443e-11, | |
| "loss": 1.2670037746429443, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.5591240875912409, | |
| "grad_norm": 1.4413124322891235, | |
| "learning_rate": 5.586903410509765e-11, | |
| "loss": 1.2830889225006104, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.562043795620438, | |
| "grad_norm": 1.2201826572418213, | |
| "learning_rate": 5.572420884415871e-11, | |
| "loss": 1.0796787738800049, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.564963503649635, | |
| "grad_norm": 1.3723790645599365, | |
| "learning_rate": 5.557937607972542e-11, | |
| "loss": 1.0531703233718872, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.5678832116788322, | |
| "grad_norm": 0.7803177237510681, | |
| "learning_rate": 5.543453731240338e-11, | |
| "loss": 1.2681570053100586, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.5708029197080293, | |
| "grad_norm": 0.9405286908149719, | |
| "learning_rate": 5.528969404286032e-11, | |
| "loss": 1.2899736166000366, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.5737226277372263, | |
| "grad_norm": 1.5248796939849854, | |
| "learning_rate": 5.51448477718107e-11, | |
| "loss": 1.1627418994903564, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.5766423357664232, | |
| "grad_norm": 0.5063557028770447, | |
| "learning_rate": 5.5000000000000004e-11, | |
| "loss": 1.2972100973129272, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5795620437956206, | |
| "grad_norm": 0.9377431869506836, | |
| "learning_rate": 5.485515222818931e-11, | |
| "loss": 1.3344355821609497, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.5824817518248175, | |
| "grad_norm": 2.720984935760498, | |
| "learning_rate": 5.4710305957139695e-11, | |
| "loss": 1.3908426761627197, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.5854014598540145, | |
| "grad_norm": 1.205546259880066, | |
| "learning_rate": 5.4565462687596646e-11, | |
| "loss": 1.3961727619171143, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.5883211678832116, | |
| "grad_norm": 0.8635709881782532, | |
| "learning_rate": 5.442062392027459e-11, | |
| "loss": 1.1889724731445312, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.5912408759124088, | |
| "grad_norm": 0.9093577265739441, | |
| "learning_rate": 5.4275791155841305e-11, | |
| "loss": 1.1577911376953125, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5941605839416058, | |
| "grad_norm": 1.561112880706787, | |
| "learning_rate": 5.4130965894902354e-11, | |
| "loss": 1.2478744983673096, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.597080291970803, | |
| "grad_norm": 0.9377318024635315, | |
| "learning_rate": 5.398614963798558e-11, | |
| "loss": 1.3727445602416992, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 2.3208870887756348, | |
| "learning_rate": 5.384134388552552e-11, | |
| "loss": 1.5765446424484253, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.602919708029197, | |
| "grad_norm": 0.5327683687210083, | |
| "learning_rate": 5.3696550137847864e-11, | |
| "loss": 1.294034481048584, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6058394160583942, | |
| "grad_norm": 0.9915587902069092, | |
| "learning_rate": 5.355176989515396e-11, | |
| "loss": 1.2214086055755615, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6087591240875914, | |
| "grad_norm": 0.9066019058227539, | |
| "learning_rate": 5.3407004657505185e-11, | |
| "loss": 1.3712859153747559, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.6116788321167883, | |
| "grad_norm": 9.204240798950195, | |
| "learning_rate": 5.326225592480748e-11, | |
| "loss": 1.4917619228363037, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.6145985401459853, | |
| "grad_norm": 1.6456899642944336, | |
| "learning_rate": 5.3117525196795767e-11, | |
| "loss": 1.2672224044799805, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.6175182481751826, | |
| "grad_norm": 10.377935409545898, | |
| "learning_rate": 5.297281397301841e-11, | |
| "loss": 1.2122763395309448, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.6204379562043796, | |
| "grad_norm": 2.182238817214966, | |
| "learning_rate": 5.282812375282173e-11, | |
| "loss": 1.640610694885254, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.6233576642335765, | |
| "grad_norm": 0.5079107284545898, | |
| "learning_rate": 5.268345603533441e-11, | |
| "loss": 1.237924337387085, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.6262773722627737, | |
| "grad_norm": 0.3855704367160797, | |
| "learning_rate": 5.2538812319451956e-11, | |
| "loss": 1.183269739151001, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.6291970802919709, | |
| "grad_norm": 0.47776803374290466, | |
| "learning_rate": 5.2394194103821245e-11, | |
| "loss": 1.2128846645355225, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.6321167883211678, | |
| "grad_norm": 0.812568724155426, | |
| "learning_rate": 5.22496028868249e-11, | |
| "loss": 1.2890725135803223, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.635036496350365, | |
| "grad_norm": 1.0895529985427856, | |
| "learning_rate": 5.210504016656583e-11, | |
| "loss": 1.5768861770629883, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.6379562043795621, | |
| "grad_norm": 1.085981011390686, | |
| "learning_rate": 5.1960507440851726e-11, | |
| "loss": 1.5731027126312256, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.640875912408759, | |
| "grad_norm": 2.4678897857666016, | |
| "learning_rate": 5.1816006207179414e-11, | |
| "loss": 1.5561609268188477, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6437956204379562, | |
| "grad_norm": 0.7261845469474792, | |
| "learning_rate": 5.167153796271955e-11, | |
| "loss": 1.2497360706329346, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.6467153284671534, | |
| "grad_norm": 0.9011226296424866, | |
| "learning_rate": 5.152710420430091e-11, | |
| "loss": 1.536658763885498, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.6496350364963503, | |
| "grad_norm": 0.7906641960144043, | |
| "learning_rate": 5.1382706428395e-11, | |
| "loss": 1.3465948104858398, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.6525547445255473, | |
| "grad_norm": 0.6133513450622559, | |
| "learning_rate": 5.1238346131100486e-11, | |
| "loss": 1.4438421726226807, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.6554744525547447, | |
| "grad_norm": 0.9642121195793152, | |
| "learning_rate": 5.1094024808127713e-11, | |
| "loss": 1.3970121145248413, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.6583941605839416, | |
| "grad_norm": 1.0689072608947754, | |
| "learning_rate": 5.094974395478327e-11, | |
| "loss": 1.409815788269043, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.6613138686131386, | |
| "grad_norm": 1.137559413909912, | |
| "learning_rate": 5.080550506595439e-11, | |
| "loss": 1.4787304401397705, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.6642335766423357, | |
| "grad_norm": 1.2347713708877563, | |
| "learning_rate": 5.0661309636093525e-11, | |
| "loss": 1.4061477184295654, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.667153284671533, | |
| "grad_norm": 0.803547739982605, | |
| "learning_rate": 5.0517159159202843e-11, | |
| "loss": 1.2163686752319336, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.6700729927007298, | |
| "grad_norm": 2.352090358734131, | |
| "learning_rate": 5.037305512881878e-11, | |
| "loss": 1.4551012516021729, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.672992700729927, | |
| "grad_norm": 0.8435637354850769, | |
| "learning_rate": 5.022899903799655e-11, | |
| "loss": 1.5262391567230225, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.6759124087591242, | |
| "grad_norm": 1.5151363611221313, | |
| "learning_rate": 5.0084992379294625e-11, | |
| "loss": 1.7455451488494873, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.6788321167883211, | |
| "grad_norm": 1.6078482866287231, | |
| "learning_rate": 4.994103664475933e-11, | |
| "loss": 1.2475740909576416, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6817518248175183, | |
| "grad_norm": 0.9017986059188843, | |
| "learning_rate": 4.979713332590939e-11, | |
| "loss": 1.277698278427124, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.6846715328467154, | |
| "grad_norm": 0.9896549582481384, | |
| "learning_rate": 4.9653283913720435e-11, | |
| "loss": 1.5071510076522827, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.6875912408759124, | |
| "grad_norm": 1.081743597984314, | |
| "learning_rate": 4.950948989860955e-11, | |
| "loss": 1.6487393379211426, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.6905109489051093, | |
| "grad_norm": 2.4992644786834717, | |
| "learning_rate": 4.9365752770419915e-11, | |
| "loss": 1.5936250686645508, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.6934306569343067, | |
| "grad_norm": 1.5350581407546997, | |
| "learning_rate": 4.922207401840521e-11, | |
| "loss": 1.4541165828704834, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6963503649635037, | |
| "grad_norm": 0.7558183670043945, | |
| "learning_rate": 4.9078455131214394e-11, | |
| "loss": 1.449777603149414, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.6992700729927006, | |
| "grad_norm": 0.8487734794616699, | |
| "learning_rate": 4.8934897596876084e-11, | |
| "loss": 1.6207102537155151, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.7021897810218978, | |
| "grad_norm": 0.6523111462593079, | |
| "learning_rate": 4.879140290278327e-11, | |
| "loss": 1.4100027084350586, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.705109489051095, | |
| "grad_norm": 0.82191401720047, | |
| "learning_rate": 4.8647972535677835e-11, | |
| "loss": 1.538939118385315, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7080291970802919, | |
| "grad_norm": 1.0211535692214966, | |
| "learning_rate": 4.8504607981635173e-11, | |
| "loss": 1.5269452333450317, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.710948905109489, | |
| "grad_norm": 1.3899484872817993, | |
| "learning_rate": 4.83613107260488e-11, | |
| "loss": 1.6164069175720215, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.7138686131386862, | |
| "grad_norm": 1.4372365474700928, | |
| "learning_rate": 4.821808225361497e-11, | |
| "loss": 1.4939758777618408, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.7167883211678832, | |
| "grad_norm": 1.6705962419509888, | |
| "learning_rate": 4.8074924048317217e-11, | |
| "loss": 1.5666300058364868, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.7197080291970803, | |
| "grad_norm": 0.9834710359573364, | |
| "learning_rate": 4.793183759341114e-11, | |
| "loss": 1.5477665662765503, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.7226277372262775, | |
| "grad_norm": 0.9190999269485474, | |
| "learning_rate": 4.7788824371408855e-11, | |
| "loss": 1.5345749855041504, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.7255474452554744, | |
| "grad_norm": 1.211067795753479, | |
| "learning_rate": 4.764588586406373e-11, | |
| "loss": 1.4287199974060059, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.7284671532846714, | |
| "grad_norm": 1.0257560014724731, | |
| "learning_rate": 4.750302355235506e-11, | |
| "loss": 1.7664557695388794, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.7313868613138688, | |
| "grad_norm": 2.3863766193389893, | |
| "learning_rate": 4.7360238916472634e-11, | |
| "loss": 1.5075891017913818, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.7343065693430657, | |
| "grad_norm": 1.257262110710144, | |
| "learning_rate": 4.721753343580143e-11, | |
| "loss": 1.5417242050170898, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7372262773722627, | |
| "grad_norm": 0.9946934580802917, | |
| "learning_rate": 4.7074908588906385e-11, | |
| "loss": 1.3855595588684082, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7401459854014598, | |
| "grad_norm": 0.8203712701797485, | |
| "learning_rate": 4.6932365853516914e-11, | |
| "loss": 1.5216609239578247, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.743065693430657, | |
| "grad_norm": 0.6123158931732178, | |
| "learning_rate": 4.678990670651169e-11, | |
| "loss": 1.5857679843902588, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.745985401459854, | |
| "grad_norm": 1.0939323902130127, | |
| "learning_rate": 4.664753262390337e-11, | |
| "loss": 1.5613963603973389, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.748905109489051, | |
| "grad_norm": 0.616036593914032, | |
| "learning_rate": 4.6505245080823234e-11, | |
| "loss": 1.4957456588745117, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.7518248175182483, | |
| "grad_norm": 0.6918394565582275, | |
| "learning_rate": 4.6363045551505944e-11, | |
| "loss": 1.509902000427246, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7547445255474452, | |
| "grad_norm": 1.065696358680725, | |
| "learning_rate": 4.622093550927423e-11, | |
| "loss": 1.5907013416290283, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.7576642335766424, | |
| "grad_norm": 0.8832411170005798, | |
| "learning_rate": 4.6078916426523674e-11, | |
| "loss": 1.4450404644012451, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.7605839416058395, | |
| "grad_norm": 1.1494882106781006, | |
| "learning_rate": 4.593698977470744e-11, | |
| "loss": 1.5977954864501953, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.7635036496350365, | |
| "grad_norm": 1.0702247619628906, | |
| "learning_rate": 4.579515702432098e-11, | |
| "loss": 1.6019237041473389, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.7664233576642334, | |
| "grad_norm": 1.1543371677398682, | |
| "learning_rate": 4.565341964488686e-11, | |
| "loss": 1.6050516366958618, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7693430656934308, | |
| "grad_norm": 3.437559127807617, | |
| "learning_rate": 4.551177910493951e-11, | |
| "loss": 1.6240508556365967, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.7722627737226277, | |
| "grad_norm": 0.8247520327568054, | |
| "learning_rate": 4.537023687201004e-11, | |
| "loss": 1.478027582168579, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.7751824817518247, | |
| "grad_norm": 1.8147697448730469, | |
| "learning_rate": 4.522879441261097e-11, | |
| "loss": 1.6583856344223022, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.7781021897810219, | |
| "grad_norm": 1.4217400550842285, | |
| "learning_rate": 4.5087453192221077e-11, | |
| "loss": 1.5172584056854248, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.781021897810219, | |
| "grad_norm": 1.15066397190094, | |
| "learning_rate": 4.4946214675270194e-11, | |
| "loss": 1.4624955654144287, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.783941605839416, | |
| "grad_norm": 1.8578609228134155, | |
| "learning_rate": 4.4805080325124085e-11, | |
| "loss": 1.3036024570465088, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.7868613138686131, | |
| "grad_norm": 0.9672820568084717, | |
| "learning_rate": 4.4664051604069215e-11, | |
| "loss": 1.2247357368469238, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.7897810218978103, | |
| "grad_norm": 2.0057568550109863, | |
| "learning_rate": 4.452312997329763e-11, | |
| "loss": 1.2701904773712158, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.7927007299270072, | |
| "grad_norm": 0.47993046045303345, | |
| "learning_rate": 4.438231689289185e-11, | |
| "loss": 1.4895787239074707, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.7956204379562044, | |
| "grad_norm": 0.7674112915992737, | |
| "learning_rate": 4.4241613821809646e-11, | |
| "loss": 1.4899382591247559, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7985401459854016, | |
| "grad_norm": 0.2502080500125885, | |
| "learning_rate": 4.410102221786909e-11, | |
| "loss": 1.23140549659729, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.8014598540145985, | |
| "grad_norm": 0.31645339727401733, | |
| "learning_rate": 4.3960543537733255e-11, | |
| "loss": 1.2940046787261963, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.8043795620437955, | |
| "grad_norm": 0.6047260761260986, | |
| "learning_rate": 4.382017923689525e-11, | |
| "loss": 1.1552473306655884, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.8072992700729928, | |
| "grad_norm": 0.36432042717933655, | |
| "learning_rate": 4.3679930769663116e-11, | |
| "loss": 1.1009633541107178, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.8102189781021898, | |
| "grad_norm": 0.3929561376571655, | |
| "learning_rate": 4.3539799589144715e-11, | |
| "loss": 1.137239694595337, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8131386861313867, | |
| "grad_norm": 1.3844205141067505, | |
| "learning_rate": 4.339978714723274e-11, | |
| "loss": 1.038290023803711, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.816058394160584, | |
| "grad_norm": 0.4448591470718384, | |
| "learning_rate": 4.325989489458963e-11, | |
| "loss": 1.0505162477493286, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.818978102189781, | |
| "grad_norm": 0.36241307854652405, | |
| "learning_rate": 4.312012428063252e-11, | |
| "loss": 1.0383713245391846, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.821897810218978, | |
| "grad_norm": 0.3799454867839813, | |
| "learning_rate": 4.298047675351832e-11, | |
| "loss": 1.0668659210205078, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.8248175182481752, | |
| "grad_norm": 0.9156618118286133, | |
| "learning_rate": 4.2840953760128575e-11, | |
| "loss": 1.2242635488510132, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8277372262773723, | |
| "grad_norm": 0.8311979174613953, | |
| "learning_rate": 4.270155674605455e-11, | |
| "loss": 1.4662895202636719, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.8306569343065693, | |
| "grad_norm": 0.5216842293739319, | |
| "learning_rate": 4.25622871555823e-11, | |
| "loss": 1.4537993669509888, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.8335766423357664, | |
| "grad_norm": 0.6534904837608337, | |
| "learning_rate": 4.242314643167759e-11, | |
| "loss": 1.4437849521636963, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.8364963503649636, | |
| "grad_norm": 0.7201212048530579, | |
| "learning_rate": 4.2284136015971e-11, | |
| "loss": 1.4175381660461426, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.8394160583941606, | |
| "grad_norm": 1.0450283288955688, | |
| "learning_rate": 4.214525734874306e-11, | |
| "loss": 1.4487650394439697, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.8423357664233575, | |
| "grad_norm": 0.5216411352157593, | |
| "learning_rate": 4.2006511868909207e-11, | |
| "loss": 1.3851439952850342, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.845255474452555, | |
| "grad_norm": 0.5573458671569824, | |
| "learning_rate": 4.186790101400491e-11, | |
| "loss": 1.444380283355713, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.8481751824817518, | |
| "grad_norm": 0.8099340200424194, | |
| "learning_rate": 4.172942622017086e-11, | |
| "loss": 1.4534127712249756, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.8510948905109488, | |
| "grad_norm": 0.6079779267311096, | |
| "learning_rate": 4.159108892213799e-11, | |
| "loss": 1.4374399185180664, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.854014598540146, | |
| "grad_norm": 3.8505401611328125, | |
| "learning_rate": 4.145289055321263e-11, | |
| "loss": 1.35469651222229, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.856934306569343, | |
| "grad_norm": 0.5389693379402161, | |
| "learning_rate": 4.1314832545261706e-11, | |
| "loss": 1.4246481657028198, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.85985401459854, | |
| "grad_norm": 0.749738872051239, | |
| "learning_rate": 4.1176916328697836e-11, | |
| "loss": 1.3933446407318115, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.8627737226277372, | |
| "grad_norm": 0.7984603643417358, | |
| "learning_rate": 4.103914333246458e-11, | |
| "loss": 1.371537685394287, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.8656934306569344, | |
| "grad_norm": 0.6698455810546875, | |
| "learning_rate": 4.090151498402155e-11, | |
| "loss": 1.4040921926498413, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.8686131386861313, | |
| "grad_norm": 0.6215634346008301, | |
| "learning_rate": 4.076403270932973e-11, | |
| "loss": 1.4515206813812256, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.8715328467153285, | |
| "grad_norm": 0.692348301410675, | |
| "learning_rate": 4.0626697932836585e-11, | |
| "loss": 1.4216551780700684, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.8744525547445257, | |
| "grad_norm": 0.6244620680809021, | |
| "learning_rate": 4.048951207746142e-11, | |
| "loss": 1.4502363204956055, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.8773722627737226, | |
| "grad_norm": 0.7252545356750488, | |
| "learning_rate": 4.0352476564580485e-11, | |
| "loss": 1.4447228908538818, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.8802919708029195, | |
| "grad_norm": 1.0156503915786743, | |
| "learning_rate": 4.021559281401244e-11, | |
| "loss": 1.4445124864578247, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.883211678832117, | |
| "grad_norm": 0.6140323877334595, | |
| "learning_rate": 4.0078862244003477e-11, | |
| "loss": 1.467374563217163, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8861313868613139, | |
| "grad_norm": 0.9039534330368042, | |
| "learning_rate": 3.994228627121269e-11, | |
| "loss": 1.383927345275879, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.8890510948905108, | |
| "grad_norm": 0.7470601797103882, | |
| "learning_rate": 3.980586631069744e-11, | |
| "loss": 1.4455444812774658, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.891970802919708, | |
| "grad_norm": 0.5493040680885315, | |
| "learning_rate": 3.9669603775898614e-11, | |
| "loss": 1.3891698122024536, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.8948905109489051, | |
| "grad_norm": 0.7406651377677917, | |
| "learning_rate": 3.953350007862603e-11, | |
| "loss": 1.4206746816635132, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 0.5344046354293823, | |
| "learning_rate": 3.9397556629043816e-11, | |
| "loss": 1.4315729141235352, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9007299270072993, | |
| "grad_norm": 0.7977921962738037, | |
| "learning_rate": 3.926177483565576e-11, | |
| "loss": 1.452127456665039, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.9036496350364964, | |
| "grad_norm": 0.5497680306434631, | |
| "learning_rate": 3.912615610529074e-11, | |
| "loss": 1.443485140800476, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.9065693430656934, | |
| "grad_norm": 0.5397874712944031, | |
| "learning_rate": 3.899070184308815e-11, | |
| "loss": 1.3739013671875, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.9094890510948905, | |
| "grad_norm": 0.762992799282074, | |
| "learning_rate": 3.885541345248335e-11, | |
| "loss": 1.4031250476837158, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.9124087591240877, | |
| "grad_norm": 0.6079079508781433, | |
| "learning_rate": 3.8720292335193065e-11, | |
| "loss": 1.4409074783325195, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.9153284671532846, | |
| "grad_norm": 0.8720683455467224, | |
| "learning_rate": 3.858533989120098e-11, | |
| "loss": 1.4547772407531738, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.9182481751824818, | |
| "grad_norm": 0.7784188389778137, | |
| "learning_rate": 3.845055751874309e-11, | |
| "loss": 1.4471113681793213, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.921167883211679, | |
| "grad_norm": 0.5901778936386108, | |
| "learning_rate": 3.831594661429334e-11, | |
| "loss": 1.4252212047576904, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.924087591240876, | |
| "grad_norm": 0.6086148619651794, | |
| "learning_rate": 3.8181508572549096e-11, | |
| "loss": 1.3742456436157227, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.9270072992700729, | |
| "grad_norm": 1.7310707569122314, | |
| "learning_rate": 3.8047244786416667e-11, | |
| "loss": 1.417896032333374, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.92992700729927, | |
| "grad_norm": 0.5888099670410156, | |
| "learning_rate": 3.791315664699697e-11, | |
| "loss": 1.450620174407959, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.9328467153284672, | |
| "grad_norm": 0.6644768714904785, | |
| "learning_rate": 3.777924554357096e-11, | |
| "loss": 1.4075061082839966, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.9357664233576641, | |
| "grad_norm": 1.2621780633926392, | |
| "learning_rate": 3.764551286358544e-11, | |
| "loss": 1.4255995750427246, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.9386861313868613, | |
| "grad_norm": 0.6554630994796753, | |
| "learning_rate": 3.751195999263849e-11, | |
| "loss": 1.4196115732192993, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.9416058394160585, | |
| "grad_norm": 1.330627679824829, | |
| "learning_rate": 3.737858831446524e-11, | |
| "loss": 1.4479843378067017, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.9445255474452554, | |
| "grad_norm": 0.7637717127799988, | |
| "learning_rate": 3.724539921092347e-11, | |
| "loss": 1.482810378074646, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.9474452554744526, | |
| "grad_norm": 0.6666594743728638, | |
| "learning_rate": 3.711239406197932e-11, | |
| "loss": 1.4590442180633545, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.9503649635036497, | |
| "grad_norm": 0.5666018128395081, | |
| "learning_rate": 3.6979574245693e-11, | |
| "loss": 1.4436371326446533, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.9532846715328467, | |
| "grad_norm": 1.4761041402816772, | |
| "learning_rate": 3.68469411382045e-11, | |
| "loss": 1.4259295463562012, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.9562043795620438, | |
| "grad_norm": 0.814704954624176, | |
| "learning_rate": 3.671449611371928e-11, | |
| "loss": 1.3765316009521484, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.959124087591241, | |
| "grad_norm": 0.6419834494590759, | |
| "learning_rate": 3.658224054449412e-11, | |
| "loss": 1.4001023769378662, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.962043795620438, | |
| "grad_norm": 0.6163597106933594, | |
| "learning_rate": 3.645017580082287e-11, | |
| "loss": 1.4055715799331665, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.964963503649635, | |
| "grad_norm": 0.6974842548370361, | |
| "learning_rate": 3.631830325102221e-11, | |
| "loss": 1.3906728029251099, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.967883211678832, | |
| "grad_norm": 0.8115648031234741, | |
| "learning_rate": 3.618662426141754e-11, | |
| "loss": 1.412947416305542, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.9708029197080292, | |
| "grad_norm": 1.0154221057891846, | |
| "learning_rate": 3.6055140196328793e-11, | |
| "loss": 1.4584650993347168, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9737226277372262, | |
| "grad_norm": 0.763891339302063, | |
| "learning_rate": 3.592385241805628e-11, | |
| "loss": 1.4505597352981567, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.9766423357664233, | |
| "grad_norm": 0.6464084982872009, | |
| "learning_rate": 3.579276228686658e-11, | |
| "loss": 1.470134973526001, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.9795620437956205, | |
| "grad_norm": 0.7250464558601379, | |
| "learning_rate": 3.56618711609785e-11, | |
| "loss": 1.4165124893188477, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.9824817518248175, | |
| "grad_norm": 0.6437573432922363, | |
| "learning_rate": 3.5531180396548933e-11, | |
| "loss": 1.3973997831344604, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.9854014598540146, | |
| "grad_norm": 0.8988074064254761, | |
| "learning_rate": 3.5400691347658855e-11, | |
| "loss": 1.431837797164917, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9883211678832118, | |
| "grad_norm": 0.754982590675354, | |
| "learning_rate": 3.527040536629927e-11, | |
| "loss": 1.4458699226379395, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.9912408759124087, | |
| "grad_norm": 1.197950005531311, | |
| "learning_rate": 3.514032380235719e-11, | |
| "loss": 1.5078868865966797, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.994160583941606, | |
| "grad_norm": 1.2275372743606567, | |
| "learning_rate": 3.5010448003601684e-11, | |
| "loss": 2.048449993133545, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.997080291970803, | |
| "grad_norm": 1.4293484687805176, | |
| "learning_rate": 3.4880779315669865e-11, | |
| "loss": 2.0413155555725098, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.44569730758667, | |
| "learning_rate": 3.475131908205308e-11, | |
| "loss": 1.975757360458374, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.002919708029197, | |
| "grad_norm": 0.62864089012146, | |
| "learning_rate": 3.4622068644082805e-11, | |
| "loss": 1.4804534912109375, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.0058394160583943, | |
| "grad_norm": 0.6451647281646729, | |
| "learning_rate": 3.44930293409168e-11, | |
| "loss": 1.4802284240722656, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.0087591240875913, | |
| "grad_norm": 2.3734664916992188, | |
| "learning_rate": 3.436420250952537e-11, | |
| "loss": 1.4805121421813965, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.011678832116788, | |
| "grad_norm": 0.7836667895317078, | |
| "learning_rate": 3.423558948467733e-11, | |
| "loss": 1.484985589981079, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.0145985401459856, | |
| "grad_norm": 0.5654711723327637, | |
| "learning_rate": 3.4107191598926294e-11, | |
| "loss": 1.4124772548675537, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.0175182481751825, | |
| "grad_norm": 0.5948511362075806, | |
| "learning_rate": 3.3979010182596804e-11, | |
| "loss": 1.4515975713729858, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.0204379562043795, | |
| "grad_norm": 0.721831202507019, | |
| "learning_rate": 3.3851046563770615e-11, | |
| "loss": 1.3632853031158447, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.0233576642335764, | |
| "grad_norm": 0.6011833548545837, | |
| "learning_rate": 3.372330206827281e-11, | |
| "loss": 1.361553430557251, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.026277372262774, | |
| "grad_norm": 0.6041366457939148, | |
| "learning_rate": 3.3595778019658256e-11, | |
| "loss": 1.2695659399032593, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.0291970802919708, | |
| "grad_norm": 0.7570754885673523, | |
| "learning_rate": 3.346847573919769e-11, | |
| "loss": 1.4630597829818726, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.0321167883211677, | |
| "grad_norm": 0.6280392408370972, | |
| "learning_rate": 3.3341396545864166e-11, | |
| "loss": 1.3815408945083618, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.035036496350365, | |
| "grad_norm": 1.1998370885849, | |
| "learning_rate": 3.321454175631934e-11, | |
| "loss": 1.3043756484985352, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.037956204379562, | |
| "grad_norm": 0.7230603098869324, | |
| "learning_rate": 3.308791268489979e-11, | |
| "loss": 1.3712929487228394, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.040875912408759, | |
| "grad_norm": 0.993170440196991, | |
| "learning_rate": 3.2961510643603477e-11, | |
| "loss": 1.4463608264923096, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.0437956204379564, | |
| "grad_norm": 0.731469988822937, | |
| "learning_rate": 3.283533694207611e-11, | |
| "loss": 1.4276561737060547, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.0467153284671533, | |
| "grad_norm": 0.6702476739883423, | |
| "learning_rate": 3.270939288759758e-11, | |
| "loss": 1.4296890497207642, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.0496350364963503, | |
| "grad_norm": 0.3689621686935425, | |
| "learning_rate": 3.25836797850684e-11, | |
| "loss": 1.2140007019042969, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.0525547445255476, | |
| "grad_norm": 0.5467419028282166, | |
| "learning_rate": 3.2458198936996216e-11, | |
| "loss": 1.1607273817062378, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.0554744525547446, | |
| "grad_norm": 0.6665674448013306, | |
| "learning_rate": 3.233295164348229e-11, | |
| "loss": 1.3310924768447876, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.0583941605839415, | |
| "grad_norm": 0.5236396193504333, | |
| "learning_rate": 3.2207939202208046e-11, | |
| "loss": 1.4082982540130615, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.0613138686131385, | |
| "grad_norm": 0.43380749225616455, | |
| "learning_rate": 3.208316290842159e-11, | |
| "loss": 1.4460103511810303, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.064233576642336, | |
| "grad_norm": 1.7187466621398926, | |
| "learning_rate": 3.1958624054924346e-11, | |
| "loss": 1.452533483505249, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.067153284671533, | |
| "grad_norm": 1.875139832496643, | |
| "learning_rate": 3.183432393205763e-11, | |
| "loss": 1.676717758178711, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.0700729927007298, | |
| "grad_norm": 0.6816144585609436, | |
| "learning_rate": 3.171026382768923e-11, | |
| "loss": 1.3316125869750977, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.072992700729927, | |
| "grad_norm": 0.7521905899047852, | |
| "learning_rate": 3.158644502720015e-11, | |
| "loss": 1.3629651069641113, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.075912408759124, | |
| "grad_norm": 0.8290117383003235, | |
| "learning_rate": 3.146286881347128e-11, | |
| "loss": 1.4297902584075928, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.078832116788321, | |
| "grad_norm": 1.896441102027893, | |
| "learning_rate": 3.1339536466870046e-11, | |
| "loss": 1.4510955810546875, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.0817518248175184, | |
| "grad_norm": 0.9783208966255188, | |
| "learning_rate": 3.121644926523721e-11, | |
| "loss": 1.4805799722671509, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.0846715328467154, | |
| "grad_norm": 0.44715872406959534, | |
| "learning_rate": 3.1093608483873524e-11, | |
| "loss": 1.259225845336914, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.0875912408759123, | |
| "grad_norm": 0.7327166795730591, | |
| "learning_rate": 3.097101539552668e-11, | |
| "loss": 1.267507791519165, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.0905109489051097, | |
| "grad_norm": 0.5539389848709106, | |
| "learning_rate": 3.0848671270377985e-11, | |
| "loss": 1.2651211023330688, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.0934306569343066, | |
| "grad_norm": 0.510282576084137, | |
| "learning_rate": 3.0726577376029264e-11, | |
| "loss": 1.3089463710784912, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.0963503649635036, | |
| "grad_norm": 0.7474421858787537, | |
| "learning_rate": 3.0604734977489704e-11, | |
| "loss": 1.3216513395309448, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.0992700729927005, | |
| "grad_norm": 0.8534965515136719, | |
| "learning_rate": 3.048314533716277e-11, | |
| "loss": 1.3030940294265747, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.102189781021898, | |
| "grad_norm": 0.5548062324523926, | |
| "learning_rate": 3.03618097148331e-11, | |
| "loss": 1.3647727966308594, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.105109489051095, | |
| "grad_norm": 1.8093698024749756, | |
| "learning_rate": 3.0240729367653456e-11, | |
| "loss": 1.5192502737045288, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.108029197080292, | |
| "grad_norm": 0.6639271378517151, | |
| "learning_rate": 3.0119905550131735e-11, | |
| "loss": 1.477950096130371, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.110948905109489, | |
| "grad_norm": 0.9859902858734131, | |
| "learning_rate": 2.9999339514117915e-11, | |
| "loss": 1.4478323459625244, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.113868613138686, | |
| "grad_norm": 0.8713816404342651, | |
| "learning_rate": 2.987903250879109e-11, | |
| "loss": 1.5153857469558716, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.116788321167883, | |
| "grad_norm": 0.935540497303009, | |
| "learning_rate": 2.975898578064662e-11, | |
| "loss": 1.365601897239685, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.1197080291970805, | |
| "grad_norm": 0.8956299424171448, | |
| "learning_rate": 2.9639200573483075e-11, | |
| "loss": 1.289376974105835, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.1226277372262774, | |
| "grad_norm": 1.5029017925262451, | |
| "learning_rate": 2.951967812838946e-11, | |
| "loss": 1.2087488174438477, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.1255474452554743, | |
| "grad_norm": 0.7925666570663452, | |
| "learning_rate": 2.940041968373229e-11, | |
| "loss": 1.3250759840011597, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.1284671532846717, | |
| "grad_norm": 1.1250343322753906, | |
| "learning_rate": 2.928142647514281e-11, | |
| "loss": 1.197310209274292, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.1313868613138687, | |
| "grad_norm": 0.5464907884597778, | |
| "learning_rate": 2.916269973550413e-11, | |
| "loss": 1.4173991680145264, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.1343065693430656, | |
| "grad_norm": 0.7621420621871948, | |
| "learning_rate": 2.904424069493853e-11, | |
| "loss": 1.4400224685668945, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.137226277372263, | |
| "grad_norm": 0.5502415299415588, | |
| "learning_rate": 2.892605058079464e-11, | |
| "loss": 1.4867937564849854, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.14014598540146, | |
| "grad_norm": 1.0415494441986084, | |
| "learning_rate": 2.8808130617634766e-11, | |
| "loss": 1.4039362668991089, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.143065693430657, | |
| "grad_norm": 0.5690549612045288, | |
| "learning_rate": 2.8690482027222204e-11, | |
| "loss": 1.3866254091262817, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.145985401459854, | |
| "grad_norm": 1.3573678731918335, | |
| "learning_rate": 2.857310602850854e-11, | |
| "loss": 1.5354242324829102, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.1489051094890512, | |
| "grad_norm": 0.617643415927887, | |
| "learning_rate": 2.845600383762107e-11, | |
| "loss": 1.4223835468292236, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.151824817518248, | |
| "grad_norm": 0.7567899227142334, | |
| "learning_rate": 2.833917666785017e-11, | |
| "loss": 1.3844956159591675, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.154744525547445, | |
| "grad_norm": 0.7231752872467041, | |
| "learning_rate": 2.8222625729636774e-11, | |
| "loss": 1.4574776887893677, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.1576642335766425, | |
| "grad_norm": 0.680822491645813, | |
| "learning_rate": 2.8106352230559756e-11, | |
| "loss": 1.5150808095932007, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.1605839416058394, | |
| "grad_norm": 1.0630342960357666, | |
| "learning_rate": 2.799035737532344e-11, | |
| "loss": 1.4478161334991455, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.1635036496350364, | |
| "grad_norm": 0.9563947916030884, | |
| "learning_rate": 2.7874642365745163e-11, | |
| "loss": 1.493828535079956, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.1664233576642338, | |
| "grad_norm": 0.7002918720245361, | |
| "learning_rate": 2.7759208400742797e-11, | |
| "loss": 1.3383344411849976, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.1693430656934307, | |
| "grad_norm": 1.3854063749313354, | |
| "learning_rate": 2.764405667632231e-11, | |
| "loss": 1.5312495231628418, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.1722627737226277, | |
| "grad_norm": 0.7880827784538269, | |
| "learning_rate": 2.7529188385565386e-11, | |
| "loss": 1.383646011352539, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.1751824817518246, | |
| "grad_norm": 0.8886491656303406, | |
| "learning_rate": 2.741460471861708e-11, | |
| "loss": 1.536646842956543, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.178102189781022, | |
| "grad_norm": 1.4431637525558472, | |
| "learning_rate": 2.730030686267347e-11, | |
| "loss": 1.4069819450378418, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.181021897810219, | |
| "grad_norm": 0.6276810765266418, | |
| "learning_rate": 2.7186296001969352e-11, | |
| "loss": 1.4030295610427856, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.183941605839416, | |
| "grad_norm": 1.0170162916183472, | |
| "learning_rate": 2.7072573317765974e-11, | |
| "loss": 1.433866024017334, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.1868613138686133, | |
| "grad_norm": 5.051820755004883, | |
| "learning_rate": 2.695913998833881e-11, | |
| "loss": 1.429917573928833, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.18978102189781, | |
| "grad_norm": 0.8205865025520325, | |
| "learning_rate": 2.6845997188965365e-11, | |
| "loss": 1.3765311241149902, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.192700729927007, | |
| "grad_norm": 0.6207389831542969, | |
| "learning_rate": 2.673314609191291e-11, | |
| "loss": 1.4253859519958496, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.1956204379562045, | |
| "grad_norm": 0.5341710448265076, | |
| "learning_rate": 2.662058786642646e-11, | |
| "loss": 1.4388456344604492, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.1985401459854015, | |
| "grad_norm": 1.7783007621765137, | |
| "learning_rate": 2.6508323678716584e-11, | |
| "loss": 1.308274745941162, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.2014598540145984, | |
| "grad_norm": 0.5501033067703247, | |
| "learning_rate": 2.6396354691947322e-11, | |
| "loss": 1.1534695625305176, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.204379562043796, | |
| "grad_norm": 1.8598493337631226, | |
| "learning_rate": 2.628468206622421e-11, | |
| "loss": 1.4224417209625244, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.2072992700729928, | |
| "grad_norm": 3.1066925525665283, | |
| "learning_rate": 2.617330695858212e-11, | |
| "loss": 1.3432610034942627, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.2102189781021897, | |
| "grad_norm": 1.4407155513763428, | |
| "learning_rate": 2.6062230522973407e-11, | |
| "loss": 1.3356854915618896, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.213138686131387, | |
| "grad_norm": 0.7633354663848877, | |
| "learning_rate": 2.5951453910255874e-11, | |
| "loss": 1.421032190322876, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.216058394160584, | |
| "grad_norm": 0.6978413462638855, | |
| "learning_rate": 2.5840978268180892e-11, | |
| "loss": 1.3334453105926514, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.218978102189781, | |
| "grad_norm": 0.5624653697013855, | |
| "learning_rate": 2.5730804741381475e-11, | |
| "loss": 1.4526844024658203, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.221897810218978, | |
| "grad_norm": 0.8185047507286072, | |
| "learning_rate": 2.5620934471360446e-11, | |
| "loss": 1.5922720432281494, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.2248175182481753, | |
| "grad_norm": 0.696137011051178, | |
| "learning_rate": 2.5511368596478575e-11, | |
| "loss": 1.658097743988037, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.2277372262773723, | |
| "grad_norm": 0.9072125554084778, | |
| "learning_rate": 2.5402108251942813e-11, | |
| "loss": 1.4522528648376465, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.230656934306569, | |
| "grad_norm": 0.8078010082244873, | |
| "learning_rate": 2.529315456979457e-11, | |
| "loss": 1.538939356803894, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.2335766423357666, | |
| "grad_norm": 0.8872140645980835, | |
| "learning_rate": 2.5184508678897894e-11, | |
| "loss": 1.6126492023468018, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.2364963503649635, | |
| "grad_norm": 1.1159803867340088, | |
| "learning_rate": 2.5076171704927847e-11, | |
| "loss": 1.6062068939208984, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.2394160583941605, | |
| "grad_norm": 1.937201976776123, | |
| "learning_rate": 2.4968144770358785e-11, | |
| "loss": 1.593677282333374, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.242335766423358, | |
| "grad_norm": 2.113393783569336, | |
| "learning_rate": 2.4860428994452785e-11, | |
| "loss": 1.4414892196655273, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.245255474452555, | |
| "grad_norm": 1.1923096179962158, | |
| "learning_rate": 2.4753025493248032e-11, | |
| "loss": 1.658026933670044, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.2481751824817517, | |
| "grad_norm": 1.3112952709197998, | |
| "learning_rate": 2.464593537954722e-11, | |
| "loss": 1.1770000457763672, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.2510948905109487, | |
| "grad_norm": 1.8166533708572388, | |
| "learning_rate": 2.453915976290607e-11, | |
| "loss": 1.2049548625946045, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.254014598540146, | |
| "grad_norm": 2.864039659500122, | |
| "learning_rate": 2.4432699749621813e-11, | |
| "loss": 1.2220643758773804, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.256934306569343, | |
| "grad_norm": 1.7549316883087158, | |
| "learning_rate": 2.4326556442721715e-11, | |
| "loss": 1.2086524963378906, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.25985401459854, | |
| "grad_norm": 1.350193738937378, | |
| "learning_rate": 2.4220730941951673e-11, | |
| "loss": 1.4182915687561035, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.2627737226277373, | |
| "grad_norm": 1.4707825183868408, | |
| "learning_rate": 2.411522434376481e-11, | |
| "loss": 1.3656668663024902, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.2656934306569343, | |
| "grad_norm": 0.7274520397186279, | |
| "learning_rate": 2.4010037741310103e-11, | |
| "loss": 1.2772574424743652, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.2686131386861312, | |
| "grad_norm": 1.6660062074661255, | |
| "learning_rate": 2.3905172224421092e-11, | |
| "loss": 1.1854485273361206, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.2715328467153286, | |
| "grad_norm": 0.628384530544281, | |
| "learning_rate": 2.3800628879604523e-11, | |
| "loss": 1.0012882947921753, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.2744525547445256, | |
| "grad_norm": 0.9691115617752075, | |
| "learning_rate": 2.3696408790029166e-11, | |
| "loss": 1.2167019844055176, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.2773722627737225, | |
| "grad_norm": 0.8349161148071289, | |
| "learning_rate": 2.3592513035514534e-11, | |
| "loss": 1.2905924320220947, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.28029197080292, | |
| "grad_norm": 0.6068980693817139, | |
| "learning_rate": 2.348894269251978e-11, | |
| "loss": 1.0265419483184814, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.283211678832117, | |
| "grad_norm": 1.2399365901947021, | |
| "learning_rate": 2.3385698834132398e-11, | |
| "loss": 1.1850502490997314, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.286131386861314, | |
| "grad_norm": 1.1862503290176392, | |
| "learning_rate": 2.3282782530057236e-11, | |
| "loss": 1.3720860481262207, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.289051094890511, | |
| "grad_norm": 1.2461482286453247, | |
| "learning_rate": 2.3180194846605366e-11, | |
| "loss": 1.2948455810546875, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.291970802919708, | |
| "grad_norm": 1.039872646331787, | |
| "learning_rate": 2.307793684668303e-11, | |
| "loss": 1.3344666957855225, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.294890510948905, | |
| "grad_norm": 1.5449193716049194, | |
| "learning_rate": 2.297600958978064e-11, | |
| "loss": 1.4214136600494385, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.297810218978102, | |
| "grad_norm": 0.5902289748191833, | |
| "learning_rate": 2.2874414131961783e-11, | |
| "loss": 1.0963501930236816, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.3007299270072994, | |
| "grad_norm": 0.7048768401145935, | |
| "learning_rate": 2.277315152585231e-11, | |
| "loss": 1.224388837814331, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.3036496350364963, | |
| "grad_norm": 0.9916707873344421, | |
| "learning_rate": 2.2672222820629375e-11, | |
| "loss": 1.5652008056640625, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.3065693430656933, | |
| "grad_norm": 1.8845689296722412, | |
| "learning_rate": 2.2571629062010654e-11, | |
| "loss": 1.4575833082199097, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.3094890510948907, | |
| "grad_norm": 0.6540777683258057, | |
| "learning_rate": 2.2471371292243415e-11, | |
| "loss": 1.5350267887115479, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.3124087591240876, | |
| "grad_norm": 4.6816253662109375, | |
| "learning_rate": 2.2371450550093786e-11, | |
| "loss": 1.5066488981246948, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.3153284671532846, | |
| "grad_norm": 0.621639609336853, | |
| "learning_rate": 2.227186787083593e-11, | |
| "loss": 1.2260034084320068, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.318248175182482, | |
| "grad_norm": 0.6506893038749695, | |
| "learning_rate": 2.2172624286241394e-11, | |
| "loss": 1.3516817092895508, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.321167883211679, | |
| "grad_norm": 0.6439290642738342, | |
| "learning_rate": 2.2073720824568366e-11, | |
| "loss": 1.2580097913742065, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.324087591240876, | |
| "grad_norm": 0.5539758801460266, | |
| "learning_rate": 2.1975158510551046e-11, | |
| "loss": 1.1156539916992188, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.3270072992700728, | |
| "grad_norm": 1.03206467628479, | |
| "learning_rate": 2.1876938365389005e-11, | |
| "loss": 1.605167269706726, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.32992700729927, | |
| "grad_norm": 0.6580781936645508, | |
| "learning_rate": 2.1779061406736623e-11, | |
| "loss": 1.4872037172317505, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.332846715328467, | |
| "grad_norm": 0.9182255268096924, | |
| "learning_rate": 2.1681528648692546e-11, | |
| "loss": 1.421220302581787, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.335766423357664, | |
| "grad_norm": 0.9587092399597168, | |
| "learning_rate": 2.1584341101789163e-11, | |
| "loss": 1.1090900897979736, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.3386861313868614, | |
| "grad_norm": 0.9461172223091125, | |
| "learning_rate": 2.1487499772982154e-11, | |
| "loss": 1.5479141473770142, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.3416058394160584, | |
| "grad_norm": 1.0093610286712646, | |
| "learning_rate": 2.1391005665640057e-11, | |
| "loss": 1.4849309921264648, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.3445255474452553, | |
| "grad_norm": 0.8153662085533142, | |
| "learning_rate": 2.129485977953386e-11, | |
| "loss": 1.6183394193649292, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.3474452554744527, | |
| "grad_norm": 1.0623451471328735, | |
| "learning_rate": 2.1199063110826615e-11, | |
| "loss": 1.5566279888153076, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.3503649635036497, | |
| "grad_norm": 0.7380754947662354, | |
| "learning_rate": 2.1103616652063197e-11, | |
| "loss": 1.3920304775238037, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.3532846715328466, | |
| "grad_norm": 0.7378389239311218, | |
| "learning_rate": 2.1008521392159927e-11, | |
| "loss": 1.3314943313598633, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.356204379562044, | |
| "grad_norm": 1.424649953842163, | |
| "learning_rate": 2.0913778316394434e-11, | |
| "loss": 1.385246992111206, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.359124087591241, | |
| "grad_norm": 0.8609190583229065, | |
| "learning_rate": 2.081938840639533e-11, | |
| "loss": 1.4388893842697144, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.362043795620438, | |
| "grad_norm": 0.9891670346260071, | |
| "learning_rate": 2.072535264013209e-11, | |
| "loss": 1.6035603284835815, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.3649635036496353, | |
| "grad_norm": 1.3809630870819092, | |
| "learning_rate": 2.063167199190495e-11, | |
| "loss": 1.5855412483215332, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.367883211678832, | |
| "grad_norm": 0.5980952382087708, | |
| "learning_rate": 2.053834743233477e-11, | |
| "loss": 1.6966145038604736, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.370802919708029, | |
| "grad_norm": 1.0012812614440918, | |
| "learning_rate": 2.0445379928353005e-11, | |
| "loss": 1.551912784576416, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.373722627737226, | |
| "grad_norm": 2.968132495880127, | |
| "learning_rate": 2.035277044319165e-11, | |
| "loss": 1.4885845184326172, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.3766423357664235, | |
| "grad_norm": 1.230621337890625, | |
| "learning_rate": 2.026051993637332e-11, | |
| "loss": 1.2746766805648804, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.3795620437956204, | |
| "grad_norm": 0.7411018013954163, | |
| "learning_rate": 2.0168629363701215e-11, | |
| "loss": 1.237394094467163, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.3824817518248174, | |
| "grad_norm": 1.0179007053375244, | |
| "learning_rate": 2.0077099677249334e-11, | |
| "loss": 1.5453828573226929, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.3854014598540147, | |
| "grad_norm": 0.8561967015266418, | |
| "learning_rate": 1.9985931825352528e-11, | |
| "loss": 1.4817601442337036, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.3883211678832117, | |
| "grad_norm": 0.747097373008728, | |
| "learning_rate": 1.989512675259668e-11, | |
| "loss": 1.525360345840454, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.3912408759124086, | |
| "grad_norm": 0.8082491755485535, | |
| "learning_rate": 1.980468539980897e-11, | |
| "loss": 1.1978973150253296, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.394160583941606, | |
| "grad_norm": 0.9642162322998047, | |
| "learning_rate": 1.9714608704048038e-11, | |
| "loss": 1.199863314628601, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.397080291970803, | |
| "grad_norm": 0.6381575465202332, | |
| "learning_rate": 1.9624897598594387e-11, | |
| "loss": 1.185179591178894, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.7561579346656799, | |
| "learning_rate": 1.953555301294062e-11, | |
| "loss": 1.2254691123962402, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.402919708029197, | |
| "grad_norm": 1.4877296686172485, | |
| "learning_rate": 1.9446575872781863e-11, | |
| "loss": 1.5172537565231323, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.4058394160583942, | |
| "grad_norm": 1.3091741800308228, | |
| "learning_rate": 1.9357967100006153e-11, | |
| "loss": 1.513096809387207, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.408759124087591, | |
| "grad_norm": 1.3534202575683594, | |
| "learning_rate": 1.9269727612684882e-11, | |
| "loss": 1.4667720794677734, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.411678832116788, | |
| "grad_norm": 1.1409510374069214, | |
| "learning_rate": 1.9181858325063297e-11, | |
| "loss": 1.507994532585144, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.4145985401459855, | |
| "grad_norm": 1.0714995861053467, | |
| "learning_rate": 1.9094360147551022e-11, | |
| "loss": 1.581897497177124, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.4175182481751825, | |
| "grad_norm": 0.7724789381027222, | |
| "learning_rate": 1.900723398671263e-11, | |
| "loss": 1.2759335041046143, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.4204379562043794, | |
| "grad_norm": 1.407017469406128, | |
| "learning_rate": 1.892048074525824e-11, | |
| "loss": 1.2699828147888184, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.423357664233577, | |
| "grad_norm": 0.6306201219558716, | |
| "learning_rate": 1.8834101322034192e-11, | |
| "loss": 1.3749358654022217, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.4262773722627737, | |
| "grad_norm": 0.8618882894515991, | |
| "learning_rate": 1.874809661201367e-11, | |
| "loss": 1.4793400764465332, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.4291970802919707, | |
| "grad_norm": 0.8602250218391418, | |
| "learning_rate": 1.8662467506287496e-11, | |
| "loss": 1.2748618125915527, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.432116788321168, | |
| "grad_norm": 1.4891537427902222, | |
| "learning_rate": 1.8577214892054895e-11, | |
| "loss": 1.1587989330291748, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.435036496350365, | |
| "grad_norm": 0.9357245564460754, | |
| "learning_rate": 1.8492339652614253e-11, | |
| "loss": 1.404365062713623, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.437956204379562, | |
| "grad_norm": 1.0059746503829956, | |
| "learning_rate": 1.8407842667354008e-11, | |
| "loss": 1.3615386486053467, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.4408759124087593, | |
| "grad_norm": 0.7331128716468811, | |
| "learning_rate": 1.8323724811743495e-11, | |
| "loss": 1.3693501949310303, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.4437956204379563, | |
| "grad_norm": 0.7307764291763306, | |
| "learning_rate": 1.8239986957323938e-11, | |
| "loss": 1.526829481124878, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.4467153284671532, | |
| "grad_norm": 0.9488301277160645, | |
| "learning_rate": 1.8156629971699374e-11, | |
| "loss": 1.650126338005066, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.44963503649635, | |
| "grad_norm": 0.6294970512390137, | |
| "learning_rate": 1.807365471852767e-11, | |
| "loss": 1.3320525884628296, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.4525547445255476, | |
| "grad_norm": 1.4968878030776978, | |
| "learning_rate": 1.7991062057511587e-11, | |
| "loss": 1.283010482788086, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.4554744525547445, | |
| "grad_norm": 0.9758629202842712, | |
| "learning_rate": 1.7908852844389878e-11, | |
| "loss": 1.3871170282363892, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.4583941605839414, | |
| "grad_norm": 0.6724770069122314, | |
| "learning_rate": 1.7827027930928388e-11, | |
| "loss": 1.2842590808868408, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.461313868613139, | |
| "grad_norm": 1.0515720844268799, | |
| "learning_rate": 1.7745588164911263e-11, | |
| "loss": 1.6692872047424316, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.4642335766423358, | |
| "grad_norm": 0.9676235318183899, | |
| "learning_rate": 1.766453439013215e-11, | |
| "loss": 1.4625557661056519, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.4671532846715327, | |
| "grad_norm": 0.7648779153823853, | |
| "learning_rate": 1.758386744638546e-11, | |
| "loss": 1.3853659629821777, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.47007299270073, | |
| "grad_norm": 1.141390085220337, | |
| "learning_rate": 1.7503588169457688e-11, | |
| "loss": 1.388798475265503, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.472992700729927, | |
| "grad_norm": 0.7227885127067566, | |
| "learning_rate": 1.7423697391118673e-11, | |
| "loss": 1.3357038497924805, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.475912408759124, | |
| "grad_norm": 0.40319228172302246, | |
| "learning_rate": 1.7344195939113094e-11, | |
| "loss": 1.3553037643432617, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.478832116788321, | |
| "grad_norm": 0.9001244902610779, | |
| "learning_rate": 1.7265084637151817e-11, | |
| "loss": 1.290768027305603, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.4817518248175183, | |
| "grad_norm": 0.55340176820755, | |
| "learning_rate": 1.718636430490338e-11, | |
| "loss": 1.3492891788482666, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.4846715328467153, | |
| "grad_norm": 0.5425994992256165, | |
| "learning_rate": 1.7108035757985506e-11, | |
| "loss": 1.3719546794891357, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.487591240875912, | |
| "grad_norm": 0.7454577088356018, | |
| "learning_rate": 1.7030099807956648e-11, | |
| "loss": 1.3073012828826904, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.4905109489051096, | |
| "grad_norm": 0.8407237529754639, | |
| "learning_rate": 1.695255726230758e-11, | |
| "loss": 1.6391425132751465, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.4934306569343065, | |
| "grad_norm": 1.5644853115081787, | |
| "learning_rate": 1.6875408924453033e-11, | |
| "loss": 1.5422215461730957, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.4963503649635035, | |
| "grad_norm": 1.1186398267745972, | |
| "learning_rate": 1.6798655593723362e-11, | |
| "loss": 1.7016127109527588, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.499270072992701, | |
| "grad_norm": 1.0774526596069336, | |
| "learning_rate": 1.6722298065356283e-11, | |
| "loss": 1.3653264045715332, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.502189781021898, | |
| "grad_norm": 0.9093915224075317, | |
| "learning_rate": 1.6646337130488608e-11, | |
| "loss": 1.2852323055267334, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.5051094890510948, | |
| "grad_norm": 0.9438765645027161, | |
| "learning_rate": 1.657077357614808e-11, | |
| "loss": 1.1891322135925293, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.508029197080292, | |
| "grad_norm": 1.076302409172058, | |
| "learning_rate": 1.649560818524517e-11, | |
| "loss": 1.2569494247436523, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.510948905109489, | |
| "grad_norm": 0.9599831700325012, | |
| "learning_rate": 1.6420841736565042e-11, | |
| "loss": 1.514939308166504, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.513868613138686, | |
| "grad_norm": 1.4091204404830933, | |
| "learning_rate": 1.6346475004759414e-11, | |
| "loss": 1.6944941282272339, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.5167883211678834, | |
| "grad_norm": 1.0423575639724731, | |
| "learning_rate": 1.627250876033853e-11, | |
| "loss": 1.7986551523208618, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.5197080291970804, | |
| "grad_norm": 1.6733680963516235, | |
| "learning_rate": 1.619894376966325e-11, | |
| "loss": 1.479095458984375, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.5226277372262773, | |
| "grad_norm": 1.1303389072418213, | |
| "learning_rate": 1.612578079493702e-11, | |
| "loss": 1.4962432384490967, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.5255474452554747, | |
| "grad_norm": 0.9553906917572021, | |
| "learning_rate": 1.6053020594198053e-11, | |
| "loss": 1.3031461238861084, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.5284671532846716, | |
| "grad_norm": 1.038560390472412, | |
| "learning_rate": 1.598066392131142e-11, | |
| "loss": 1.671377182006836, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.5313868613138686, | |
| "grad_norm": 1.979683756828308, | |
| "learning_rate": 1.5908711525961263e-11, | |
| "loss": 1.848396897315979, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.5343065693430655, | |
| "grad_norm": 0.60489422082901, | |
| "learning_rate": 1.5837164153643013e-11, | |
| "loss": 1.1993465423583984, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.537226277372263, | |
| "grad_norm": 0.8573713302612305, | |
| "learning_rate": 1.5766022545655703e-11, | |
| "loss": 1.300091028213501, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.54014598540146, | |
| "grad_norm": 0.5826265811920166, | |
| "learning_rate": 1.569528743909423e-11, | |
| "loss": 1.4329663515090942, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.543065693430657, | |
| "grad_norm": 1.2077945470809937, | |
| "learning_rate": 1.5624959566841764e-11, | |
| "loss": 1.666152000427246, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.5459854014598537, | |
| "grad_norm": 0.8533981442451477, | |
| "learning_rate": 1.5555039657562147e-11, | |
| "loss": 1.7575132846832275, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.548905109489051, | |
| "grad_norm": 0.4306533932685852, | |
| "learning_rate": 1.548552843569231e-11, | |
| "loss": 1.4937412738800049, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.551824817518248, | |
| "grad_norm": 1.0778521299362183, | |
| "learning_rate": 1.541642662143481e-11, | |
| "loss": 1.6058778762817383, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.554744525547445, | |
| "grad_norm": 0.7876433730125427, | |
| "learning_rate": 1.5347734930750357e-11, | |
| "loss": 1.3722295761108398, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.5576642335766424, | |
| "grad_norm": 0.5924785733222961, | |
| "learning_rate": 1.5279454075350363e-11, | |
| "loss": 1.2301844358444214, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.5605839416058394, | |
| "grad_norm": 0.5814633965492249, | |
| "learning_rate": 1.521158476268965e-11, | |
| "loss": 1.0383514165878296, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.5635036496350363, | |
| "grad_norm": 1.3851001262664795, | |
| "learning_rate": 1.514412769595899e-11, | |
| "loss": 1.2032626867294312, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.5664233576642337, | |
| "grad_norm": 0.801948606967926, | |
| "learning_rate": 1.5077083574077948e-11, | |
| "loss": 1.0018473863601685, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.5693430656934306, | |
| "grad_norm": 4.405994415283203, | |
| "learning_rate": 1.5010453091687567e-11, | |
| "loss": 1.373892068862915, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.5722627737226276, | |
| "grad_norm": 1.1656264066696167, | |
| "learning_rate": 1.494423693914319e-11, | |
| "loss": 1.1970577239990234, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.575182481751825, | |
| "grad_norm": 2.2657151222229004, | |
| "learning_rate": 1.4878435802507326e-11, | |
| "loss": 1.1862726211547852, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.578102189781022, | |
| "grad_norm": 0.7318071722984314, | |
| "learning_rate": 1.48130503635425e-11, | |
| "loss": 1.2532281875610352, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.581021897810219, | |
| "grad_norm": 0.38518860936164856, | |
| "learning_rate": 1.474808129970421e-11, | |
| "loss": 1.1742783784866333, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.5839416058394162, | |
| "grad_norm": 0.6994870901107788, | |
| "learning_rate": 1.468352928413392e-11, | |
| "loss": 1.556276559829712, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.586861313868613, | |
| "grad_norm": 0.5077093243598938, | |
| "learning_rate": 1.4619394985652097e-11, | |
| "loss": 1.1290826797485352, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.58978102189781, | |
| "grad_norm": 0.7753961682319641, | |
| "learning_rate": 1.4555679068751232e-11, | |
| "loss": 1.1286797523498535, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.5927007299270075, | |
| "grad_norm": 0.41287869215011597, | |
| "learning_rate": 1.4492382193589005e-11, | |
| "loss": 1.1933693885803223, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.5956204379562045, | |
| "grad_norm": 1.3430049419403076, | |
| "learning_rate": 1.4429505015981392e-11, | |
| "loss": 1.2475504875183105, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.5985401459854014, | |
| "grad_norm": 1.0992190837860107, | |
| "learning_rate": 1.4367048187395926e-11, | |
| "loss": 1.5478603839874268, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.601459854014599, | |
| "grad_norm": 0.6663128733634949, | |
| "learning_rate": 1.430501235494493e-11, | |
| "loss": 1.384826898574829, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.6043795620437957, | |
| "grad_norm": 0.37811803817749023, | |
| "learning_rate": 1.4243398161378788e-11, | |
| "loss": 1.1319198608398438, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.6072992700729927, | |
| "grad_norm": 1.3160942792892456, | |
| "learning_rate": 1.418220624507931e-11, | |
| "loss": 1.2590198516845703, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.61021897810219, | |
| "grad_norm": 0.8661515116691589, | |
| "learning_rate": 1.412143724005311e-11, | |
| "loss": 1.4723635911941528, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.613138686131387, | |
| "grad_norm": 1.0532100200653076, | |
| "learning_rate": 1.4061091775925042e-11, | |
| "loss": 1.6236767768859863, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.616058394160584, | |
| "grad_norm": 0.6676415801048279, | |
| "learning_rate": 1.4001170477931665e-11, | |
| "loss": 1.0301096439361572, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.618978102189781, | |
| "grad_norm": 0.8614192605018616, | |
| "learning_rate": 1.3941673966914778e-11, | |
| "loss": 1.3631868362426758, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.621897810218978, | |
| "grad_norm": 0.8611546754837036, | |
| "learning_rate": 1.3882602859314983e-11, | |
| "loss": 1.411142110824585, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.624817518248175, | |
| "grad_norm": 1.126027226448059, | |
| "learning_rate": 1.3823957767165299e-11, | |
| "loss": 1.18174147605896, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.627737226277372, | |
| "grad_norm": 0.9726853370666504, | |
| "learning_rate": 1.3765739298084793e-11, | |
| "loss": 1.1945315599441528, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.630656934306569, | |
| "grad_norm": 1.0772740840911865, | |
| "learning_rate": 1.3707948055272349e-11, | |
| "loss": 1.1097228527069092, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.6335766423357665, | |
| "grad_norm": 0.6411526799201965, | |
| "learning_rate": 1.3650584637500337e-11, | |
| "loss": 1.5164823532104492, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.6364963503649634, | |
| "grad_norm": 0.6073819398880005, | |
| "learning_rate": 1.3593649639108521e-11, | |
| "loss": 1.4226418733596802, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.6394160583941604, | |
| "grad_norm": 1.4418057203292847, | |
| "learning_rate": 1.3537143649997743e-11, | |
| "loss": 1.5453827381134033, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.6423357664233578, | |
| "grad_norm": 0.5259611010551453, | |
| "learning_rate": 1.3481067255623958e-11, | |
| "loss": 1.3054715394973755, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.6452554744525547, | |
| "grad_norm": 0.9263610243797302, | |
| "learning_rate": 1.3425421036992098e-11, | |
| "loss": 1.4691216945648193, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.6481751824817517, | |
| "grad_norm": 1.2356170415878296, | |
| "learning_rate": 1.337020557065006e-11, | |
| "loss": 1.3554291725158691, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.651094890510949, | |
| "grad_norm": 0.991592526435852, | |
| "learning_rate": 1.3315421428682727e-11, | |
| "loss": 1.4532699584960938, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.654014598540146, | |
| "grad_norm": 1.029658555984497, | |
| "learning_rate": 1.326106917870607e-11, | |
| "loss": 1.2581933736801147, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.656934306569343, | |
| "grad_norm": 0.7576796412467957, | |
| "learning_rate": 1.320714938386125e-11, | |
| "loss": 1.3442846536636353, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.6598540145985403, | |
| "grad_norm": 0.8517515063285828, | |
| "learning_rate": 1.3153662602808731e-11, | |
| "loss": 1.361882209777832, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.6627737226277373, | |
| "grad_norm": 0.7901403307914734, | |
| "learning_rate": 1.3100609389722604e-11, | |
| "loss": 1.4933452606201172, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.665693430656934, | |
| "grad_norm": 1.100870132446289, | |
| "learning_rate": 1.3047990294284753e-11, | |
| "loss": 1.2277604341506958, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.6686131386861316, | |
| "grad_norm": 0.6526262760162354, | |
| "learning_rate": 1.29958058616792e-11, | |
| "loss": 1.2418105602264404, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.6715328467153285, | |
| "grad_norm": 0.9037129878997803, | |
| "learning_rate": 1.2944056632586418e-11, | |
| "loss": 1.4959716796875, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.6744525547445255, | |
| "grad_norm": 1.6702628135681152, | |
| "learning_rate": 1.2892743143177793e-11, | |
| "loss": 1.7112908363342285, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.677372262773723, | |
| "grad_norm": 1.4932409524917603, | |
| "learning_rate": 1.284186592511e-11, | |
| "loss": 1.3613243103027344, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.68029197080292, | |
| "grad_norm": 0.3351198136806488, | |
| "learning_rate": 1.2791425505519557e-11, | |
| "loss": 1.2194634675979614, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.6832116788321168, | |
| "grad_norm": 0.8374185562133789, | |
| "learning_rate": 1.2741422407017312e-11, | |
| "loss": 1.3384425640106201, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.686131386861314, | |
| "grad_norm": 0.8853601217269897, | |
| "learning_rate": 1.2691857147683055e-11, | |
| "loss": 1.531416654586792, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.689051094890511, | |
| "grad_norm": 0.7787609696388245, | |
| "learning_rate": 1.2642730241060149e-11, | |
| "loss": 1.5118640661239624, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.691970802919708, | |
| "grad_norm": 2.0880799293518066, | |
| "learning_rate": 1.2594042196150196e-11, | |
| "loss": 1.561602234840393, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.694890510948905, | |
| "grad_norm": 0.9511889815330505, | |
| "learning_rate": 1.254579351740779e-11, | |
| "loss": 1.3543150424957275, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.697810218978102, | |
| "grad_norm": 1.0282362699508667, | |
| "learning_rate": 1.2497984704735244e-11, | |
| "loss": 1.5251884460449219, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.7007299270072993, | |
| "grad_norm": 0.7513379454612732, | |
| "learning_rate": 1.2450616253477472e-11, | |
| "loss": 1.4650368690490723, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.7036496350364962, | |
| "grad_norm": 1.5720882415771484, | |
| "learning_rate": 1.2403688654416788e-11, | |
| "loss": 1.5332424640655518, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.706569343065693, | |
| "grad_norm": 0.6373589634895325, | |
| "learning_rate": 1.2357202393767884e-11, | |
| "loss": 1.271345615386963, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.7094890510948906, | |
| "grad_norm": 0.9719908833503723, | |
| "learning_rate": 1.2311157953172753e-11, | |
| "loss": 1.6266101598739624, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.7124087591240875, | |
| "grad_norm": 1.5200854539871216, | |
| "learning_rate": 1.2265555809695725e-11, | |
| "loss": 1.4726779460906982, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.7153284671532845, | |
| "grad_norm": 1.090428113937378, | |
| "learning_rate": 1.2220396435818494e-11, | |
| "loss": 1.5031441450119019, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.718248175182482, | |
| "grad_norm": 0.9306588172912598, | |
| "learning_rate": 1.217568029943524e-11, | |
| "loss": 1.5269947052001953, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.721167883211679, | |
| "grad_norm": 0.8506163358688354, | |
| "learning_rate": 1.2131407863847788e-11, | |
| "loss": 1.480023980140686, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.7240875912408757, | |
| "grad_norm": 1.3606634140014648, | |
| "learning_rate": 1.2087579587760794e-11, | |
| "loss": 1.5377426147460938, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.727007299270073, | |
| "grad_norm": 1.9341416358947754, | |
| "learning_rate": 1.2044195925277e-11, | |
| "loss": 1.4445481300354004, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.72992700729927, | |
| "grad_norm": 1.101349115371704, | |
| "learning_rate": 1.2001257325892525e-11, | |
| "loss": 1.6140016317367554, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.732846715328467, | |
| "grad_norm": 1.2086797952651978, | |
| "learning_rate": 1.1958764234492219e-11, | |
| "loss": 1.4995508193969727, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.7357664233576644, | |
| "grad_norm": 1.0278968811035156, | |
| "learning_rate": 1.1916717091345023e-11, | |
| "loss": 1.3886187076568604, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.7386861313868613, | |
| "grad_norm": 1.0268532037734985, | |
| "learning_rate": 1.1875116332099453e-11, | |
| "loss": 1.457838535308838, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.7416058394160583, | |
| "grad_norm": 0.5896971225738525, | |
| "learning_rate": 1.1833962387779047e-11, | |
| "loss": 1.4909448623657227, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.7445255474452557, | |
| "grad_norm": 0.6387025117874146, | |
| "learning_rate": 1.1793255684777923e-11, | |
| "loss": 1.4974184036254883, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.7474452554744526, | |
| "grad_norm": 0.61506587266922, | |
| "learning_rate": 1.1752996644856346e-11, | |
| "loss": 1.518266201019287, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.7503649635036496, | |
| "grad_norm": 1.253604769706726, | |
| "learning_rate": 1.1713185685136364e-11, | |
| "loss": 1.428184986114502, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.753284671532847, | |
| "grad_norm": 0.7252373099327087, | |
| "learning_rate": 1.167382321809749e-11, | |
| "loss": 1.5073142051696777, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.756204379562044, | |
| "grad_norm": 0.8362865447998047, | |
| "learning_rate": 1.1634909651572425e-11, | |
| "loss": 1.397181510925293, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.759124087591241, | |
| "grad_norm": 1.2112503051757812, | |
| "learning_rate": 1.1596445388742837e-11, | |
| "loss": 1.5129762887954712, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.7620437956204382, | |
| "grad_norm": 0.7931188941001892, | |
| "learning_rate": 1.1558430828135168e-11, | |
| "loss": 1.523097276687622, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.764963503649635, | |
| "grad_norm": 0.6849160194396973, | |
| "learning_rate": 1.1520866363616524e-11, | |
| "loss": 1.4566798210144043, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.767883211678832, | |
| "grad_norm": 1.564093828201294, | |
| "learning_rate": 1.1483752384390583e-11, | |
| "loss": 1.6970009803771973, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.770802919708029, | |
| "grad_norm": 1.1729105710983276, | |
| "learning_rate": 1.1447089274993575e-11, | |
| "loss": 1.4502595663070679, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.7737226277372264, | |
| "grad_norm": 1.0183480978012085, | |
| "learning_rate": 1.1410877415290269e-11, | |
| "loss": 1.3800312280654907, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.7766423357664234, | |
| "grad_norm": 0.9024052023887634, | |
| "learning_rate": 1.1375117180470078e-11, | |
| "loss": 1.5891414880752563, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.7795620437956203, | |
| "grad_norm": 0.735111653804779, | |
| "learning_rate": 1.133980894104314e-11, | |
| "loss": 1.4016191959381104, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.7824817518248173, | |
| "grad_norm": 1.0259346961975098, | |
| "learning_rate": 1.1304953062836486e-11, | |
| "loss": 1.3877499103546143, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.7854014598540147, | |
| "grad_norm": 1.3157730102539062, | |
| "learning_rate": 1.1270549906990256e-11, | |
| "loss": 1.1698932647705078, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.7883211678832116, | |
| "grad_norm": 2.4770195484161377, | |
| "learning_rate": 1.1236599829953968e-11, | |
| "loss": 1.179342269897461, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.7912408759124085, | |
| "grad_norm": 0.8251614570617676, | |
| "learning_rate": 1.1203103183482787e-11, | |
| "loss": 1.3370054960250854, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.794160583941606, | |
| "grad_norm": 0.8468762636184692, | |
| "learning_rate": 1.1170060314633928e-11, | |
| "loss": 1.4804577827453613, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.797080291970803, | |
| "grad_norm": 0.46036869287490845, | |
| "learning_rate": 1.1137471565763024e-11, | |
| "loss": 1.3382248878479004, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.4767540395259857, | |
| "learning_rate": 1.1105337274520589e-11, | |
| "loss": 1.1864900588989258, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.802919708029197, | |
| "grad_norm": 1.0670506954193115, | |
| "learning_rate": 1.1073657773848535e-11, | |
| "loss": 1.2210025787353516, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.805839416058394, | |
| "grad_norm": 0.7148623466491699, | |
| "learning_rate": 1.10424333919767e-11, | |
| "loss": 1.130507230758667, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.808759124087591, | |
| "grad_norm": 0.5990166068077087, | |
| "learning_rate": 1.1011664452419465e-11, | |
| "loss": 1.1209691762924194, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.8116788321167885, | |
| "grad_norm": 0.4928390085697174, | |
| "learning_rate": 1.0981351273972383e-11, | |
| "loss": 1.05220365524292, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.8145985401459854, | |
| "grad_norm": 0.42803847789764404, | |
| "learning_rate": 1.09514941707089e-11, | |
| "loss": 1.0301886796951294, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.8175182481751824, | |
| "grad_norm": 0.26767516136169434, | |
| "learning_rate": 1.0922093451977073e-11, | |
| "loss": 1.0528907775878906, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.8204379562043798, | |
| "grad_norm": 2.2468039989471436, | |
| "learning_rate": 1.0893149422396403e-11, | |
| "loss": 1.0261061191558838, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.8233576642335767, | |
| "grad_norm": 0.3851284384727478, | |
| "learning_rate": 1.0864662381854632e-11, | |
| "loss": 1.106552243232727, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.8262773722627736, | |
| "grad_norm": 0.5606375336647034, | |
| "learning_rate": 1.0836632625504674e-11, | |
| "loss": 1.3412342071533203, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.829197080291971, | |
| "grad_norm": 0.8580256700515747, | |
| "learning_rate": 1.0809060443761531e-11, | |
| "loss": 1.3885517120361328, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.832116788321168, | |
| "grad_norm": 0.7379202842712402, | |
| "learning_rate": 1.0781946122299307e-11, | |
| "loss": 1.4401962757110596, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.835036496350365, | |
| "grad_norm": 0.726071834564209, | |
| "learning_rate": 1.0755289942048237e-11, | |
| "loss": 1.3771188259124756, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.8379562043795623, | |
| "grad_norm": 1.260642170906067, | |
| "learning_rate": 1.0729092179191765e-11, | |
| "loss": 1.4098293781280518, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.8408759124087593, | |
| "grad_norm": 2.4710659980773926, | |
| "learning_rate": 1.0703353105163708e-11, | |
| "loss": 1.351040244102478, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.843795620437956, | |
| "grad_norm": 0.7024121284484863, | |
| "learning_rate": 1.0678072986645414e-11, | |
| "loss": 1.3694772720336914, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.846715328467153, | |
| "grad_norm": 0.6834819316864014, | |
| "learning_rate": 1.0653252085563021e-11, | |
| "loss": 1.4273629188537598, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.8496350364963505, | |
| "grad_norm": 0.9664222598075867, | |
| "learning_rate": 1.0628890659084748e-11, | |
| "loss": 1.3720366954803467, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.8525547445255475, | |
| "grad_norm": 0.6046033501625061, | |
| "learning_rate": 1.0604988959618193e-11, | |
| "loss": 1.3852019309997559, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.8554744525547444, | |
| "grad_norm": 0.8115307092666626, | |
| "learning_rate": 1.0581547234807778e-11, | |
| "loss": 1.3424034118652344, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.8583941605839414, | |
| "grad_norm": 0.6844267845153809, | |
| "learning_rate": 1.055856572753211e-11, | |
| "loss": 1.3605966567993164, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.8613138686131387, | |
| "grad_norm": 0.751759946346283, | |
| "learning_rate": 1.0536044675901533e-11, | |
| "loss": 1.3599393367767334, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.8642335766423357, | |
| "grad_norm": 0.753299355506897, | |
| "learning_rate": 1.0513984313255612e-11, | |
| "loss": 1.3264997005462646, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.8671532846715326, | |
| "grad_norm": 0.6588592529296875, | |
| "learning_rate": 1.0492384868160763e-11, | |
| "loss": 1.384945273399353, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.87007299270073, | |
| "grad_norm": 1.344991683959961, | |
| "learning_rate": 1.0471246564407825e-11, | |
| "loss": 1.4046998023986816, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.872992700729927, | |
| "grad_norm": 0.968307375907898, | |
| "learning_rate": 1.0450569621009781e-11, | |
| "loss": 1.423596739768982, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.875912408759124, | |
| "grad_norm": 0.8447715640068054, | |
| "learning_rate": 1.0430354252199495e-11, | |
| "loss": 1.4057683944702148, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.8788321167883213, | |
| "grad_norm": 0.7159552574157715, | |
| "learning_rate": 1.0410600667427462e-11, | |
| "loss": 1.4235366582870483, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.8817518248175182, | |
| "grad_norm": 0.8689557313919067, | |
| "learning_rate": 1.0391309071359665e-11, | |
| "loss": 1.396228313446045, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.884671532846715, | |
| "grad_norm": 0.5925703048706055, | |
| "learning_rate": 1.0372479663875433e-11, | |
| "loss": 1.4210439920425415, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.8875912408759126, | |
| "grad_norm": 0.6446409225463867, | |
| "learning_rate": 1.0354112640065392e-11, | |
| "loss": 1.36083984375, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.8905109489051095, | |
| "grad_norm": 0.7436321377754211, | |
| "learning_rate": 1.0336208190229425e-11, | |
| "loss": 1.3726532459259033, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.8934306569343065, | |
| "grad_norm": 0.8913896679878235, | |
| "learning_rate": 1.0318766499874702e-11, | |
| "loss": 1.4016762971878052, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.896350364963504, | |
| "grad_norm": 1.1491930484771729, | |
| "learning_rate": 1.0301787749713778e-11, | |
| "loss": 1.3855860233306885, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.899270072992701, | |
| "grad_norm": 0.5891770720481873, | |
| "learning_rate": 1.0285272115662697e-11, | |
| "loss": 1.4141876697540283, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.9021897810218977, | |
| "grad_norm": 0.6485686898231506, | |
| "learning_rate": 1.0269219768839177e-11, | |
| "loss": 1.4087603092193604, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.905109489051095, | |
| "grad_norm": 1.2576870918273926, | |
| "learning_rate": 1.0253630875560841e-11, | |
| "loss": 1.398749828338623, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.908029197080292, | |
| "grad_norm": 0.8351176381111145, | |
| "learning_rate": 1.0238505597343493e-11, | |
| "loss": 1.3710181713104248, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.910948905109489, | |
| "grad_norm": 0.6197527647018433, | |
| "learning_rate": 1.0223844090899445e-11, | |
| "loss": 1.368019700050354, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.9138686131386864, | |
| "grad_norm": 0.6959254145622253, | |
| "learning_rate": 1.0209646508135873e-11, | |
| "loss": 1.3915154933929443, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.9167883211678833, | |
| "grad_norm": 0.8842105269432068, | |
| "learning_rate": 1.0195912996153294e-11, | |
| "loss": 1.4496486186981201, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.9197080291970803, | |
| "grad_norm": 0.96880042552948, | |
| "learning_rate": 1.0182643697243976e-11, | |
| "loss": 1.3976027965545654, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.9226277372262772, | |
| "grad_norm": 0.6320104598999023, | |
| "learning_rate": 1.0169838748890516e-11, | |
| "loss": 1.387899398803711, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.9255474452554746, | |
| "grad_norm": 1.409497857093811, | |
| "learning_rate": 1.0157498283764395e-11, | |
| "loss": 1.3462584018707275, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.9284671532846716, | |
| "grad_norm": 0.564811646938324, | |
| "learning_rate": 1.0145622429724598e-11, | |
| "loss": 1.4039101600646973, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.9313868613138685, | |
| "grad_norm": 2.1775057315826416, | |
| "learning_rate": 1.0134211309816299e-11, | |
| "loss": 1.4030756950378418, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.9343065693430654, | |
| "grad_norm": 0.8580056428909302, | |
| "learning_rate": 1.0123265042269589e-11, | |
| "loss": 1.3715065717697144, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.937226277372263, | |
| "grad_norm": 0.6893569827079773, | |
| "learning_rate": 1.0112783740498235e-11, | |
| "loss": 1.4134101867675781, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.9401459854014598, | |
| "grad_norm": 0.5474994778633118, | |
| "learning_rate": 1.0102767513098516e-11, | |
| "loss": 1.4000933170318604, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.9430656934306567, | |
| "grad_norm": 0.8648778796195984, | |
| "learning_rate": 1.0093216463848107e-11, | |
| "loss": 1.4565629959106445, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.945985401459854, | |
| "grad_norm": 0.7470832467079163, | |
| "learning_rate": 1.0084130691704985e-11, | |
| "loss": 1.43211030960083, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.948905109489051, | |
| "grad_norm": 1.5307104587554932, | |
| "learning_rate": 1.0075510290806418e-11, | |
| "loss": 1.4241385459899902, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.951824817518248, | |
| "grad_norm": 0.6980034112930298, | |
| "learning_rate": 1.0067355350467981e-11, | |
| "loss": 1.409893274307251, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.9547445255474454, | |
| "grad_norm": 0.6787309050559998, | |
| "learning_rate": 1.0059665955182627e-11, | |
| "loss": 1.4007437229156494, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.9576642335766423, | |
| "grad_norm": 0.6224604845046997, | |
| "learning_rate": 1.0052442184619831e-11, | |
| "loss": 1.3615572452545166, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.9605839416058393, | |
| "grad_norm": 0.6994672417640686, | |
| "learning_rate": 1.0045684113624746e-11, | |
| "loss": 1.3544400930404663, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.9635036496350367, | |
| "grad_norm": 0.8780302405357361, | |
| "learning_rate": 1.0039391812217433e-11, | |
| "loss": 1.3746960163116455, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.9664233576642336, | |
| "grad_norm": 0.5529699921607971, | |
| "learning_rate": 1.0033565345592127e-11, | |
| "loss": 1.378829002380371, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.9693430656934305, | |
| "grad_norm": 1.0961394309997559, | |
| "learning_rate": 1.0028204774116592e-11, | |
| "loss": 1.4332547187805176, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.972262773722628, | |
| "grad_norm": 0.7980661392211914, | |
| "learning_rate": 1.0023310153331455e-11, | |
| "loss": 1.421024203300476, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.975182481751825, | |
| "grad_norm": 0.8884260058403015, | |
| "learning_rate": 1.0018881533949651e-11, | |
| "loss": 1.4154038429260254, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.978102189781022, | |
| "grad_norm": 0.6180927753448486, | |
| "learning_rate": 1.0014918961855914e-11, | |
| "loss": 1.4124186038970947, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.981021897810219, | |
| "grad_norm": 1.0760934352874756, | |
| "learning_rate": 1.0011422478106256e-11, | |
| "loss": 1.4193979501724243, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.983941605839416, | |
| "grad_norm": 1.3947854042053223, | |
| "learning_rate": 1.000839211892759e-11, | |
| "loss": 1.351562738418579, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.986861313868613, | |
| "grad_norm": 0.8076825141906738, | |
| "learning_rate": 1.0005827915717327e-11, | |
| "loss": 1.4435218572616577, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.9897810218978105, | |
| "grad_norm": 0.8117115497589111, | |
| "learning_rate": 1.0003729895043056e-11, | |
| "loss": 1.4881727695465088, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.9927007299270074, | |
| "grad_norm": 1.277565598487854, | |
| "learning_rate": 1.0002098078642278e-11, | |
| "loss": 1.6461460590362549, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.9956204379562044, | |
| "grad_norm": 1.383927822113037, | |
| "learning_rate": 1.000093248342216e-11, | |
| "loss": 1.9207826852798462, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.9985401459854013, | |
| "grad_norm": 1.1475387811660767, | |
| "learning_rate": 1.0000233121459382e-11, | |
| "loss": 1.9258217811584473, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2055, | |
| "total_flos": 3.6341125395219743e+18, | |
| "train_loss": 1.5168476336773875, | |
| "train_runtime": 11672.6098, | |
| "train_samples_per_second": 2.817, | |
| "train_steps_per_second": 0.176 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2055, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6341125395219743e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |