Instructions to use furproxy/9b-21 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/9b-21 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "furproxy/9b-21") - Transformers
How to use furproxy/9b-21 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/9b-21") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/9b-21", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-21 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-21" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-21", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/9b-21
- SGLang
How to use furproxy/9b-21 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-21" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-21", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-21" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-21", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/9b-21 with Docker Model Runner:
docker model run hf.co/furproxy/9b-21
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2619, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002290950744558992, | |
| "grad_norm": 0.2425529807806015, | |
| "learning_rate": 7.633587786259542e-08, | |
| "loss": 1.4360580444335938, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004581901489117984, | |
| "grad_norm": 0.3157375752925873, | |
| "learning_rate": 2.2900763358778629e-07, | |
| "loss": 1.9720795154571533, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006872852233676976, | |
| "grad_norm": 0.3054039776325226, | |
| "learning_rate": 3.8167938931297716e-07, | |
| "loss": 1.8266024589538574, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009163802978235968, | |
| "grad_norm": 0.7713625431060791, | |
| "learning_rate": 5.34351145038168e-07, | |
| "loss": 2.4644742012023926, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.011454753722794959, | |
| "grad_norm": 0.17603641748428345, | |
| "learning_rate": 6.870229007633589e-07, | |
| "loss": 1.273919939994812, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013745704467353952, | |
| "grad_norm": 0.12217849493026733, | |
| "learning_rate": 8.396946564885497e-07, | |
| "loss": 1.6275849342346191, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.016036655211912942, | |
| "grad_norm": 0.4887905418872833, | |
| "learning_rate": 9.923664122137404e-07, | |
| "loss": 1.9397943019866943, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.018327605956471937, | |
| "grad_norm": 0.14639756083488464, | |
| "learning_rate": 1.1450381679389313e-06, | |
| "loss": 1.5786923170089722, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.020618556701030927, | |
| "grad_norm": 1.0165174007415771, | |
| "learning_rate": 1.297709923664122e-06, | |
| "loss": 2.2075228691101074, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.022909507445589918, | |
| "grad_norm": 0.6907394528388977, | |
| "learning_rate": 1.450381679389313e-06, | |
| "loss": 2.0885233879089355, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.025200458190148912, | |
| "grad_norm": 0.5898762941360474, | |
| "learning_rate": 1.603053435114504e-06, | |
| "loss": 1.5722987651824951, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.027491408934707903, | |
| "grad_norm": 0.28665709495544434, | |
| "learning_rate": 1.7557251908396948e-06, | |
| "loss": 1.920871376991272, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.029782359679266894, | |
| "grad_norm": 0.2708616256713867, | |
| "learning_rate": 1.908396946564886e-06, | |
| "loss": 1.9028164148330688, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.032073310423825885, | |
| "grad_norm": 0.12096082419157028, | |
| "learning_rate": 2.0610687022900764e-06, | |
| "loss": 1.724308967590332, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03436426116838488, | |
| "grad_norm": 0.3193468749523163, | |
| "learning_rate": 2.2137404580152674e-06, | |
| "loss": 2.3334269523620605, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03665521191294387, | |
| "grad_norm": 0.25921180844306946, | |
| "learning_rate": 2.3664122137404585e-06, | |
| "loss": 1.7049498558044434, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.038946162657502864, | |
| "grad_norm": 0.2873923182487488, | |
| "learning_rate": 2.5190839694656487e-06, | |
| "loss": 1.8035858869552612, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 0.2893233895301819, | |
| "learning_rate": 2.67175572519084e-06, | |
| "loss": 1.8687759637832642, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.043528064146620846, | |
| "grad_norm": 0.3628486394882202, | |
| "learning_rate": 2.824427480916031e-06, | |
| "loss": 1.5172038078308105, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.045819014891179836, | |
| "grad_norm": 0.14311300218105316, | |
| "learning_rate": 2.9770992366412218e-06, | |
| "loss": 1.3586844205856323, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.048109965635738834, | |
| "grad_norm": 0.2167440503835678, | |
| "learning_rate": 3.129770992366413e-06, | |
| "loss": 1.8630679845809937, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.050400916380297825, | |
| "grad_norm": 0.28841111063957214, | |
| "learning_rate": 3.2824427480916034e-06, | |
| "loss": 1.8234974145889282, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.052691867124856816, | |
| "grad_norm": 1.6244624853134155, | |
| "learning_rate": 3.4351145038167944e-06, | |
| "loss": 2.474839210510254, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.054982817869415807, | |
| "grad_norm": 0.7337155938148499, | |
| "learning_rate": 3.587786259541985e-06, | |
| "loss": 1.9647748470306396, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0572737686139748, | |
| "grad_norm": 0.21595202386379242, | |
| "learning_rate": 3.740458015267176e-06, | |
| "loss": 1.7915287017822266, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05956471935853379, | |
| "grad_norm": 0.12472744286060333, | |
| "learning_rate": 3.893129770992366e-06, | |
| "loss": 1.57389497756958, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.061855670103092786, | |
| "grad_norm": 0.2959010601043701, | |
| "learning_rate": 4.045801526717557e-06, | |
| "loss": 1.9410815238952637, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06414662084765177, | |
| "grad_norm": 0.5962556600570679, | |
| "learning_rate": 4.198473282442748e-06, | |
| "loss": 1.5538616180419922, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06643757159221077, | |
| "grad_norm": 0.223605677485466, | |
| "learning_rate": 4.351145038167939e-06, | |
| "loss": 2.3718690872192383, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06872852233676977, | |
| "grad_norm": 2.0641846656799316, | |
| "learning_rate": 4.5038167938931296e-06, | |
| "loss": 2.0545084476470947, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07101947308132875, | |
| "grad_norm": 0.11926179379224777, | |
| "learning_rate": 4.656488549618321e-06, | |
| "loss": 1.518899917602539, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07331042382588775, | |
| "grad_norm": 0.5377867221832275, | |
| "learning_rate": 4.8091603053435125e-06, | |
| "loss": 2.0711913108825684, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07560137457044673, | |
| "grad_norm": 0.20864170789718628, | |
| "learning_rate": 4.961832061068703e-06, | |
| "loss": 1.5393109321594238, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07789232531500573, | |
| "grad_norm": 0.17572522163391113, | |
| "learning_rate": 5.114503816793893e-06, | |
| "loss": 1.7614434957504272, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08018327605956473, | |
| "grad_norm": 0.4484013020992279, | |
| "learning_rate": 5.267175572519084e-06, | |
| "loss": 1.6294645071029663, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "grad_norm": 0.1890186220407486, | |
| "learning_rate": 5.419847328244276e-06, | |
| "loss": 1.6021493673324585, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08476517754868271, | |
| "grad_norm": 0.19444267451763153, | |
| "learning_rate": 5.572519083969467e-06, | |
| "loss": 1.861649990081787, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08705612829324169, | |
| "grad_norm": 0.0960668995976448, | |
| "learning_rate": 5.725190839694656e-06, | |
| "loss": 1.5107250213623047, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08934707903780069, | |
| "grad_norm": 0.5166394114494324, | |
| "learning_rate": 5.877862595419848e-06, | |
| "loss": 1.977331280708313, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09163802978235967, | |
| "grad_norm": 0.19248326122760773, | |
| "learning_rate": 6.030534351145039e-06, | |
| "loss": 1.7162344455718994, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09392898052691867, | |
| "grad_norm": 0.14979946613311768, | |
| "learning_rate": 6.18320610687023e-06, | |
| "loss": 1.4223518371582031, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09621993127147767, | |
| "grad_norm": 0.19589261710643768, | |
| "learning_rate": 6.335877862595419e-06, | |
| "loss": 1.7889583110809326, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09851088201603665, | |
| "grad_norm": 0.10083930939435959, | |
| "learning_rate": 6.488549618320611e-06, | |
| "loss": 1.309556484222412, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10080183276059565, | |
| "grad_norm": 0.1550842672586441, | |
| "learning_rate": 6.641221374045802e-06, | |
| "loss": 1.6037399768829346, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10309278350515463, | |
| "grad_norm": 0.1855807900428772, | |
| "learning_rate": 6.793893129770993e-06, | |
| "loss": 1.73233962059021, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10538373424971363, | |
| "grad_norm": 0.07532945275306702, | |
| "learning_rate": 6.946564885496184e-06, | |
| "loss": 1.5398590564727783, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10767468499427263, | |
| "grad_norm": 0.15782028436660767, | |
| "learning_rate": 7.0992366412213746e-06, | |
| "loss": 1.7258206605911255, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10996563573883161, | |
| "grad_norm": 0.09372120350599289, | |
| "learning_rate": 7.251908396946566e-06, | |
| "loss": 1.3687584400177002, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11225658648339061, | |
| "grad_norm": 0.21025855839252472, | |
| "learning_rate": 7.404580152671757e-06, | |
| "loss": 1.646334171295166, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1145475372279496, | |
| "grad_norm": 1.09811532497406, | |
| "learning_rate": 7.557251908396948e-06, | |
| "loss": 1.6427111625671387, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11683848797250859, | |
| "grad_norm": 0.2197725772857666, | |
| "learning_rate": 7.709923664122137e-06, | |
| "loss": 1.7769626379013062, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11912943871706758, | |
| "grad_norm": 0.06639471650123596, | |
| "learning_rate": 7.862595419847328e-06, | |
| "loss": 1.4597866535186768, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12142038946162657, | |
| "grad_norm": 0.08875848352909088, | |
| "learning_rate": 8.015267175572519e-06, | |
| "loss": 1.3246572017669678, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "grad_norm": 0.07279494404792786, | |
| "learning_rate": 8.16793893129771e-06, | |
| "loss": 1.551082730293274, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.12600229095074456, | |
| "grad_norm": 0.06380033493041992, | |
| "learning_rate": 8.320610687022901e-06, | |
| "loss": 1.481245517730713, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12829324169530354, | |
| "grad_norm": 0.16885097324848175, | |
| "learning_rate": 8.473282442748092e-06, | |
| "loss": 1.7407457828521729, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.13058419243986255, | |
| "grad_norm": 0.20184065401554108, | |
| "learning_rate": 8.625954198473283e-06, | |
| "loss": 1.5867414474487305, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.13287514318442153, | |
| "grad_norm": 0.20837143063545227, | |
| "learning_rate": 8.778625954198474e-06, | |
| "loss": 1.6812255382537842, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.13516609392898052, | |
| "grad_norm": 0.20289385318756104, | |
| "learning_rate": 8.931297709923665e-06, | |
| "loss": 1.5645579099655151, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.13745704467353953, | |
| "grad_norm": 0.32700014114379883, | |
| "learning_rate": 9.083969465648855e-06, | |
| "loss": 1.3761099576950073, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13974799541809851, | |
| "grad_norm": 0.04891065135598183, | |
| "learning_rate": 9.236641221374046e-06, | |
| "loss": 1.300285816192627, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1420389461626575, | |
| "grad_norm": 0.0589461550116539, | |
| "learning_rate": 9.389312977099237e-06, | |
| "loss": 1.210198163986206, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.14432989690721648, | |
| "grad_norm": 0.1429443210363388, | |
| "learning_rate": 9.54198473282443e-06, | |
| "loss": 1.5391454696655273, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1466208476517755, | |
| "grad_norm": 0.16855759918689728, | |
| "learning_rate": 9.694656488549619e-06, | |
| "loss": 1.4557760953903198, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14891179839633448, | |
| "grad_norm": 0.16834264993667603, | |
| "learning_rate": 9.84732824427481e-06, | |
| "loss": 1.6899962425231934, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.15120274914089346, | |
| "grad_norm": 0.1502954512834549, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5119028091430664, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.15349369988545247, | |
| "grad_norm": 0.048700153827667236, | |
| "learning_rate": 9.999985650351204e-06, | |
| "loss": 1.1397861242294312, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.15578465063001146, | |
| "grad_norm": 0.049325715750455856, | |
| "learning_rate": 9.999942601496331e-06, | |
| "loss": 1.2158293724060059, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15807560137457044, | |
| "grad_norm": 0.05145062878727913, | |
| "learning_rate": 9.999870853709929e-06, | |
| "loss": 1.4632039070129395, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.16036655211912945, | |
| "grad_norm": 0.040577393025159836, | |
| "learning_rate": 9.999770407449582e-06, | |
| "loss": 1.1962344646453857, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16265750286368844, | |
| "grad_norm": 0.1325235515832901, | |
| "learning_rate": 9.999641263355893e-06, | |
| "loss": 1.342354416847229, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 0.11594824492931366, | |
| "learning_rate": 9.999483422252499e-06, | |
| "loss": 1.4513139724731445, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1672394043528064, | |
| "grad_norm": 0.05856647342443466, | |
| "learning_rate": 9.999296885146047e-06, | |
| "loss": 1.2211672067642212, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16953035509736541, | |
| "grad_norm": 0.046867020428180695, | |
| "learning_rate": 9.999081653226205e-06, | |
| "loss": 1.316023588180542, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1718213058419244, | |
| "grad_norm": 0.24781149625778198, | |
| "learning_rate": 9.998837727865636e-06, | |
| "loss": 1.1912282705307007, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17411225658648338, | |
| "grad_norm": 0.04234715923666954, | |
| "learning_rate": 9.998565110620006e-06, | |
| "loss": 1.0865659713745117, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1764032073310424, | |
| "grad_norm": 0.11648119986057281, | |
| "learning_rate": 9.998263803227965e-06, | |
| "loss": 1.4158616065979004, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.17869415807560138, | |
| "grad_norm": 0.21850669384002686, | |
| "learning_rate": 9.997933807611133e-06, | |
| "loss": 1.7990024089813232, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.18098510882016036, | |
| "grad_norm": 0.12010746449232101, | |
| "learning_rate": 9.997575125874104e-06, | |
| "loss": 1.472090244293213, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.18327605956471935, | |
| "grad_norm": 0.1091763824224472, | |
| "learning_rate": 9.997187760304411e-06, | |
| "loss": 1.0730650424957275, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18556701030927836, | |
| "grad_norm": 0.14433470368385315, | |
| "learning_rate": 9.996771713372525e-06, | |
| "loss": 1.29958176612854, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.18785796105383734, | |
| "grad_norm": 0.12404698878526688, | |
| "learning_rate": 9.996326987731836e-06, | |
| "loss": 1.0624301433563232, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.19014891179839633, | |
| "grad_norm": 0.12703843414783478, | |
| "learning_rate": 9.995853586218636e-06, | |
| "loss": 1.5410776138305664, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.19243986254295534, | |
| "grad_norm": 2.6877777576446533, | |
| "learning_rate": 9.995351511852102e-06, | |
| "loss": 0.6895647048950195, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.19473081328751432, | |
| "grad_norm": 0.12183137238025665, | |
| "learning_rate": 9.994820767834273e-06, | |
| "loss": 1.4578661918640137, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1970217640320733, | |
| "grad_norm": 0.17509934306144714, | |
| "learning_rate": 9.994261357550034e-06, | |
| "loss": 1.5483499765396118, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.19931271477663232, | |
| "grad_norm": 0.1465608775615692, | |
| "learning_rate": 9.993673284567092e-06, | |
| "loss": 1.6950434446334839, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2016036655211913, | |
| "grad_norm": 0.11729174107313156, | |
| "learning_rate": 9.993056552635954e-06, | |
| "loss": 1.4780206680297852, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.20389461626575028, | |
| "grad_norm": 0.11391811072826385, | |
| "learning_rate": 9.992411165689902e-06, | |
| "loss": 1.4607470035552979, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 0.24819950759410858, | |
| "learning_rate": 9.99173712784497e-06, | |
| "loss": 1.5854477882385254, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.20847651775486828, | |
| "grad_norm": 0.12101314961910248, | |
| "learning_rate": 9.99103444339992e-06, | |
| "loss": 1.3363440036773682, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.21076746849942726, | |
| "grad_norm": 0.1651735156774521, | |
| "learning_rate": 9.990303116836204e-06, | |
| "loss": 1.3371784687042236, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.21305841924398625, | |
| "grad_norm": 0.10060884058475494, | |
| "learning_rate": 9.989543152817945e-06, | |
| "loss": 1.1477423906326294, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.21534936998854526, | |
| "grad_norm": 0.12473434954881668, | |
| "learning_rate": 9.98875455619191e-06, | |
| "loss": 1.3203130960464478, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.21764032073310424, | |
| "grad_norm": 0.03830314055085182, | |
| "learning_rate": 9.987937331987466e-06, | |
| "loss": 1.29048752784729, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21993127147766323, | |
| "grad_norm": 0.0400603786110878, | |
| "learning_rate": 9.987091485416564e-06, | |
| "loss": 1.1765823364257812, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.1159832775592804, | |
| "learning_rate": 9.986217021873688e-06, | |
| "loss": 1.2849760055541992, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.22451317296678122, | |
| "grad_norm": 0.036686670035123825, | |
| "learning_rate": 9.985313946935841e-06, | |
| "loss": 1.1865625381469727, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2268041237113402, | |
| "grad_norm": 0.10680826008319855, | |
| "learning_rate": 9.98438226636249e-06, | |
| "loss": 1.4698808193206787, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2290950744558992, | |
| "grad_norm": 0.19631391763687134, | |
| "learning_rate": 9.983421986095543e-06, | |
| "loss": 1.5152738094329834, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2313860252004582, | |
| "grad_norm": 0.2143646776676178, | |
| "learning_rate": 9.982433112259304e-06, | |
| "loss": 1.1555585861206055, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.23367697594501718, | |
| "grad_norm": 0.11695901304483414, | |
| "learning_rate": 9.981415651160434e-06, | |
| "loss": 1.3724945783615112, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.23596792668957617, | |
| "grad_norm": 0.0444357693195343, | |
| "learning_rate": 9.980369609287918e-06, | |
| "loss": 1.28928804397583, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.23825887743413515, | |
| "grad_norm": 0.2176477164030075, | |
| "learning_rate": 9.979294993313013e-06, | |
| "loss": 1.3569140434265137, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.24054982817869416, | |
| "grad_norm": 0.09549252688884735, | |
| "learning_rate": 9.978191810089213e-06, | |
| "loss": 1.2554432153701782, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.24284077892325315, | |
| "grad_norm": 0.035298705101013184, | |
| "learning_rate": 9.977060066652208e-06, | |
| "loss": 1.19698166847229, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.24513172966781213, | |
| "grad_norm": 0.03924322500824928, | |
| "learning_rate": 9.975899770219823e-06, | |
| "loss": 1.253554105758667, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "grad_norm": 0.14062990248203278, | |
| "learning_rate": 9.974710928191994e-06, | |
| "loss": 1.505861520767212, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.24971363115693013, | |
| "grad_norm": 0.10934654623270035, | |
| "learning_rate": 9.973493548150705e-06, | |
| "loss": 1.2586157321929932, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2520045819014891, | |
| "grad_norm": 0.16258153319358826, | |
| "learning_rate": 9.972247637859942e-06, | |
| "loss": 1.5800942182540894, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2542955326460481, | |
| "grad_norm": 0.04441208764910698, | |
| "learning_rate": 9.970973205265654e-06, | |
| "loss": 1.2052358388900757, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2565864833906071, | |
| "grad_norm": 0.04366284981369972, | |
| "learning_rate": 9.969670258495689e-06, | |
| "loss": 1.4153897762298584, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.2588774341351661, | |
| "grad_norm": 0.10389053076505661, | |
| "learning_rate": 9.968338805859746e-06, | |
| "loss": 1.1192145347595215, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2611683848797251, | |
| "grad_norm": 0.03827785328030586, | |
| "learning_rate": 9.966978855849328e-06, | |
| "loss": 1.233184814453125, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2634593356242841, | |
| "grad_norm": 0.03582112118601799, | |
| "learning_rate": 9.965590417137683e-06, | |
| "loss": 1.444356918334961, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.26575028636884307, | |
| "grad_norm": 0.16782893240451813, | |
| "learning_rate": 9.964173498579744e-06, | |
| "loss": 1.3132915496826172, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.26804123711340205, | |
| "grad_norm": 0.10886494070291519, | |
| "learning_rate": 9.962728109212087e-06, | |
| "loss": 1.603651762008667, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.27033218785796104, | |
| "grad_norm": 0.32741063833236694, | |
| "learning_rate": 9.961254258252853e-06, | |
| "loss": 1.722959041595459, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.27262313860252, | |
| "grad_norm": 0.038513097912073135, | |
| "learning_rate": 9.95975195510171e-06, | |
| "loss": 1.270897626876831, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.27491408934707906, | |
| "grad_norm": 0.11721174418926239, | |
| "learning_rate": 9.958221209339776e-06, | |
| "loss": 1.3808534145355225, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.27720504009163804, | |
| "grad_norm": 0.11607187986373901, | |
| "learning_rate": 9.956662030729571e-06, | |
| "loss": 1.4212572574615479, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.27949599083619703, | |
| "grad_norm": 0.05183471366763115, | |
| "learning_rate": 9.955074429214945e-06, | |
| "loss": 1.2837097644805908, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.281786941580756, | |
| "grad_norm": 0.048340827226638794, | |
| "learning_rate": 9.95345841492102e-06, | |
| "loss": 1.3732128143310547, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.284077892325315, | |
| "grad_norm": 0.03368377685546875, | |
| "learning_rate": 9.951813998154122e-06, | |
| "loss": 1.150505542755127, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.286368843069874, | |
| "grad_norm": 0.16787323355674744, | |
| "learning_rate": 9.950141189401722e-06, | |
| "loss": 1.4632198810577393, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "grad_norm": 0.14481912553310394, | |
| "learning_rate": 9.948439999332362e-06, | |
| "loss": 1.4926843643188477, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.290950744558992, | |
| "grad_norm": 0.15263405442237854, | |
| "learning_rate": 9.946710438795586e-06, | |
| "loss": 1.4954164028167725, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.293241695303551, | |
| "grad_norm": 0.11114485561847687, | |
| "learning_rate": 9.944952518821877e-06, | |
| "loss": 0.9889236688613892, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.29553264604810997, | |
| "grad_norm": 0.2147989124059677, | |
| "learning_rate": 9.943166250622585e-06, | |
| "loss": 1.2724560499191284, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.29782359679266895, | |
| "grad_norm": 0.21461904048919678, | |
| "learning_rate": 9.941351645589853e-06, | |
| "loss": 0.9281865358352661, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.30011454753722794, | |
| "grad_norm": 0.03682840242981911, | |
| "learning_rate": 9.939508715296543e-06, | |
| "loss": 1.3381445407867432, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3024054982817869, | |
| "grad_norm": 0.2321019023656845, | |
| "learning_rate": 9.93763747149617e-06, | |
| "loss": 1.2490191459655762, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.30469644902634596, | |
| "grad_norm": 0.11454650014638901, | |
| "learning_rate": 9.935737926122816e-06, | |
| "loss": 1.2711057662963867, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.30698739977090495, | |
| "grad_norm": 0.10006029158830643, | |
| "learning_rate": 9.933810091291065e-06, | |
| "loss": 1.2933646440505981, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.30927835051546393, | |
| "grad_norm": 0.1007399782538414, | |
| "learning_rate": 9.93185397929592e-06, | |
| "loss": 1.2037980556488037, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3115693012600229, | |
| "grad_norm": 0.03689350187778473, | |
| "learning_rate": 9.929869602612718e-06, | |
| "loss": 1.1460704803466797, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3138602520045819, | |
| "grad_norm": 0.14365845918655396, | |
| "learning_rate": 9.927856973897068e-06, | |
| "loss": 0.5115751624107361, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3161512027491409, | |
| "grad_norm": 0.15885332226753235, | |
| "learning_rate": 9.925816105984751e-06, | |
| "loss": 1.3992141485214233, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.31844215349369986, | |
| "grad_norm": 0.2972363829612732, | |
| "learning_rate": 9.923747011891653e-06, | |
| "loss": 0.6298142671585083, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3207331042382589, | |
| "grad_norm": 0.10289888083934784, | |
| "learning_rate": 9.92164970481367e-06, | |
| "loss": 1.3492999076843262, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3230240549828179, | |
| "grad_norm": 0.30722421407699585, | |
| "learning_rate": 9.919524198126637e-06, | |
| "loss": 1.1495556831359863, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.32531500572737687, | |
| "grad_norm": 0.1234731450676918, | |
| "learning_rate": 9.91737050538623e-06, | |
| "loss": 1.625375747680664, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.32760595647193586, | |
| "grad_norm": 0.11733370274305344, | |
| "learning_rate": 9.915188640327887e-06, | |
| "loss": 0.7891396284103394, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 0.11923382431268692, | |
| "learning_rate": 9.912978616866716e-06, | |
| "loss": 1.469787836074829, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3321878579610538, | |
| "grad_norm": 0.17214825749397278, | |
| "learning_rate": 9.910740449097412e-06, | |
| "loss": 1.4425544738769531, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3344788087056128, | |
| "grad_norm": 0.04059191793203354, | |
| "learning_rate": 9.908474151294161e-06, | |
| "loss": 1.1002707481384277, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.33676975945017185, | |
| "grad_norm": 0.03392655774950981, | |
| "learning_rate": 9.906179737910554e-06, | |
| "loss": 1.1838493347167969, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.33906071019473083, | |
| "grad_norm": 0.032716911286115646, | |
| "learning_rate": 9.903857223579496e-06, | |
| "loss": 1.0073325634002686, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3413516609392898, | |
| "grad_norm": 0.030965758487582207, | |
| "learning_rate": 9.901506623113098e-06, | |
| "loss": 0.969083845615387, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3436426116838488, | |
| "grad_norm": 0.12395308911800385, | |
| "learning_rate": 9.899127951502601e-06, | |
| "loss": 1.2470171451568604, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3459335624284078, | |
| "grad_norm": 0.3706231713294983, | |
| "learning_rate": 9.896721223918276e-06, | |
| "loss": 1.5252048969268799, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.34822451317296677, | |
| "grad_norm": 0.044303592294454575, | |
| "learning_rate": 9.89428645570932e-06, | |
| "loss": 1.1785590648651123, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.35051546391752575, | |
| "grad_norm": 0.22373101115226746, | |
| "learning_rate": 9.891823662403763e-06, | |
| "loss": 1.0534144639968872, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3528064146620848, | |
| "grad_norm": 0.0998837873339653, | |
| "learning_rate": 9.88933285970837e-06, | |
| "loss": 1.358349084854126, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3550973654066438, | |
| "grad_norm": 0.10202368348836899, | |
| "learning_rate": 9.886814063508536e-06, | |
| "loss": 1.2032561302185059, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.35738831615120276, | |
| "grad_norm": 0.12603235244750977, | |
| "learning_rate": 9.884267289868194e-06, | |
| "loss": 1.270133376121521, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.35967926689576174, | |
| "grad_norm": 0.13588766753673553, | |
| "learning_rate": 9.8816925550297e-06, | |
| "loss": 1.3731555938720703, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3619702176403207, | |
| "grad_norm": 0.2927129566669464, | |
| "learning_rate": 9.879089875413736e-06, | |
| "loss": 1.2470248937606812, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3642611683848797, | |
| "grad_norm": 0.03428524360060692, | |
| "learning_rate": 9.876459267619215e-06, | |
| "loss": 1.2745685577392578, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3665521191294387, | |
| "grad_norm": 0.3623892068862915, | |
| "learning_rate": 9.873800748423152e-06, | |
| "loss": 1.3339478969573975, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.36884306987399773, | |
| "grad_norm": 0.11408170312643051, | |
| "learning_rate": 9.871114334780583e-06, | |
| "loss": 1.3286080360412598, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "grad_norm": 0.10345927625894547, | |
| "learning_rate": 9.868400043824431e-06, | |
| "loss": 1.332814335823059, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3734249713631157, | |
| "grad_norm": 0.451455682516098, | |
| "learning_rate": 9.86565789286542e-06, | |
| "loss": 1.435546875, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3757159221076747, | |
| "grad_norm": 0.03447026014328003, | |
| "learning_rate": 9.862887899391953e-06, | |
| "loss": 1.2484092712402344, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.37800687285223367, | |
| "grad_norm": 0.10319351404905319, | |
| "learning_rate": 9.860090081069998e-06, | |
| "loss": 1.1266441345214844, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.38029782359679265, | |
| "grad_norm": 0.10190501809120178, | |
| "learning_rate": 9.857264455742983e-06, | |
| "loss": 1.2466825246810913, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.38258877434135163, | |
| "grad_norm": 0.04389253631234169, | |
| "learning_rate": 9.854411041431678e-06, | |
| "loss": 1.1545830965042114, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3848797250859107, | |
| "grad_norm": 0.1142214685678482, | |
| "learning_rate": 9.851529856334079e-06, | |
| "loss": 1.2927944660186768, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.38717067583046966, | |
| "grad_norm": 0.03781864792108536, | |
| "learning_rate": 9.848620918825294e-06, | |
| "loss": 1.2539844512939453, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.38946162657502864, | |
| "grad_norm": 0.12804462015628815, | |
| "learning_rate": 9.845684247457425e-06, | |
| "loss": 1.3808146715164185, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3917525773195876, | |
| "grad_norm": 0.037370480597019196, | |
| "learning_rate": 9.842719860959455e-06, | |
| "loss": 1.2456262111663818, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3940435280641466, | |
| "grad_norm": 0.042404066771268845, | |
| "learning_rate": 9.839727778237116e-06, | |
| "loss": 1.3200464248657227, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3963344788087056, | |
| "grad_norm": 0.04442799836397171, | |
| "learning_rate": 9.836708018372782e-06, | |
| "loss": 1.3215160369873047, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.39862542955326463, | |
| "grad_norm": 0.08433306217193604, | |
| "learning_rate": 9.833660600625338e-06, | |
| "loss": 1.2241692543029785, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4009163802978236, | |
| "grad_norm": 0.14272065460681915, | |
| "learning_rate": 9.83058554443006e-06, | |
| "loss": 1.2168128490447998, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4032073310423826, | |
| "grad_norm": 0.10155618190765381, | |
| "learning_rate": 9.827482869398496e-06, | |
| "loss": 1.3269870281219482, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4054982817869416, | |
| "grad_norm": 0.345684289932251, | |
| "learning_rate": 9.82435259531833e-06, | |
| "loss": 1.048119068145752, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.40778923253150057, | |
| "grad_norm": 0.03348676860332489, | |
| "learning_rate": 9.82119474215327e-06, | |
| "loss": 0.8612366914749146, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.41008018327605955, | |
| "grad_norm": 0.10538852959871292, | |
| "learning_rate": 9.818009330042906e-06, | |
| "loss": 1.4399499893188477, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 0.12840358912944794, | |
| "learning_rate": 9.814796379302592e-06, | |
| "loss": 1.3075119256973267, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4146620847651776, | |
| "grad_norm": 0.03234279155731201, | |
| "learning_rate": 9.811555910423312e-06, | |
| "loss": 1.195176601409912, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.41695303550973656, | |
| "grad_norm": 0.2873457968235016, | |
| "learning_rate": 9.808287944071552e-06, | |
| "loss": 1.3421657085418701, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.41924398625429554, | |
| "grad_norm": 0.03667648881673813, | |
| "learning_rate": 9.804992501089164e-06, | |
| "loss": 1.4639015197753906, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4215349369988545, | |
| "grad_norm": 0.1039833277463913, | |
| "learning_rate": 9.801669602493236e-06, | |
| "loss": 1.2831900119781494, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4238258877434135, | |
| "grad_norm": 0.1346283257007599, | |
| "learning_rate": 9.798319269475959e-06, | |
| "loss": 1.1615617275238037, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4261168384879725, | |
| "grad_norm": 0.11276187002658844, | |
| "learning_rate": 9.794941523404491e-06, | |
| "loss": 1.3308217525482178, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4284077892325315, | |
| "grad_norm": 0.10832475125789642, | |
| "learning_rate": 9.791536385820815e-06, | |
| "loss": 1.3403054475784302, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4306987399770905, | |
| "grad_norm": 0.07494664937257767, | |
| "learning_rate": 9.788103878441614e-06, | |
| "loss": 0.9897347688674927, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4329896907216495, | |
| "grad_norm": 0.13242274522781372, | |
| "learning_rate": 9.784644023158118e-06, | |
| "loss": 1.2988442182540894, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4352806414662085, | |
| "grad_norm": 0.04049122706055641, | |
| "learning_rate": 9.781156842035978e-06, | |
| "loss": 1.2798575162887573, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.43757159221076747, | |
| "grad_norm": 0.10166372358798981, | |
| "learning_rate": 9.777642357315115e-06, | |
| "loss": 1.4548561573028564, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.43986254295532645, | |
| "grad_norm": 0.09827661514282227, | |
| "learning_rate": 9.774100591409583e-06, | |
| "loss": 0.8798496127128601, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.44215349369988544, | |
| "grad_norm": 0.039044998586177826, | |
| "learning_rate": 9.770531566907424e-06, | |
| "loss": 1.1994266510009766, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.03467918932437897, | |
| "learning_rate": 9.766935306570528e-06, | |
| "loss": 1.0867178440093994, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.44673539518900346, | |
| "grad_norm": 0.09605854749679565, | |
| "learning_rate": 9.763311833334482e-06, | |
| "loss": 1.2179234027862549, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.44902634593356244, | |
| "grad_norm": 0.04672601819038391, | |
| "learning_rate": 9.759661170308426e-06, | |
| "loss": 1.0760804414749146, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4513172966781214, | |
| "grad_norm": 0.14849010109901428, | |
| "learning_rate": 9.75598334077491e-06, | |
| "loss": 1.4439059495925903, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "grad_norm": 0.10495574027299881, | |
| "learning_rate": 9.752278368189738e-06, | |
| "loss": 1.2911549806594849, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4558991981672394, | |
| "grad_norm": 0.10133286565542221, | |
| "learning_rate": 9.748546276181824e-06, | |
| "loss": 1.3363006114959717, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4581901489117984, | |
| "grad_norm": 0.10403479635715485, | |
| "learning_rate": 9.74478708855304e-06, | |
| "loss": 1.1435672044754028, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.46048109965635736, | |
| "grad_norm": 0.23274336755275726, | |
| "learning_rate": 9.74100082927806e-06, | |
| "loss": 1.1191637516021729, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4627720504009164, | |
| "grad_norm": 0.035993631929159164, | |
| "learning_rate": 9.737187522504215e-06, | |
| "loss": 1.2965900897979736, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4650630011454754, | |
| "grad_norm": 0.11053793877363205, | |
| "learning_rate": 9.733347192551333e-06, | |
| "loss": 1.3513212203979492, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.46735395189003437, | |
| "grad_norm": 0.38711977005004883, | |
| "learning_rate": 9.729479863911585e-06, | |
| "loss": 0.9576661586761475, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.46964490263459335, | |
| "grad_norm": 0.10174008458852768, | |
| "learning_rate": 9.725585561249331e-06, | |
| "loss": 1.3105225563049316, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.47193585337915234, | |
| "grad_norm": 0.5640531182289124, | |
| "learning_rate": 9.72166430940096e-06, | |
| "loss": 1.5542221069335938, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4742268041237113, | |
| "grad_norm": 0.043333012610673904, | |
| "learning_rate": 9.71771613337473e-06, | |
| "loss": 1.212013602256775, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4765177548682703, | |
| "grad_norm": 0.12072140723466873, | |
| "learning_rate": 9.713741058350618e-06, | |
| "loss": 1.3732361793518066, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.47880870561282934, | |
| "grad_norm": 0.039414238184690475, | |
| "learning_rate": 9.709739109680146e-06, | |
| "loss": 1.0561060905456543, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.48109965635738833, | |
| "grad_norm": 0.11718718707561493, | |
| "learning_rate": 9.70571031288623e-06, | |
| "loss": 1.4380638599395752, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4833906071019473, | |
| "grad_norm": 0.03386823460459709, | |
| "learning_rate": 9.701654693663012e-06, | |
| "loss": 1.223938226699829, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4856815578465063, | |
| "grad_norm": 0.08439936488866806, | |
| "learning_rate": 9.697572277875696e-06, | |
| "loss": 1.2271449565887451, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4879725085910653, | |
| "grad_norm": 0.03942372649908066, | |
| "learning_rate": 9.693463091560387e-06, | |
| "loss": 1.0300081968307495, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.49026345933562426, | |
| "grad_norm": 0.2659618556499481, | |
| "learning_rate": 9.689327160923918e-06, | |
| "loss": 1.275801420211792, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4925544100801833, | |
| "grad_norm": 0.11298934370279312, | |
| "learning_rate": 9.685164512343694e-06, | |
| "loss": 1.2419630289077759, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 0.04946233332157135, | |
| "learning_rate": 9.680975172367508e-06, | |
| "loss": 1.4032783508300781, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.49713631156930127, | |
| "grad_norm": 0.0772048756480217, | |
| "learning_rate": 9.67675916771339e-06, | |
| "loss": 1.1696724891662598, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.49942726231386025, | |
| "grad_norm": 0.13884860277175903, | |
| "learning_rate": 9.67251652526942e-06, | |
| "loss": 1.1072807312011719, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5017182130584192, | |
| "grad_norm": 0.1219138652086258, | |
| "learning_rate": 9.668247272093568e-06, | |
| "loss": 1.2660953998565674, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5040091638029782, | |
| "grad_norm": 0.11671170592308044, | |
| "learning_rate": 9.663951435413512e-06, | |
| "loss": 1.4654450416564941, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5063001145475372, | |
| "grad_norm": 0.10897082090377808, | |
| "learning_rate": 9.659629042626478e-06, | |
| "loss": 1.2332985401153564, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5085910652920962, | |
| "grad_norm": 0.2291656881570816, | |
| "learning_rate": 9.655280121299049e-06, | |
| "loss": 1.2597649097442627, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5108820160366552, | |
| "grad_norm": 0.18447436392307281, | |
| "learning_rate": 9.650904699167002e-06, | |
| "loss": 1.2398802042007446, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5131729667812142, | |
| "grad_norm": 0.09861113131046295, | |
| "learning_rate": 9.646502804135125e-06, | |
| "loss": 1.157179832458496, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 0.04085477441549301, | |
| "learning_rate": 9.642074464277035e-06, | |
| "loss": 1.0565853118896484, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5177548682703322, | |
| "grad_norm": 0.03253213316202164, | |
| "learning_rate": 9.637619707835011e-06, | |
| "loss": 1.115795373916626, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5200458190148912, | |
| "grad_norm": 0.10820797830820084, | |
| "learning_rate": 9.633138563219805e-06, | |
| "loss": 1.2870464324951172, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5223367697594502, | |
| "grad_norm": 0.08356233686208725, | |
| "learning_rate": 9.628631059010459e-06, | |
| "loss": 1.161600112915039, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5246277205040092, | |
| "grad_norm": 0.10461345314979553, | |
| "learning_rate": 9.624097223954132e-06, | |
| "loss": 1.2577989101409912, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5269186712485682, | |
| "grad_norm": 0.2117365151643753, | |
| "learning_rate": 9.619537086965909e-06, | |
| "loss": 0.9334989190101624, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5292096219931272, | |
| "grad_norm": 0.03358057513833046, | |
| "learning_rate": 9.614950677128618e-06, | |
| "loss": 1.0164105892181396, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5315005727376861, | |
| "grad_norm": 0.04315713420510292, | |
| "learning_rate": 9.610338023692644e-06, | |
| "loss": 1.2515099048614502, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5337915234822451, | |
| "grad_norm": 0.11090853065252304, | |
| "learning_rate": 9.60569915607575e-06, | |
| "loss": 0.9969978928565979, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5360824742268041, | |
| "grad_norm": 0.06928592920303345, | |
| "learning_rate": 9.601034103862875e-06, | |
| "loss": 1.1942644119262695, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5383734249713631, | |
| "grad_norm": 0.11059997975826263, | |
| "learning_rate": 9.596342896805958e-06, | |
| "loss": 1.0854039192199707, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5406643757159221, | |
| "grad_norm": 0.2203303724527359, | |
| "learning_rate": 9.591625564823743e-06, | |
| "loss": 0.6206300258636475, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5429553264604811, | |
| "grad_norm": 0.03575604781508446, | |
| "learning_rate": 9.58688213800159e-06, | |
| "loss": 1.1505436897277832, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.54524627720504, | |
| "grad_norm": 0.11237749457359314, | |
| "learning_rate": 9.58211264659128e-06, | |
| "loss": 1.2954638004302979, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5475372279495991, | |
| "grad_norm": 0.03931301459670067, | |
| "learning_rate": 9.577317121010822e-06, | |
| "loss": 1.1090443134307861, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5498281786941581, | |
| "grad_norm": 0.10506019741296768, | |
| "learning_rate": 9.572495591844268e-06, | |
| "loss": 1.2766085863113403, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5521191294387171, | |
| "grad_norm": 0.03373529762029648, | |
| "learning_rate": 9.567648089841504e-06, | |
| "loss": 1.1036125421524048, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5544100801832761, | |
| "grad_norm": 0.03757813945412636, | |
| "learning_rate": 9.562774645918067e-06, | |
| "loss": 1.1739498376846313, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5567010309278351, | |
| "grad_norm": 0.033927951008081436, | |
| "learning_rate": 9.557875291154937e-06, | |
| "loss": 1.0839054584503174, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5589919816723941, | |
| "grad_norm": 0.11530556529760361, | |
| "learning_rate": 9.552950056798345e-06, | |
| "loss": 1.401555061340332, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.561282932416953, | |
| "grad_norm": 0.1037631705403328, | |
| "learning_rate": 9.547998974259573e-06, | |
| "loss": 1.266376256942749, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.563573883161512, | |
| "grad_norm": 0.03408586606383324, | |
| "learning_rate": 9.543022075114751e-06, | |
| "loss": 1.1710147857666016, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.565864833906071, | |
| "grad_norm": 0.09721101075410843, | |
| "learning_rate": 9.538019391104659e-06, | |
| "loss": 1.1715192794799805, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.56815578465063, | |
| "grad_norm": 0.13440069556236267, | |
| "learning_rate": 9.532990954134527e-06, | |
| "loss": 1.2787623405456543, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.570446735395189, | |
| "grad_norm": 0.14032506942749023, | |
| "learning_rate": 9.527936796273818e-06, | |
| "loss": 1.352327823638916, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.572737686139748, | |
| "grad_norm": 0.03532100096344948, | |
| "learning_rate": 9.522856949756042e-06, | |
| "loss": 1.396005630493164, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5750286368843069, | |
| "grad_norm": 0.03295751288533211, | |
| "learning_rate": 9.517751446978537e-06, | |
| "loss": 1.2074257135391235, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5773195876288659, | |
| "grad_norm": 0.03744492679834366, | |
| "learning_rate": 9.51262032050227e-06, | |
| "loss": 1.1247986555099487, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.579610538373425, | |
| "grad_norm": 0.10635876655578613, | |
| "learning_rate": 9.507463603051624e-06, | |
| "loss": 1.279697060585022, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.581901489117984, | |
| "grad_norm": 0.1893984079360962, | |
| "learning_rate": 9.502281327514192e-06, | |
| "loss": 1.3793418407440186, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.584192439862543, | |
| "grad_norm": 0.03022020496428013, | |
| "learning_rate": 9.497073526940564e-06, | |
| "loss": 1.1290874481201172, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.586483390607102, | |
| "grad_norm": 0.08362241834402084, | |
| "learning_rate": 9.491840234544127e-06, | |
| "loss": 1.1272192001342773, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.588774341351661, | |
| "grad_norm": 0.04655982553958893, | |
| "learning_rate": 9.486581483700836e-06, | |
| "loss": 1.0976135730743408, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5910652920962199, | |
| "grad_norm": 0.09682846069335938, | |
| "learning_rate": 9.481297307949016e-06, | |
| "loss": 1.361159324645996, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5933562428407789, | |
| "grad_norm": 0.10151924192905426, | |
| "learning_rate": 9.47598774098914e-06, | |
| "loss": 1.1374366283416748, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5956471935853379, | |
| "grad_norm": 0.3335345685482025, | |
| "learning_rate": 9.470652816683619e-06, | |
| "loss": 1.183037519454956, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5979381443298969, | |
| "grad_norm": 0.11204201728105545, | |
| "learning_rate": 9.46529256905658e-06, | |
| "loss": 1.3378653526306152, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6002290950744559, | |
| "grad_norm": 0.03878810256719589, | |
| "learning_rate": 9.459907032293654e-06, | |
| "loss": 1.2830768823623657, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6025200458190149, | |
| "grad_norm": 0.03078841231763363, | |
| "learning_rate": 9.454496240741761e-06, | |
| "loss": 0.988889753818512, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6048109965635738, | |
| "grad_norm": 0.04741325229406357, | |
| "learning_rate": 9.44906022890888e-06, | |
| "loss": 1.2009971141815186, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6071019473081328, | |
| "grad_norm": 0.09710617363452911, | |
| "learning_rate": 9.443599031463838e-06, | |
| "loss": 1.4264302253723145, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6093928980526919, | |
| "grad_norm": 0.12086698412895203, | |
| "learning_rate": 9.438112683236086e-06, | |
| "loss": 0.7716737985610962, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.6116838487972509, | |
| "grad_norm": 0.028488747775554657, | |
| "learning_rate": 9.432601219215479e-06, | |
| "loss": 1.1653621196746826, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6139747995418099, | |
| "grad_norm": 0.030125625431537628, | |
| "learning_rate": 9.427064674552046e-06, | |
| "loss": 0.9844313859939575, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6162657502863689, | |
| "grad_norm": 1.246670126914978, | |
| "learning_rate": 9.421503084555778e-06, | |
| "loss": 1.1310303211212158, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 0.047299824655056, | |
| "learning_rate": 9.41591648469639e-06, | |
| "loss": 1.1721080541610718, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6208476517754868, | |
| "grad_norm": 0.07911659777164459, | |
| "learning_rate": 9.410304910603105e-06, | |
| "loss": 0.9692875146865845, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6231386025200458, | |
| "grad_norm": 0.041665468364953995, | |
| "learning_rate": 9.404668398064415e-06, | |
| "loss": 1.225880742073059, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6254295532646048, | |
| "grad_norm": 0.09727603197097778, | |
| "learning_rate": 9.399006983027869e-06, | |
| "loss": 1.2111965417861938, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6277205040091638, | |
| "grad_norm": 0.2791145443916321, | |
| "learning_rate": 9.393320701599826e-06, | |
| "loss": 1.3773910999298096, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6300114547537228, | |
| "grad_norm": 0.2922375202178955, | |
| "learning_rate": 9.387609590045243e-06, | |
| "loss": 1.5269827842712402, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6323024054982818, | |
| "grad_norm": 0.034879785031080246, | |
| "learning_rate": 9.381873684787424e-06, | |
| "loss": 1.096055269241333, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6345933562428407, | |
| "grad_norm": 0.21075953543186188, | |
| "learning_rate": 9.376113022407806e-06, | |
| "loss": 1.3973267078399658, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6368843069873997, | |
| "grad_norm": 0.04559031128883362, | |
| "learning_rate": 9.370327639645715e-06, | |
| "loss": 1.3103339672088623, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6391752577319587, | |
| "grad_norm": 0.09411913901567459, | |
| "learning_rate": 9.364517573398128e-06, | |
| "loss": 1.1426537036895752, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6414662084765178, | |
| "grad_norm": 0.11268170177936554, | |
| "learning_rate": 9.358682860719456e-06, | |
| "loss": 1.2462685108184814, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6437571592210768, | |
| "grad_norm": 0.15371939539909363, | |
| "learning_rate": 9.352823538821286e-06, | |
| "loss": 1.350957989692688, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6460481099656358, | |
| "grad_norm": 0.10083743184804916, | |
| "learning_rate": 9.346939645072158e-06, | |
| "loss": 1.2344059944152832, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6483390607101948, | |
| "grad_norm": 0.12374784797430038, | |
| "learning_rate": 9.341031216997318e-06, | |
| "loss": 1.332352876663208, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6506300114547537, | |
| "grad_norm": 0.09682541340589523, | |
| "learning_rate": 9.335098292278487e-06, | |
| "loss": 1.0717089176177979, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6529209621993127, | |
| "grad_norm": 0.04859980568289757, | |
| "learning_rate": 9.329140908753612e-06, | |
| "loss": 1.2429944276809692, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6552119129438717, | |
| "grad_norm": 0.03579086810350418, | |
| "learning_rate": 9.323159104416637e-06, | |
| "loss": 1.4028278589248657, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6575028636884307, | |
| "grad_norm": 0.11958760768175125, | |
| "learning_rate": 9.31715291741724e-06, | |
| "loss": 1.2857556343078613, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6597938144329897, | |
| "grad_norm": 0.03217066451907158, | |
| "learning_rate": 9.311122386060612e-06, | |
| "loss": 1.1168699264526367, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6620847651775487, | |
| "grad_norm": 0.12310899794101715, | |
| "learning_rate": 9.305067548807202e-06, | |
| "loss": 1.1735236644744873, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6643757159221076, | |
| "grad_norm": 0.11337030678987503, | |
| "learning_rate": 9.29898844427247e-06, | |
| "loss": 1.301731824874878, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.03304918482899666, | |
| "learning_rate": 9.292885111226647e-06, | |
| "loss": 1.1898317337036133, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6689576174112256, | |
| "grad_norm": 0.035968322306871414, | |
| "learning_rate": 9.286757588594479e-06, | |
| "loss": 1.3450303077697754, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6712485681557846, | |
| "grad_norm": 0.232716366648674, | |
| "learning_rate": 9.28060591545499e-06, | |
| "loss": 0.5568069219589233, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6735395189003437, | |
| "grad_norm": 0.18080194294452667, | |
| "learning_rate": 9.274430131041224e-06, | |
| "loss": 1.2939491271972656, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6758304696449027, | |
| "grad_norm": 0.1996665745973587, | |
| "learning_rate": 9.268230274739993e-06, | |
| "loss": 1.680454134941101, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6781214203894617, | |
| "grad_norm": 0.10235057026147842, | |
| "learning_rate": 9.262006386091643e-06, | |
| "loss": 1.300153136253357, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6804123711340206, | |
| "grad_norm": 0.1189015805721283, | |
| "learning_rate": 9.255758504789773e-06, | |
| "loss": 1.4123055934906006, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6827033218785796, | |
| "grad_norm": 0.11398884654045105, | |
| "learning_rate": 9.249486670681011e-06, | |
| "loss": 1.3474668264389038, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6849942726231386, | |
| "grad_norm": 0.33024370670318604, | |
| "learning_rate": 9.243190923764743e-06, | |
| "loss": 0.9317454695701599, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6872852233676976, | |
| "grad_norm": 0.18342778086662292, | |
| "learning_rate": 9.236871304192857e-06, | |
| "loss": 1.3693324327468872, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6895761741122566, | |
| "grad_norm": 0.09077564626932144, | |
| "learning_rate": 9.2305278522695e-06, | |
| "loss": 1.2000863552093506, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6918671248568156, | |
| "grad_norm": 0.1056990921497345, | |
| "learning_rate": 9.224160608450806e-06, | |
| "loss": 1.2265493869781494, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6941580756013745, | |
| "grad_norm": 0.039485201239585876, | |
| "learning_rate": 9.217769613344647e-06, | |
| "loss": 1.1047804355621338, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6964490263459335, | |
| "grad_norm": 0.08278033137321472, | |
| "learning_rate": 9.211354907710373e-06, | |
| "loss": 1.2274484634399414, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6987399770904925, | |
| "grad_norm": 0.04109884425997734, | |
| "learning_rate": 9.204916532458552e-06, | |
| "loss": 1.237788438796997, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7010309278350515, | |
| "grad_norm": 0.11578814685344696, | |
| "learning_rate": 9.198454528650702e-06, | |
| "loss": 1.2939280271530151, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7033218785796106, | |
| "grad_norm": 0.10241305083036423, | |
| "learning_rate": 9.191968937499041e-06, | |
| "loss": 1.1080031394958496, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7056128293241696, | |
| "grad_norm": 0.08389988541603088, | |
| "learning_rate": 9.185459800366212e-06, | |
| "loss": 1.1695685386657715, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7079037800687286, | |
| "grad_norm": 0.18472033739089966, | |
| "learning_rate": 9.178927158765037e-06, | |
| "loss": 1.1244120597839355, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7101947308132875, | |
| "grad_norm": 0.13071289658546448, | |
| "learning_rate": 9.172371054358224e-06, | |
| "loss": 1.2239925861358643, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7124856815578465, | |
| "grad_norm": 0.039994560182094574, | |
| "learning_rate": 9.16579152895813e-06, | |
| "loss": 1.1075098514556885, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7147766323024055, | |
| "grad_norm": 0.12017904967069626, | |
| "learning_rate": 9.15918862452648e-06, | |
| "loss": 1.0645837783813477, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7170675830469645, | |
| "grad_norm": 0.19609086215496063, | |
| "learning_rate": 9.152562383174102e-06, | |
| "loss": 0.6706198453903198, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7193585337915235, | |
| "grad_norm": 0.13229237496852875, | |
| "learning_rate": 9.145912847160652e-06, | |
| "loss": 1.3926395177841187, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7216494845360825, | |
| "grad_norm": 0.09191222488880157, | |
| "learning_rate": 9.139240058894358e-06, | |
| "loss": 1.2411160469055176, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7239404352806414, | |
| "grad_norm": 0.13508380949497223, | |
| "learning_rate": 9.132544060931738e-06, | |
| "loss": 1.4096720218658447, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7262313860252004, | |
| "grad_norm": 0.11203691363334656, | |
| "learning_rate": 9.125824895977334e-06, | |
| "loss": 0.7960629463195801, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7285223367697594, | |
| "grad_norm": 0.17772053182125092, | |
| "learning_rate": 9.11908260688344e-06, | |
| "loss": 1.3206441402435303, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7308132875143184, | |
| "grad_norm": 0.09526784718036652, | |
| "learning_rate": 9.112317236649822e-06, | |
| "loss": 1.159492015838623, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7331042382588774, | |
| "grad_norm": 0.11944857239723206, | |
| "learning_rate": 9.105528828423455e-06, | |
| "loss": 1.3975093364715576, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7353951890034365, | |
| "grad_norm": 0.034794408828020096, | |
| "learning_rate": 9.098717425498237e-06, | |
| "loss": 1.132660150527954, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7376861397479955, | |
| "grad_norm": 0.13092458248138428, | |
| "learning_rate": 9.09188307131472e-06, | |
| "loss": 1.3938100337982178, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7399770904925544, | |
| "grad_norm": 0.033246614038944244, | |
| "learning_rate": 9.085025809459826e-06, | |
| "loss": 1.1749005317687988, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7422680412371134, | |
| "grad_norm": 0.036673709750175476, | |
| "learning_rate": 9.078145683666582e-06, | |
| "loss": 1.0377411842346191, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7445589919816724, | |
| "grad_norm": 0.1842445731163025, | |
| "learning_rate": 9.071242737813824e-06, | |
| "loss": 1.0705593824386597, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7468499427262314, | |
| "grad_norm": 0.04267792031168938, | |
| "learning_rate": 9.06431701592593e-06, | |
| "loss": 1.1197433471679688, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7491408934707904, | |
| "grad_norm": 0.11130751669406891, | |
| "learning_rate": 9.057368562172535e-06, | |
| "loss": 1.0569872856140137, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7514318442153494, | |
| "grad_norm": 0.11254757642745972, | |
| "learning_rate": 9.050397420868246e-06, | |
| "loss": 1.1947691440582275, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7537227949599083, | |
| "grad_norm": 0.11397068947553635, | |
| "learning_rate": 9.043403636472368e-06, | |
| "loss": 0.9924128651618958, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7560137457044673, | |
| "grad_norm": 0.16056719422340393, | |
| "learning_rate": 9.036387253588611e-06, | |
| "loss": 1.4780884981155396, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7583046964490263, | |
| "grad_norm": 0.1511063575744629, | |
| "learning_rate": 9.02934831696481e-06, | |
| "loss": 0.638008713722229, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7605956471935853, | |
| "grad_norm": 0.04247143492102623, | |
| "learning_rate": 9.022286871492641e-06, | |
| "loss": 1.166100025177002, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7628865979381443, | |
| "grad_norm": 0.10111839324235916, | |
| "learning_rate": 9.015202962207329e-06, | |
| "loss": 1.248875617980957, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7651775486827033, | |
| "grad_norm": 0.10688889771699905, | |
| "learning_rate": 9.008096634287372e-06, | |
| "loss": 1.218861699104309, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7674684994272624, | |
| "grad_norm": 0.14122603833675385, | |
| "learning_rate": 9.000967933054236e-06, | |
| "loss": 1.2160475254058838, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7697594501718213, | |
| "grad_norm": 0.09510156512260437, | |
| "learning_rate": 8.993816903972083e-06, | |
| "loss": 1.3408292531967163, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7720504009163803, | |
| "grad_norm": 0.032357197254896164, | |
| "learning_rate": 8.986643592647473e-06, | |
| "loss": 0.9297527074813843, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7743413516609393, | |
| "grad_norm": 0.10768794268369675, | |
| "learning_rate": 8.979448044829068e-06, | |
| "loss": 1.2648060321807861, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7766323024054983, | |
| "grad_norm": 0.1046016663312912, | |
| "learning_rate": 8.972230306407354e-06, | |
| "loss": 0.9922665357589722, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7789232531500573, | |
| "grad_norm": 0.10708943009376526, | |
| "learning_rate": 8.964990423414334e-06, | |
| "loss": 1.2871710062026978, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7812142038946163, | |
| "grad_norm": 0.11082033067941666, | |
| "learning_rate": 8.957728442023243e-06, | |
| "loss": 1.2588818073272705, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7835051546391752, | |
| "grad_norm": 0.15941235423088074, | |
| "learning_rate": 8.95044440854825e-06, | |
| "loss": 1.4140057563781738, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7857961053837342, | |
| "grad_norm": 0.10488958656787872, | |
| "learning_rate": 8.943138369444165e-06, | |
| "loss": 1.216564655303955, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7880870561282932, | |
| "grad_norm": 0.09411117434501648, | |
| "learning_rate": 8.935810371306143e-06, | |
| "loss": 1.0623620748519897, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7903780068728522, | |
| "grad_norm": 0.1014682799577713, | |
| "learning_rate": 8.928460460869383e-06, | |
| "loss": 1.3300588130950928, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7926689576174112, | |
| "grad_norm": 0.1440027803182602, | |
| "learning_rate": 8.921088685008833e-06, | |
| "loss": 1.4568748474121094, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7949599083619702, | |
| "grad_norm": 0.05546777695417404, | |
| "learning_rate": 8.913695090738891e-06, | |
| "loss": 1.1724140644073486, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7972508591065293, | |
| "grad_norm": 0.0671909973025322, | |
| "learning_rate": 8.906279725213105e-06, | |
| "loss": 1.0626245737075806, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7995418098510882, | |
| "grad_norm": 0.10349666327238083, | |
| "learning_rate": 8.898842635723868e-06, | |
| "loss": 1.2186851501464844, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8018327605956472, | |
| "grad_norm": 0.09945754706859589, | |
| "learning_rate": 8.891383869702127e-06, | |
| "loss": 1.2207574844360352, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8041237113402062, | |
| "grad_norm": 0.11375889927148819, | |
| "learning_rate": 8.883903474717067e-06, | |
| "loss": 1.1474676132202148, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8064146620847652, | |
| "grad_norm": 0.033355362713336945, | |
| "learning_rate": 8.876401498475818e-06, | |
| "loss": 1.218152403831482, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8087056128293242, | |
| "grad_norm": 0.0988926887512207, | |
| "learning_rate": 8.868877988823148e-06, | |
| "loss": 1.217521071434021, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8109965635738832, | |
| "grad_norm": 0.10163167864084244, | |
| "learning_rate": 8.861332993741155e-06, | |
| "loss": 1.3256537914276123, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8132875143184422, | |
| "grad_norm": 0.15732599794864655, | |
| "learning_rate": 8.85376656134896e-06, | |
| "loss": 1.1980226039886475, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8155784650630011, | |
| "grad_norm": 0.12234638631343842, | |
| "learning_rate": 8.846178739902409e-06, | |
| "loss": 1.5141435861587524, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8178694158075601, | |
| "grad_norm": 0.03240432217717171, | |
| "learning_rate": 8.838569577793756e-06, | |
| "loss": 1.032310962677002, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8201603665521191, | |
| "grad_norm": 0.09315624833106995, | |
| "learning_rate": 8.83093912355136e-06, | |
| "loss": 1.1437958478927612, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8224513172966781, | |
| "grad_norm": 0.11046700924634933, | |
| "learning_rate": 8.82328742583937e-06, | |
| "loss": 1.1637240648269653, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 0.0841628909111023, | |
| "learning_rate": 8.815614533457419e-06, | |
| "loss": 1.0901081562042236, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.827033218785796, | |
| "grad_norm": 0.10208731889724731, | |
| "learning_rate": 8.807920495340313e-06, | |
| "loss": 1.2723631858825684, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8293241695303551, | |
| "grad_norm": 0.10736406594514847, | |
| "learning_rate": 8.800205360557714e-06, | |
| "loss": 1.3155536651611328, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8316151202749141, | |
| "grad_norm": 0.24670641124248505, | |
| "learning_rate": 8.792469178313835e-06, | |
| "loss": 1.1443843841552734, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8339060710194731, | |
| "grad_norm": 0.11407999694347382, | |
| "learning_rate": 8.784711997947121e-06, | |
| "loss": 1.2705860137939453, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8361970217640321, | |
| "grad_norm": 0.1088922917842865, | |
| "learning_rate": 8.776933868929929e-06, | |
| "loss": 1.2330048084259033, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8384879725085911, | |
| "grad_norm": 0.04192059114575386, | |
| "learning_rate": 8.769134840868228e-06, | |
| "loss": 1.17930006980896, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8407789232531501, | |
| "grad_norm": 0.10472884774208069, | |
| "learning_rate": 8.761314963501265e-06, | |
| "loss": 1.2617244720458984, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.843069873997709, | |
| "grad_norm": 0.036843329668045044, | |
| "learning_rate": 8.753474286701263e-06, | |
| "loss": 1.0243253707885742, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.845360824742268, | |
| "grad_norm": 0.14724741876125336, | |
| "learning_rate": 8.74561286047309e-06, | |
| "loss": 1.342656135559082, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.847651775486827, | |
| "grad_norm": 0.10858327895402908, | |
| "learning_rate": 8.737730734953949e-06, | |
| "loss": 1.3815677165985107, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.849942726231386, | |
| "grad_norm": 0.04007134586572647, | |
| "learning_rate": 8.729827960413054e-06, | |
| "loss": 1.385345458984375, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.852233676975945, | |
| "grad_norm": 0.1044420599937439, | |
| "learning_rate": 8.721904587251315e-06, | |
| "loss": 1.3040602207183838, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.854524627720504, | |
| "grad_norm": 0.034813616424798965, | |
| "learning_rate": 8.713960666001e-06, | |
| "loss": 1.0024397373199463, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.856815578465063, | |
| "grad_norm": 0.03408174216747284, | |
| "learning_rate": 8.705996247325443e-06, | |
| "loss": 1.1536363363265991, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8591065292096219, | |
| "grad_norm": 0.10098287463188171, | |
| "learning_rate": 8.698011382018687e-06, | |
| "loss": 1.4317657947540283, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.861397479954181, | |
| "grad_norm": 0.12679803371429443, | |
| "learning_rate": 8.690006121005187e-06, | |
| "loss": 1.4823131561279297, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.86368843069874, | |
| "grad_norm": 0.11496943235397339, | |
| "learning_rate": 8.681980515339464e-06, | |
| "loss": 1.172654151916504, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.865979381443299, | |
| "grad_norm": 0.03390732407569885, | |
| "learning_rate": 8.6739346162058e-06, | |
| "loss": 1.1092259883880615, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.868270332187858, | |
| "grad_norm": 0.09868176281452179, | |
| "learning_rate": 8.66586847491789e-06, | |
| "loss": 1.1055936813354492, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.870561282932417, | |
| "grad_norm": 0.1280670017004013, | |
| "learning_rate": 8.657782142918537e-06, | |
| "loss": 1.2542710304260254, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.872852233676976, | |
| "grad_norm": 0.101657435297966, | |
| "learning_rate": 8.649675671779304e-06, | |
| "loss": 1.2047970294952393, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8751431844215349, | |
| "grad_norm": 0.16795100271701813, | |
| "learning_rate": 8.641549113200198e-06, | |
| "loss": 1.1056723594665527, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8774341351660939, | |
| "grad_norm": 0.045386865735054016, | |
| "learning_rate": 8.633402519009337e-06, | |
| "loss": 1.307566523551941, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8797250859106529, | |
| "grad_norm": 0.09542455524206161, | |
| "learning_rate": 8.625235941162615e-06, | |
| "loss": 1.1384682655334473, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8820160366552119, | |
| "grad_norm": 0.3451470136642456, | |
| "learning_rate": 8.617049431743376e-06, | |
| "loss": 1.0609101057052612, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8843069873997709, | |
| "grad_norm": 0.043013185262680054, | |
| "learning_rate": 8.60884304296208e-06, | |
| "loss": 0.8049713373184204, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8865979381443299, | |
| "grad_norm": 0.0997074693441391, | |
| "learning_rate": 8.600616827155968e-06, | |
| "loss": 1.2299315929412842, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.11632440239191055, | |
| "learning_rate": 8.592370836788738e-06, | |
| "loss": 1.325179100036621, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8911798396334479, | |
| "grad_norm": 0.11443230509757996, | |
| "learning_rate": 8.584105124450192e-06, | |
| "loss": 1.4285557270050049, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8934707903780069, | |
| "grad_norm": 0.21292933821678162, | |
| "learning_rate": 8.575819742855918e-06, | |
| "loss": 1.6177531480789185, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8957617411225659, | |
| "grad_norm": 0.03320442512631416, | |
| "learning_rate": 8.567514744846947e-06, | |
| "loss": 1.1647987365722656, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8980526918671249, | |
| "grad_norm": 0.21047484874725342, | |
| "learning_rate": 8.559190183389411e-06, | |
| "loss": 0.6821029186248779, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9003436426116839, | |
| "grad_norm": 0.08516217768192291, | |
| "learning_rate": 8.550846111574216e-06, | |
| "loss": 0.9978764057159424, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9026345933562429, | |
| "grad_norm": 0.20089198648929596, | |
| "learning_rate": 8.542482582616694e-06, | |
| "loss": 1.1663529872894287, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9049255441008018, | |
| "grad_norm": 0.08953817188739777, | |
| "learning_rate": 8.53409964985627e-06, | |
| "loss": 1.1072628498077393, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9072164948453608, | |
| "grad_norm": 0.20335598289966583, | |
| "learning_rate": 8.525697366756117e-06, | |
| "loss": 1.1753069162368774, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9095074455899198, | |
| "grad_norm": 0.10008153319358826, | |
| "learning_rate": 8.51727578690282e-06, | |
| "loss": 1.2463250160217285, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9117983963344788, | |
| "grad_norm": 0.03641178086400032, | |
| "learning_rate": 8.508834964006026e-06, | |
| "loss": 1.1079061031341553, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9140893470790378, | |
| "grad_norm": 0.11625296622514725, | |
| "learning_rate": 8.500374951898111e-06, | |
| "loss": 1.2706644535064697, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9163802978235968, | |
| "grad_norm": 0.11276042461395264, | |
| "learning_rate": 8.491895804533834e-06, | |
| "loss": 1.3342492580413818, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9186712485681557, | |
| "grad_norm": 0.10194030404090881, | |
| "learning_rate": 8.483397575989984e-06, | |
| "loss": 1.307550311088562, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9209621993127147, | |
| "grad_norm": 0.13294701278209686, | |
| "learning_rate": 8.474880320465054e-06, | |
| "loss": 1.3148338794708252, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9232531500572738, | |
| "grad_norm": 0.13636107742786407, | |
| "learning_rate": 8.466344092278874e-06, | |
| "loss": 1.4288030862808228, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9255441008018328, | |
| "grad_norm": 0.17754265666007996, | |
| "learning_rate": 8.457788945872278e-06, | |
| "loss": 1.3068530559539795, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.9278350515463918, | |
| "grad_norm": 0.16769464313983917, | |
| "learning_rate": 8.449214935806754e-06, | |
| "loss": 1.2810285091400146, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9301260022909508, | |
| "grad_norm": 0.17232482135295868, | |
| "learning_rate": 8.440622116764095e-06, | |
| "loss": 1.3921419382095337, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9324169530355098, | |
| "grad_norm": 0.03388385474681854, | |
| "learning_rate": 8.43201054354605e-06, | |
| "loss": 1.0712862014770508, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9347079037800687, | |
| "grad_norm": 0.24018289148807526, | |
| "learning_rate": 8.423380271073975e-06, | |
| "loss": 1.0584461688995361, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9369988545246277, | |
| "grad_norm": 0.17711585760116577, | |
| "learning_rate": 8.41473135438848e-06, | |
| "loss": 1.2261204719543457, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.9392898052691867, | |
| "grad_norm": 0.036880411207675934, | |
| "learning_rate": 8.406063848649089e-06, | |
| "loss": 1.1226576566696167, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9415807560137457, | |
| "grad_norm": 0.03658941015601158, | |
| "learning_rate": 8.397377809133872e-06, | |
| "loss": 1.0902087688446045, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9438717067583047, | |
| "grad_norm": 0.10014797747135162, | |
| "learning_rate": 8.388673291239098e-06, | |
| "loss": 1.2248773574829102, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9461626575028637, | |
| "grad_norm": 0.10653302818536758, | |
| "learning_rate": 8.379950350478899e-06, | |
| "loss": 1.1442151069641113, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9484536082474226, | |
| "grad_norm": 0.044977616518735886, | |
| "learning_rate": 8.371209042484884e-06, | |
| "loss": 1.190359354019165, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9507445589919816, | |
| "grad_norm": 0.11633345484733582, | |
| "learning_rate": 8.362449423005811e-06, | |
| "loss": 0.9401731491088867, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9530355097365406, | |
| "grad_norm": 0.10338146239519119, | |
| "learning_rate": 8.353671547907218e-06, | |
| "loss": 1.2098982334136963, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9553264604810997, | |
| "grad_norm": 0.037584755569696426, | |
| "learning_rate": 8.344875473171072e-06, | |
| "loss": 1.0929856300354004, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9576174112256587, | |
| "grad_norm": 0.037858281284570694, | |
| "learning_rate": 8.33606125489541e-06, | |
| "loss": 1.1181831359863281, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9599083619702177, | |
| "grad_norm": 0.033685605973005295, | |
| "learning_rate": 8.327228949293983e-06, | |
| "loss": 1.1918667554855347, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9621993127147767, | |
| "grad_norm": 0.10927028208971024, | |
| "learning_rate": 8.318378612695893e-06, | |
| "loss": 1.1812386512756348, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9644902634593356, | |
| "grad_norm": 0.09622111916542053, | |
| "learning_rate": 8.30951030154524e-06, | |
| "loss": 1.2083207368850708, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9667812142038946, | |
| "grad_norm": 0.09567157924175262, | |
| "learning_rate": 8.300624072400757e-06, | |
| "loss": 1.1253591775894165, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.9690721649484536, | |
| "grad_norm": 0.1568741798400879, | |
| "learning_rate": 8.29171998193545e-06, | |
| "loss": 1.2886474132537842, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9713631156930126, | |
| "grad_norm": 0.21366208791732788, | |
| "learning_rate": 8.28279808693624e-06, | |
| "loss": 1.2641284465789795, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9736540664375716, | |
| "grad_norm": 0.11669494956731796, | |
| "learning_rate": 8.273858444303601e-06, | |
| "loss": 1.2815788984298706, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9759450171821306, | |
| "grad_norm": 0.12636975944042206, | |
| "learning_rate": 8.26490111105119e-06, | |
| "loss": 1.280737042427063, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9782359679266895, | |
| "grad_norm": 0.09644594043493271, | |
| "learning_rate": 8.25592614430549e-06, | |
| "loss": 1.4965810775756836, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9805269186712485, | |
| "grad_norm": 0.10086604207754135, | |
| "learning_rate": 8.246933601305441e-06, | |
| "loss": 1.2899303436279297, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9828178694158075, | |
| "grad_norm": 0.03850960731506348, | |
| "learning_rate": 8.237923539402083e-06, | |
| "loss": 1.1815292835235596, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9851088201603666, | |
| "grad_norm": 0.03153403848409653, | |
| "learning_rate": 8.228896016058182e-06, | |
| "loss": 1.0716750621795654, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9873997709049256, | |
| "grad_norm": 0.5610406398773193, | |
| "learning_rate": 8.219851088847866e-06, | |
| "loss": 0.8392553329467773, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9896907216494846, | |
| "grad_norm": 0.029171844944357872, | |
| "learning_rate": 8.210788815456259e-06, | |
| "loss": 0.5268185138702393, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9919816723940436, | |
| "grad_norm": 0.048108626157045364, | |
| "learning_rate": 8.201709253679113e-06, | |
| "loss": 1.1043140888214111, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9942726231386025, | |
| "grad_norm": 0.03910600021481514, | |
| "learning_rate": 8.192612461422436e-06, | |
| "loss": 1.1640534400939941, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9965635738831615, | |
| "grad_norm": 0.14607083797454834, | |
| "learning_rate": 8.18349849670213e-06, | |
| "loss": 1.191579818725586, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9988545246277205, | |
| "grad_norm": 0.1132192313671112, | |
| "learning_rate": 8.174367417643614e-06, | |
| "loss": 1.300398349761963, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0011454753722795, | |
| "grad_norm": 0.03182588517665863, | |
| "learning_rate": 8.165219282481454e-06, | |
| "loss": 1.0970737934112549, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0034364261168385, | |
| "grad_norm": 0.09465012699365616, | |
| "learning_rate": 8.156054149558997e-06, | |
| "loss": 1.2283321619033813, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.0057273768613975, | |
| "grad_norm": 0.09852684289216995, | |
| "learning_rate": 8.146872077327992e-06, | |
| "loss": 1.2641890048980713, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0080183276059564, | |
| "grad_norm": 0.09122022986412048, | |
| "learning_rate": 8.137673124348224e-06, | |
| "loss": 1.2633693218231201, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0103092783505154, | |
| "grad_norm": 0.0346677266061306, | |
| "learning_rate": 8.128457349287134e-06, | |
| "loss": 1.2950749397277832, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0126002290950744, | |
| "grad_norm": 0.10290050506591797, | |
| "learning_rate": 8.119224810919446e-06, | |
| "loss": 1.155712604522705, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0148911798396334, | |
| "grad_norm": 0.11368633806705475, | |
| "learning_rate": 8.1099755681268e-06, | |
| "loss": 1.2856930494308472, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0171821305841924, | |
| "grad_norm": 0.09924730658531189, | |
| "learning_rate": 8.10070967989737e-06, | |
| "loss": 1.3079783916473389, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0194730813287514, | |
| "grad_norm": 0.03917548060417175, | |
| "learning_rate": 8.091427205325481e-06, | |
| "loss": 1.1257250308990479, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.0217640320733103, | |
| "grad_norm": 0.10367637127637863, | |
| "learning_rate": 8.082128203611245e-06, | |
| "loss": 1.1606723070144653, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0240549828178693, | |
| "grad_norm": 0.04987473413348198, | |
| "learning_rate": 8.07281273406018e-06, | |
| "loss": 1.2995059490203857, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0263459335624283, | |
| "grad_norm": 0.1082967147231102, | |
| "learning_rate": 8.063480856082822e-06, | |
| "loss": 1.3261157274246216, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0286368843069873, | |
| "grad_norm": 0.10131801664829254, | |
| "learning_rate": 8.054132629194363e-06, | |
| "loss": 1.2500728368759155, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 0.20225836336612701, | |
| "learning_rate": 8.044768113014253e-06, | |
| "loss": 1.2365930080413818, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0332187857961055, | |
| "grad_norm": 0.3037818372249603, | |
| "learning_rate": 8.03538736726584e-06, | |
| "loss": 0.9093201756477356, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.0355097365406645, | |
| "grad_norm": 0.09588132798671722, | |
| "learning_rate": 8.025990451775963e-06, | |
| "loss": 1.203194499015808, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.0378006872852235, | |
| "grad_norm": 0.09174283593893051, | |
| "learning_rate": 8.016577426474602e-06, | |
| "loss": 1.1838289499282837, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.0400916380297824, | |
| "grad_norm": 0.09558971971273422, | |
| "learning_rate": 8.007148351394465e-06, | |
| "loss": 1.2214634418487549, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.0423825887743414, | |
| "grad_norm": 0.06839102506637573, | |
| "learning_rate": 7.99770328667063e-06, | |
| "loss": 1.0787734985351562, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.0446735395189004, | |
| "grad_norm": 0.040618084371089935, | |
| "learning_rate": 7.988242292540144e-06, | |
| "loss": 1.246540904045105, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.0469644902634594, | |
| "grad_norm": 0.12011495232582092, | |
| "learning_rate": 7.978765429341651e-06, | |
| "loss": 1.1854981184005737, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.0492554410080184, | |
| "grad_norm": 0.12019508332014084, | |
| "learning_rate": 7.969272757514997e-06, | |
| "loss": 1.1858718395233154, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.0515463917525774, | |
| "grad_norm": 0.09292206168174744, | |
| "learning_rate": 7.959764337600852e-06, | |
| "loss": 1.2531483173370361, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.0538373424971363, | |
| "grad_norm": 0.030973954126238823, | |
| "learning_rate": 7.950240230240323e-06, | |
| "loss": 0.6899155974388123, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.0561282932416953, | |
| "grad_norm": 0.17956514656543732, | |
| "learning_rate": 7.94070049617456e-06, | |
| "loss": 1.1831200122833252, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.0584192439862543, | |
| "grad_norm": 0.10381026566028595, | |
| "learning_rate": 7.93114519624438e-06, | |
| "loss": 1.2370057106018066, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.0607101947308133, | |
| "grad_norm": 0.1092928797006607, | |
| "learning_rate": 7.921574391389874e-06, | |
| "loss": 1.1756248474121094, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.0630011454753723, | |
| "grad_norm": 0.0376867949962616, | |
| "learning_rate": 7.911988142650008e-06, | |
| "loss": 1.072977066040039, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.0652920962199313, | |
| "grad_norm": 0.32605865597724915, | |
| "learning_rate": 7.902386511162257e-06, | |
| "loss": 1.0307834148406982, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0675830469644902, | |
| "grad_norm": 0.10231613367795944, | |
| "learning_rate": 7.892769558162188e-06, | |
| "loss": 1.206171989440918, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.0698739977090492, | |
| "grad_norm": 0.10850352048873901, | |
| "learning_rate": 7.883137344983094e-06, | |
| "loss": 0.7696674466133118, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.0721649484536082, | |
| "grad_norm": 0.10014284402132034, | |
| "learning_rate": 7.873489933055586e-06, | |
| "loss": 1.127623200416565, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.0744558991981672, | |
| "grad_norm": 0.06162659451365471, | |
| "learning_rate": 7.863827383907202e-06, | |
| "loss": 1.3174957036972046, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.0767468499427262, | |
| "grad_norm": 0.12214836478233337, | |
| "learning_rate": 7.85414975916203e-06, | |
| "loss": 1.235177993774414, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0790378006872852, | |
| "grad_norm": 0.03565968945622444, | |
| "learning_rate": 7.8444571205403e-06, | |
| "loss": 1.1172802448272705, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.0813287514318441, | |
| "grad_norm": 0.04510068520903587, | |
| "learning_rate": 7.834749529857991e-06, | |
| "loss": 1.1934261322021484, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.0836197021764031, | |
| "grad_norm": 0.0956355631351471, | |
| "learning_rate": 7.825027049026448e-06, | |
| "loss": 1.2597328424453735, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.0859106529209621, | |
| "grad_norm": 0.09545722603797913, | |
| "learning_rate": 7.81528974005197e-06, | |
| "loss": 1.2638235092163086, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.088201603665521, | |
| "grad_norm": 0.1045144647359848, | |
| "learning_rate": 7.805537665035435e-06, | |
| "loss": 1.1498044729232788, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.09049255441008, | |
| "grad_norm": 0.09164479374885559, | |
| "learning_rate": 7.795770886171885e-06, | |
| "loss": 1.224025011062622, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.0927835051546393, | |
| "grad_norm": 0.03645243123173714, | |
| "learning_rate": 7.785989465750144e-06, | |
| "loss": 1.1202518939971924, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.0950744558991983, | |
| "grad_norm": 0.09394631534814835, | |
| "learning_rate": 7.776193466152408e-06, | |
| "loss": 1.231307864189148, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.0973654066437573, | |
| "grad_norm": 0.08598010987043381, | |
| "learning_rate": 7.766382949853856e-06, | |
| "loss": 1.1217191219329834, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.0996563573883162, | |
| "grad_norm": 0.04581398144364357, | |
| "learning_rate": 7.756557979422254e-06, | |
| "loss": 1.2090941667556763, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1019473081328752, | |
| "grad_norm": 0.09387822449207306, | |
| "learning_rate": 7.746718617517541e-06, | |
| "loss": 1.245224952697754, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.1042382588774342, | |
| "grad_norm": 0.14695440232753754, | |
| "learning_rate": 7.73686492689145e-06, | |
| "loss": 1.2612382173538208, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1065292096219932, | |
| "grad_norm": 0.09694456309080124, | |
| "learning_rate": 7.726996970387087e-06, | |
| "loss": 1.185286283493042, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.1088201603665522, | |
| "grad_norm": 0.1064264178276062, | |
| "learning_rate": 7.717114810938548e-06, | |
| "loss": 1.2336416244506836, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.09071429073810577, | |
| "learning_rate": 7.707218511570506e-06, | |
| "loss": 0.9867799282073975, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1134020618556701, | |
| "grad_norm": 0.0929940938949585, | |
| "learning_rate": 7.697308135397819e-06, | |
| "loss": 1.303302526473999, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1156930126002291, | |
| "grad_norm": 0.09926512837409973, | |
| "learning_rate": 7.687383745625113e-06, | |
| "loss": 0.7288157939910889, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1179839633447881, | |
| "grad_norm": 0.12962137162685394, | |
| "learning_rate": 7.67744540554639e-06, | |
| "loss": 1.3191977739334106, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.120274914089347, | |
| "grad_norm": 0.14595907926559448, | |
| "learning_rate": 7.667493178544626e-06, | |
| "loss": 1.0432918071746826, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.122565864833906, | |
| "grad_norm": 0.11306449770927429, | |
| "learning_rate": 7.65752712809136e-06, | |
| "loss": 1.110289454460144, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.124856815578465, | |
| "grad_norm": 0.13710643351078033, | |
| "learning_rate": 7.64754731774629e-06, | |
| "loss": 1.3338663578033447, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.127147766323024, | |
| "grad_norm": 0.14222948253154755, | |
| "learning_rate": 7.637553811156871e-06, | |
| "loss": 1.2077035903930664, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.129438717067583, | |
| "grad_norm": 0.1056067943572998, | |
| "learning_rate": 7.627546672057908e-06, | |
| "loss": 1.1796958446502686, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.131729667812142, | |
| "grad_norm": 0.12675486505031586, | |
| "learning_rate": 7.617525964271149e-06, | |
| "loss": 1.2140767574310303, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.134020618556701, | |
| "grad_norm": 0.09778056293725967, | |
| "learning_rate": 7.607491751704876e-06, | |
| "loss": 1.1973233222961426, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.13631156930126, | |
| "grad_norm": 0.043336447328329086, | |
| "learning_rate": 7.5974440983535015e-06, | |
| "loss": 1.0020596981048584, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.138602520045819, | |
| "grad_norm": 0.10499600321054459, | |
| "learning_rate": 7.587383068297157e-06, | |
| "loss": 1.1112275123596191, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.140893470790378, | |
| "grad_norm": 0.0355721153318882, | |
| "learning_rate": 7.577308725701285e-06, | |
| "loss": 0.9966940879821777, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.143184421534937, | |
| "grad_norm": 0.0992012470960617, | |
| "learning_rate": 7.567221134816235e-06, | |
| "loss": 1.1800178289413452, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.145475372279496, | |
| "grad_norm": 0.10058074444532394, | |
| "learning_rate": 7.557120359976843e-06, | |
| "loss": 1.1242153644561768, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.147766323024055, | |
| "grad_norm": 0.19073814153671265, | |
| "learning_rate": 7.547006465602026e-06, | |
| "loss": 1.0217759609222412, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.1500572737686139, | |
| "grad_norm": 0.09970526397228241, | |
| "learning_rate": 7.5368795161943835e-06, | |
| "loss": 1.3643629550933838, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.1523482245131729, | |
| "grad_norm": 0.06471025198698044, | |
| "learning_rate": 7.526739576339761e-06, | |
| "loss": 1.0752243995666504, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.1546391752577319, | |
| "grad_norm": 0.042841047048568726, | |
| "learning_rate": 7.516586710706862e-06, | |
| "loss": 1.0762394666671753, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.1569301260022908, | |
| "grad_norm": 0.09638847410678864, | |
| "learning_rate": 7.506420984046823e-06, | |
| "loss": 1.1684939861297607, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.1592210767468498, | |
| "grad_norm": 0.1367417573928833, | |
| "learning_rate": 7.496242461192801e-06, | |
| "loss": 1.1517741680145264, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.161512027491409, | |
| "grad_norm": 0.03387519717216492, | |
| "learning_rate": 7.486051207059567e-06, | |
| "loss": 0.9555083513259888, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.163802978235968, | |
| "grad_norm": 0.1254238784313202, | |
| "learning_rate": 7.475847286643081e-06, | |
| "loss": 1.1272577047348022, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.166093928980527, | |
| "grad_norm": 0.0902896374464035, | |
| "learning_rate": 7.46563076502009e-06, | |
| "loss": 1.1893162727355957, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.168384879725086, | |
| "grad_norm": 0.09928741306066513, | |
| "learning_rate": 7.4554017073477e-06, | |
| "loss": 1.0811066627502441, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.170675830469645, | |
| "grad_norm": 0.03147672861814499, | |
| "learning_rate": 7.445160178862977e-06, | |
| "loss": 0.9947164058685303, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.172966781214204, | |
| "grad_norm": 0.04409791901707649, | |
| "learning_rate": 7.434906244882508e-06, | |
| "loss": 0.9831681847572327, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.175257731958763, | |
| "grad_norm": 0.20074661076068878, | |
| "learning_rate": 7.42463997080201e-06, | |
| "loss": 0.9452756643295288, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.177548682703322, | |
| "grad_norm": 0.09279465675354004, | |
| "learning_rate": 7.414361422095894e-06, | |
| "loss": 1.1467375755310059, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.179839633447881, | |
| "grad_norm": 0.09862932562828064, | |
| "learning_rate": 7.404070664316855e-06, | |
| "loss": 1.4994152784347534, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1821305841924399, | |
| "grad_norm": 0.15487772226333618, | |
| "learning_rate": 7.393767763095452e-06, | |
| "loss": 0.6926403045654297, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.1844215349369989, | |
| "grad_norm": 0.03889283537864685, | |
| "learning_rate": 7.383452784139694e-06, | |
| "loss": 1.1457732915878296, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.1867124856815578, | |
| "grad_norm": 0.20745863020420074, | |
| "learning_rate": 7.37312579323461e-06, | |
| "loss": 0.6812342405319214, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.1890034364261168, | |
| "grad_norm": 0.15700620412826538, | |
| "learning_rate": 7.362786856241845e-06, | |
| "loss": 1.1620197296142578, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.1912943871706758, | |
| "grad_norm": 0.031710829585790634, | |
| "learning_rate": 7.3524360390992275e-06, | |
| "loss": 1.0484931468963623, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1935853379152348, | |
| "grad_norm": 0.10261533409357071, | |
| "learning_rate": 7.342073407820351e-06, | |
| "loss": 0.7066391706466675, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.1958762886597938, | |
| "grad_norm": 0.18190829455852509, | |
| "learning_rate": 7.331699028494161e-06, | |
| "loss": 1.1231335401535034, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.1981672394043528, | |
| "grad_norm": 0.2540644109249115, | |
| "learning_rate": 7.321312967284518e-06, | |
| "loss": 1.2889418601989746, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2004581901489118, | |
| "grad_norm": 0.22046895325183868, | |
| "learning_rate": 7.310915290429799e-06, | |
| "loss": 1.2246109247207642, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.2027491408934707, | |
| "grad_norm": 0.10056769847869873, | |
| "learning_rate": 7.300506064242448e-06, | |
| "loss": 1.1946299076080322, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2050400916380297, | |
| "grad_norm": 0.09971389919519424, | |
| "learning_rate": 7.290085355108573e-06, | |
| "loss": 1.191572666168213, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2073310423825887, | |
| "grad_norm": 0.047876957803964615, | |
| "learning_rate": 7.279653229487517e-06, | |
| "loss": 1.1152572631835938, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2096219931271477, | |
| "grad_norm": 0.038673873990774155, | |
| "learning_rate": 7.269209753911426e-06, | |
| "loss": 1.1568899154663086, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.2119129438717067, | |
| "grad_norm": 0.23321399092674255, | |
| "learning_rate": 7.258754994984839e-06, | |
| "loss": 0.8744759559631348, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2142038946162657, | |
| "grad_norm": 0.036044131964445114, | |
| "learning_rate": 7.248289019384255e-06, | |
| "loss": 1.0827986001968384, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2164948453608249, | |
| "grad_norm": 0.10701718181371689, | |
| "learning_rate": 7.237811893857703e-06, | |
| "loss": 1.2203986644744873, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2187857961053838, | |
| "grad_norm": 0.04341509938240051, | |
| "learning_rate": 7.227323685224329e-06, | |
| "loss": 1.0838563442230225, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.2210767468499428, | |
| "grad_norm": 0.12016027420759201, | |
| "learning_rate": 7.216824460373959e-06, | |
| "loss": 1.111936092376709, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.2233676975945018, | |
| "grad_norm": 0.22775952517986298, | |
| "learning_rate": 7.206314286266676e-06, | |
| "loss": 0.9151073694229126, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.2256586483390608, | |
| "grad_norm": 0.10555245727300644, | |
| "learning_rate": 7.195793229932397e-06, | |
| "loss": 1.1633076667785645, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.2279495990836198, | |
| "grad_norm": 0.1947338879108429, | |
| "learning_rate": 7.185261358470436e-06, | |
| "loss": 1.0789947509765625, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.2302405498281788, | |
| "grad_norm": 0.032104719430208206, | |
| "learning_rate": 7.174718739049087e-06, | |
| "loss": 1.0364956855773926, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.2325315005727377, | |
| "grad_norm": 0.09648443758487701, | |
| "learning_rate": 7.164165438905186e-06, | |
| "loss": 1.002371072769165, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.2348224513172967, | |
| "grad_norm": 0.0989924743771553, | |
| "learning_rate": 7.153601525343692e-06, | |
| "loss": 1.1737957000732422, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.2371134020618557, | |
| "grad_norm": 0.23395802080631256, | |
| "learning_rate": 7.143027065737247e-06, | |
| "loss": 1.070110559463501, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.2394043528064147, | |
| "grad_norm": 0.05930604040622711, | |
| "learning_rate": 7.132442127525754e-06, | |
| "loss": 0.9888740181922913, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.2416953035509737, | |
| "grad_norm": 0.10642720013856888, | |
| "learning_rate": 7.121846778215946e-06, | |
| "loss": 1.1234185695648193, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.2439862542955327, | |
| "grad_norm": 0.09997859597206116, | |
| "learning_rate": 7.111241085380951e-06, | |
| "loss": 1.0492407083511353, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.2462772050400917, | |
| "grad_norm": 0.09804773330688477, | |
| "learning_rate": 7.100625116659867e-06, | |
| "loss": 1.124328851699829, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.2485681557846506, | |
| "grad_norm": 0.034081630408763885, | |
| "learning_rate": 7.089998939757323e-06, | |
| "loss": 1.098294734954834, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.2508591065292096, | |
| "grad_norm": 0.13619168102741241, | |
| "learning_rate": 7.07936262244306e-06, | |
| "loss": 1.3204905986785889, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.2531500572737686, | |
| "grad_norm": 0.04348979890346527, | |
| "learning_rate": 7.068716232551484e-06, | |
| "loss": 1.1737202405929565, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.2554410080183276, | |
| "grad_norm": 0.1019207090139389, | |
| "learning_rate": 7.05805983798124e-06, | |
| "loss": 1.240083932876587, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.2577319587628866, | |
| "grad_norm": 0.033528897911310196, | |
| "learning_rate": 7.047393506694784e-06, | |
| "loss": 1.1311392784118652, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.2600229095074456, | |
| "grad_norm": 0.11534976214170456, | |
| "learning_rate": 7.036717306717941e-06, | |
| "loss": 1.096567153930664, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.2623138602520045, | |
| "grad_norm": 0.17929257452487946, | |
| "learning_rate": 7.026031306139476e-06, | |
| "loss": 1.2111990451812744, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.2646048109965635, | |
| "grad_norm": 0.06933283805847168, | |
| "learning_rate": 7.015335573110655e-06, | |
| "loss": 1.0563737154006958, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.2668957617411225, | |
| "grad_norm": 0.2785737216472626, | |
| "learning_rate": 7.004630175844821e-06, | |
| "loss": 1.2497079372406006, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.2691867124856815, | |
| "grad_norm": 0.10546611249446869, | |
| "learning_rate": 6.9939151826169435e-06, | |
| "loss": 1.2129267454147339, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.2714776632302405, | |
| "grad_norm": 0.11540545523166656, | |
| "learning_rate": 6.9831906617632015e-06, | |
| "loss": 1.0614213943481445, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.2737686139747995, | |
| "grad_norm": 0.10855554044246674, | |
| "learning_rate": 6.972456681680526e-06, | |
| "loss": 1.1559650897979736, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.2760595647193584, | |
| "grad_norm": 0.10004860907793045, | |
| "learning_rate": 6.961713310826186e-06, | |
| "loss": 1.2221918106079102, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.2783505154639174, | |
| "grad_norm": 0.11250676959753036, | |
| "learning_rate": 6.950960617717331e-06, | |
| "loss": 1.2137792110443115, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.2806414662084764, | |
| "grad_norm": 0.10565535724163055, | |
| "learning_rate": 6.940198670930575e-06, | |
| "loss": 1.1705491542816162, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.2829324169530354, | |
| "grad_norm": 0.1519957184791565, | |
| "learning_rate": 6.929427539101542e-06, | |
| "loss": 1.167567491531372, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2852233676975944, | |
| "grad_norm": 0.3252302408218384, | |
| "learning_rate": 6.918647290924431e-06, | |
| "loss": 0.6762139201164246, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.2875143184421534, | |
| "grad_norm": 0.10116905719041824, | |
| "learning_rate": 6.907857995151593e-06, | |
| "loss": 1.3349790573120117, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.2898052691867126, | |
| "grad_norm": 0.26615846157073975, | |
| "learning_rate": 6.897059720593072e-06, | |
| "loss": 1.2615208625793457, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.2920962199312716, | |
| "grad_norm": 0.28528809547424316, | |
| "learning_rate": 6.886252536116178e-06, | |
| "loss": 0.752361536026001, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.2943871706758305, | |
| "grad_norm": 0.10344325751066208, | |
| "learning_rate": 6.875436510645046e-06, | |
| "loss": 1.1990864276885986, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2966781214203895, | |
| "grad_norm": 0.18242916464805603, | |
| "learning_rate": 6.864611713160195e-06, | |
| "loss": 0.5096596479415894, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.2989690721649485, | |
| "grad_norm": 0.10747134685516357, | |
| "learning_rate": 6.853778212698085e-06, | |
| "loss": 1.1793510913848877, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.3012600229095075, | |
| "grad_norm": 0.10210155695676804, | |
| "learning_rate": 6.842936078350687e-06, | |
| "loss": 1.1578278541564941, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3035509736540665, | |
| "grad_norm": 0.09255377948284149, | |
| "learning_rate": 6.832085379265032e-06, | |
| "loss": 1.0329861640930176, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3058419243986255, | |
| "grad_norm": 0.23646067082881927, | |
| "learning_rate": 6.821226184642769e-06, | |
| "loss": 1.1236613988876343, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3081328751431844, | |
| "grad_norm": 0.17102530598640442, | |
| "learning_rate": 6.810358563739741e-06, | |
| "loss": 0.9725713729858398, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3104238258877434, | |
| "grad_norm": 0.07726361602544785, | |
| "learning_rate": 6.799482585865517e-06, | |
| "loss": 1.1270265579223633, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3127147766323024, | |
| "grad_norm": 0.2417464554309845, | |
| "learning_rate": 6.788598320382969e-06, | |
| "loss": 0.6766105890274048, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.3150057273768614, | |
| "grad_norm": 0.12223535776138306, | |
| "learning_rate": 6.777705836707827e-06, | |
| "loss": 0.6427032947540283, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.3172966781214204, | |
| "grad_norm": 0.035287536680698395, | |
| "learning_rate": 6.766805204308232e-06, | |
| "loss": 1.086439847946167, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.3195876288659794, | |
| "grad_norm": 0.15588583052158356, | |
| "learning_rate": 6.755896492704292e-06, | |
| "loss": 0.690952479839325, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.3218785796105383, | |
| "grad_norm": 0.036040663719177246, | |
| "learning_rate": 6.7449797714676446e-06, | |
| "loss": 1.0549356937408447, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.3241695303550973, | |
| "grad_norm": 0.12717777490615845, | |
| "learning_rate": 6.734055110221004e-06, | |
| "loss": 1.0231207609176636, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.3264604810996563, | |
| "grad_norm": 0.1270737648010254, | |
| "learning_rate": 6.72312257863773e-06, | |
| "loss": 0.8079697489738464, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.3287514318442153, | |
| "grad_norm": 0.279462605714798, | |
| "learning_rate": 6.712182246441372e-06, | |
| "loss": 1.142216444015503, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.3310423825887743, | |
| "grad_norm": 0.11068404465913773, | |
| "learning_rate": 6.701234183405228e-06, | |
| "loss": 1.2053807973861694, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.18221408128738403, | |
| "learning_rate": 6.690278459351907e-06, | |
| "loss": 1.0797538757324219, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.3356242840778922, | |
| "grad_norm": 0.10141758620738983, | |
| "learning_rate": 6.679315144152867e-06, | |
| "loss": 1.0947381258010864, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.3379152348224514, | |
| "grad_norm": 0.03468356281518936, | |
| "learning_rate": 6.6683443077279885e-06, | |
| "loss": 0.9148234724998474, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.3402061855670104, | |
| "grad_norm": 0.2539704144001007, | |
| "learning_rate": 6.6573660200451155e-06, | |
| "loss": 0.7626911401748657, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.3424971363115694, | |
| "grad_norm": 0.08057498931884766, | |
| "learning_rate": 6.646380351119612e-06, | |
| "loss": 1.0045770406723022, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.3447880870561284, | |
| "grad_norm": 0.10992975533008575, | |
| "learning_rate": 6.6353873710139185e-06, | |
| "loss": 1.1851505041122437, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.3470790378006874, | |
| "grad_norm": 0.05306171998381615, | |
| "learning_rate": 6.624387149837105e-06, | |
| "loss": 1.1102643013000488, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.3493699885452464, | |
| "grad_norm": 0.03687020018696785, | |
| "learning_rate": 6.613379757744419e-06, | |
| "loss": 1.0992820262908936, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.3516609392898054, | |
| "grad_norm": 0.04827934503555298, | |
| "learning_rate": 6.602365264936843e-06, | |
| "loss": 0.9393289089202881, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.3539518900343643, | |
| "grad_norm": 0.03528865426778793, | |
| "learning_rate": 6.5913437416606485e-06, | |
| "loss": 1.1535475254058838, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.3562428407789233, | |
| "grad_norm": 0.16577307879924774, | |
| "learning_rate": 6.5803152582069365e-06, | |
| "loss": 1.034282922744751, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.3585337915234823, | |
| "grad_norm": 0.10467011481523514, | |
| "learning_rate": 6.569279884911205e-06, | |
| "loss": 1.1791577339172363, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.3608247422680413, | |
| "grad_norm": 0.04116935655474663, | |
| "learning_rate": 6.558237692152889e-06, | |
| "loss": 1.1147451400756836, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.3631156930126003, | |
| "grad_norm": 0.09797222167253494, | |
| "learning_rate": 6.547188750354917e-06, | |
| "loss": 1.1144938468933105, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.3654066437571593, | |
| "grad_norm": 0.07124507427215576, | |
| "learning_rate": 6.536133129983261e-06, | |
| "loss": 1.0669329166412354, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.3676975945017182, | |
| "grad_norm": 0.10105873644351959, | |
| "learning_rate": 6.5250709015464834e-06, | |
| "loss": 1.138416051864624, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.3699885452462772, | |
| "grad_norm": 0.10858606547117233, | |
| "learning_rate": 6.5140021355952935e-06, | |
| "loss": 1.193711757659912, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.3722794959908362, | |
| "grad_norm": 0.20762988924980164, | |
| "learning_rate": 6.502926902722092e-06, | |
| "loss": 1.0726079940795898, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.3745704467353952, | |
| "grad_norm": 0.10816437751054764, | |
| "learning_rate": 6.4918452735605245e-06, | |
| "loss": 1.0476200580596924, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3768613974799542, | |
| "grad_norm": 0.08065369725227356, | |
| "learning_rate": 6.4807573187850295e-06, | |
| "loss": 0.9677037000656128, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.3791523482245132, | |
| "grad_norm": 0.22005292773246765, | |
| "learning_rate": 6.469663109110389e-06, | |
| "loss": 0.9527295827865601, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.3814432989690721, | |
| "grad_norm": 0.10050750523805618, | |
| "learning_rate": 6.458562715291273e-06, | |
| "loss": 1.1403913497924805, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.3837342497136311, | |
| "grad_norm": 0.034232836216688156, | |
| "learning_rate": 6.4474562081217975e-06, | |
| "loss": 1.007227897644043, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.38602520045819, | |
| "grad_norm": 0.11036860942840576, | |
| "learning_rate": 6.436343658435059e-06, | |
| "loss": 1.2514678239822388, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.388316151202749, | |
| "grad_norm": 0.12463992834091187, | |
| "learning_rate": 6.4252251371026984e-06, | |
| "loss": 1.169065237045288, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.390607101947308, | |
| "grad_norm": 0.10533028095960617, | |
| "learning_rate": 6.414100715034437e-06, | |
| "loss": 1.2318856716156006, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.392898052691867, | |
| "grad_norm": 0.044587064534425735, | |
| "learning_rate": 6.4029704631776334e-06, | |
| "loss": 1.1643192768096924, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.395189003436426, | |
| "grad_norm": 0.10597223788499832, | |
| "learning_rate": 6.3918344525168176e-06, | |
| "loss": 1.227967619895935, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.397479954180985, | |
| "grad_norm": 0.1139107197523117, | |
| "learning_rate": 6.380692754073257e-06, | |
| "loss": 1.2038414478302002, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.399770904925544, | |
| "grad_norm": 0.037755899131298065, | |
| "learning_rate": 6.3695454389044885e-06, | |
| "loss": 1.0631117820739746, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.402061855670103, | |
| "grad_norm": 0.13147905468940735, | |
| "learning_rate": 6.35839257810387e-06, | |
| "loss": 1.1393280029296875, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.404352806414662, | |
| "grad_norm": 0.10851233452558517, | |
| "learning_rate": 6.347234242800131e-06, | |
| "loss": 1.094638705253601, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.406643757159221, | |
| "grad_norm": 0.20785588026046753, | |
| "learning_rate": 6.336070504156907e-06, | |
| "loss": 0.39179080724716187, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.40893470790378, | |
| "grad_norm": 0.1313144713640213, | |
| "learning_rate": 6.324901433372307e-06, | |
| "loss": 1.1032609939575195, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.411225658648339, | |
| "grad_norm": 0.10240298509597778, | |
| "learning_rate": 6.313727101678433e-06, | |
| "loss": 1.151193380355835, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.413516609392898, | |
| "grad_norm": 0.16149231791496277, | |
| "learning_rate": 6.302547580340949e-06, | |
| "loss": 1.1676666736602783, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.4158075601374571, | |
| "grad_norm": 0.033813584595918655, | |
| "learning_rate": 6.291362940658612e-06, | |
| "loss": 0.9767388701438904, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.418098510882016, | |
| "grad_norm": 0.15657632052898407, | |
| "learning_rate": 6.2801732539628205e-06, | |
| "loss": 1.1303942203521729, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.420389461626575, | |
| "grad_norm": 0.05029843747615814, | |
| "learning_rate": 6.2689785916171656e-06, | |
| "loss": 1.1143161058425903, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.422680412371134, | |
| "grad_norm": 0.07945811748504639, | |
| "learning_rate": 6.257779025016967e-06, | |
| "loss": 1.1215400695800781, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.424971363115693, | |
| "grad_norm": 0.12305624037981033, | |
| "learning_rate": 6.246574625588824e-06, | |
| "loss": 1.096776008605957, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.427262313860252, | |
| "grad_norm": 0.10627985745668411, | |
| "learning_rate": 6.235365464790158e-06, | |
| "loss": 1.1911216974258423, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.429553264604811, | |
| "grad_norm": 0.06793335825204849, | |
| "learning_rate": 6.224151614108755e-06, | |
| "loss": 1.1242647171020508, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.43184421534937, | |
| "grad_norm": 0.10330016911029816, | |
| "learning_rate": 6.212933145062313e-06, | |
| "loss": 0.9869295954704285, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.434135166093929, | |
| "grad_norm": 0.09804986417293549, | |
| "learning_rate": 6.201710129197984e-06, | |
| "loss": 1.0983688831329346, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.436426116838488, | |
| "grad_norm": 0.1734234243631363, | |
| "learning_rate": 6.190482638091917e-06, | |
| "loss": 1.254033088684082, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.438717067583047, | |
| "grad_norm": 0.21741870045661926, | |
| "learning_rate": 6.179250743348801e-06, | |
| "loss": 0.6976019144058228, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.441008018327606, | |
| "grad_norm": 0.10098083317279816, | |
| "learning_rate": 6.1680145166014135e-06, | |
| "loss": 1.1904094219207764, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.443298969072165, | |
| "grad_norm": 0.03571165353059769, | |
| "learning_rate": 6.156774029510158e-06, | |
| "loss": 1.0290796756744385, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.445589919816724, | |
| "grad_norm": 0.04223793372511864, | |
| "learning_rate": 6.145529353762608e-06, | |
| "loss": 1.0820773839950562, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.447880870561283, | |
| "grad_norm": 0.08061600476503372, | |
| "learning_rate": 6.1342805610730515e-06, | |
| "loss": 1.0397876501083374, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.4501718213058419, | |
| "grad_norm": 0.1024092435836792, | |
| "learning_rate": 6.123027723182034e-06, | |
| "loss": 1.0905309915542603, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.4524627720504009, | |
| "grad_norm": 0.1202508732676506, | |
| "learning_rate": 6.111770911855895e-06, | |
| "loss": 1.3037893772125244, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.4547537227949598, | |
| "grad_norm": 0.11221358925104141, | |
| "learning_rate": 6.100510198886324e-06, | |
| "loss": 1.166261076927185, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.4570446735395188, | |
| "grad_norm": 0.033267792314291, | |
| "learning_rate": 6.089245656089882e-06, | |
| "loss": 1.0799281597137451, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.4593356242840778, | |
| "grad_norm": 0.20279622077941895, | |
| "learning_rate": 6.077977355307564e-06, | |
| "loss": 0.9322797060012817, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.461626575028637, | |
| "grad_norm": 0.15380460023880005, | |
| "learning_rate": 6.066705368404332e-06, | |
| "loss": 1.0121163129806519, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.463917525773196, | |
| "grad_norm": 0.11669916659593582, | |
| "learning_rate": 6.0554297672686515e-06, | |
| "loss": 1.159261703491211, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.466208476517755, | |
| "grad_norm": 0.13704603910446167, | |
| "learning_rate": 6.044150623812041e-06, | |
| "loss": 1.2306301593780518, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.468499427262314, | |
| "grad_norm": 0.10381774604320526, | |
| "learning_rate": 6.032868009968611e-06, | |
| "loss": 0.6591931581497192, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.470790378006873, | |
| "grad_norm": 0.10854183882474899, | |
| "learning_rate": 6.021581997694604e-06, | |
| "loss": 1.2100105285644531, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.473081328751432, | |
| "grad_norm": 0.05851943418383598, | |
| "learning_rate": 6.010292658967937e-06, | |
| "loss": 0.8533772230148315, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.475372279495991, | |
| "grad_norm": 0.2286427617073059, | |
| "learning_rate": 5.999000065787741e-06, | |
| "loss": 1.1328685283660889, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.47766323024055, | |
| "grad_norm": 0.0334353968501091, | |
| "learning_rate": 5.987704290173904e-06, | |
| "loss": 1.125452995300293, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.479954180985109, | |
| "grad_norm": 0.11280791461467743, | |
| "learning_rate": 5.976405404166609e-06, | |
| "loss": 1.0590157508850098, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.4822451317296679, | |
| "grad_norm": 0.11283966153860092, | |
| "learning_rate": 5.965103479825874e-06, | |
| "loss": 1.32285475730896, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.4845360824742269, | |
| "grad_norm": 0.10280606895685196, | |
| "learning_rate": 5.953798589231102e-06, | |
| "loss": 1.1084046363830566, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.4868270332187858, | |
| "grad_norm": 0.03574017807841301, | |
| "learning_rate": 5.942490804480605e-06, | |
| "loss": 1.0094585418701172, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.4891179839633448, | |
| "grad_norm": 0.18968385457992554, | |
| "learning_rate": 5.931180197691155e-06, | |
| "loss": 1.0455267429351807, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4914089347079038, | |
| "grad_norm": 0.12325051426887512, | |
| "learning_rate": 5.919866840997528e-06, | |
| "loss": 0.8611164689064026, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.4936998854524628, | |
| "grad_norm": 0.16876213252544403, | |
| "learning_rate": 5.908550806552027e-06, | |
| "loss": 1.2782384157180786, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.4959908361970218, | |
| "grad_norm": 0.034040793776512146, | |
| "learning_rate": 5.897232166524044e-06, | |
| "loss": 1.1155657768249512, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.4982817869415808, | |
| "grad_norm": 0.20665396749973297, | |
| "learning_rate": 5.885910993099581e-06, | |
| "loss": 0.8285434246063232, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5005727376861397, | |
| "grad_norm": 0.04009568691253662, | |
| "learning_rate": 5.874587358480798e-06, | |
| "loss": 1.116700530052185, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.5028636884306987, | |
| "grad_norm": 0.14039941132068634, | |
| "learning_rate": 5.863261334885553e-06, | |
| "loss": 1.3074442148208618, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.5051546391752577, | |
| "grad_norm": 0.037182215601205826, | |
| "learning_rate": 5.851932994546941e-06, | |
| "loss": 1.1499791145324707, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.5074455899198167, | |
| "grad_norm": 0.2790544033050537, | |
| "learning_rate": 5.840602409712831e-06, | |
| "loss": 1.098710060119629, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.5097365406643757, | |
| "grad_norm": 0.14369939267635345, | |
| "learning_rate": 5.829269652645404e-06, | |
| "loss": 0.9552765488624573, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.5120274914089347, | |
| "grad_norm": 0.07011190801858902, | |
| "learning_rate": 5.817934795620702e-06, | |
| "loss": 0.8580765128135681, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.5143184421534936, | |
| "grad_norm": 0.06823685765266418, | |
| "learning_rate": 5.8065979109281515e-06, | |
| "loss": 1.1291577816009521, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.5166093928980526, | |
| "grad_norm": 0.10182562470436096, | |
| "learning_rate": 5.795259070870114e-06, | |
| "loss": 1.0331019163131714, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.5189003436426116, | |
| "grad_norm": 0.046754829585552216, | |
| "learning_rate": 5.783918347761424e-06, | |
| "loss": 1.0500078201293945, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.5211912943871706, | |
| "grad_norm": 0.27025946974754333, | |
| "learning_rate": 5.77257581392892e-06, | |
| "loss": 0.9796900749206543, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.5234822451317296, | |
| "grad_norm": 0.10360334813594818, | |
| "learning_rate": 5.761231541710994e-06, | |
| "loss": 1.0910930633544922, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.5257731958762886, | |
| "grad_norm": 0.18193475902080536, | |
| "learning_rate": 5.7498856034571235e-06, | |
| "loss": 0.933897852897644, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.5280641466208476, | |
| "grad_norm": 0.04006630554795265, | |
| "learning_rate": 5.7385380715274075e-06, | |
| "loss": 0.7624289989471436, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.5303550973654065, | |
| "grad_norm": 0.10916395485401154, | |
| "learning_rate": 5.727189018292115e-06, | |
| "loss": 1.0581674575805664, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.5326460481099655, | |
| "grad_norm": 0.17847099900245667, | |
| "learning_rate": 5.715838516131212e-06, | |
| "loss": 0.7457289695739746, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.5349369988545245, | |
| "grad_norm": 0.03762081265449524, | |
| "learning_rate": 5.704486637433907e-06, | |
| "loss": 1.13935387134552, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.5372279495990835, | |
| "grad_norm": 0.038652852177619934, | |
| "learning_rate": 5.6931334545981876e-06, | |
| "loss": 1.084958791732788, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.5395189003436425, | |
| "grad_norm": 0.3131856918334961, | |
| "learning_rate": 5.68177904003036e-06, | |
| "loss": 0.5619981288909912, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.5418098510882015, | |
| "grad_norm": 0.23260389268398285, | |
| "learning_rate": 5.670423466144585e-06, | |
| "loss": 0.7182325124740601, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.5441008018327604, | |
| "grad_norm": 0.04350382462143898, | |
| "learning_rate": 5.659066805362416e-06, | |
| "loss": 1.1135191917419434, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 0.032647889107465744, | |
| "learning_rate": 5.647709130112339e-06, | |
| "loss": 1.0672420263290405, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.5486827033218786, | |
| "grad_norm": 0.20975816249847412, | |
| "learning_rate": 5.6363505128293105e-06, | |
| "loss": 1.0823326110839844, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.5509736540664376, | |
| "grad_norm": 0.10048626363277435, | |
| "learning_rate": 5.624991025954296e-06, | |
| "loss": 1.1277685165405273, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.5532646048109966, | |
| "grad_norm": 0.1214866116642952, | |
| "learning_rate": 5.613630741933801e-06, | |
| "loss": 1.0712693929672241, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 0.24091294407844543, | |
| "learning_rate": 5.602269733219422e-06, | |
| "loss": 0.8346927165985107, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.5578465063001146, | |
| "grad_norm": 0.14887717366218567, | |
| "learning_rate": 5.590908072267376e-06, | |
| "loss": 1.1978172063827515, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.5601374570446735, | |
| "grad_norm": 0.28521859645843506, | |
| "learning_rate": 5.579545831538036e-06, | |
| "loss": 0.9784339666366577, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.5624284077892325, | |
| "grad_norm": 0.10800915211439133, | |
| "learning_rate": 5.568183083495476e-06, | |
| "loss": 1.2052793502807617, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.5647193585337915, | |
| "grad_norm": 0.11474448442459106, | |
| "learning_rate": 5.556819900607004e-06, | |
| "loss": 1.0182474851608276, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.5670103092783505, | |
| "grad_norm": 0.11097566783428192, | |
| "learning_rate": 5.545456355342702e-06, | |
| "loss": 1.1119379997253418, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.5693012600229095, | |
| "grad_norm": 0.11000394821166992, | |
| "learning_rate": 5.53409252017496e-06, | |
| "loss": 1.1207211017608643, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.5715922107674685, | |
| "grad_norm": 0.13542112708091736, | |
| "learning_rate": 5.522728467578024e-06, | |
| "loss": 1.3491647243499756, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.5738831615120275, | |
| "grad_norm": 0.10330595821142197, | |
| "learning_rate": 5.51136427002752e-06, | |
| "loss": 1.1328983306884766, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.5761741122565864, | |
| "grad_norm": 0.12681104242801666, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 1.0954585075378418, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.5784650630011456, | |
| "grad_norm": 0.13023322820663452, | |
| "learning_rate": 5.488635729972482e-06, | |
| "loss": 1.0727986097335815, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.5807560137457046, | |
| "grad_norm": 0.10406231880187988, | |
| "learning_rate": 5.477271532421978e-06, | |
| "loss": 1.1195957660675049, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.5830469644902636, | |
| "grad_norm": 0.09854082018136978, | |
| "learning_rate": 5.465907479825041e-06, | |
| "loss": 1.2089966535568237, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.5853379152348226, | |
| "grad_norm": 0.05718742311000824, | |
| "learning_rate": 5.454543644657302e-06, | |
| "loss": 1.0215727090835571, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.5876288659793816, | |
| "grad_norm": 0.03251669183373451, | |
| "learning_rate": 5.4431800993929985e-06, | |
| "loss": 0.9556213617324829, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.5899198167239406, | |
| "grad_norm": 0.18236595392227173, | |
| "learning_rate": 5.431816916504526e-06, | |
| "loss": 1.1348545551300049, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.5922107674684995, | |
| "grad_norm": 0.03708245977759361, | |
| "learning_rate": 5.420454168461966e-06, | |
| "loss": 1.1390302181243896, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.5945017182130585, | |
| "grad_norm": 0.18347609043121338, | |
| "learning_rate": 5.4090919277326255e-06, | |
| "loss": 0.8546306490898132, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.5967926689576175, | |
| "grad_norm": 0.05539577454328537, | |
| "learning_rate": 5.39773026678058e-06, | |
| "loss": 0.792382001876831, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.5990836197021765, | |
| "grad_norm": 0.1365845501422882, | |
| "learning_rate": 5.386369258066201e-06, | |
| "loss": 1.3111037015914917, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.6013745704467355, | |
| "grad_norm": 0.03231525421142578, | |
| "learning_rate": 5.3750089740457075e-06, | |
| "loss": 1.0748083591461182, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.6036655211912945, | |
| "grad_norm": 0.10971445590257645, | |
| "learning_rate": 5.363649487170691e-06, | |
| "loss": 1.0106375217437744, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6059564719358534, | |
| "grad_norm": 0.13350991904735565, | |
| "learning_rate": 5.352290869887663e-06, | |
| "loss": 1.202370524406433, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.6082474226804124, | |
| "grad_norm": 0.09823276847600937, | |
| "learning_rate": 5.340933194637586e-06, | |
| "loss": 0.6368390321731567, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.6105383734249714, | |
| "grad_norm": 0.11566170305013657, | |
| "learning_rate": 5.329576533855415e-06, | |
| "loss": 1.2471826076507568, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.6128293241695304, | |
| "grad_norm": 0.03772154077887535, | |
| "learning_rate": 5.3182209599696415e-06, | |
| "loss": 0.9229879975318909, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.6151202749140894, | |
| "grad_norm": 0.1596955955028534, | |
| "learning_rate": 5.306866545401813e-06, | |
| "loss": 1.1240687370300293, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.6174112256586484, | |
| "grad_norm": 0.03676540032029152, | |
| "learning_rate": 5.295513362566096e-06, | |
| "loss": 0.7489064931869507, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.6197021764032073, | |
| "grad_norm": 0.03176848217844963, | |
| "learning_rate": 5.28416148386879e-06, | |
| "loss": 1.0567030906677246, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.6219931271477663, | |
| "grad_norm": 0.10595987737178802, | |
| "learning_rate": 5.272810981707886e-06, | |
| "loss": 1.0767991542816162, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.6242840778923253, | |
| "grad_norm": 0.25980374217033386, | |
| "learning_rate": 5.261461928472593e-06, | |
| "loss": 0.9103140830993652, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.6265750286368843, | |
| "grad_norm": 0.09718900173902512, | |
| "learning_rate": 5.250114396542877e-06, | |
| "loss": 1.0742920637130737, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.6288659793814433, | |
| "grad_norm": 0.20691142976284027, | |
| "learning_rate": 5.238768458289007e-06, | |
| "loss": 1.2718803882598877, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.6311569301260023, | |
| "grad_norm": 0.07437516003847122, | |
| "learning_rate": 5.227424186071081e-06, | |
| "loss": 0.9761091470718384, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.6334478808705613, | |
| "grad_norm": 0.11155454814434052, | |
| "learning_rate": 5.216081652238579e-06, | |
| "loss": 1.1434038877487183, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.6357388316151202, | |
| "grad_norm": 0.21897216141223907, | |
| "learning_rate": 5.204740929129887e-06, | |
| "loss": 1.1481974124908447, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.6380297823596792, | |
| "grad_norm": 0.04085572436451912, | |
| "learning_rate": 5.193402089071852e-06, | |
| "loss": 1.125546932220459, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.6403207331042382, | |
| "grad_norm": 0.03452634811401367, | |
| "learning_rate": 5.1820652043793e-06, | |
| "loss": 1.0230213403701782, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.6426116838487972, | |
| "grad_norm": 0.10921933501958847, | |
| "learning_rate": 5.1707303473545955e-06, | |
| "loss": 1.2148507833480835, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.6449026345933562, | |
| "grad_norm": 0.10001760721206665, | |
| "learning_rate": 5.1593975902871705e-06, | |
| "loss": 1.1477183103561401, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.6471935853379152, | |
| "grad_norm": 0.0428081750869751, | |
| "learning_rate": 5.1480670054530605e-06, | |
| "loss": 1.2147631645202637, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.6494845360824741, | |
| "grad_norm": 0.042964909225702286, | |
| "learning_rate": 5.136738665114449e-06, | |
| "loss": 1.0643807649612427, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.6517754868270331, | |
| "grad_norm": 0.11255067586898804, | |
| "learning_rate": 5.125412641519204e-06, | |
| "loss": 1.1312243938446045, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.654066437571592, | |
| "grad_norm": 0.13041959702968597, | |
| "learning_rate": 5.114089006900422e-06, | |
| "loss": 1.3391623497009277, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.656357388316151, | |
| "grad_norm": 0.10491371154785156, | |
| "learning_rate": 5.102767833475958e-06, | |
| "loss": 1.1496613025665283, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.65864833906071, | |
| "grad_norm": 0.18518638610839844, | |
| "learning_rate": 5.091449193447974e-06, | |
| "loss": 1.0849189758300781, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.660939289805269, | |
| "grad_norm": 0.11973278224468231, | |
| "learning_rate": 5.080133159002474e-06, | |
| "loss": 1.0063197612762451, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.663230240549828, | |
| "grad_norm": 0.1098853349685669, | |
| "learning_rate": 5.068819802308845e-06, | |
| "loss": 1.132586121559143, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.665521191294387, | |
| "grad_norm": 0.1009354293346405, | |
| "learning_rate": 5.057509195519398e-06, | |
| "loss": 1.1691906452178955, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.667812142038946, | |
| "grad_norm": 0.19018951058387756, | |
| "learning_rate": 5.046201410768899e-06, | |
| "loss": 1.1768360137939453, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.670103092783505, | |
| "grad_norm": 0.2605174779891968, | |
| "learning_rate": 5.034896520174126e-06, | |
| "loss": 0.8101754188537598, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.6723940435280642, | |
| "grad_norm": 0.07710061222314835, | |
| "learning_rate": 5.023594595833393e-06, | |
| "loss": 0.47093313932418823, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.6746849942726232, | |
| "grad_norm": 0.15821929275989532, | |
| "learning_rate": 5.0122957098260975e-06, | |
| "loss": 1.3882811069488525, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.6769759450171822, | |
| "grad_norm": 0.10836261510848999, | |
| "learning_rate": 5.0009999342122606e-06, | |
| "loss": 1.3038394451141357, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.6792668957617412, | |
| "grad_norm": 0.1247217059135437, | |
| "learning_rate": 4.989707341032064e-06, | |
| "loss": 1.1847283840179443, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.6815578465063001, | |
| "grad_norm": 0.1041129007935524, | |
| "learning_rate": 4.978418002305399e-06, | |
| "loss": 1.2749258279800415, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.6838487972508591, | |
| "grad_norm": 0.09283646196126938, | |
| "learning_rate": 4.967131990031391e-06, | |
| "loss": 1.103422999382019, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.686139747995418, | |
| "grad_norm": 0.10496700555086136, | |
| "learning_rate": 4.955849376187961e-06, | |
| "loss": 0.8029290437698364, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.688430698739977, | |
| "grad_norm": 0.110666923224926, | |
| "learning_rate": 4.944570232731351e-06, | |
| "loss": 1.1437358856201172, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.690721649484536, | |
| "grad_norm": 0.18121127784252167, | |
| "learning_rate": 4.9332946315956695e-06, | |
| "loss": 0.9108361005783081, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.693012600229095, | |
| "grad_norm": 0.0387575663626194, | |
| "learning_rate": 4.922022644692438e-06, | |
| "loss": 1.1488621234893799, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.695303550973654, | |
| "grad_norm": 0.1944582760334015, | |
| "learning_rate": 4.910754343910121e-06, | |
| "loss": 1.0412110090255737, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.697594501718213, | |
| "grad_norm": 0.10680762678384781, | |
| "learning_rate": 4.89948980111368e-06, | |
| "loss": 1.1225993633270264, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.699885452462772, | |
| "grad_norm": 0.0894724577665329, | |
| "learning_rate": 4.888229088144106e-06, | |
| "loss": 1.1438815593719482, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.702176403207331, | |
| "grad_norm": 0.07776302099227905, | |
| "learning_rate": 4.8769722768179686e-06, | |
| "loss": 1.077296257019043, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.7044673539518902, | |
| "grad_norm": 0.10468591749668121, | |
| "learning_rate": 4.86571943892695e-06, | |
| "loss": 1.1218129396438599, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.7067583046964492, | |
| "grad_norm": 0.10482863336801529, | |
| "learning_rate": 4.854470646237392e-06, | |
| "loss": 1.1150727272033691, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.7090492554410082, | |
| "grad_norm": 0.03965259715914726, | |
| "learning_rate": 4.843225970489843e-06, | |
| "loss": 0.772418737411499, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.7113402061855671, | |
| "grad_norm": 0.0391482338309288, | |
| "learning_rate": 4.831985483398587e-06, | |
| "loss": 1.2282614707946777, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.7136311569301261, | |
| "grad_norm": 0.03488003835082054, | |
| "learning_rate": 4.820749256651202e-06, | |
| "loss": 0.9615294933319092, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.7159221076746851, | |
| "grad_norm": 0.04074535146355629, | |
| "learning_rate": 4.809517361908086e-06, | |
| "loss": 1.0762970447540283, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.718213058419244, | |
| "grad_norm": 0.043285515159368515, | |
| "learning_rate": 4.798289870802018e-06, | |
| "loss": 0.6143720149993896, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.720504009163803, | |
| "grad_norm": 0.1776183396577835, | |
| "learning_rate": 4.787066854937689e-06, | |
| "loss": 1.129189372062683, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.722794959908362, | |
| "grad_norm": 0.143866166472435, | |
| "learning_rate": 4.775848385891245e-06, | |
| "loss": 1.3030314445495605, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.725085910652921, | |
| "grad_norm": 0.1914648562669754, | |
| "learning_rate": 4.7646345352098445e-06, | |
| "loss": 0.6304170489311218, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.72737686139748, | |
| "grad_norm": 0.04062773287296295, | |
| "learning_rate": 4.753425374411177e-06, | |
| "loss": 1.104649305343628, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.729667812142039, | |
| "grad_norm": 0.037888024002313614, | |
| "learning_rate": 4.742220974983036e-06, | |
| "loss": 1.0669211149215698, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.731958762886598, | |
| "grad_norm": 0.04267639294266701, | |
| "learning_rate": 4.731021408382837e-06, | |
| "loss": 1.1336133480072021, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.734249713631157, | |
| "grad_norm": 0.10452942550182343, | |
| "learning_rate": 4.7198267460371826e-06, | |
| "loss": 1.3840365409851074, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.736540664375716, | |
| "grad_norm": 0.10939494520425797, | |
| "learning_rate": 4.70863705934139e-06, | |
| "loss": 1.0531353950500488, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.738831615120275, | |
| "grad_norm": 0.10143084079027176, | |
| "learning_rate": 4.697452419659052e-06, | |
| "loss": 1.216707468032837, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.741122565864834, | |
| "grad_norm": 0.0358530730009079, | |
| "learning_rate": 4.686272898321568e-06, | |
| "loss": 1.0575618743896484, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.743413516609393, | |
| "grad_norm": 0.032807547599077225, | |
| "learning_rate": 4.675098566627695e-06, | |
| "loss": 0.9459750652313232, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.745704467353952, | |
| "grad_norm": 0.04335108399391174, | |
| "learning_rate": 4.6639294958430946e-06, | |
| "loss": 1.0062423944473267, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.747995418098511, | |
| "grad_norm": 0.03665841370820999, | |
| "learning_rate": 4.652765757199872e-06, | |
| "loss": 1.0454283952713013, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.7502863688430699, | |
| "grad_norm": 0.039896849542856216, | |
| "learning_rate": 4.641607421896132e-06, | |
| "loss": 0.9870510101318359, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.7525773195876289, | |
| "grad_norm": 0.1942906230688095, | |
| "learning_rate": 4.630454561095513e-06, | |
| "loss": 0.8729791641235352, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.7548682703321878, | |
| "grad_norm": 0.1113077700138092, | |
| "learning_rate": 4.619307245926743e-06, | |
| "loss": 1.1512171030044556, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.7571592210767468, | |
| "grad_norm": 0.03571424260735512, | |
| "learning_rate": 4.608165547483183e-06, | |
| "loss": 1.2152729034423828, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.7594501718213058, | |
| "grad_norm": 0.04555622860789299, | |
| "learning_rate": 4.597029536822368e-06, | |
| "loss": 0.6297133564949036, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.7617411225658648, | |
| "grad_norm": 0.10868772864341736, | |
| "learning_rate": 4.585899284965563e-06, | |
| "loss": 1.1049292087554932, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.7640320733104238, | |
| "grad_norm": 0.11979663372039795, | |
| "learning_rate": 4.574774862897302e-06, | |
| "loss": 1.1207685470581055, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.7663230240549828, | |
| "grad_norm": 0.10041869431734085, | |
| "learning_rate": 4.563656341564941e-06, | |
| "loss": 1.046482801437378, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.7686139747995417, | |
| "grad_norm": 0.13303270936012268, | |
| "learning_rate": 4.552543791878205e-06, | |
| "loss": 1.305598497390747, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.7709049255441007, | |
| "grad_norm": 0.033517755568027496, | |
| "learning_rate": 4.541437284708727e-06, | |
| "loss": 0.9493808746337891, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.7731958762886597, | |
| "grad_norm": 0.05814732611179352, | |
| "learning_rate": 4.530336890889613e-06, | |
| "loss": 1.0616586208343506, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.7754868270332187, | |
| "grad_norm": 0.20835857093334198, | |
| "learning_rate": 4.519242681214971e-06, | |
| "loss": 0.7091290950775146, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 0.30232834815979004, | |
| "learning_rate": 4.508154726439478e-06, | |
| "loss": 1.036962866783142, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.7800687285223367, | |
| "grad_norm": 0.11091706156730652, | |
| "learning_rate": 4.49707309727791e-06, | |
| "loss": 1.1720504760742188, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.7823596792668956, | |
| "grad_norm": 0.06305071711540222, | |
| "learning_rate": 4.485997864404709e-06, | |
| "loss": 1.1253163814544678, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.7846506300114546, | |
| "grad_norm": 0.07569506764411926, | |
| "learning_rate": 4.474929098453519e-06, | |
| "loss": 1.2266206741333008, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.7869415807560136, | |
| "grad_norm": 0.035198770463466644, | |
| "learning_rate": 4.463866870016741e-06, | |
| "loss": 1.0873854160308838, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.7892325315005726, | |
| "grad_norm": 0.19046899676322937, | |
| "learning_rate": 4.452811249645085e-06, | |
| "loss": 1.1058852672576904, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.7915234822451316, | |
| "grad_norm": 0.1516924798488617, | |
| "learning_rate": 4.441762307847113e-06, | |
| "loss": 1.233043909072876, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.7938144329896906, | |
| "grad_norm": 0.12142589688301086, | |
| "learning_rate": 4.430720115088797e-06, | |
| "loss": 1.1887891292572021, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.7961053837342495, | |
| "grad_norm": 0.2711428701877594, | |
| "learning_rate": 4.419684741793066e-06, | |
| "loss": 0.8487293720245361, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.7983963344788088, | |
| "grad_norm": 0.09922181069850922, | |
| "learning_rate": 4.408656258339355e-06, | |
| "loss": 1.0131475925445557, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.8006872852233677, | |
| "grad_norm": 0.11347451061010361, | |
| "learning_rate": 4.397634735063158e-06, | |
| "loss": 1.1046950817108154, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.8029782359679267, | |
| "grad_norm": 0.03463425114750862, | |
| "learning_rate": 4.386620242255583e-06, | |
| "loss": 1.096078872680664, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.8052691867124857, | |
| "grad_norm": 0.1700165718793869, | |
| "learning_rate": 4.375612850162897e-06, | |
| "loss": 1.2153785228729248, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.8075601374570447, | |
| "grad_norm": 0.10484757274389267, | |
| "learning_rate": 4.364612628986082e-06, | |
| "loss": 1.0014398097991943, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.8098510882016037, | |
| "grad_norm": 0.23074306547641754, | |
| "learning_rate": 4.35361964888039e-06, | |
| "loss": 0.98294597864151, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.8121420389461627, | |
| "grad_norm": 0.1004788875579834, | |
| "learning_rate": 4.342633979954887e-06, | |
| "loss": 1.168994665145874, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.8144329896907216, | |
| "grad_norm": 0.2612786293029785, | |
| "learning_rate": 4.331655692272011e-06, | |
| "loss": 0.9925752282142639, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.8167239404352806, | |
| "grad_norm": 0.037493567913770676, | |
| "learning_rate": 4.320684855847135e-06, | |
| "loss": 1.2383928298950195, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.8190148911798396, | |
| "grad_norm": 0.08211533725261688, | |
| "learning_rate": 4.309721540648094e-06, | |
| "loss": 0.9583084583282471, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.8213058419243986, | |
| "grad_norm": 0.034089405089616776, | |
| "learning_rate": 4.298765816594773e-06, | |
| "loss": 1.1073875427246094, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.8235967926689576, | |
| "grad_norm": 0.11309397965669632, | |
| "learning_rate": 4.2878177535586294e-06, | |
| "loss": 1.0401699542999268, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.8258877434135166, | |
| "grad_norm": 0.10397134721279144, | |
| "learning_rate": 4.276877421362273e-06, | |
| "loss": 1.1131930351257324, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.8281786941580758, | |
| "grad_norm": 0.10276107490062714, | |
| "learning_rate": 4.265944889778998e-06, | |
| "loss": 1.2132148742675781, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.8304696449026348, | |
| "grad_norm": 0.10183647274971008, | |
| "learning_rate": 4.255020228532357e-06, | |
| "loss": 1.179892659187317, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.8327605956471937, | |
| "grad_norm": 0.11640535295009613, | |
| "learning_rate": 4.244103507295709e-06, | |
| "loss": 0.8675186634063721, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8350515463917527, | |
| "grad_norm": 0.1039697676897049, | |
| "learning_rate": 4.233194795691767e-06, | |
| "loss": 1.1238439083099365, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.8373424971363117, | |
| "grad_norm": 0.11134682595729828, | |
| "learning_rate": 4.222294163292173e-06, | |
| "loss": 1.1501944065093994, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.8396334478808707, | |
| "grad_norm": 0.09264901280403137, | |
| "learning_rate": 4.211401679617031e-06, | |
| "loss": 1.0822536945343018, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.8419243986254297, | |
| "grad_norm": 0.09437301009893417, | |
| "learning_rate": 4.200517414134487e-06, | |
| "loss": 1.1331408023834229, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.8442153493699887, | |
| "grad_norm": 0.042578209191560745, | |
| "learning_rate": 4.1896414362602615e-06, | |
| "loss": 0.9823397994041443, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.8465063001145476, | |
| "grad_norm": 0.1340235471725464, | |
| "learning_rate": 4.178773815357231e-06, | |
| "loss": 1.3153901100158691, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.8487972508591066, | |
| "grad_norm": 0.15407730638980865, | |
| "learning_rate": 4.167914620734971e-06, | |
| "loss": 1.372065782546997, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.8510882016036656, | |
| "grad_norm": 0.09786330908536911, | |
| "learning_rate": 4.157063921649314e-06, | |
| "loss": 1.1111478805541992, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.8533791523482246, | |
| "grad_norm": 0.03377949446439743, | |
| "learning_rate": 4.146221787301916e-06, | |
| "loss": 1.1279263496398926, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.8556701030927836, | |
| "grad_norm": 0.10334134101867676, | |
| "learning_rate": 4.135388286839806e-06, | |
| "loss": 1.0680410861968994, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.8579610538373426, | |
| "grad_norm": 0.13414083421230316, | |
| "learning_rate": 4.1245634893549545e-06, | |
| "loss": 1.1650376319885254, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.8602520045819015, | |
| "grad_norm": 0.14877556264400482, | |
| "learning_rate": 4.113747463883823e-06, | |
| "loss": 1.367631196975708, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.8625429553264605, | |
| "grad_norm": 0.031263113021850586, | |
| "learning_rate": 4.102940279406928e-06, | |
| "loss": 1.0940788984298706, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.8648339060710195, | |
| "grad_norm": 0.04384366795420647, | |
| "learning_rate": 4.092142004848408e-06, | |
| "loss": 1.1474003791809082, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.8671248568155785, | |
| "grad_norm": 0.03476424515247345, | |
| "learning_rate": 4.08135270907557e-06, | |
| "loss": 1.0565738677978516, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.8694158075601375, | |
| "grad_norm": 0.14033450186252594, | |
| "learning_rate": 4.070572460898462e-06, | |
| "loss": 1.0951378345489502, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.8717067583046965, | |
| "grad_norm": 0.03384094685316086, | |
| "learning_rate": 4.059801329069426e-06, | |
| "loss": 1.029362678527832, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.8739977090492554, | |
| "grad_norm": 0.03602081537246704, | |
| "learning_rate": 4.04903938228267e-06, | |
| "loss": 1.1431338787078857, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.8762886597938144, | |
| "grad_norm": 0.1386619508266449, | |
| "learning_rate": 4.038286689173817e-06, | |
| "loss": 1.1247670650482178, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.8785796105383734, | |
| "grad_norm": 0.047482799738645554, | |
| "learning_rate": 4.027543318319474e-06, | |
| "loss": 1.0399794578552246, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.8808705612829324, | |
| "grad_norm": 0.10432790964841843, | |
| "learning_rate": 4.0168093382368e-06, | |
| "loss": 1.1941301822662354, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.8831615120274914, | |
| "grad_norm": 0.21906164288520813, | |
| "learning_rate": 4.006084817383056e-06, | |
| "loss": 0.24486073851585388, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.8854524627720504, | |
| "grad_norm": 0.11396703869104385, | |
| "learning_rate": 3.995369824155181e-06, | |
| "loss": 1.13923978805542, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.8877434135166093, | |
| "grad_norm": 0.10515320301055908, | |
| "learning_rate": 3.984664426889346e-06, | |
| "loss": 1.1944549083709717, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.8900343642611683, | |
| "grad_norm": 0.12443188577890396, | |
| "learning_rate": 3.973968693860527e-06, | |
| "loss": 1.2203004360198975, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.8923253150057273, | |
| "grad_norm": 0.1255001723766327, | |
| "learning_rate": 3.96328269328206e-06, | |
| "loss": 1.3052341938018799, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.8946162657502863, | |
| "grad_norm": 0.12038350105285645, | |
| "learning_rate": 3.952606493305216e-06, | |
| "loss": 1.4456214904785156, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.8969072164948453, | |
| "grad_norm": 0.04385162517428398, | |
| "learning_rate": 3.941940162018761e-06, | |
| "loss": 1.0220682621002197, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.8991981672394043, | |
| "grad_norm": 0.0347161740064621, | |
| "learning_rate": 3.931283767448518e-06, | |
| "loss": 0.5256936550140381, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.9014891179839633, | |
| "grad_norm": 0.3636147975921631, | |
| "learning_rate": 3.920637377556942e-06, | |
| "loss": 0.8584147691726685, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.9037800687285222, | |
| "grad_norm": 0.11466902494430542, | |
| "learning_rate": 3.910001060242677e-06, | |
| "loss": 0.9281732439994812, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.9060710194730812, | |
| "grad_norm": 0.1715666800737381, | |
| "learning_rate": 3.899374883340137e-06, | |
| "loss": 1.1350767612457275, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.9083619702176402, | |
| "grad_norm": 0.10768167674541473, | |
| "learning_rate": 3.88875891461905e-06, | |
| "loss": 0.8779376745223999, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.9106529209621992, | |
| "grad_norm": 0.05779425799846649, | |
| "learning_rate": 3.878153221784054e-06, | |
| "loss": 1.1153831481933594, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.9129438717067582, | |
| "grad_norm": 0.11072687059640884, | |
| "learning_rate": 3.867557872474248e-06, | |
| "loss": 1.098388433456421, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.9152348224513172, | |
| "grad_norm": 0.11078870296478271, | |
| "learning_rate": 3.856972934262755e-06, | |
| "loss": 1.206322193145752, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.9175257731958761, | |
| "grad_norm": 0.11533273011445999, | |
| "learning_rate": 3.84639847465631e-06, | |
| "loss": 1.2478669881820679, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.9198167239404351, | |
| "grad_norm": 0.1029491275548935, | |
| "learning_rate": 3.835834561094815e-06, | |
| "loss": 1.144696831703186, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.9221076746849943, | |
| "grad_norm": 0.2026187777519226, | |
| "learning_rate": 3.825281260950916e-06, | |
| "loss": 1.330293893814087, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.9243986254295533, | |
| "grad_norm": 0.16172434389591217, | |
| "learning_rate": 3.814738641529566e-06, | |
| "loss": 1.2509064674377441, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.9266895761741123, | |
| "grad_norm": 0.03767630457878113, | |
| "learning_rate": 3.8042067700676044e-06, | |
| "loss": 0.9259130954742432, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.9289805269186713, | |
| "grad_norm": 0.10923905670642853, | |
| "learning_rate": 3.793685713733326e-06, | |
| "loss": 1.2091310024261475, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.9312714776632303, | |
| "grad_norm": 0.17867109179496765, | |
| "learning_rate": 3.7831755396260416e-06, | |
| "loss": 1.122981071472168, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.9335624284077892, | |
| "grad_norm": 0.04535387083888054, | |
| "learning_rate": 3.772676314775674e-06, | |
| "loss": 0.9912348985671997, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.9358533791523482, | |
| "grad_norm": 0.09147147089242935, | |
| "learning_rate": 3.762188106142298e-06, | |
| "loss": 0.8392770290374756, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.9381443298969072, | |
| "grad_norm": 0.10301242023706436, | |
| "learning_rate": 3.751710980615748e-06, | |
| "loss": 1.138921856880188, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.9404352806414662, | |
| "grad_norm": 0.04164798557758331, | |
| "learning_rate": 3.741245005015162e-06, | |
| "loss": 0.9997767210006714, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.9427262313860252, | |
| "grad_norm": 0.10936956107616425, | |
| "learning_rate": 3.7307902460885746e-06, | |
| "loss": 1.1422123908996582, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.9450171821305842, | |
| "grad_norm": 0.10265643894672394, | |
| "learning_rate": 3.720346770512486e-06, | |
| "loss": 1.039827585220337, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.9473081328751431, | |
| "grad_norm": 0.1073760837316513, | |
| "learning_rate": 3.709914644891427e-06, | |
| "loss": 1.201385259628296, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.9495990836197021, | |
| "grad_norm": 0.37369316816329956, | |
| "learning_rate": 3.6994939357575533e-06, | |
| "loss": 0.6540091633796692, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.9518900343642611, | |
| "grad_norm": 0.0465695895254612, | |
| "learning_rate": 3.689084709570202e-06, | |
| "loss": 1.1277987957000732, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.9541809851088203, | |
| "grad_norm": 0.09042780101299286, | |
| "learning_rate": 3.6786870327154832e-06, | |
| "loss": 1.1528184413909912, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.9564719358533793, | |
| "grad_norm": 0.12911838293075562, | |
| "learning_rate": 3.6683009715058416e-06, | |
| "loss": 1.0916279554367065, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.9587628865979383, | |
| "grad_norm": 0.12108128517866135, | |
| "learning_rate": 3.657926592179649e-06, | |
| "loss": 1.0954606533050537, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.9610538373424973, | |
| "grad_norm": 0.053682778030633926, | |
| "learning_rate": 3.647563960900774e-06, | |
| "loss": 1.096421480178833, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.9633447880870563, | |
| "grad_norm": 0.08973834663629532, | |
| "learning_rate": 3.637213143758156e-06, | |
| "loss": 1.0274837017059326, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.9656357388316152, | |
| "grad_norm": 0.038498666137456894, | |
| "learning_rate": 3.626874206765392e-06, | |
| "loss": 1.1263710260391235, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.9679266895761742, | |
| "grad_norm": 0.10485488921403885, | |
| "learning_rate": 3.616547215860309e-06, | |
| "loss": 1.0969138145446777, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.9702176403207332, | |
| "grad_norm": 0.1671239733695984, | |
| "learning_rate": 3.606232236904549e-06, | |
| "loss": 1.2424817085266113, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.9725085910652922, | |
| "grad_norm": 0.10209770500659943, | |
| "learning_rate": 3.595929335683146e-06, | |
| "loss": 0.9946843385696411, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.9747995418098512, | |
| "grad_norm": 0.10356201231479645, | |
| "learning_rate": 3.585638577904107e-06, | |
| "loss": 1.1440200805664062, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.9770904925544102, | |
| "grad_norm": 0.16762997210025787, | |
| "learning_rate": 3.5753600291979917e-06, | |
| "loss": 1.3794069290161133, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.9793814432989691, | |
| "grad_norm": 0.16176775097846985, | |
| "learning_rate": 3.5650937551174914e-06, | |
| "loss": 1.1582558155059814, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.9816723940435281, | |
| "grad_norm": 0.24708229303359985, | |
| "learning_rate": 3.554839821137026e-06, | |
| "loss": 0.991188108921051, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.9839633447880871, | |
| "grad_norm": 0.03801441565155983, | |
| "learning_rate": 3.5445982926523006e-06, | |
| "loss": 1.1123301982879639, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.986254295532646, | |
| "grad_norm": 0.11292216181755066, | |
| "learning_rate": 3.534369234979914e-06, | |
| "loss": 1.094356894493103, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.988545246277205, | |
| "grad_norm": 0.24388711154460907, | |
| "learning_rate": 3.5241527133569197e-06, | |
| "loss": 0.09452319145202637, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.990836197021764, | |
| "grad_norm": 0.09518136829137802, | |
| "learning_rate": 3.5139487929404335e-06, | |
| "loss": 0.9023909568786621, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.993127147766323, | |
| "grad_norm": 0.10411088913679123, | |
| "learning_rate": 3.5037575388071997e-06, | |
| "loss": 1.18477463722229, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.995418098510882, | |
| "grad_norm": 0.04860776290297508, | |
| "learning_rate": 3.4935790159531786e-06, | |
| "loss": 1.0331647396087646, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.997709049255441, | |
| "grad_norm": 0.11631099134683609, | |
| "learning_rate": 3.4834132892931405e-06, | |
| "loss": 1.1645119190216064, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.10443787276744843, | |
| "learning_rate": 3.47326042366024e-06, | |
| "loss": 1.174635887145996, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.002290950744559, | |
| "grad_norm": 0.10160228610038757, | |
| "learning_rate": 3.46312048380562e-06, | |
| "loss": 1.040701985359192, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.004581901489118, | |
| "grad_norm": 0.11828029900789261, | |
| "learning_rate": 3.452993534397975e-06, | |
| "loss": 1.2142775058746338, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.006872852233677, | |
| "grad_norm": 0.10975845903158188, | |
| "learning_rate": 3.4428796400231595e-06, | |
| "loss": 1.1384990215301514, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.009163802978236, | |
| "grad_norm": 0.3760664463043213, | |
| "learning_rate": 3.4327788651837658e-06, | |
| "loss": 1.1351079940795898, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.011454753722795, | |
| "grad_norm": 0.046844061464071274, | |
| "learning_rate": 3.422691274298714e-06, | |
| "loss": 1.0275928974151611, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.013745704467354, | |
| "grad_norm": 0.038912829011678696, | |
| "learning_rate": 3.4126169317028445e-06, | |
| "loss": 1.1176273822784424, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.016036655211913, | |
| "grad_norm": 0.13894204795360565, | |
| "learning_rate": 3.4025559016465003e-06, | |
| "loss": 1.2932333946228027, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.018327605956472, | |
| "grad_norm": 0.044606562703847885, | |
| "learning_rate": 3.3925082482951253e-06, | |
| "loss": 1.10593843460083, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.020618556701031, | |
| "grad_norm": 0.4010094702243805, | |
| "learning_rate": 3.382474035728852e-06, | |
| "loss": 0.9274369478225708, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.02290950744559, | |
| "grad_norm": 0.148308664560318, | |
| "learning_rate": 3.372453327942092e-06, | |
| "loss": 1.2721387147903442, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.025200458190149, | |
| "grad_norm": 0.13406376540660858, | |
| "learning_rate": 3.3624461888431315e-06, | |
| "loss": 1.1050572395324707, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.027491408934708, | |
| "grad_norm": 0.11722642928361893, | |
| "learning_rate": 3.352452682253713e-06, | |
| "loss": 1.1906096935272217, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.029782359679267, | |
| "grad_norm": 0.10479209572076797, | |
| "learning_rate": 3.3424728719086424e-06, | |
| "loss": 1.179795265197754, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.0320733104238258, | |
| "grad_norm": 0.042092155665159225, | |
| "learning_rate": 3.332506821455376e-06, | |
| "loss": 0.937066912651062, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.0343642611683848, | |
| "grad_norm": 0.12035442888736725, | |
| "learning_rate": 3.3225545944536132e-06, | |
| "loss": 0.8189719319343567, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.0366552119129437, | |
| "grad_norm": 0.10334596037864685, | |
| "learning_rate": 3.3126162543748906e-06, | |
| "loss": 1.0670874118804932, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.0389461626575027, | |
| "grad_norm": 0.1044737845659256, | |
| "learning_rate": 3.302691864602183e-06, | |
| "loss": 1.1054072380065918, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.0412371134020617, | |
| "grad_norm": 0.11545980721712112, | |
| "learning_rate": 3.292781488429494e-06, | |
| "loss": 1.1284576654434204, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.0435280641466207, | |
| "grad_norm": 0.1243211105465889, | |
| "learning_rate": 3.2828851890614534e-06, | |
| "loss": 1.0694844722747803, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.0458190148911797, | |
| "grad_norm": 0.0351092666387558, | |
| "learning_rate": 3.2730030296129157e-06, | |
| "loss": 1.1130070686340332, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.0481099656357387, | |
| "grad_norm": 0.20803546905517578, | |
| "learning_rate": 3.2631350731085526e-06, | |
| "loss": 1.176312804222107, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.0504009163802976, | |
| "grad_norm": 0.10609522461891174, | |
| "learning_rate": 3.25328138248246e-06, | |
| "loss": 1.115060567855835, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.0526918671248566, | |
| "grad_norm": 0.2342589944601059, | |
| "learning_rate": 3.2434420205777473e-06, | |
| "loss": 0.6569336652755737, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.0549828178694156, | |
| "grad_norm": 0.30435091257095337, | |
| "learning_rate": 3.2336170501461433e-06, | |
| "loss": 0.9199868440628052, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.0572737686139746, | |
| "grad_norm": 0.14184576272964478, | |
| "learning_rate": 3.2238065338475944e-06, | |
| "loss": 0.9895941019058228, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.0595647193585336, | |
| "grad_norm": 0.05588042736053467, | |
| "learning_rate": 3.2140105342498577e-06, | |
| "loss": 1.0880318880081177, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.0618556701030926, | |
| "grad_norm": 0.1207311823964119, | |
| "learning_rate": 3.2042291138281155e-06, | |
| "loss": 1.1787630319595337, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.0641466208476515, | |
| "grad_norm": 0.32253092527389526, | |
| "learning_rate": 3.194462334964566e-06, | |
| "loss": 0.6825082302093506, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.066437571592211, | |
| "grad_norm": 0.11069416999816895, | |
| "learning_rate": 3.184710259948032e-06, | |
| "loss": 0.9402496814727783, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.06872852233677, | |
| "grad_norm": 0.13846199214458466, | |
| "learning_rate": 3.1749729509735555e-06, | |
| "loss": 0.5835152864456177, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.071019473081329, | |
| "grad_norm": 0.03694439306855202, | |
| "learning_rate": 3.165250470142009e-06, | |
| "loss": 1.0787923336029053, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.073310423825888, | |
| "grad_norm": 0.2090117484331131, | |
| "learning_rate": 3.155542879459702e-06, | |
| "loss": 1.236689567565918, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.075601374570447, | |
| "grad_norm": 0.105824314057827, | |
| "learning_rate": 3.145850240837971e-06, | |
| "loss": 1.1017699241638184, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.077892325315006, | |
| "grad_norm": 0.10985516756772995, | |
| "learning_rate": 3.136172616092801e-06, | |
| "loss": 1.1386725902557373, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.080183276059565, | |
| "grad_norm": 0.22561407089233398, | |
| "learning_rate": 3.1265100669444184e-06, | |
| "loss": 0.9274455308914185, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.082474226804124, | |
| "grad_norm": 0.10488112270832062, | |
| "learning_rate": 3.116862655016907e-06, | |
| "loss": 1.1748979091644287, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.084765177548683, | |
| "grad_norm": 0.10127851366996765, | |
| "learning_rate": 3.107230441837812e-06, | |
| "loss": 1.2072792053222656, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.087056128293242, | |
| "grad_norm": 0.03725925460457802, | |
| "learning_rate": 3.097613488837744e-06, | |
| "loss": 1.1067794561386108, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.089347079037801, | |
| "grad_norm": 0.24176861345767975, | |
| "learning_rate": 3.0880118573499928e-06, | |
| "loss": 1.04270601272583, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.09163802978236, | |
| "grad_norm": 0.11087094247341156, | |
| "learning_rate": 3.0784256086101265e-06, | |
| "loss": 1.1116524934768677, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.093928980526919, | |
| "grad_norm": 0.10241114348173141, | |
| "learning_rate": 3.0688548037556203e-06, | |
| "loss": 1.0798685550689697, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.0962199312714778, | |
| "grad_norm": 0.12044376134872437, | |
| "learning_rate": 3.0592995038254413e-06, | |
| "loss": 1.1466073989868164, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.0985108820160367, | |
| "grad_norm": 0.07996680587530136, | |
| "learning_rate": 3.04975976975968e-06, | |
| "loss": 1.0409008264541626, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.1008018327605957, | |
| "grad_norm": 0.10078135132789612, | |
| "learning_rate": 3.0402356623991487e-06, | |
| "loss": 1.2206724882125854, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.1030927835051547, | |
| "grad_norm": 0.12359591573476791, | |
| "learning_rate": 3.0307272424850043e-06, | |
| "loss": 1.1318553686141968, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.1053837342497137, | |
| "grad_norm": 0.040033888071775436, | |
| "learning_rate": 3.021234570658351e-06, | |
| "loss": 1.0623432397842407, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.1076746849942727, | |
| "grad_norm": 0.10040728002786636, | |
| "learning_rate": 3.011757707459857e-06, | |
| "loss": 1.1741615533828735, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.1099656357388317, | |
| "grad_norm": 0.06718029826879501, | |
| "learning_rate": 3.0022967133293716e-06, | |
| "loss": 0.9849785566329956, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.1122565864833907, | |
| "grad_norm": 0.14430569112300873, | |
| "learning_rate": 2.992851648605536e-06, | |
| "loss": 1.1471836566925049, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.1145475372279496, | |
| "grad_norm": 0.20142653584480286, | |
| "learning_rate": 2.9834225735254e-06, | |
| "loss": 0.6019679307937622, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.1168384879725086, | |
| "grad_norm": 0.1241523027420044, | |
| "learning_rate": 2.9740095482240382e-06, | |
| "loss": 1.193034052848816, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.1191294387170676, | |
| "grad_norm": 0.035164572298526764, | |
| "learning_rate": 2.964612632734163e-06, | |
| "loss": 1.113813877105713, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.1214203894616266, | |
| "grad_norm": 0.06513768434524536, | |
| "learning_rate": 2.9552318869857464e-06, | |
| "loss": 0.8912978172302246, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.1237113402061856, | |
| "grad_norm": 0.04359002783894539, | |
| "learning_rate": 2.9458673708056385e-06, | |
| "loss": 1.1514525413513184, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.1260022909507446, | |
| "grad_norm": 0.03506020829081535, | |
| "learning_rate": 2.936519143917179e-06, | |
| "loss": 1.0918513536453247, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.1282932416953035, | |
| "grad_norm": 0.11882031708955765, | |
| "learning_rate": 2.9271872659398224e-06, | |
| "loss": 1.1890110969543457, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.1305841924398625, | |
| "grad_norm": 0.12055717408657074, | |
| "learning_rate": 2.9178717963887557e-06, | |
| "loss": 1.0867390632629395, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.1328751431844215, | |
| "grad_norm": 0.1105470135807991, | |
| "learning_rate": 2.90857279467452e-06, | |
| "loss": 1.144205093383789, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.1351660939289805, | |
| "grad_norm": 0.15523314476013184, | |
| "learning_rate": 2.899290320102631e-06, | |
| "loss": 0.925602376461029, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.1374570446735395, | |
| "grad_norm": 0.21147528290748596, | |
| "learning_rate": 2.890024431873201e-06, | |
| "loss": 0.8735249638557434, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.1397479954180985, | |
| "grad_norm": 0.033189158886671066, | |
| "learning_rate": 2.880775189080555e-06, | |
| "loss": 1.0115811824798584, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.1420389461626574, | |
| "grad_norm": 0.061869971454143524, | |
| "learning_rate": 2.8715426507128687e-06, | |
| "loss": 1.066664695739746, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.1443298969072164, | |
| "grad_norm": 0.10503748059272766, | |
| "learning_rate": 2.862326875651777e-06, | |
| "loss": 1.129685401916504, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.1466208476517754, | |
| "grad_norm": 0.10907737910747528, | |
| "learning_rate": 2.853127922672008e-06, | |
| "loss": 1.0200390815734863, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.1489117983963344, | |
| "grad_norm": 0.1520504504442215, | |
| "learning_rate": 2.8439458504410044e-06, | |
| "loss": 1.0280253887176514, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.1512027491408934, | |
| "grad_norm": 0.12227918952703476, | |
| "learning_rate": 2.834780717518547e-06, | |
| "loss": 1.050950527191162, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.1534936998854524, | |
| "grad_norm": 0.03874862939119339, | |
| "learning_rate": 2.825632582356387e-06, | |
| "loss": 1.0017433166503906, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.1557846506300113, | |
| "grad_norm": 0.04391537606716156, | |
| "learning_rate": 2.8165015032978703e-06, | |
| "loss": 1.0685606002807617, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.1580756013745703, | |
| "grad_norm": 0.04769907891750336, | |
| "learning_rate": 2.8073875385775663e-06, | |
| "loss": 1.186462640762329, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.1603665521191293, | |
| "grad_norm": 0.032127585262060165, | |
| "learning_rate": 2.7982907463208896e-06, | |
| "loss": 0.895263135433197, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.1626575028636883, | |
| "grad_norm": 0.14522826671600342, | |
| "learning_rate": 2.7892111845437416e-06, | |
| "loss": 1.0644443035125732, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.1649484536082473, | |
| "grad_norm": 0.11006541550159454, | |
| "learning_rate": 2.780148911152135e-06, | |
| "loss": 1.0343241691589355, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.1672394043528063, | |
| "grad_norm": 0.03823330998420715, | |
| "learning_rate": 2.7711039839418187e-06, | |
| "loss": 1.0082577466964722, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.1695303550973652, | |
| "grad_norm": 0.03993887081742287, | |
| "learning_rate": 2.762076460597919e-06, | |
| "loss": 1.0605108737945557, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.1718213058419242, | |
| "grad_norm": 0.17623330652713776, | |
| "learning_rate": 2.7530663986945605e-06, | |
| "loss": 0.8171453475952148, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.174112256586483, | |
| "grad_norm": 0.03557036072015762, | |
| "learning_rate": 2.7440738556945122e-06, | |
| "loss": 0.9885848760604858, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.176403207331042, | |
| "grad_norm": 0.11463136225938797, | |
| "learning_rate": 2.735098888948811e-06, | |
| "loss": 0.8385453820228577, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.178694158075601, | |
| "grad_norm": 0.20864970982074738, | |
| "learning_rate": 2.726141555696399e-06, | |
| "loss": 1.330157995223999, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.18098510882016, | |
| "grad_norm": 0.10703415423631668, | |
| "learning_rate": 2.7172019130637605e-06, | |
| "loss": 1.1223435401916504, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.183276059564719, | |
| "grad_norm": 0.10402292013168335, | |
| "learning_rate": 2.708280018064551e-06, | |
| "loss": 0.5987660884857178, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.1855670103092786, | |
| "grad_norm": 0.1397983282804489, | |
| "learning_rate": 2.699375927599245e-06, | |
| "loss": 1.0533286333084106, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.1878579610538376, | |
| "grad_norm": 0.130964994430542, | |
| "learning_rate": 2.6904896984547614e-06, | |
| "loss": 0.5356656908988953, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.1901489117983965, | |
| "grad_norm": 0.11952181160449982, | |
| "learning_rate": 2.6816213873041086e-06, | |
| "loss": 1.1443605422973633, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.1924398625429555, | |
| "grad_norm": 0.1906312108039856, | |
| "learning_rate": 2.6727710507060166e-06, | |
| "loss": 0.44051751494407654, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.1947308132875145, | |
| "grad_norm": 0.10350323468446732, | |
| "learning_rate": 2.6639387451045894e-06, | |
| "loss": 1.1139777898788452, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.1970217640320735, | |
| "grad_norm": 0.15858761966228485, | |
| "learning_rate": 2.6551245268289293e-06, | |
| "loss": 1.123609185218811, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.1993127147766325, | |
| "grad_norm": 0.1445668339729309, | |
| "learning_rate": 2.6463284520927834e-06, | |
| "loss": 0.9767943620681763, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.2016036655211915, | |
| "grad_norm": 0.10581092536449432, | |
| "learning_rate": 2.63755057699419e-06, | |
| "loss": 0.9770960807800293, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.2038946162657505, | |
| "grad_norm": 0.11240138858556747, | |
| "learning_rate": 2.6287909575151166e-06, | |
| "loss": 1.1265208721160889, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.2061855670103094, | |
| "grad_norm": 0.2250319868326187, | |
| "learning_rate": 2.620049649521103e-06, | |
| "loss": 0.9786791801452637, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.2084765177548684, | |
| "grad_norm": 0.10950709134340286, | |
| "learning_rate": 2.6113267087609018e-06, | |
| "loss": 1.1055612564086914, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.2107674684994274, | |
| "grad_norm": 0.1569567322731018, | |
| "learning_rate": 2.6026221908661307e-06, | |
| "loss": 1.0753288269042969, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.2130584192439864, | |
| "grad_norm": 0.10235545039176941, | |
| "learning_rate": 2.5939361513509124e-06, | |
| "loss": 0.6077594757080078, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.2153493699885454, | |
| "grad_norm": 0.12555944919586182, | |
| "learning_rate": 2.5852686456115194e-06, | |
| "loss": 1.0947010517120361, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.2176403207331044, | |
| "grad_norm": 0.0357440747320652, | |
| "learning_rate": 2.5766197289260277e-06, | |
| "loss": 1.0835474729537964, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.2199312714776633, | |
| "grad_norm": 0.035975709557533264, | |
| "learning_rate": 2.5679894564539513e-06, | |
| "loss": 1.0810333490371704, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.12334857881069183, | |
| "learning_rate": 2.5593778832359062e-06, | |
| "loss": 0.9912731051445007, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.2245131729667813, | |
| "grad_norm": 0.03624502941966057, | |
| "learning_rate": 2.550785064193246e-06, | |
| "loss": 0.7342801094055176, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.2268041237113403, | |
| "grad_norm": 0.10606859624385834, | |
| "learning_rate": 2.5422110541277225e-06, | |
| "loss": 1.1674457788467407, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.2290950744558993, | |
| "grad_norm": 0.16007988154888153, | |
| "learning_rate": 2.5336559077211285e-06, | |
| "loss": 0.7999182939529419, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.2313860252004583, | |
| "grad_norm": 0.21072755753993988, | |
| "learning_rate": 2.5251196795349476e-06, | |
| "loss": 0.7957800030708313, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.2336769759450172, | |
| "grad_norm": 0.11326409876346588, | |
| "learning_rate": 2.5166024240100175e-06, | |
| "loss": 1.092682123184204, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.2359679266895762, | |
| "grad_norm": 0.03987419232726097, | |
| "learning_rate": 2.508104195466169e-06, | |
| "loss": 1.0951638221740723, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.238258877434135, | |
| "grad_norm": 0.2164270430803299, | |
| "learning_rate": 2.4996250481018917e-06, | |
| "loss": 0.6124863624572754, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.240549828178694, | |
| "grad_norm": 0.1036994680762291, | |
| "learning_rate": 2.491165035993977e-06, | |
| "loss": 1.0700827836990356, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.242840778923253, | |
| "grad_norm": 0.03502170741558075, | |
| "learning_rate": 2.4827242130971822e-06, | |
| "loss": 1.0163440704345703, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.245131729667812, | |
| "grad_norm": 0.04055381193757057, | |
| "learning_rate": 2.4743026332438835e-06, | |
| "loss": 1.0671887397766113, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.247422680412371, | |
| "grad_norm": 0.15526892244815826, | |
| "learning_rate": 2.4659003501437302e-06, | |
| "loss": 1.065315842628479, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.24971363115693, | |
| "grad_norm": 0.10842100530862808, | |
| "learning_rate": 2.4575174173833076e-06, | |
| "loss": 1.0709346532821655, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.252004581901489, | |
| "grad_norm": 0.1467980593442917, | |
| "learning_rate": 2.449153888425786e-06, | |
| "loss": 1.220167875289917, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.254295532646048, | |
| "grad_norm": 0.0432235449552536, | |
| "learning_rate": 2.4408098166105905e-06, | |
| "loss": 1.1115278005599976, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.256586483390607, | |
| "grad_norm": 0.07890545576810837, | |
| "learning_rate": 2.4324852551530546e-06, | |
| "loss": 1.2021079063415527, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.258877434135166, | |
| "grad_norm": 0.10672356933355331, | |
| "learning_rate": 2.424180257144082e-06, | |
| "loss": 0.9553957581520081, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.261168384879725, | |
| "grad_norm": 0.04205801710486412, | |
| "learning_rate": 2.4158948755498097e-06, | |
| "loss": 1.057405710220337, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.263459335624284, | |
| "grad_norm": 0.03718230873346329, | |
| "learning_rate": 2.407629163211264e-06, | |
| "loss": 1.1039612293243408, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.265750286368843, | |
| "grad_norm": 0.15723231434822083, | |
| "learning_rate": 2.399383172844033e-06, | |
| "loss": 1.0969688892364502, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.268041237113402, | |
| "grad_norm": 0.1052173301577568, | |
| "learning_rate": 2.3911569570379226e-06, | |
| "loss": 0.9735721349716187, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.270332187857961, | |
| "grad_norm": 0.28806793689727783, | |
| "learning_rate": 2.3829505682566274e-06, | |
| "loss": 0.8661947250366211, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.27262313860252, | |
| "grad_norm": 0.043717917054891586, | |
| "learning_rate": 2.374764058837388e-06, | |
| "loss": 1.085542917251587, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.274914089347079, | |
| "grad_norm": 0.11502456665039062, | |
| "learning_rate": 2.366597480990663e-06, | |
| "loss": 1.1323692798614502, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.277205040091638, | |
| "grad_norm": 0.1218227744102478, | |
| "learning_rate": 2.3584508867998023e-06, | |
| "loss": 1.165204405784607, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.279495990836197, | |
| "grad_norm": 0.04732652008533478, | |
| "learning_rate": 2.350324328220697e-06, | |
| "loss": 1.1347787380218506, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.281786941580756, | |
| "grad_norm": 0.051126670092344284, | |
| "learning_rate": 2.3422178570814656e-06, | |
| "loss": 1.190324068069458, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.284077892325315, | |
| "grad_norm": 0.03463529422879219, | |
| "learning_rate": 2.3341315250821102e-06, | |
| "loss": 0.9339711666107178, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.286368843069874, | |
| "grad_norm": 0.18917769193649292, | |
| "learning_rate": 2.326065383794202e-06, | |
| "loss": 0.7177498936653137, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.288659793814433, | |
| "grad_norm": 0.16513919830322266, | |
| "learning_rate": 2.3180194846605367e-06, | |
| "loss": 1.1773316860198975, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.290950744558992, | |
| "grad_norm": 0.15770074725151062, | |
| "learning_rate": 2.3099938789948147e-06, | |
| "loss": 0.9601297378540039, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.293241695303551, | |
| "grad_norm": 0.11050211638212204, | |
| "learning_rate": 2.301988617981314e-06, | |
| "loss": 0.625648021697998, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.29553264604811, | |
| "grad_norm": 0.24079091846942902, | |
| "learning_rate": 2.294003752674557e-06, | |
| "loss": 0.862073540687561, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.297823596792669, | |
| "grad_norm": 0.22309234738349915, | |
| "learning_rate": 2.2860393339990005e-06, | |
| "loss": 0.4852597117424011, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.3001145475372278, | |
| "grad_norm": 0.05693449079990387, | |
| "learning_rate": 2.2780954127486883e-06, | |
| "loss": 1.1776564121246338, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.3024054982817868, | |
| "grad_norm": 0.23189516365528107, | |
| "learning_rate": 2.270172039586948e-06, | |
| "loss": 0.8554977774620056, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.3046964490263457, | |
| "grad_norm": 0.11551795154809952, | |
| "learning_rate": 2.2622692650460516e-06, | |
| "loss": 1.0636159181594849, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.3069873997709047, | |
| "grad_norm": 0.03734152764081955, | |
| "learning_rate": 2.25438713952691e-06, | |
| "loss": 0.9173470735549927, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.3092783505154637, | |
| "grad_norm": 0.10906286537647247, | |
| "learning_rate": 2.2465257132987388e-06, | |
| "loss": 0.8785110712051392, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.3115693012600227, | |
| "grad_norm": 0.03761937841773033, | |
| "learning_rate": 2.2386850364987357e-06, | |
| "loss": 1.0721259117126465, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.3138602520045817, | |
| "grad_norm": 0.15644510090351105, | |
| "learning_rate": 2.230865159131774e-06, | |
| "loss": 0.10478156059980392, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.3161512027491407, | |
| "grad_norm": 0.18335962295532227, | |
| "learning_rate": 2.2230661310700717e-06, | |
| "loss": 1.056382179260254, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.3184421534936996, | |
| "grad_norm": 0.3297891318798065, | |
| "learning_rate": 2.215288002052882e-06, | |
| "loss": 0.5145710706710815, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.320733104238259, | |
| "grad_norm": 0.1070839911699295, | |
| "learning_rate": 2.207530821686166e-06, | |
| "loss": 1.017712950706482, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.323024054982818, | |
| "grad_norm": 0.24218393862247467, | |
| "learning_rate": 2.199794639442287e-06, | |
| "loss": 0.7674975395202637, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.325315005727377, | |
| "grad_norm": 0.15954658389091492, | |
| "learning_rate": 2.1920795046596887e-06, | |
| "loss": 1.3201217651367188, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.327605956471936, | |
| "grad_norm": 0.12265370041131973, | |
| "learning_rate": 2.1843854665425816e-06, | |
| "loss": 0.5814427137374878, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.329896907216495, | |
| "grad_norm": 0.10698456317186356, | |
| "learning_rate": 2.176712574160632e-06, | |
| "loss": 0.9436638355255127, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.332187857961054, | |
| "grad_norm": 0.19457845389842987, | |
| "learning_rate": 2.1690608764486417e-06, | |
| "loss": 1.0933640003204346, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.334478808705613, | |
| "grad_norm": 0.0642176941037178, | |
| "learning_rate": 2.1614304222062447e-06, | |
| "loss": 0.8278051614761353, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.336769759450172, | |
| "grad_norm": 0.039123423397541046, | |
| "learning_rate": 2.1538212600975927e-06, | |
| "loss": 1.037153959274292, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.339060710194731, | |
| "grad_norm": 0.03765871003270149, | |
| "learning_rate": 2.146233438651042e-06, | |
| "loss": 0.9489548206329346, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.34135166093929, | |
| "grad_norm": 0.03357570245862007, | |
| "learning_rate": 2.138667006258849e-06, | |
| "loss": 0.5923043489456177, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.343642611683849, | |
| "grad_norm": 0.1226658970117569, | |
| "learning_rate": 2.131122011176854e-06, | |
| "loss": 1.0477964878082275, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.345933562428408, | |
| "grad_norm": 0.278041273355484, | |
| "learning_rate": 2.1235985015241832e-06, | |
| "loss": 0.8828730583190918, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.348224513172967, | |
| "grad_norm": 0.039722733199596405, | |
| "learning_rate": 2.1160965252829344e-06, | |
| "loss": 1.1027134656906128, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.350515463917526, | |
| "grad_norm": 0.2350229173898697, | |
| "learning_rate": 2.108616130297876e-06, | |
| "loss": 0.710883617401123, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.352806414662085, | |
| "grad_norm": 0.10858850926160812, | |
| "learning_rate": 2.101157364276134e-06, | |
| "loss": 1.2099969387054443, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.355097365406644, | |
| "grad_norm": 0.10385426133871078, | |
| "learning_rate": 2.0937202747868974e-06, | |
| "loss": 1.068502426147461, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.357388316151203, | |
| "grad_norm": 0.12905098497867584, | |
| "learning_rate": 2.08630490926111e-06, | |
| "loss": 0.9399811029434204, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.359679266895762, | |
| "grad_norm": 0.1345222443342209, | |
| "learning_rate": 2.0789113149911678e-06, | |
| "loss": 1.1330405473709106, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.361970217640321, | |
| "grad_norm": 0.3326592445373535, | |
| "learning_rate": 2.071539539130619e-06, | |
| "loss": 0.8196538686752319, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.3642611683848798, | |
| "grad_norm": 0.03900545835494995, | |
| "learning_rate": 2.0641896286938586e-06, | |
| "loss": 1.1343563795089722, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.3665521191294387, | |
| "grad_norm": 0.30328232049942017, | |
| "learning_rate": 2.0568616305558362e-06, | |
| "loss": 0.8793677687644958, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.3688430698739977, | |
| "grad_norm": 0.11676283180713654, | |
| "learning_rate": 2.0495555914517517e-06, | |
| "loss": 1.1062278747558594, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.3711340206185567, | |
| "grad_norm": 0.11668711155653, | |
| "learning_rate": 2.042271557976758e-06, | |
| "loss": 1.1123981475830078, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.3734249713631157, | |
| "grad_norm": 0.41015803813934326, | |
| "learning_rate": 2.0350095765856674e-06, | |
| "loss": 0.6100105047225952, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.3757159221076747, | |
| "grad_norm": 0.04040496423840523, | |
| "learning_rate": 2.0277696935926465e-06, | |
| "loss": 1.1077494621276855, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.3780068728522337, | |
| "grad_norm": 0.11139100790023804, | |
| "learning_rate": 2.020551955170932e-06, | |
| "loss": 0.9434921145439148, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.3802978235967927, | |
| "grad_norm": 0.10817384719848633, | |
| "learning_rate": 2.0133564073525283e-06, | |
| "loss": 0.7552821040153503, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.3825887743413516, | |
| "grad_norm": 0.038600143045186996, | |
| "learning_rate": 2.0061830960279182e-06, | |
| "loss": 1.0198276042938232, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.3848797250859106, | |
| "grad_norm": 0.13652552664279938, | |
| "learning_rate": 1.9990320669457664e-06, | |
| "loss": 1.120629072189331, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.3871706758304696, | |
| "grad_norm": 0.05311482027173042, | |
| "learning_rate": 1.9919033657126297e-06, | |
| "loss": 1.1022953987121582, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.3894616265750286, | |
| "grad_norm": 0.1396331489086151, | |
| "learning_rate": 1.9847970377926723e-06, | |
| "loss": 1.1291303634643555, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.3917525773195876, | |
| "grad_norm": 0.03835023194551468, | |
| "learning_rate": 1.9777131285073608e-06, | |
| "loss": 1.0857717990875244, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.3940435280641466, | |
| "grad_norm": 0.06169874966144562, | |
| "learning_rate": 1.9706516830351915e-06, | |
| "loss": 1.230233907699585, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.3963344788087055, | |
| "grad_norm": 0.03643700107932091, | |
| "learning_rate": 1.963612746411389e-06, | |
| "loss": 1.1737711429595947, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.3986254295532645, | |
| "grad_norm": 0.039749957621097565, | |
| "learning_rate": 1.9565963635276326e-06, | |
| "loss": 1.073061466217041, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.4009163802978235, | |
| "grad_norm": 0.13701081275939941, | |
| "learning_rate": 1.949602579131754e-06, | |
| "loss": 1.0454206466674805, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.4032073310423825, | |
| "grad_norm": 0.1076841652393341, | |
| "learning_rate": 1.9426314378274663e-06, | |
| "loss": 1.114311695098877, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.4054982817869415, | |
| "grad_norm": 0.3381866216659546, | |
| "learning_rate": 1.9356829840740705e-06, | |
| "loss": 0.5539225339889526, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.4077892325315005, | |
| "grad_norm": 0.03752079978585243, | |
| "learning_rate": 1.928757262186177e-06, | |
| "loss": 0.5570148229598999, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.4100801832760594, | |
| "grad_norm": 0.11114965379238129, | |
| "learning_rate": 1.9218543163334198e-06, | |
| "loss": 1.1547082662582397, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.4123711340206184, | |
| "grad_norm": 0.13616393506526947, | |
| "learning_rate": 1.914974190540174e-06, | |
| "loss": 1.0737618207931519, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.4146620847651774, | |
| "grad_norm": 0.034298207610845566, | |
| "learning_rate": 1.9081169286852827e-06, | |
| "loss": 0.9443414807319641, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.4169530355097364, | |
| "grad_norm": 0.2994533181190491, | |
| "learning_rate": 1.9012825745017633e-06, | |
| "loss": 0.6153253316879272, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.4192439862542954, | |
| "grad_norm": 0.041227761656045914, | |
| "learning_rate": 1.8944711715765446e-06, | |
| "loss": 1.2450883388519287, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.4215349369988544, | |
| "grad_norm": 0.11088487505912781, | |
| "learning_rate": 1.8876827633501784e-06, | |
| "loss": 1.1472076177597046, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.4238258877434133, | |
| "grad_norm": 0.14382129907608032, | |
| "learning_rate": 1.8809173931165614e-06, | |
| "loss": 0.9431512355804443, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.4261168384879723, | |
| "grad_norm": 0.12498413771390915, | |
| "learning_rate": 1.8741751040226663e-06, | |
| "loss": 1.1251745223999023, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.4284077892325313, | |
| "grad_norm": 0.13061866164207458, | |
| "learning_rate": 1.8674559390682629e-06, | |
| "loss": 1.1227977275848389, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.4306987399770907, | |
| "grad_norm": 0.08055675029754639, | |
| "learning_rate": 1.8607599411056444e-06, | |
| "loss": 0.8968017101287842, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.4329896907216497, | |
| "grad_norm": 0.1616377979516983, | |
| "learning_rate": 1.8540871528393497e-06, | |
| "loss": 1.0171732902526855, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.4352806414662087, | |
| "grad_norm": 0.048232708126306534, | |
| "learning_rate": 1.8474376168259003e-06, | |
| "loss": 1.138751745223999, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.4375715922107677, | |
| "grad_norm": 0.11059226095676422, | |
| "learning_rate": 1.84081137547352e-06, | |
| "loss": 1.104867696762085, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.4398625429553267, | |
| "grad_norm": 0.11222375184297562, | |
| "learning_rate": 1.83420847104187e-06, | |
| "loss": 0.5774714946746826, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.4421534936998857, | |
| "grad_norm": 0.03512409329414368, | |
| "learning_rate": 1.8276289456417784e-06, | |
| "loss": 1.068396806716919, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.042172499001026154, | |
| "learning_rate": 1.821072841234966e-06, | |
| "loss": 1.0330781936645508, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.4467353951890036, | |
| "grad_norm": 0.11471384018659592, | |
| "learning_rate": 1.8145401996337877e-06, | |
| "loss": 1.0819417238235474, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.4490263459335626, | |
| "grad_norm": 0.04197245091199875, | |
| "learning_rate": 1.80803106250096e-06, | |
| "loss": 0.9713804125785828, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.4513172966781216, | |
| "grad_norm": 0.14753484725952148, | |
| "learning_rate": 1.8015454713492985e-06, | |
| "loss": 1.2071187496185303, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.4536082474226806, | |
| "grad_norm": 0.12013762444257736, | |
| "learning_rate": 1.7950834675414498e-06, | |
| "loss": 1.0779211521148682, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.4558991981672396, | |
| "grad_norm": 0.11798571050167084, | |
| "learning_rate": 1.788645092289627e-06, | |
| "loss": 1.126075029373169, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.4581901489117985, | |
| "grad_norm": 0.13205040991306305, | |
| "learning_rate": 1.7822303866553536e-06, | |
| "loss": 1.0150024890899658, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.4604810996563575, | |
| "grad_norm": 0.2663130760192871, | |
| "learning_rate": 1.7758393915491957e-06, | |
| "loss": 0.542580783367157, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.4627720504009165, | |
| "grad_norm": 0.0403556264936924, | |
| "learning_rate": 1.7694721477305026e-06, | |
| "loss": 1.0786938667297363, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.4650630011454755, | |
| "grad_norm": 0.11474873125553131, | |
| "learning_rate": 1.7631286958071444e-06, | |
| "loss": 1.1576069593429565, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.4673539518900345, | |
| "grad_norm": 0.4626518189907074, | |
| "learning_rate": 1.7568090762352591e-06, | |
| "loss": 0.6024706363677979, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.4696449026345935, | |
| "grad_norm": 0.10904034972190857, | |
| "learning_rate": 1.7505133293189898e-06, | |
| "loss": 1.116260290145874, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.4719358533791524, | |
| "grad_norm": 0.5043454766273499, | |
| "learning_rate": 1.7442414952102279e-06, | |
| "loss": 0.6629682779312134, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.4742268041237114, | |
| "grad_norm": 0.04634617269039154, | |
| "learning_rate": 1.7379936139083604e-06, | |
| "loss": 1.1367168426513672, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.4765177548682704, | |
| "grad_norm": 0.12527769804000854, | |
| "learning_rate": 1.731769725260008e-06, | |
| "loss": 0.9263055324554443, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.4788087056128294, | |
| "grad_norm": 0.0392335020005703, | |
| "learning_rate": 1.7255698689587774e-06, | |
| "loss": 1.012625813484192, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.4810996563573884, | |
| "grad_norm": 0.1595272570848465, | |
| "learning_rate": 1.719394084545011e-06, | |
| "loss": 1.2080172300338745, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.4833906071019474, | |
| "grad_norm": 0.04092966765165329, | |
| "learning_rate": 1.7132424114055212e-06, | |
| "loss": 1.0967365503311157, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.4856815578465064, | |
| "grad_norm": 0.09347018599510193, | |
| "learning_rate": 1.7071148887733552e-06, | |
| "loss": 1.0474753379821777, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.4879725085910653, | |
| "grad_norm": 0.042294181883335114, | |
| "learning_rate": 1.7010115557275297e-06, | |
| "loss": 0.9783978462219238, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.4902634593356243, | |
| "grad_norm": 0.300382524728775, | |
| "learning_rate": 1.6949324511927993e-06, | |
| "loss": 0.6601334810256958, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.4925544100801833, | |
| "grad_norm": 0.10989909619092941, | |
| "learning_rate": 1.6888776139393892e-06, | |
| "loss": 1.036874532699585, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.4948453608247423, | |
| "grad_norm": 0.05132288485765457, | |
| "learning_rate": 1.6828470825827626e-06, | |
| "loss": 1.1650950908660889, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.4971363115693013, | |
| "grad_norm": 0.09411272406578064, | |
| "learning_rate": 1.6768408955833653e-06, | |
| "loss": 1.0749032497406006, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.4994272623138603, | |
| "grad_norm": 0.17432832717895508, | |
| "learning_rate": 1.6708590912463878e-06, | |
| "loss": 0.6857452392578125, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.5017182130584192, | |
| "grad_norm": 0.1278747320175171, | |
| "learning_rate": 1.6649017077215146e-06, | |
| "loss": 1.136509656906128, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.504009163802978, | |
| "grad_norm": 0.12037646770477295, | |
| "learning_rate": 1.6589687830026835e-06, | |
| "loss": 1.1849336624145508, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.506300114547537, | |
| "grad_norm": 0.12011449784040451, | |
| "learning_rate": 1.653060354927844e-06, | |
| "loss": 1.1057056188583374, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.508591065292096, | |
| "grad_norm": 0.2916851341724396, | |
| "learning_rate": 1.647176461178714e-06, | |
| "loss": 0.5272727012634277, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.510882016036655, | |
| "grad_norm": 0.20276512205600739, | |
| "learning_rate": 1.6413171392805444e-06, | |
| "loss": 0.8722376823425293, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.513172966781214, | |
| "grad_norm": 0.11439058929681778, | |
| "learning_rate": 1.6354824266018726e-06, | |
| "loss": 1.018531322479248, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.515463917525773, | |
| "grad_norm": 0.03520492836833, | |
| "learning_rate": 1.6296723603542874e-06, | |
| "loss": 0.988182783126831, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.517754868270332, | |
| "grad_norm": 0.03757385164499283, | |
| "learning_rate": 1.6238869775921947e-06, | |
| "loss": 1.0032260417938232, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.520045819014891, | |
| "grad_norm": 0.11876486241817474, | |
| "learning_rate": 1.6181263152125761e-06, | |
| "loss": 1.1592516899108887, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.52233676975945, | |
| "grad_norm": 0.10025777667760849, | |
| "learning_rate": 1.6123904099547593e-06, | |
| "loss": 0.6935728788375854, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.524627720504009, | |
| "grad_norm": 0.11772006005048752, | |
| "learning_rate": 1.606679298400175e-06, | |
| "loss": 1.0764268636703491, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.526918671248568, | |
| "grad_norm": 0.22523917257785797, | |
| "learning_rate": 1.6009930169721332e-06, | |
| "loss": 0.3999437093734741, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.529209621993127, | |
| "grad_norm": 0.03774823993444443, | |
| "learning_rate": 1.595331601935586e-06, | |
| "loss": 0.9670888185501099, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.531500572737686, | |
| "grad_norm": 0.041391998529434204, | |
| "learning_rate": 1.5896950893968968e-06, | |
| "loss": 1.1194099187850952, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.533791523482245, | |
| "grad_norm": 0.12182236462831497, | |
| "learning_rate": 1.584083515303611e-06, | |
| "loss": 0.6618677377700806, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.536082474226804, | |
| "grad_norm": 0.047492437064647675, | |
| "learning_rate": 1.5784969154442228e-06, | |
| "loss": 1.135564923286438, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.538373424971363, | |
| "grad_norm": 0.12321395426988602, | |
| "learning_rate": 1.5729353254479544e-06, | |
| "loss": 0.9604895710945129, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.540664375715922, | |
| "grad_norm": 0.26825836300849915, | |
| "learning_rate": 1.5673987807845224e-06, | |
| "loss": 0.20692116022109985, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.542955326460481, | |
| "grad_norm": 0.03937020152807236, | |
| "learning_rate": 1.561887316763914e-06, | |
| "loss": 0.7779680490493774, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.54524627720504, | |
| "grad_norm": 0.12153687328100204, | |
| "learning_rate": 1.5564009685361631e-06, | |
| "loss": 1.1622121334075928, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.547537227949599, | |
| "grad_norm": 0.048881255090236664, | |
| "learning_rate": 1.5509397710911206e-06, | |
| "loss": 1.0526154041290283, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.549828178694158, | |
| "grad_norm": 0.11527767777442932, | |
| "learning_rate": 1.545503759258239e-06, | |
| "loss": 0.9147733449935913, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.552119129438717, | |
| "grad_norm": 0.06676455587148666, | |
| "learning_rate": 1.5400929677063453e-06, | |
| "loss": 0.990266740322113, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.554410080183276, | |
| "grad_norm": 0.0386880524456501, | |
| "learning_rate": 1.534707430943422e-06, | |
| "loss": 1.051877498626709, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.556701030927835, | |
| "grad_norm": 0.037052009254693985, | |
| "learning_rate": 1.5293471833163836e-06, | |
| "loss": 0.7502873539924622, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.558991981672394, | |
| "grad_norm": 0.12356662005186081, | |
| "learning_rate": 1.5240122590108615e-06, | |
| "loss": 1.1431405544281006, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.561282932416953, | |
| "grad_norm": 0.10430527478456497, | |
| "learning_rate": 1.5187026920509857e-06, | |
| "loss": 0.8166168928146362, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.563573883161512, | |
| "grad_norm": 0.04083002358675003, | |
| "learning_rate": 1.5134185162991653e-06, | |
| "loss": 1.0520750284194946, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.565864833906071, | |
| "grad_norm": 0.10489920526742935, | |
| "learning_rate": 1.5081597654558753e-06, | |
| "loss": 1.0503392219543457, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.5681557846506298, | |
| "grad_norm": 0.15466554462909698, | |
| "learning_rate": 1.5029264730594366e-06, | |
| "loss": 1.0287251472473145, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.5704467353951888, | |
| "grad_norm": 0.15002210438251495, | |
| "learning_rate": 1.49771867248581e-06, | |
| "loss": 1.1313282251358032, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.5727376861397477, | |
| "grad_norm": 0.03917040675878525, | |
| "learning_rate": 1.4925363969483764e-06, | |
| "loss": 1.2097923755645752, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.5750286368843067, | |
| "grad_norm": 0.036840129643678665, | |
| "learning_rate": 1.48737967949773e-06, | |
| "loss": 1.0906928777694702, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.5773195876288657, | |
| "grad_norm": 0.03971611335873604, | |
| "learning_rate": 1.4822485530214636e-06, | |
| "loss": 0.9993399977684021, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.579610538373425, | |
| "grad_norm": 0.12046768516302109, | |
| "learning_rate": 1.4771430502439585e-06, | |
| "loss": 1.058572769165039, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.581901489117984, | |
| "grad_norm": 0.21793264150619507, | |
| "learning_rate": 1.4720632037261837e-06, | |
| "loss": 1.0307724475860596, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.584192439862543, | |
| "grad_norm": 0.040577035397291183, | |
| "learning_rate": 1.4670090458654752e-06, | |
| "loss": 1.0201236009597778, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.586483390607102, | |
| "grad_norm": 0.08926853537559509, | |
| "learning_rate": 1.4619806088953423e-06, | |
| "loss": 1.028358817100525, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.588774341351661, | |
| "grad_norm": 0.05179933086037636, | |
| "learning_rate": 1.4569779248852514e-06, | |
| "loss": 1.0471670627593994, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.59106529209622, | |
| "grad_norm": 0.12112816423177719, | |
| "learning_rate": 1.4520010257404282e-06, | |
| "loss": 1.0303044319152832, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.593356242840779, | |
| "grad_norm": 0.13048020005226135, | |
| "learning_rate": 1.4470499432016564e-06, | |
| "loss": 1.023011565208435, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.595647193585338, | |
| "grad_norm": 0.20969820022583008, | |
| "learning_rate": 1.4421247088450638e-06, | |
| "loss": 0.3655018210411072, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.597938144329897, | |
| "grad_norm": 0.15613162517547607, | |
| "learning_rate": 1.4372253540819342e-06, | |
| "loss": 1.1912338733673096, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.600229095074456, | |
| "grad_norm": 0.036865267902612686, | |
| "learning_rate": 1.4323519101584956e-06, | |
| "loss": 1.14031982421875, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.602520045819015, | |
| "grad_norm": 0.0848236158490181, | |
| "learning_rate": 1.4275044081557336e-06, | |
| "loss": 0.9510655999183655, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.604810996563574, | |
| "grad_norm": 0.05090273916721344, | |
| "learning_rate": 1.422682878989179e-06, | |
| "loss": 1.06699538230896, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.607101947308133, | |
| "grad_norm": 0.11556492745876312, | |
| "learning_rate": 1.4178873534087224e-06, | |
| "loss": 1.1918110847473145, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.609392898052692, | |
| "grad_norm": 0.14172205328941345, | |
| "learning_rate": 1.413117861998411e-06, | |
| "loss": 0.5816162824630737, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.611683848797251, | |
| "grad_norm": 0.0312827005982399, | |
| "learning_rate": 1.4083744351762568e-06, | |
| "loss": 1.0428073406219482, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.61397479954181, | |
| "grad_norm": 0.035471122711896896, | |
| "learning_rate": 1.4036571031940433e-06, | |
| "loss": 0.9467089176177979, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.616265750286369, | |
| "grad_norm": 0.21262601017951965, | |
| "learning_rate": 1.398965896137126e-06, | |
| "loss": 0.6726322174072266, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.618556701030928, | |
| "grad_norm": 0.060128457844257355, | |
| "learning_rate": 1.394300843924251e-06, | |
| "loss": 1.120849847793579, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.620847651775487, | |
| "grad_norm": 0.0908019170165062, | |
| "learning_rate": 1.3896619763073558e-06, | |
| "loss": 0.8834356069564819, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.623138602520046, | |
| "grad_norm": 0.042698655277490616, | |
| "learning_rate": 1.3850493228713829e-06, | |
| "loss": 1.11699640750885, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.625429553264605, | |
| "grad_norm": 0.11093872040510178, | |
| "learning_rate": 1.3804629130340922e-06, | |
| "loss": 0.8252977132797241, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.627720504009164, | |
| "grad_norm": 0.4029269516468048, | |
| "learning_rate": 1.375902776045869e-06, | |
| "loss": 0.8700896501541138, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.6300114547537228, | |
| "grad_norm": 0.4993189871311188, | |
| "learning_rate": 1.3713689409895423e-06, | |
| "loss": 0.9152309894561768, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.6323024054982818, | |
| "grad_norm": 0.05592384934425354, | |
| "learning_rate": 1.3668614367801968e-06, | |
| "loss": 1.0523474216461182, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.6345933562428407, | |
| "grad_norm": 0.26710832118988037, | |
| "learning_rate": 1.3623802921649903e-06, | |
| "loss": 1.022230625152588, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.6368843069873997, | |
| "grad_norm": 0.05241448059678078, | |
| "learning_rate": 1.3579255357229665e-06, | |
| "loss": 1.0288628339767456, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.6391752577319587, | |
| "grad_norm": 0.11045310646295547, | |
| "learning_rate": 1.3534971958648763e-06, | |
| "loss": 1.0415080785751343, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.6414662084765177, | |
| "grad_norm": 0.1546574980020523, | |
| "learning_rate": 1.3490953008329976e-06, | |
| "loss": 1.096618890762329, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.6437571592210767, | |
| "grad_norm": 0.1845928579568863, | |
| "learning_rate": 1.34471987870095e-06, | |
| "loss": 1.0915133953094482, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.6460481099656357, | |
| "grad_norm": 0.11465723067522049, | |
| "learning_rate": 1.3403709573735226e-06, | |
| "loss": 1.0728447437286377, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.6483390607101946, | |
| "grad_norm": 0.13408894836902618, | |
| "learning_rate": 1.336048564586489e-06, | |
| "loss": 1.1959388256072998, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.6506300114547536, | |
| "grad_norm": 0.11020931601524353, | |
| "learning_rate": 1.3317527279064346e-06, | |
| "loss": 0.9788947105407715, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.6529209621993126, | |
| "grad_norm": 0.056901715695858, | |
| "learning_rate": 1.3274834747305816e-06, | |
| "loss": 1.119939923286438, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.6552119129438716, | |
| "grad_norm": 0.043660301715135574, | |
| "learning_rate": 1.3232408322866112e-06, | |
| "loss": 1.2462071180343628, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.6575028636884306, | |
| "grad_norm": 0.12008506804704666, | |
| "learning_rate": 1.319024827632493e-06, | |
| "loss": 1.1180698871612549, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.6597938144329896, | |
| "grad_norm": 0.03617572784423828, | |
| "learning_rate": 1.3148354876563078e-06, | |
| "loss": 0.8943448066711426, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.6620847651775486, | |
| "grad_norm": 0.1361423134803772, | |
| "learning_rate": 1.3106728390760822e-06, | |
| "loss": 0.9939253330230713, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.6643757159221075, | |
| "grad_norm": 0.12970031797885895, | |
| "learning_rate": 1.3065369084396143e-06, | |
| "loss": 1.1329681873321533, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.04504101350903511, | |
| "learning_rate": 1.302427722124305e-06, | |
| "loss": 1.0876107215881348, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.6689576174112255, | |
| "grad_norm": 0.043641116470098495, | |
| "learning_rate": 1.2983453063369886e-06, | |
| "loss": 1.1243267059326172, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.6712485681557845, | |
| "grad_norm": 0.12474491447210312, | |
| "learning_rate": 1.2942896871137703e-06, | |
| "loss": 0.150488942861557, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.673539518900344, | |
| "grad_norm": 0.21792425215244293, | |
| "learning_rate": 1.290260890319854e-06, | |
| "loss": 1.049038290977478, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.675830469644903, | |
| "grad_norm": 0.24270223081111908, | |
| "learning_rate": 1.2862589416493828e-06, | |
| "loss": 1.2486166954040527, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.678121420389462, | |
| "grad_norm": 0.121235691010952, | |
| "learning_rate": 1.282283866625271e-06, | |
| "loss": 1.1332775354385376, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.680412371134021, | |
| "grad_norm": 0.16410396993160248, | |
| "learning_rate": 1.278335690599042e-06, | |
| "loss": 1.1936967372894287, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.68270332187858, | |
| "grad_norm": 0.12893368303775787, | |
| "learning_rate": 1.27441443875067e-06, | |
| "loss": 1.167176365852356, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.684994272623139, | |
| "grad_norm": 0.7378900051116943, | |
| "learning_rate": 1.2705201360884154e-06, | |
| "loss": 0.5444666147232056, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.687285223367698, | |
| "grad_norm": 0.14510443806648254, | |
| "learning_rate": 1.2666528074486675e-06, | |
| "loss": 1.0824750661849976, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.689576174112257, | |
| "grad_norm": 0.10740603506565094, | |
| "learning_rate": 1.2628124774957862e-06, | |
| "loss": 1.043527364730835, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.691867124856816, | |
| "grad_norm": 0.1193559393286705, | |
| "learning_rate": 1.2589991707219407e-06, | |
| "loss": 0.9171677827835083, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.6941580756013748, | |
| "grad_norm": 0.04034031182527542, | |
| "learning_rate": 1.255212911446962e-06, | |
| "loss": 1.0557785034179688, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.6964490263459338, | |
| "grad_norm": 0.09817221015691757, | |
| "learning_rate": 1.2514537238181764e-06, | |
| "loss": 0.9150309562683105, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.6987399770904927, | |
| "grad_norm": 0.04312164708971977, | |
| "learning_rate": 1.2477216318102638e-06, | |
| "loss": 1.1289045810699463, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.7010309278350517, | |
| "grad_norm": 0.13590708374977112, | |
| "learning_rate": 1.2440166592250908e-06, | |
| "loss": 1.1207735538482666, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.7033218785796107, | |
| "grad_norm": 0.11693116277456284, | |
| "learning_rate": 1.2403388296915748e-06, | |
| "loss": 0.9732823371887207, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.7056128293241697, | |
| "grad_norm": 0.09531773626804352, | |
| "learning_rate": 1.2366881666655197e-06, | |
| "loss": 1.0172736644744873, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.7079037800687287, | |
| "grad_norm": 0.23191611468791962, | |
| "learning_rate": 1.2330646934294733e-06, | |
| "loss": 0.7734839916229248, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.7101947308132877, | |
| "grad_norm": 0.13292023539543152, | |
| "learning_rate": 1.2294684330925773e-06, | |
| "loss": 1.0921151638031006, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.7124856815578466, | |
| "grad_norm": 0.03602350875735283, | |
| "learning_rate": 1.225899408590418e-06, | |
| "loss": 1.0671606063842773, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.7147766323024056, | |
| "grad_norm": 0.1325361579656601, | |
| "learning_rate": 1.222357642684886e-06, | |
| "loss": 0.9317104816436768, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.7170675830469646, | |
| "grad_norm": 0.02299438789486885, | |
| "learning_rate": 1.2188431579640229e-06, | |
| "loss": 0.5671766996383667, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.7193585337915236, | |
| "grad_norm": 0.1803787499666214, | |
| "learning_rate": 1.2153559768418826e-06, | |
| "loss": 1.208252191543579, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.7216494845360826, | |
| "grad_norm": 0.10567279905080795, | |
| "learning_rate": 1.2118961215583876e-06, | |
| "loss": 0.9299669861793518, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.7239404352806416, | |
| "grad_norm": 0.1304902285337448, | |
| "learning_rate": 1.2084636141791853e-06, | |
| "loss": 1.1629877090454102, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.7262313860252005, | |
| "grad_norm": 0.11300843209028244, | |
| "learning_rate": 1.2050584765955106e-06, | |
| "loss": 0.5608739852905273, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.7285223367697595, | |
| "grad_norm": 0.2323353886604309, | |
| "learning_rate": 1.2016807305240418e-06, | |
| "loss": 1.0366477966308594, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.7308132875143185, | |
| "grad_norm": 0.1072358712553978, | |
| "learning_rate": 1.198330397506765e-06, | |
| "loss": 1.0595978498458862, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.7331042382588775, | |
| "grad_norm": 0.16197116672992706, | |
| "learning_rate": 1.1950074989108369e-06, | |
| "loss": 1.25881028175354, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.7353951890034365, | |
| "grad_norm": 0.042508866637945175, | |
| "learning_rate": 1.1917120559284484e-06, | |
| "loss": 1.033616304397583, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.7376861397479955, | |
| "grad_norm": 0.16776804625988007, | |
| "learning_rate": 1.188444089576689e-06, | |
| "loss": 1.1445398330688477, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.7399770904925544, | |
| "grad_norm": 0.040681999176740646, | |
| "learning_rate": 1.1852036206974093e-06, | |
| "loss": 1.0716893672943115, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.7422680412371134, | |
| "grad_norm": 0.03599237650632858, | |
| "learning_rate": 1.1819906699570951e-06, | |
| "loss": 1.0026639699935913, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.7445589919816724, | |
| "grad_norm": 0.20803894102573395, | |
| "learning_rate": 1.178805257846731e-06, | |
| "loss": 0.8390488624572754, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.7468499427262314, | |
| "grad_norm": 0.045633960515260696, | |
| "learning_rate": 1.1756474046816705e-06, | |
| "loss": 1.0751760005950928, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.7491408934707904, | |
| "grad_norm": 0.1319274753332138, | |
| "learning_rate": 1.172517130601506e-06, | |
| "loss": 0.9296597838401794, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.7514318442153494, | |
| "grad_norm": 0.13140706717967987, | |
| "learning_rate": 1.169414455569941e-06, | |
| "loss": 1.083257794380188, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.7537227949599083, | |
| "grad_norm": 0.10441871732473373, | |
| "learning_rate": 1.1663393993746639e-06, | |
| "loss": 0.70625901222229, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.7560137457044673, | |
| "grad_norm": 0.21324773132801056, | |
| "learning_rate": 1.1632919816272185e-06, | |
| "loss": 1.1939747333526611, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.7583046964490263, | |
| "grad_norm": 0.15752872824668884, | |
| "learning_rate": 1.1602722217628847e-06, | |
| "loss": 0.5383874177932739, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.7605956471935853, | |
| "grad_norm": 0.04424149543046951, | |
| "learning_rate": 1.157280139040546e-06, | |
| "loss": 1.1126903295516968, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.7628865979381443, | |
| "grad_norm": 0.11697188764810562, | |
| "learning_rate": 1.1543157525425753e-06, | |
| "loss": 1.0950360298156738, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.7651775486827033, | |
| "grad_norm": 0.12432119995355606, | |
| "learning_rate": 1.1513790811747077e-06, | |
| "loss": 1.031628966331482, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.7674684994272623, | |
| "grad_norm": 0.1950952112674713, | |
| "learning_rate": 1.1484701436659228e-06, | |
| "loss": 0.9780592322349548, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.7697594501718212, | |
| "grad_norm": 0.10742440074682236, | |
| "learning_rate": 1.1455889585683234e-06, | |
| "loss": 1.1695256233215332, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.77205040091638, | |
| "grad_norm": 0.03295076638460159, | |
| "learning_rate": 1.1427355442570174e-06, | |
| "loss": 0.9007450342178345, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.774341351660939, | |
| "grad_norm": 0.12205695360898972, | |
| "learning_rate": 1.1399099189300022e-06, | |
| "loss": 1.1622676849365234, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.776632302405498, | |
| "grad_norm": 0.12269830703735352, | |
| "learning_rate": 1.1371121006080476e-06, | |
| "loss": 0.6331859827041626, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.778923253150057, | |
| "grad_norm": 0.12496553361415863, | |
| "learning_rate": 1.1343421071345807e-06, | |
| "loss": 0.8568001985549927, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.781214203894616, | |
| "grad_norm": 0.13641129434108734, | |
| "learning_rate": 1.1315999561755704e-06, | |
| "loss": 1.0987757444381714, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.783505154639175, | |
| "grad_norm": 0.2431728094816208, | |
| "learning_rate": 1.1288856652194187e-06, | |
| "loss": 1.1993837356567383, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.785796105383734, | |
| "grad_norm": 0.1257048398256302, | |
| "learning_rate": 1.1261992515768475e-06, | |
| "loss": 1.093268871307373, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.788087056128293, | |
| "grad_norm": 0.11494007706642151, | |
| "learning_rate": 1.1235407323807863e-06, | |
| "loss": 0.9766872525215149, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.790378006872852, | |
| "grad_norm": 0.12698274850845337, | |
| "learning_rate": 1.1209101245862641e-06, | |
| "loss": 1.0670726299285889, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.792668957617411, | |
| "grad_norm": 0.1961643397808075, | |
| "learning_rate": 1.1183074449703026e-06, | |
| "loss": 1.1611640453338623, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.79495990836197, | |
| "grad_norm": 0.06117239221930504, | |
| "learning_rate": 1.1157327101318079e-06, | |
| "loss": 1.0661375522613525, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.797250859106529, | |
| "grad_norm": 0.07389727979898453, | |
| "learning_rate": 1.113185936491464e-06, | |
| "loss": 0.653728723526001, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.799541809851088, | |
| "grad_norm": 0.1278214305639267, | |
| "learning_rate": 1.1106671402916311e-06, | |
| "loss": 1.077947974205017, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.801832760595647, | |
| "grad_norm": 0.11121320724487305, | |
| "learning_rate": 1.1081763375962376e-06, | |
| "loss": 1.074617862701416, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.804123711340206, | |
| "grad_norm": 0.13351018726825714, | |
| "learning_rate": 1.1057135442906802e-06, | |
| "loss": 1.0540236234664917, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.806414662084765, | |
| "grad_norm": 0.04395301640033722, | |
| "learning_rate": 1.1032787760817245e-06, | |
| "loss": 1.0289393663406372, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.808705612829324, | |
| "grad_norm": 0.11833327263593674, | |
| "learning_rate": 1.1008720484973997e-06, | |
| "loss": 1.0666475296020508, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.810996563573883, | |
| "grad_norm": 0.12046226114034653, | |
| "learning_rate": 1.0984933768869045e-06, | |
| "loss": 0.9002621173858643, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.813287514318442, | |
| "grad_norm": 0.2076469361782074, | |
| "learning_rate": 1.0961427764205054e-06, | |
| "loss": 0.9490566253662109, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.815578465063001, | |
| "grad_norm": 0.16561594605445862, | |
| "learning_rate": 1.0938202620894444e-06, | |
| "loss": 1.0374698638916016, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.81786941580756, | |
| "grad_norm": 0.039882734417915344, | |
| "learning_rate": 1.0915258487058393e-06, | |
| "loss": 0.9988796710968018, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.820160366552119, | |
| "grad_norm": 0.11536919325590134, | |
| "learning_rate": 1.0892595509025897e-06, | |
| "loss": 1.0317668914794922, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.822451317296678, | |
| "grad_norm": 0.12474019825458527, | |
| "learning_rate": 1.0870213831332856e-06, | |
| "loss": 1.0597889423370361, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.824742268041237, | |
| "grad_norm": 0.10003583133220673, | |
| "learning_rate": 1.0848113596721147e-06, | |
| "loss": 0.9430271983146667, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.827033218785796, | |
| "grad_norm": 0.11832843720912933, | |
| "learning_rate": 1.0826294946137705e-06, | |
| "loss": 1.1333792209625244, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.8293241695303553, | |
| "grad_norm": 0.12214350700378418, | |
| "learning_rate": 1.0804758018733635e-06, | |
| "loss": 1.1652965545654297, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.8316151202749142, | |
| "grad_norm": 0.3238823711872101, | |
| "learning_rate": 1.0783502951863302e-06, | |
| "loss": 0.739303469657898, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.8339060710194732, | |
| "grad_norm": 0.12782305479049683, | |
| "learning_rate": 1.076252988108349e-06, | |
| "loss": 1.1109859943389893, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.836197021764032, | |
| "grad_norm": 0.12479905039072037, | |
| "learning_rate": 1.0741838940152498e-06, | |
| "loss": 1.060528039932251, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.838487972508591, | |
| "grad_norm": 0.03563203662633896, | |
| "learning_rate": 1.0721430261029334e-06, | |
| "loss": 1.0834589004516602, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.84077892325315, | |
| "grad_norm": 0.11884791404008865, | |
| "learning_rate": 1.0701303973872825e-06, | |
| "loss": 1.1230555772781372, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.843069873997709, | |
| "grad_norm": 0.03811972215771675, | |
| "learning_rate": 1.0681460207040816e-06, | |
| "loss": 0.9433954358100891, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.845360824742268, | |
| "grad_norm": 0.1846611499786377, | |
| "learning_rate": 1.066189908708935e-06, | |
| "loss": 1.1506900787353516, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.847651775486827, | |
| "grad_norm": 0.1252901256084442, | |
| "learning_rate": 1.0642620738771844e-06, | |
| "loss": 1.1932969093322754, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.849942726231386, | |
| "grad_norm": 0.03945187106728554, | |
| "learning_rate": 1.0623625285038317e-06, | |
| "loss": 1.2127320766448975, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.852233676975945, | |
| "grad_norm": 0.12211194634437561, | |
| "learning_rate": 1.0604912847034579e-06, | |
| "loss": 1.1592764854431152, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.854524627720504, | |
| "grad_norm": 0.03981486335396767, | |
| "learning_rate": 1.0586483544101484e-06, | |
| "loss": 0.9723308682441711, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.856815578465063, | |
| "grad_norm": 0.03858442232012749, | |
| "learning_rate": 1.0568337493774153e-06, | |
| "loss": 1.0726711750030518, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.859106529209622, | |
| "grad_norm": 0.11059273779392242, | |
| "learning_rate": 1.055047481178123e-06, | |
| "loss": 1.244250774383545, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.861397479954181, | |
| "grad_norm": 0.149624302983284, | |
| "learning_rate": 1.0532895612044148e-06, | |
| "loss": 1.2564786672592163, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.86368843069874, | |
| "grad_norm": 0.15256492793560028, | |
| "learning_rate": 1.0515600006676388e-06, | |
| "loss": 1.055551290512085, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.865979381443299, | |
| "grad_norm": 0.0374947264790535, | |
| "learning_rate": 1.0498588105982773e-06, | |
| "loss": 1.071763515472412, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.868270332187858, | |
| "grad_norm": 0.11409316956996918, | |
| "learning_rate": 1.0481860018458777e-06, | |
| "loss": 1.0046906471252441, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.870561282932417, | |
| "grad_norm": 0.1608385592699051, | |
| "learning_rate": 1.0465415850789818e-06, | |
| "loss": 1.0260601043701172, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.872852233676976, | |
| "grad_norm": 0.1228945255279541, | |
| "learning_rate": 1.044925570785057e-06, | |
| "loss": 1.1091501712799072, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.875143184421535, | |
| "grad_norm": 0.23849347233772278, | |
| "learning_rate": 1.0433379692704303e-06, | |
| "loss": 0.8928807377815247, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.877434135166094, | |
| "grad_norm": 0.05072511360049248, | |
| "learning_rate": 1.0417787906602254e-06, | |
| "loss": 1.1910967826843262, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.879725085910653, | |
| "grad_norm": 0.11455868184566498, | |
| "learning_rate": 1.0402480448982916e-06, | |
| "loss": 1.0570517778396606, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.882016036655212, | |
| "grad_norm": 0.3516460657119751, | |
| "learning_rate": 1.0387457417471484e-06, | |
| "loss": 0.6423954963684082, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.884306987399771, | |
| "grad_norm": 0.06222286447882652, | |
| "learning_rate": 1.0372718907879148e-06, | |
| "loss": 0.615919828414917, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.88659793814433, | |
| "grad_norm": 0.11802791804075241, | |
| "learning_rate": 1.0358265014202562e-06, | |
| "loss": 1.0918095111846924, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.13629449903964996, | |
| "learning_rate": 1.0344095828623187e-06, | |
| "loss": 1.1885225772857666, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.891179839633448, | |
| "grad_norm": 0.1508660912513733, | |
| "learning_rate": 1.0330211441506725e-06, | |
| "loss": 1.2527198791503906, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.893470790378007, | |
| "grad_norm": 0.3050176799297333, | |
| "learning_rate": 1.0316611941402554e-06, | |
| "loss": 1.289149284362793, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.895761741122566, | |
| "grad_norm": 0.03513256832957268, | |
| "learning_rate": 1.0303297415043125e-06, | |
| "loss": 1.0728578567504883, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.8980526918671248, | |
| "grad_norm": 0.13335010409355164, | |
| "learning_rate": 1.0290267947343467e-06, | |
| "loss": 0.550873875617981, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.9003436426116838, | |
| "grad_norm": 0.10280898958444595, | |
| "learning_rate": 1.027752362140058e-06, | |
| "loss": 0.9256083965301514, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.9026345933562427, | |
| "grad_norm": 0.24355080723762512, | |
| "learning_rate": 1.0265064518492971e-06, | |
| "loss": 0.6109650135040283, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.9049255441008017, | |
| "grad_norm": 0.11080548912286758, | |
| "learning_rate": 1.0252890718080065e-06, | |
| "loss": 0.9810810089111328, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.9072164948453607, | |
| "grad_norm": 0.23204368352890015, | |
| "learning_rate": 1.024100229780177e-06, | |
| "loss": 0.7867921590805054, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.9095074455899197, | |
| "grad_norm": 0.11744631826877594, | |
| "learning_rate": 1.0229399333477933e-06, | |
| "loss": 1.1211484670639038, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.9117983963344787, | |
| "grad_norm": 0.04548896104097366, | |
| "learning_rate": 1.0218081899107866e-06, | |
| "loss": 1.0681244134902954, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.9140893470790377, | |
| "grad_norm": 0.13808302581310272, | |
| "learning_rate": 1.0207050066869884e-06, | |
| "loss": 1.128277063369751, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.9163802978235966, | |
| "grad_norm": 0.15436206758022308, | |
| "learning_rate": 1.0196303907120835e-06, | |
| "loss": 1.1815663576126099, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.9186712485681556, | |
| "grad_norm": 0.12546345591545105, | |
| "learning_rate": 1.0185843488395675e-06, | |
| "loss": 1.1786937713623047, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.9209621993127146, | |
| "grad_norm": 0.17458893358707428, | |
| "learning_rate": 1.017566887740698e-06, | |
| "loss": 1.1511332988739014, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.923253150057274, | |
| "grad_norm": 0.19275349378585815, | |
| "learning_rate": 1.0165780139044579e-06, | |
| "loss": 1.0778021812438965, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.925544100801833, | |
| "grad_norm": 0.24212509393692017, | |
| "learning_rate": 1.0156177336375104e-06, | |
| "loss": 0.9915075302124023, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.927835051546392, | |
| "grad_norm": 0.21904876828193665, | |
| "learning_rate": 1.0146860530641597e-06, | |
| "loss": 1.1115221977233887, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.930126002290951, | |
| "grad_norm": 0.16616712510585785, | |
| "learning_rate": 1.013782978126312e-06, | |
| "loss": 1.1849524974822998, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.93241695303551, | |
| "grad_norm": 0.034661658108234406, | |
| "learning_rate": 1.0129085145834374e-06, | |
| "loss": 0.8701274394989014, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.934707903780069, | |
| "grad_norm": 0.2624439299106598, | |
| "learning_rate": 1.012062668012534e-06, | |
| "loss": 0.7686551809310913, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.936998854524628, | |
| "grad_norm": 0.25557151436805725, | |
| "learning_rate": 1.0112454438080912e-06, | |
| "loss": 0.9486254453659058, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.939289805269187, | |
| "grad_norm": 0.07392004132270813, | |
| "learning_rate": 1.010456847182055e-06, | |
| "loss": 1.0521230697631836, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.941580756013746, | |
| "grad_norm": 0.05467333644628525, | |
| "learning_rate": 1.0096968831637976e-06, | |
| "loss": 1.0537450313568115, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.943871706758305, | |
| "grad_norm": 0.12582947313785553, | |
| "learning_rate": 1.008965556600081e-06, | |
| "loss": 1.0930761098861694, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.946162657502864, | |
| "grad_norm": 0.1186356469988823, | |
| "learning_rate": 1.0082628721550296e-06, | |
| "loss": 1.0622632503509521, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.948453608247423, | |
| "grad_norm": 0.03574733808636665, | |
| "learning_rate": 1.0075888343100988e-06, | |
| "loss": 1.1093733310699463, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.950744558991982, | |
| "grad_norm": 0.13341714441776276, | |
| "learning_rate": 1.0069434473640478e-06, | |
| "loss": 0.6080751419067383, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.953035509736541, | |
| "grad_norm": 0.11503258347511292, | |
| "learning_rate": 1.0063267154329093e-06, | |
| "loss": 1.136846899986267, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.9553264604811, | |
| "grad_norm": 0.03894869238138199, | |
| "learning_rate": 1.0057386424499674e-06, | |
| "loss": 1.02817702293396, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.957617411225659, | |
| "grad_norm": 0.03900009021162987, | |
| "learning_rate": 1.005179232165728e-06, | |
| "loss": 1.0276997089385986, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.959908361970218, | |
| "grad_norm": 0.03746483847498894, | |
| "learning_rate": 1.0046484881478987e-06, | |
| "loss": 1.0998444557189941, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.9621993127147768, | |
| "grad_norm": 0.13167481124401093, | |
| "learning_rate": 1.0041464137813643e-06, | |
| "loss": 1.077620506286621, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.9644902634593358, | |
| "grad_norm": 0.11638258397579193, | |
| "learning_rate": 1.0036730122681644e-06, | |
| "loss": 1.0882108211517334, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.9667812142038947, | |
| "grad_norm": 0.10818243771791458, | |
| "learning_rate": 1.003228286627476e-06, | |
| "loss": 1.0548460483551025, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.9690721649484537, | |
| "grad_norm": 0.16958948969841003, | |
| "learning_rate": 1.00281223969559e-06, | |
| "loss": 1.097860336303711, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.9713631156930127, | |
| "grad_norm": 0.24064916372299194, | |
| "learning_rate": 1.0024248741258968e-06, | |
| "loss": 0.8971477746963501, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.9736540664375717, | |
| "grad_norm": 0.13208243250846863, | |
| "learning_rate": 1.0020661923888674e-06, | |
| "loss": 1.1392490863800049, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.9759450171821307, | |
| "grad_norm": 0.14331702888011932, | |
| "learning_rate": 1.001736196772038e-06, | |
| "loss": 1.1136538982391357, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.9782359679266897, | |
| "grad_norm": 0.11038228869438171, | |
| "learning_rate": 1.0014348893799954e-06, | |
| "loss": 1.282451868057251, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.9805269186712486, | |
| "grad_norm": 0.12085051089525223, | |
| "learning_rate": 1.0011622721343655e-06, | |
| "loss": 1.0986806154251099, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.9828178694158076, | |
| "grad_norm": 0.03834696114063263, | |
| "learning_rate": 1.0009183467737969e-06, | |
| "loss": 0.8867760896682739, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.9851088201603666, | |
| "grad_norm": 0.04324295371770859, | |
| "learning_rate": 1.0007031148539529e-06, | |
| "loss": 1.044438362121582, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.9873997709049256, | |
| "grad_norm": 0.308033287525177, | |
| "learning_rate": 1.0005165777475022e-06, | |
| "loss": 0.5716328620910645, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.9896907216494846, | |
| "grad_norm": 0.034776829183101654, | |
| "learning_rate": 1.000358736644108e-06, | |
| "loss": 0.4520626366138458, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.9919816723940436, | |
| "grad_norm": 0.052479542791843414, | |
| "learning_rate": 1.00022959255042e-06, | |
| "loss": 1.0296052694320679, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.9942726231386025, | |
| "grad_norm": 0.05816009268164635, | |
| "learning_rate": 1.0001291462900717e-06, | |
| "loss": 1.0887324810028076, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.9965635738831615, | |
| "grad_norm": 0.2066674381494522, | |
| "learning_rate": 1.0000573985036708e-06, | |
| "loss": 1.0183582305908203, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.9988545246277205, | |
| "grad_norm": 0.12765929102897644, | |
| "learning_rate": 1.0000143496487968e-06, | |
| "loss": 1.177083969116211, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2619, | |
| "total_flos": 4.569263266953429e+18, | |
| "train_loss": 1.1290170093980534, | |
| "train_runtime": 13562.7006, | |
| "train_samples_per_second": 3.09, | |
| "train_steps_per_second": 0.193 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2619, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.569263266953429e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |