Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-133 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-133 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-133") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-133") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-133") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-133 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-133" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-133", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-133
- SGLang
How to use furproxy/9b-133 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-133" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-133", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-133" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-133", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-133 with Docker Model Runner:
docker model run hf.co/furproxy/9b-133
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 3564, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016835016835016834, | |
| "grad_norm": 10.01240348815918, | |
| "learning_rate": 5.5865921787709494e-09, | |
| "loss": 1.7057493925094604, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003367003367003367, | |
| "grad_norm": 14.913334846496582, | |
| "learning_rate": 1.6759776536312847e-08, | |
| "loss": 1.2436225414276123, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005050505050505051, | |
| "grad_norm": 22.982995986938477, | |
| "learning_rate": 2.7932960893854745e-08, | |
| "loss": 1.686056137084961, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006734006734006734, | |
| "grad_norm": 15.24986457824707, | |
| "learning_rate": 3.910614525139665e-08, | |
| "loss": 1.6055235862731934, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008417508417508417, | |
| "grad_norm": 30.967639923095703, | |
| "learning_rate": 5.027932960893855e-08, | |
| "loss": 4.50665283203125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010101010101010102, | |
| "grad_norm": 4.303424835205078, | |
| "learning_rate": 6.145251396648044e-08, | |
| "loss": 1.9789408445358276, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.011784511784511785, | |
| "grad_norm": 5.598588466644287, | |
| "learning_rate": 7.262569832402235e-08, | |
| "loss": 1.6753730773925781, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.013468013468013467, | |
| "grad_norm": 4.323257923126221, | |
| "learning_rate": 8.379888268156423e-08, | |
| "loss": 1.6596330404281616, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.015151515151515152, | |
| "grad_norm": 26.17571258544922, | |
| "learning_rate": 9.497206703910614e-08, | |
| "loss": 2.7241992950439453, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.016835016835016835, | |
| "grad_norm": 9.184181213378906, | |
| "learning_rate": 1.0614525139664805e-07, | |
| "loss": 1.9634017944335938, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.018518518518518517, | |
| "grad_norm": 4.683750152587891, | |
| "learning_rate": 1.1731843575418994e-07, | |
| "loss": 1.8491621017456055, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.020202020202020204, | |
| "grad_norm": 14.232526779174805, | |
| "learning_rate": 1.2849162011173183e-07, | |
| "loss": 3.537993907928467, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.021885521885521887, | |
| "grad_norm": 11.717961311340332, | |
| "learning_rate": 1.3966480446927373e-07, | |
| "loss": 2.8410818576812744, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02356902356902357, | |
| "grad_norm": 11.476764678955078, | |
| "learning_rate": 1.5083798882681565e-07, | |
| "loss": 2.1707875728607178, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.025252525252525252, | |
| "grad_norm": 42.536720275878906, | |
| "learning_rate": 1.6201117318435754e-07, | |
| "loss": 3.401388645172119, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.026936026936026935, | |
| "grad_norm": 15.799206733703613, | |
| "learning_rate": 1.7318435754189943e-07, | |
| "loss": 1.8762117624282837, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02861952861952862, | |
| "grad_norm": 56.47621154785156, | |
| "learning_rate": 1.8435754189944133e-07, | |
| "loss": 4.025151252746582, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.030303030303030304, | |
| "grad_norm": 8.71907901763916, | |
| "learning_rate": 1.9553072625698322e-07, | |
| "loss": 1.9956148862838745, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03198653198653199, | |
| "grad_norm": 13.315755844116211, | |
| "learning_rate": 2.0670391061452514e-07, | |
| "loss": 1.5647544860839844, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03367003367003367, | |
| "grad_norm": 18.28321647644043, | |
| "learning_rate": 2.17877094972067e-07, | |
| "loss": 2.4461331367492676, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03535353535353535, | |
| "grad_norm": 7.177945137023926, | |
| "learning_rate": 2.2905027932960893e-07, | |
| "loss": 3.1400742530822754, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 11.345965385437012, | |
| "learning_rate": 2.402234636871508e-07, | |
| "loss": 2.982694149017334, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03872053872053872, | |
| "grad_norm": 18.986379623413086, | |
| "learning_rate": 2.5139664804469275e-07, | |
| "loss": 1.7094351053237915, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04040404040404041, | |
| "grad_norm": 25.200927734375, | |
| "learning_rate": 2.6256983240223464e-07, | |
| "loss": 3.4711947441101074, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04208754208754209, | |
| "grad_norm": 25.79502296447754, | |
| "learning_rate": 2.7374301675977653e-07, | |
| "loss": 2.5125930309295654, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04377104377104377, | |
| "grad_norm": 26.86095428466797, | |
| "learning_rate": 2.849162011173184e-07, | |
| "loss": 2.5184483528137207, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 23.869613647460938, | |
| "learning_rate": 2.960893854748603e-07, | |
| "loss": 2.1967999935150146, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04713804713804714, | |
| "grad_norm": 4.752484321594238, | |
| "learning_rate": 3.072625698324022e-07, | |
| "loss": 1.6605415344238281, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.04882154882154882, | |
| "grad_norm": 30.32961654663086, | |
| "learning_rate": 3.184357541899441e-07, | |
| "loss": 2.6820101737976074, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": 4.937363624572754, | |
| "learning_rate": 3.29608938547486e-07, | |
| "loss": 2.046969175338745, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05218855218855219, | |
| "grad_norm": 26.058670043945312, | |
| "learning_rate": 3.407821229050279e-07, | |
| "loss": 2.126314163208008, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.05387205387205387, | |
| "grad_norm": 3.972296714782715, | |
| "learning_rate": 3.5195530726256984e-07, | |
| "loss": 1.469801902770996, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 36.323368072509766, | |
| "learning_rate": 3.6312849162011174e-07, | |
| "loss": 2.0382440090179443, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05723905723905724, | |
| "grad_norm": 5.039744853973389, | |
| "learning_rate": 3.7430167597765363e-07, | |
| "loss": 1.679071068763733, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.058922558922558925, | |
| "grad_norm": 5.542041778564453, | |
| "learning_rate": 3.8547486033519547e-07, | |
| "loss": 1.7368519306182861, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 11.228593826293945, | |
| "learning_rate": 3.966480446927374e-07, | |
| "loss": 1.9073054790496826, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.06228956228956229, | |
| "grad_norm": 6.521553993225098, | |
| "learning_rate": 4.078212290502793e-07, | |
| "loss": 1.7021303176879883, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.06397306397306397, | |
| "grad_norm": 4.614531993865967, | |
| "learning_rate": 4.189944134078212e-07, | |
| "loss": 1.3584303855895996, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06565656565656566, | |
| "grad_norm": 4.567502021789551, | |
| "learning_rate": 4.301675977653631e-07, | |
| "loss": 1.7855596542358398, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06734006734006734, | |
| "grad_norm": 4.453341484069824, | |
| "learning_rate": 4.41340782122905e-07, | |
| "loss": 1.5260930061340332, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06902356902356903, | |
| "grad_norm": 9.207719802856445, | |
| "learning_rate": 4.5251396648044694e-07, | |
| "loss": 1.7678306102752686, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0707070707070707, | |
| "grad_norm": 11.142820358276367, | |
| "learning_rate": 4.6368715083798884e-07, | |
| "loss": 1.4878003597259521, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0723905723905724, | |
| "grad_norm": 6.588044166564941, | |
| "learning_rate": 4.7486033519553073e-07, | |
| "loss": 1.6655892133712769, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 7.762340068817139, | |
| "learning_rate": 4.860335195530726e-07, | |
| "loss": 1.4147857427597046, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07575757575757576, | |
| "grad_norm": 19.327587127685547, | |
| "learning_rate": 4.972067039106145e-07, | |
| "loss": 1.6009736061096191, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07744107744107744, | |
| "grad_norm": 16.781408309936523, | |
| "learning_rate": 5.083798882681564e-07, | |
| "loss": 1.331944227218628, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07912457912457913, | |
| "grad_norm": 5.269062042236328, | |
| "learning_rate": 5.195530726256983e-07, | |
| "loss": 1.3683245182037354, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 2.652998685836792, | |
| "learning_rate": 5.307262569832402e-07, | |
| "loss": 1.4645051956176758, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.08249158249158249, | |
| "grad_norm": 26.370506286621094, | |
| "learning_rate": 5.418994413407821e-07, | |
| "loss": 1.4499703645706177, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.08417508417508418, | |
| "grad_norm": 4.437371253967285, | |
| "learning_rate": 5.53072625698324e-07, | |
| "loss": 1.3694539070129395, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08585858585858586, | |
| "grad_norm": 7.602840900421143, | |
| "learning_rate": 5.642458100558659e-07, | |
| "loss": 1.2753658294677734, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08754208754208755, | |
| "grad_norm": 5.345534801483154, | |
| "learning_rate": 5.754189944134078e-07, | |
| "loss": 0.9927137494087219, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08922558922558922, | |
| "grad_norm": 57.12667465209961, | |
| "learning_rate": 5.865921787709497e-07, | |
| "loss": 1.144801378250122, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 3.486433267593384, | |
| "learning_rate": 5.977653631284916e-07, | |
| "loss": 1.3661882877349854, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 8.98828411102295, | |
| "learning_rate": 6.089385474860335e-07, | |
| "loss": 0.9164130687713623, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.09427609427609428, | |
| "grad_norm": 4.9939141273498535, | |
| "learning_rate": 6.201117318435754e-07, | |
| "loss": 1.3426786661148071, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.09595959595959595, | |
| "grad_norm": 29.148103713989258, | |
| "learning_rate": 6.312849162011172e-07, | |
| "loss": 1.138382911682129, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09764309764309764, | |
| "grad_norm": 34.31653594970703, | |
| "learning_rate": 6.424581005586592e-07, | |
| "loss": 0.7960847616195679, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09932659932659933, | |
| "grad_norm": 4.712627410888672, | |
| "learning_rate": 6.536312849162011e-07, | |
| "loss": 1.2441091537475586, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 5.5220794677734375, | |
| "learning_rate": 6.64804469273743e-07, | |
| "loss": 1.0892267227172852, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1026936026936027, | |
| "grad_norm": 10.08218765258789, | |
| "learning_rate": 6.759776536312849e-07, | |
| "loss": 1.266754150390625, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.10437710437710437, | |
| "grad_norm": 7.951529026031494, | |
| "learning_rate": 6.871508379888268e-07, | |
| "loss": 0.8909415006637573, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.10606060606060606, | |
| "grad_norm": 3.5433144569396973, | |
| "learning_rate": 6.983240223463687e-07, | |
| "loss": 0.7614157795906067, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.10774410774410774, | |
| "grad_norm": 84.19695281982422, | |
| "learning_rate": 7.094972067039106e-07, | |
| "loss": 1.1203527450561523, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10942760942760943, | |
| "grad_norm": 6.779047966003418, | |
| "learning_rate": 7.206703910614524e-07, | |
| "loss": 1.0394889116287231, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 3.759247303009033, | |
| "learning_rate": 7.318435754189943e-07, | |
| "loss": 0.9934459328651428, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1127946127946128, | |
| "grad_norm": 4.790719032287598, | |
| "learning_rate": 7.430167597765363e-07, | |
| "loss": 1.2970447540283203, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.11447811447811448, | |
| "grad_norm": 11.66688346862793, | |
| "learning_rate": 7.541899441340782e-07, | |
| "loss": 1.2734112739562988, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.11616161616161616, | |
| "grad_norm": 5.437692642211914, | |
| "learning_rate": 7.653631284916201e-07, | |
| "loss": 1.7463512420654297, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.11784511784511785, | |
| "grad_norm": 2.954306125640869, | |
| "learning_rate": 7.76536312849162e-07, | |
| "loss": 1.2036831378936768, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11952861952861953, | |
| "grad_norm": 3.9827589988708496, | |
| "learning_rate": 7.877094972067039e-07, | |
| "loss": 1.1270943880081177, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 19.19826316833496, | |
| "learning_rate": 7.988826815642458e-07, | |
| "loss": 1.0638954639434814, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.12289562289562289, | |
| "grad_norm": 2.969254970550537, | |
| "learning_rate": 8.100558659217876e-07, | |
| "loss": 1.2084304094314575, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.12457912457912458, | |
| "grad_norm": 3.5464372634887695, | |
| "learning_rate": 8.212290502793295e-07, | |
| "loss": 1.0377205610275269, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.12626262626262627, | |
| "grad_norm": 26.851030349731445, | |
| "learning_rate": 8.324022346368714e-07, | |
| "loss": 1.298867106437683, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12794612794612795, | |
| "grad_norm": 12.729865074157715, | |
| "learning_rate": 8.435754189944134e-07, | |
| "loss": 1.0469536781311035, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12962962962962962, | |
| "grad_norm": 39.720340728759766, | |
| "learning_rate": 8.547486033519553e-07, | |
| "loss": 1.3842543363571167, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.13131313131313133, | |
| "grad_norm": 30.861583709716797, | |
| "learning_rate": 8.659217877094972e-07, | |
| "loss": 1.2696869373321533, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.132996632996633, | |
| "grad_norm": 2.758213520050049, | |
| "learning_rate": 8.770949720670391e-07, | |
| "loss": 1.1152485609054565, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 5.129064559936523, | |
| "learning_rate": 8.88268156424581e-07, | |
| "loss": 1.21260666847229, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 2.200296640396118, | |
| "learning_rate": 8.994413407821229e-07, | |
| "loss": 1.0739009380340576, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.13804713804713806, | |
| "grad_norm": 22.802173614501953, | |
| "learning_rate": 9.106145251396647e-07, | |
| "loss": 1.0534250736236572, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.13973063973063973, | |
| "grad_norm": 8.334705352783203, | |
| "learning_rate": 9.217877094972066e-07, | |
| "loss": 0.9987061023712158, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1414141414141414, | |
| "grad_norm": 3.1446645259857178, | |
| "learning_rate": 9.329608938547485e-07, | |
| "loss": 1.2239556312561035, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.14309764309764308, | |
| "grad_norm": 11.334406852722168, | |
| "learning_rate": 9.441340782122904e-07, | |
| "loss": 1.1194162368774414, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1447811447811448, | |
| "grad_norm": 3.408362865447998, | |
| "learning_rate": 9.553072625698324e-07, | |
| "loss": 1.085777997970581, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.14646464646464646, | |
| "grad_norm": 6.2441534996032715, | |
| "learning_rate": 9.664804469273742e-07, | |
| "loss": 0.7717651128768921, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 3.749255895614624, | |
| "learning_rate": 9.776536312849163e-07, | |
| "loss": 1.1312694549560547, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.14983164983164984, | |
| "grad_norm": 3.902320384979248, | |
| "learning_rate": 9.888268156424581e-07, | |
| "loss": 1.3509280681610107, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 6.110651969909668, | |
| "learning_rate": 1e-06, | |
| "loss": 1.075784683227539, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1531986531986532, | |
| "grad_norm": 9.884479522705078, | |
| "learning_rate": 9.999992247803292e-07, | |
| "loss": 1.4511303901672363, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.15488215488215487, | |
| "grad_norm": 22.860551834106445, | |
| "learning_rate": 9.999968991239885e-07, | |
| "loss": 1.0601496696472168, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.15656565656565657, | |
| "grad_norm": 47.76069641113281, | |
| "learning_rate": 9.9999302303899e-07, | |
| "loss": 1.175671100616455, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15824915824915825, | |
| "grad_norm": 7.632693290710449, | |
| "learning_rate": 9.999875965386889e-07, | |
| "loss": 0.9617436528205872, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.15993265993265993, | |
| "grad_norm": 14.18217945098877, | |
| "learning_rate": 9.999806196417815e-07, | |
| "loss": 0.8225744962692261, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16161616161616163, | |
| "grad_norm": 3.5702500343322754, | |
| "learning_rate": 9.999720923723065e-07, | |
| "loss": 1.3951547145843506, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1632996632996633, | |
| "grad_norm": 6.512271881103516, | |
| "learning_rate": 9.999620147596435e-07, | |
| "loss": 1.3134064674377441, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.16498316498316498, | |
| "grad_norm": 4.347053050994873, | |
| "learning_rate": 9.999503868385147e-07, | |
| "loss": 1.1201355457305908, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 4.274275779724121, | |
| "learning_rate": 9.999372086489827e-07, | |
| "loss": 1.2217128276824951, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.16835016835016836, | |
| "grad_norm": 36.957733154296875, | |
| "learning_rate": 9.999224802364522e-07, | |
| "loss": 0.9089727997779846, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17003367003367004, | |
| "grad_norm": 10.688148498535156, | |
| "learning_rate": 9.999062016516683e-07, | |
| "loss": 0.9836642742156982, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1717171717171717, | |
| "grad_norm": 5.000755310058594, | |
| "learning_rate": 9.998883729507182e-07, | |
| "loss": 1.0589679479599, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1734006734006734, | |
| "grad_norm": 3.18554425239563, | |
| "learning_rate": 9.998689941950286e-07, | |
| "loss": 1.1106410026550293, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1750841750841751, | |
| "grad_norm": 3.399953842163086, | |
| "learning_rate": 9.99848065451368e-07, | |
| "loss": 1.259597897529602, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.17676767676767677, | |
| "grad_norm": 34.06399917602539, | |
| "learning_rate": 9.998255867918447e-07, | |
| "loss": 0.7958086729049683, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.17845117845117844, | |
| "grad_norm": 11.635184288024902, | |
| "learning_rate": 9.99801558293907e-07, | |
| "loss": 0.974760115146637, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.18013468013468015, | |
| "grad_norm": 3.804048776626587, | |
| "learning_rate": 9.997759800403432e-07, | |
| "loss": 1.4053202867507935, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 3.969377279281616, | |
| "learning_rate": 9.99748852119281e-07, | |
| "loss": 0.8879891633987427, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1835016835016835, | |
| "grad_norm": 13.216470718383789, | |
| "learning_rate": 9.997201746241877e-07, | |
| "loss": 0.7051749229431152, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 21.844314575195312, | |
| "learning_rate": 9.996899476538694e-07, | |
| "loss": 1.4015233516693115, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18686868686868688, | |
| "grad_norm": 4.534096717834473, | |
| "learning_rate": 9.996581713124706e-07, | |
| "loss": 0.972633957862854, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.18855218855218855, | |
| "grad_norm": 3.273697853088379, | |
| "learning_rate": 9.99624845709474e-07, | |
| "loss": 1.2434642314910889, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.19023569023569023, | |
| "grad_norm": 4.797500133514404, | |
| "learning_rate": 9.995899709597006e-07, | |
| "loss": 1.0040223598480225, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1919191919191919, | |
| "grad_norm": 12.437410354614258, | |
| "learning_rate": 9.995535471833086e-07, | |
| "loss": 1.2370095252990723, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.1936026936026936, | |
| "grad_norm": 7.460165023803711, | |
| "learning_rate": 9.995155745057929e-07, | |
| "loss": 1.4212405681610107, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19528619528619529, | |
| "grad_norm": 9.647342681884766, | |
| "learning_rate": 9.994760530579857e-07, | |
| "loss": 1.1002936363220215, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.19696969696969696, | |
| "grad_norm": 11.12820053100586, | |
| "learning_rate": 9.994349829760549e-07, | |
| "loss": 1.237018346786499, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.19865319865319866, | |
| "grad_norm": 5.350140571594238, | |
| "learning_rate": 9.993923644015042e-07, | |
| "loss": 1.0195953845977783, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.20033670033670034, | |
| "grad_norm": 3.050861358642578, | |
| "learning_rate": 9.993481974811725e-07, | |
| "loss": 1.22686767578125, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 7.857388019561768, | |
| "learning_rate": 9.993024823672335e-07, | |
| "loss": 1.0028936862945557, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2037037037037037, | |
| "grad_norm": 7.335727214813232, | |
| "learning_rate": 9.99255219217195e-07, | |
| "loss": 1.2266963720321655, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2053872053872054, | |
| "grad_norm": 6.673895359039307, | |
| "learning_rate": 9.992064081938982e-07, | |
| "loss": 1.0401980876922607, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.20707070707070707, | |
| "grad_norm": 11.121489524841309, | |
| "learning_rate": 9.99156049465518e-07, | |
| "loss": 0.704534649848938, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.20875420875420875, | |
| "grad_norm": 6.052087306976318, | |
| "learning_rate": 9.99104143205561e-07, | |
| "loss": 1.2733914852142334, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.21043771043771045, | |
| "grad_norm": 8.680047988891602, | |
| "learning_rate": 9.990506895928664e-07, | |
| "loss": 1.0285900831222534, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21212121212121213, | |
| "grad_norm": 3.77591609954834, | |
| "learning_rate": 9.989956888116044e-07, | |
| "loss": 0.925588071346283, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2138047138047138, | |
| "grad_norm": 17.994216918945312, | |
| "learning_rate": 9.989391410512756e-07, | |
| "loss": 1.09348726272583, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.21548821548821548, | |
| "grad_norm": 3.3857617378234863, | |
| "learning_rate": 9.988810465067111e-07, | |
| "loss": 1.2375221252441406, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.21717171717171718, | |
| "grad_norm": 2.9572367668151855, | |
| "learning_rate": 9.988214053780707e-07, | |
| "loss": 0.8651703000068665, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.21885521885521886, | |
| "grad_norm": 3.476825714111328, | |
| "learning_rate": 9.987602178708435e-07, | |
| "loss": 1.0651121139526367, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.22053872053872053, | |
| "grad_norm": 3.713834047317505, | |
| "learning_rate": 9.986974841958463e-07, | |
| "loss": 1.0779788494110107, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 4.407167911529541, | |
| "learning_rate": 9.986332045692227e-07, | |
| "loss": 1.1462655067443848, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2239057239057239, | |
| "grad_norm": 3.255230665206909, | |
| "learning_rate": 9.98567379212443e-07, | |
| "loss": 1.245474100112915, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2255892255892256, | |
| "grad_norm": 30.358354568481445, | |
| "learning_rate": 9.985000083523037e-07, | |
| "loss": 0.6667277216911316, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 5.595312595367432, | |
| "learning_rate": 9.984310922209254e-07, | |
| "loss": 1.0221211910247803, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22895622895622897, | |
| "grad_norm": 16.317052841186523, | |
| "learning_rate": 9.983606310557533e-07, | |
| "loss": 1.3395957946777344, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.23063973063973064, | |
| "grad_norm": 11.24964714050293, | |
| "learning_rate": 9.982886250995556e-07, | |
| "loss": 1.1954050064086914, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.23232323232323232, | |
| "grad_norm": 44.74198913574219, | |
| "learning_rate": 9.982150746004232e-07, | |
| "loss": 0.9265189170837402, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.234006734006734, | |
| "grad_norm": 3.8386383056640625, | |
| "learning_rate": 9.981399798117685e-07, | |
| "loss": 1.198085069656372, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2356902356902357, | |
| "grad_norm": 44.37248992919922, | |
| "learning_rate": 9.980633409923247e-07, | |
| "loss": 1.0136717557907104, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.23737373737373738, | |
| "grad_norm": 7.57785701751709, | |
| "learning_rate": 9.979851584061449e-07, | |
| "loss": 0.9574207663536072, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.23905723905723905, | |
| "grad_norm": 8.24811840057373, | |
| "learning_rate": 9.97905432322601e-07, | |
| "loss": 1.3114678859710693, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.24074074074074073, | |
| "grad_norm": 5.775442600250244, | |
| "learning_rate": 9.978241630163826e-07, | |
| "loss": 0.9548346400260925, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 11.149243354797363, | |
| "learning_rate": 9.977413507674968e-07, | |
| "loss": 0.8632457852363586, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.2441077441077441, | |
| "grad_norm": 78.07566833496094, | |
| "learning_rate": 9.976569958612667e-07, | |
| "loss": 1.2243592739105225, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.24579124579124578, | |
| "grad_norm": 4.65302848815918, | |
| "learning_rate": 9.975710985883304e-07, | |
| "loss": 0.6913841366767883, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2474747474747475, | |
| "grad_norm": 15.239048957824707, | |
| "learning_rate": 9.974836592446402e-07, | |
| "loss": 1.3095204830169678, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.24915824915824916, | |
| "grad_norm": 13.059560775756836, | |
| "learning_rate": 9.973946781314614e-07, | |
| "loss": 1.106144666671753, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.25084175084175087, | |
| "grad_norm": 5.432850360870361, | |
| "learning_rate": 9.973041555553712e-07, | |
| "loss": 0.6466901898384094, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.25252525252525254, | |
| "grad_norm": 9.237662315368652, | |
| "learning_rate": 9.972120918282583e-07, | |
| "loss": 0.8612852096557617, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2542087542087542, | |
| "grad_norm": 19.600900650024414, | |
| "learning_rate": 9.971184872673208e-07, | |
| "loss": 1.105349063873291, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2558922558922559, | |
| "grad_norm": 40.91580581665039, | |
| "learning_rate": 9.970233421950659e-07, | |
| "loss": 0.9198004603385925, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.25757575757575757, | |
| "grad_norm": 4.66962194442749, | |
| "learning_rate": 9.969266569393081e-07, | |
| "loss": 1.3845856189727783, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.25925925925925924, | |
| "grad_norm": 60.427490234375, | |
| "learning_rate": 9.968284318331692e-07, | |
| "loss": 1.1327593326568604, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2609427609427609, | |
| "grad_norm": 22.725788116455078, | |
| "learning_rate": 9.967286672150757e-07, | |
| "loss": 1.1523091793060303, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.26262626262626265, | |
| "grad_norm": 24.43414878845215, | |
| "learning_rate": 9.96627363428759e-07, | |
| "loss": 1.234093189239502, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.26430976430976433, | |
| "grad_norm": 3.773989200592041, | |
| "learning_rate": 9.965245208232528e-07, | |
| "loss": 1.123462200164795, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.265993265993266, | |
| "grad_norm": 4.06792688369751, | |
| "learning_rate": 9.964201397528935e-07, | |
| "loss": 1.274748682975769, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2676767676767677, | |
| "grad_norm": 6.183606147766113, | |
| "learning_rate": 9.963142205773178e-07, | |
| "loss": 1.0359277725219727, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.26936026936026936, | |
| "grad_norm": 17.0985164642334, | |
| "learning_rate": 9.962067636614617e-07, | |
| "loss": 0.7821587920188904, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.27104377104377103, | |
| "grad_norm": 8.39433765411377, | |
| "learning_rate": 9.960977693755597e-07, | |
| "loss": 1.007806420326233, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 6.79010534286499, | |
| "learning_rate": 9.959872380951425e-07, | |
| "loss": 1.306843638420105, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.27441077441077444, | |
| "grad_norm": 3.4290051460266113, | |
| "learning_rate": 9.958751702010373e-07, | |
| "loss": 1.0737717151641846, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2760942760942761, | |
| "grad_norm": 3.778372287750244, | |
| "learning_rate": 9.957615660793653e-07, | |
| "loss": 0.842218816280365, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 4.193020343780518, | |
| "learning_rate": 9.9564642612154e-07, | |
| "loss": 0.9259565472602844, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.27946127946127947, | |
| "grad_norm": 5.208146572113037, | |
| "learning_rate": 9.955297507242673e-07, | |
| "loss": 1.1419891119003296, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.28114478114478114, | |
| "grad_norm": 5.717302322387695, | |
| "learning_rate": 9.95411540289543e-07, | |
| "loss": 1.1330386400222778, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2828282828282828, | |
| "grad_norm": 5.831217288970947, | |
| "learning_rate": 9.952917952246516e-07, | |
| "loss": 1.0413146018981934, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2845117845117845, | |
| "grad_norm": 3.645052433013916, | |
| "learning_rate": 9.951705159421654e-07, | |
| "loss": 1.235117793083191, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.28619528619528617, | |
| "grad_norm": 22.020658493041992, | |
| "learning_rate": 9.950477028599428e-07, | |
| "loss": 1.043231725692749, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2878787878787879, | |
| "grad_norm": 21.875652313232422, | |
| "learning_rate": 9.94923356401126e-07, | |
| "loss": 1.175392985343933, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2895622895622896, | |
| "grad_norm": 27.17024803161621, | |
| "learning_rate": 9.947974769941413e-07, | |
| "loss": 0.9123649001121521, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.29124579124579125, | |
| "grad_norm": 6.494509220123291, | |
| "learning_rate": 9.946700650726963e-07, | |
| "loss": 1.1428461074829102, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.29292929292929293, | |
| "grad_norm": 6.450991630554199, | |
| "learning_rate": 9.94541121075778e-07, | |
| "loss": 1.08597731590271, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2946127946127946, | |
| "grad_norm": 4.014670372009277, | |
| "learning_rate": 9.944106454476535e-07, | |
| "loss": 0.8208044171333313, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 5.806086540222168, | |
| "learning_rate": 9.94278638637866e-07, | |
| "loss": 0.6253402829170227, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.29797979797979796, | |
| "grad_norm": 3.053389310836792, | |
| "learning_rate": 9.941451011012342e-07, | |
| "loss": 1.0509334802627563, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2996632996632997, | |
| "grad_norm": 7.8727521896362305, | |
| "learning_rate": 9.940100332978513e-07, | |
| "loss": 1.0956045389175415, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.30134680134680136, | |
| "grad_norm": 32.331748962402344, | |
| "learning_rate": 9.938734356930828e-07, | |
| "loss": 1.004880666732788, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 4.172276020050049, | |
| "learning_rate": 9.93735308757565e-07, | |
| "loss": 0.8379921317100525, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3047138047138047, | |
| "grad_norm": 6.15704870223999, | |
| "learning_rate": 9.93595652967203e-07, | |
| "loss": 1.0078097581863403, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3063973063973064, | |
| "grad_norm": 4.966274261474609, | |
| "learning_rate": 9.9345446880317e-07, | |
| "loss": 1.2672780752182007, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.30808080808080807, | |
| "grad_norm": 8.712943077087402, | |
| "learning_rate": 9.933117567519048e-07, | |
| "loss": 0.8534368276596069, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.30976430976430974, | |
| "grad_norm": 6.715219020843506, | |
| "learning_rate": 9.931675173051105e-07, | |
| "loss": 0.8929988145828247, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.3114478114478115, | |
| "grad_norm": 8.526223182678223, | |
| "learning_rate": 9.930217509597527e-07, | |
| "loss": 1.1088082790374756, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.31313131313131315, | |
| "grad_norm": 13.495247840881348, | |
| "learning_rate": 9.928744582180574e-07, | |
| "loss": 1.2500221729278564, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3148148148148148, | |
| "grad_norm": 23.23642921447754, | |
| "learning_rate": 9.927256395875107e-07, | |
| "loss": 0.7106721997261047, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3164983164983165, | |
| "grad_norm": 6.651264190673828, | |
| "learning_rate": 9.925752955808548e-07, | |
| "loss": 1.0243923664093018, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 5.95202112197876, | |
| "learning_rate": 9.924234267160885e-07, | |
| "loss": 1.370633840560913, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.31986531986531985, | |
| "grad_norm": 11.883193016052246, | |
| "learning_rate": 9.922700335164638e-07, | |
| "loss": 0.7322716116905212, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.32154882154882153, | |
| "grad_norm": 7.471388816833496, | |
| "learning_rate": 9.92115116510485e-07, | |
| "loss": 0.913710355758667, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.32323232323232326, | |
| "grad_norm": 44.468502044677734, | |
| "learning_rate": 9.919586762319058e-07, | |
| "loss": 1.1375393867492676, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.32491582491582494, | |
| "grad_norm": 73.37066650390625, | |
| "learning_rate": 9.918007132197294e-07, | |
| "loss": 0.750845193862915, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3265993265993266, | |
| "grad_norm": 13.170440673828125, | |
| "learning_rate": 9.916412280182047e-07, | |
| "loss": 0.9285147190093994, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.3282828282828283, | |
| "grad_norm": 27.329137802124023, | |
| "learning_rate": 9.91480221176825e-07, | |
| "loss": 1.1141570806503296, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.32996632996632996, | |
| "grad_norm": 23.576858520507812, | |
| "learning_rate": 9.913176932503269e-07, | |
| "loss": 0.8426070809364319, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.33164983164983164, | |
| "grad_norm": 4.582382678985596, | |
| "learning_rate": 9.911536447986874e-07, | |
| "loss": 1.3466606140136719, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 18.690176010131836, | |
| "learning_rate": 9.909880763871225e-07, | |
| "loss": 1.2158761024475098, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.335016835016835, | |
| "grad_norm": 12.741125106811523, | |
| "learning_rate": 9.90820988586085e-07, | |
| "loss": 0.7843135595321655, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3367003367003367, | |
| "grad_norm": 8.261248588562012, | |
| "learning_rate": 9.906523819712627e-07, | |
| "loss": 0.9648294448852539, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3383838383838384, | |
| "grad_norm": 13.866211891174316, | |
| "learning_rate": 9.904822571235764e-07, | |
| "loss": 0.9860712289810181, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3400673400673401, | |
| "grad_norm": 7.611033916473389, | |
| "learning_rate": 9.903106146291776e-07, | |
| "loss": 1.0380196571350098, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.34175084175084175, | |
| "grad_norm": 4.44096565246582, | |
| "learning_rate": 9.901374550794471e-07, | |
| "loss": 1.0885226726531982, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.3434343434343434, | |
| "grad_norm": 7.336009502410889, | |
| "learning_rate": 9.899627790709922e-07, | |
| "loss": 0.978155255317688, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3451178451178451, | |
| "grad_norm": 35.349571228027344, | |
| "learning_rate": 9.897865872056454e-07, | |
| "loss": 0.5597323179244995, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3468013468013468, | |
| "grad_norm": 5.807060718536377, | |
| "learning_rate": 9.896088800904617e-07, | |
| "loss": 0.8961684703826904, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3484848484848485, | |
| "grad_norm": 18.415029525756836, | |
| "learning_rate": 9.894296583377171e-07, | |
| "loss": 0.9247993230819702, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.3501683501683502, | |
| "grad_norm": 16.985078811645508, | |
| "learning_rate": 9.892489225649058e-07, | |
| "loss": 1.2044103145599365, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.35185185185185186, | |
| "grad_norm": 6.910268306732178, | |
| "learning_rate": 9.890666733947386e-07, | |
| "loss": 0.7405315637588501, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.35353535353535354, | |
| "grad_norm": 9.06907844543457, | |
| "learning_rate": 9.888829114551404e-07, | |
| "loss": 0.9250643253326416, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3552188552188552, | |
| "grad_norm": 10.192124366760254, | |
| "learning_rate": 9.886976373792488e-07, | |
| "loss": 1.1218069791793823, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.3569023569023569, | |
| "grad_norm": 9.159024238586426, | |
| "learning_rate": 9.885108518054106e-07, | |
| "loss": 0.6351463794708252, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.35858585858585856, | |
| "grad_norm": 29.38273811340332, | |
| "learning_rate": 9.883225553771807e-07, | |
| "loss": 1.0669465065002441, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.3602693602693603, | |
| "grad_norm": 8.669297218322754, | |
| "learning_rate": 9.881327487433198e-07, | |
| "loss": 0.8117149472236633, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.36195286195286197, | |
| "grad_norm": 6.67222785949707, | |
| "learning_rate": 9.879414325577916e-07, | |
| "loss": 1.2592154741287231, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 6.638124942779541, | |
| "learning_rate": 9.877486074797602e-07, | |
| "loss": 0.9993456602096558, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3653198653198653, | |
| "grad_norm": 3.7449495792388916, | |
| "learning_rate": 9.8755427417359e-07, | |
| "loss": 0.8662674427032471, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.367003367003367, | |
| "grad_norm": 4.553740978240967, | |
| "learning_rate": 9.873584333088407e-07, | |
| "loss": 1.0476055145263672, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3686868686868687, | |
| "grad_norm": 9.034341812133789, | |
| "learning_rate": 9.871610855602662e-07, | |
| "loss": 1.1130859851837158, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 7.609111785888672, | |
| "learning_rate": 9.869622316078128e-07, | |
| "loss": 0.9781308770179749, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3720538720538721, | |
| "grad_norm": 15.675320625305176, | |
| "learning_rate": 9.86761872136616e-07, | |
| "loss": 0.9868993759155273, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.37373737373737376, | |
| "grad_norm": 4.52480936050415, | |
| "learning_rate": 9.865600078369985e-07, | |
| "loss": 0.7887587547302246, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.37542087542087543, | |
| "grad_norm": 21.339006423950195, | |
| "learning_rate": 9.863566394044677e-07, | |
| "loss": 0.6558203101158142, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.3771043771043771, | |
| "grad_norm": 5.116230010986328, | |
| "learning_rate": 9.861517675397135e-07, | |
| "loss": 1.1714262962341309, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.3787878787878788, | |
| "grad_norm": 16.112041473388672, | |
| "learning_rate": 9.859453929486054e-07, | |
| "loss": 1.1047420501708984, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.38047138047138046, | |
| "grad_norm": 3.787045478820801, | |
| "learning_rate": 9.857375163421912e-07, | |
| "loss": 0.7425003051757812, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.38215488215488214, | |
| "grad_norm": 11.478412628173828, | |
| "learning_rate": 9.855281384366928e-07, | |
| "loss": 1.0151433944702148, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3838383838383838, | |
| "grad_norm": 3.6095988750457764, | |
| "learning_rate": 9.853172599535054e-07, | |
| "loss": 0.8090977668762207, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.38552188552188554, | |
| "grad_norm": 7.952422618865967, | |
| "learning_rate": 9.85104881619194e-07, | |
| "loss": 0.9961310625076294, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3872053872053872, | |
| "grad_norm": 2.0787007808685303, | |
| "learning_rate": 9.848910041654915e-07, | |
| "loss": 1.1424083709716797, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 3.276982545852661, | |
| "learning_rate": 9.846756283292955e-07, | |
| "loss": 0.8972825407981873, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.39057239057239057, | |
| "grad_norm": 6.26957368850708, | |
| "learning_rate": 9.844587548526665e-07, | |
| "loss": 0.8542879223823547, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.39225589225589225, | |
| "grad_norm": 32.88930892944336, | |
| "learning_rate": 9.842403844828249e-07, | |
| "loss": 0.9769890308380127, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3939393939393939, | |
| "grad_norm": 10.898834228515625, | |
| "learning_rate": 9.840205179721486e-07, | |
| "loss": 0.9689866304397583, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.3956228956228956, | |
| "grad_norm": 3.45035457611084, | |
| "learning_rate": 9.837991560781698e-07, | |
| "loss": 0.9729927778244019, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.39730639730639733, | |
| "grad_norm": 7.222962379455566, | |
| "learning_rate": 9.835762995635739e-07, | |
| "loss": 0.8332297801971436, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.398989898989899, | |
| "grad_norm": 3.249415636062622, | |
| "learning_rate": 9.833519491961951e-07, | |
| "loss": 1.0173261165618896, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4006734006734007, | |
| "grad_norm": 6.285678863525391, | |
| "learning_rate": 9.831261057490148e-07, | |
| "loss": 0.7735811471939087, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.40235690235690236, | |
| "grad_norm": 3.5245249271392822, | |
| "learning_rate": 9.82898770000159e-07, | |
| "loss": 0.9958957433700562, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 13.678420066833496, | |
| "learning_rate": 9.826699427328944e-07, | |
| "loss": 1.0717885494232178, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4057239057239057, | |
| "grad_norm": 12.059322357177734, | |
| "learning_rate": 9.824396247356276e-07, | |
| "loss": 1.0886049270629883, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.4074074074074074, | |
| "grad_norm": 9.42127513885498, | |
| "learning_rate": 9.822078168019012e-07, | |
| "loss": 0.8954146504402161, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 13.108272552490234, | |
| "learning_rate": 9.819745197303907e-07, | |
| "loss": 0.881049633026123, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4107744107744108, | |
| "grad_norm": 3.574754238128662, | |
| "learning_rate": 9.817397343249028e-07, | |
| "loss": 1.1146478652954102, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.41245791245791247, | |
| "grad_norm": 3.4290618896484375, | |
| "learning_rate": 9.815034613943722e-07, | |
| "loss": 1.118224859237671, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.41414141414141414, | |
| "grad_norm": 16.978740692138672, | |
| "learning_rate": 9.812657017528584e-07, | |
| "loss": 1.0728644132614136, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4158249158249158, | |
| "grad_norm": 5.449537754058838, | |
| "learning_rate": 9.810264562195432e-07, | |
| "loss": 0.9440809488296509, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.4175084175084175, | |
| "grad_norm": 2.756265640258789, | |
| "learning_rate": 9.807857256187283e-07, | |
| "loss": 1.1065900325775146, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.41919191919191917, | |
| "grad_norm": 3.4030373096466064, | |
| "learning_rate": 9.805435107798322e-07, | |
| "loss": 1.0974758863449097, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.4208754208754209, | |
| "grad_norm": 9.233179092407227, | |
| "learning_rate": 9.802998125373864e-07, | |
| "loss": 0.851800799369812, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4225589225589226, | |
| "grad_norm": 11.157843589782715, | |
| "learning_rate": 9.800546317310343e-07, | |
| "loss": 0.6602354645729065, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 15.149531364440918, | |
| "learning_rate": 9.798079692055267e-07, | |
| "loss": 0.9472991228103638, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.42592592592592593, | |
| "grad_norm": 30.058595657348633, | |
| "learning_rate": 9.7955982581072e-07, | |
| "loss": 0.9938538670539856, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4276094276094276, | |
| "grad_norm": 23.41927719116211, | |
| "learning_rate": 9.793102024015724e-07, | |
| "loss": 1.4200940132141113, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4292929292929293, | |
| "grad_norm": 3.9220423698425293, | |
| "learning_rate": 9.790590998381417e-07, | |
| "loss": 1.0478514432907104, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.43097643097643096, | |
| "grad_norm": 3.723065137863159, | |
| "learning_rate": 9.788065189855817e-07, | |
| "loss": 1.2064735889434814, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.43265993265993263, | |
| "grad_norm": 3.486267566680908, | |
| "learning_rate": 9.7855246071414e-07, | |
| "loss": 1.140267014503479, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.43434343434343436, | |
| "grad_norm": 8.95257568359375, | |
| "learning_rate": 9.78296925899154e-07, | |
| "loss": 1.0755705833435059, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.43602693602693604, | |
| "grad_norm": 4.213111400604248, | |
| "learning_rate": 9.780399154210487e-07, | |
| "loss": 1.0637681484222412, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4377104377104377, | |
| "grad_norm": 26.23670196533203, | |
| "learning_rate": 9.777814301653336e-07, | |
| "loss": 0.9591152667999268, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4393939393939394, | |
| "grad_norm": 2.839754343032837, | |
| "learning_rate": 9.775214710225987e-07, | |
| "loss": 0.8415237665176392, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.44107744107744107, | |
| "grad_norm": 5.952809810638428, | |
| "learning_rate": 9.77260038888513e-07, | |
| "loss": 1.133270502090454, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.44276094276094274, | |
| "grad_norm": 8.995283126831055, | |
| "learning_rate": 9.769971346638203e-07, | |
| "loss": 0.7777677774429321, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 3.4373066425323486, | |
| "learning_rate": 9.767327592543359e-07, | |
| "loss": 1.2248082160949707, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.44612794612794615, | |
| "grad_norm": 7.905541896820068, | |
| "learning_rate": 9.764669135709443e-07, | |
| "loss": 0.8326348066329956, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4478114478114478, | |
| "grad_norm": 2.997097969055176, | |
| "learning_rate": 9.76199598529596e-07, | |
| "loss": 0.8697119355201721, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4494949494949495, | |
| "grad_norm": 3.4758172035217285, | |
| "learning_rate": 9.759308150513039e-07, | |
| "loss": 0.9715222716331482, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.4511784511784512, | |
| "grad_norm": 4.66405725479126, | |
| "learning_rate": 9.756605640621397e-07, | |
| "loss": 1.2556489706039429, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.45286195286195285, | |
| "grad_norm": 11.930469512939453, | |
| "learning_rate": 9.753888464932322e-07, | |
| "loss": 1.1018869876861572, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 13.772843360900879, | |
| "learning_rate": 9.751156632807626e-07, | |
| "loss": 0.8878042101860046, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4562289562289562, | |
| "grad_norm": 4.206384181976318, | |
| "learning_rate": 9.748410153659618e-07, | |
| "loss": 1.0389076471328735, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.45791245791245794, | |
| "grad_norm": 3.624582052230835, | |
| "learning_rate": 9.745649036951079e-07, | |
| "loss": 1.1431198120117188, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.4595959595959596, | |
| "grad_norm": 12.98609733581543, | |
| "learning_rate": 9.742873292195213e-07, | |
| "loss": 1.1605827808380127, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.4612794612794613, | |
| "grad_norm": 7.910975456237793, | |
| "learning_rate": 9.740082928955634e-07, | |
| "loss": 1.3202755451202393, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 5.325044631958008, | |
| "learning_rate": 9.737277956846313e-07, | |
| "loss": 0.9252653121948242, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.46464646464646464, | |
| "grad_norm": 14.551304817199707, | |
| "learning_rate": 9.73445838553156e-07, | |
| "loss": 0.876882791519165, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.4663299663299663, | |
| "grad_norm": 3.202234983444214, | |
| "learning_rate": 9.731624224725986e-07, | |
| "loss": 1.0558652877807617, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.468013468013468, | |
| "grad_norm": 4.0583086013793945, | |
| "learning_rate": 9.728775484194464e-07, | |
| "loss": 0.740475594997406, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.4696969696969697, | |
| "grad_norm": 3.9330027103424072, | |
| "learning_rate": 9.725912173752106e-07, | |
| "loss": 1.2117640972137451, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.4713804713804714, | |
| "grad_norm": 7.207095146179199, | |
| "learning_rate": 9.723034303264225e-07, | |
| "loss": 0.4382402002811432, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4730639730639731, | |
| "grad_norm": 4.947695255279541, | |
| "learning_rate": 9.72014188264629e-07, | |
| "loss": 0.6228041648864746, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.47474747474747475, | |
| "grad_norm": 9.088849067687988, | |
| "learning_rate": 9.71723492186391e-07, | |
| "loss": 1.3076156377792358, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4764309764309764, | |
| "grad_norm": 4.49135160446167, | |
| "learning_rate": 9.714313430932785e-07, | |
| "loss": 1.1357098817825317, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4781144781144781, | |
| "grad_norm": 9.188185691833496, | |
| "learning_rate": 9.711377419918683e-07, | |
| "loss": 0.4768811762332916, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4797979797979798, | |
| "grad_norm": 23.397979736328125, | |
| "learning_rate": 9.708426898937399e-07, | |
| "loss": 1.1221351623535156, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.48148148148148145, | |
| "grad_norm": 27.77615737915039, | |
| "learning_rate": 9.705461878154714e-07, | |
| "loss": 0.7149933576583862, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4831649831649832, | |
| "grad_norm": 4.684352874755859, | |
| "learning_rate": 9.702482367786377e-07, | |
| "loss": 0.9776611924171448, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 7.567526817321777, | |
| "learning_rate": 9.699488378098055e-07, | |
| "loss": 0.8799599409103394, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.48653198653198654, | |
| "grad_norm": 9.130019187927246, | |
| "learning_rate": 9.696479919405298e-07, | |
| "loss": 1.1031641960144043, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.4882154882154882, | |
| "grad_norm": 9.574334144592285, | |
| "learning_rate": 9.693457002073517e-07, | |
| "loss": 0.8267420530319214, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4898989898989899, | |
| "grad_norm": 4.069400787353516, | |
| "learning_rate": 9.69041963651793e-07, | |
| "loss": 1.3716950416564941, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.49158249158249157, | |
| "grad_norm": 4.066318988800049, | |
| "learning_rate": 9.68736783320354e-07, | |
| "loss": 1.017892837524414, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.49326599326599324, | |
| "grad_norm": 2.714144706726074, | |
| "learning_rate": 9.684301602645098e-07, | |
| "loss": 0.861703097820282, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.494949494949495, | |
| "grad_norm": 3.8651719093322754, | |
| "learning_rate": 9.681220955407053e-07, | |
| "loss": 0.6647518873214722, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.49663299663299665, | |
| "grad_norm": 3.4340827465057373, | |
| "learning_rate": 9.67812590210353e-07, | |
| "loss": 1.1181421279907227, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4983164983164983, | |
| "grad_norm": 3.8552682399749756, | |
| "learning_rate": 9.675016453398296e-07, | |
| "loss": 1.1666280031204224, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 13.408713340759277, | |
| "learning_rate": 9.671892620004706e-07, | |
| "loss": 0.8374857902526855, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5016835016835017, | |
| "grad_norm": 7.0116424560546875, | |
| "learning_rate": 9.66875441268568e-07, | |
| "loss": 0.960757851600647, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5033670033670034, | |
| "grad_norm": 2.764244556427002, | |
| "learning_rate": 9.665601842253666e-07, | |
| "loss": 1.3247270584106445, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 13.236382484436035, | |
| "learning_rate": 9.662434919570592e-07, | |
| "loss": 0.8124715685844421, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5067340067340067, | |
| "grad_norm": 39.5108528137207, | |
| "learning_rate": 9.659253655547843e-07, | |
| "loss": 1.0799833536148071, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5084175084175084, | |
| "grad_norm": 13.359992027282715, | |
| "learning_rate": 9.656058061146207e-07, | |
| "loss": 1.0351530313491821, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.51010101010101, | |
| "grad_norm": 4.374532699584961, | |
| "learning_rate": 9.652848147375853e-07, | |
| "loss": 1.1660369634628296, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5117845117845118, | |
| "grad_norm": 7.170238018035889, | |
| "learning_rate": 9.649623925296288e-07, | |
| "loss": 0.6313941478729248, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5134680134680135, | |
| "grad_norm": 2.792412519454956, | |
| "learning_rate": 9.646385406016313e-07, | |
| "loss": 0.9415972232818604, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5151515151515151, | |
| "grad_norm": 13.598429679870605, | |
| "learning_rate": 9.643132600693983e-07, | |
| "loss": 0.9117315411567688, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5168350168350169, | |
| "grad_norm": 4.011415481567383, | |
| "learning_rate": 9.639865520536588e-07, | |
| "loss": 0.7065603137016296, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 9.801816940307617, | |
| "learning_rate": 9.636584176800593e-07, | |
| "loss": 1.1204071044921875, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.5202020202020202, | |
| "grad_norm": 10.913689613342285, | |
| "learning_rate": 9.633288580791603e-07, | |
| "loss": 1.031501054763794, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5218855218855218, | |
| "grad_norm": 2.2291688919067383, | |
| "learning_rate": 9.62997874386434e-07, | |
| "loss": 1.0308109521865845, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5235690235690236, | |
| "grad_norm": 12.420637130737305, | |
| "learning_rate": 9.62665467742258e-07, | |
| "loss": 1.0678637027740479, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5252525252525253, | |
| "grad_norm": 17.982452392578125, | |
| "learning_rate": 9.623316392919132e-07, | |
| "loss": 0.7635082006454468, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5269360269360269, | |
| "grad_norm": 31.17810821533203, | |
| "learning_rate": 9.619963901855789e-07, | |
| "loss": 0.9803504943847656, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.5286195286195287, | |
| "grad_norm": 3.1647303104400635, | |
| "learning_rate": 9.616597215783295e-07, | |
| "loss": 0.8586722612380981, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.5303030303030303, | |
| "grad_norm": 10.497335433959961, | |
| "learning_rate": 9.6132163463013e-07, | |
| "loss": 0.7892797589302063, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.531986531986532, | |
| "grad_norm": 11.274188995361328, | |
| "learning_rate": 9.609821305058324e-07, | |
| "loss": 1.1465822458267212, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.5336700336700336, | |
| "grad_norm": 4.127675533294678, | |
| "learning_rate": 9.606412103751707e-07, | |
| "loss": 0.9373839497566223, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.5353535353535354, | |
| "grad_norm": 4.121032238006592, | |
| "learning_rate": 9.602988754127585e-07, | |
| "loss": 0.8166585564613342, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5370370370370371, | |
| "grad_norm": 29.52313804626465, | |
| "learning_rate": 9.59955126798084e-07, | |
| "loss": 1.0028636455535889, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5387205387205387, | |
| "grad_norm": 4.636293888092041, | |
| "learning_rate": 9.596099657155056e-07, | |
| "loss": 0.8631769418716431, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5404040404040404, | |
| "grad_norm": 2.6743357181549072, | |
| "learning_rate": 9.592633933542484e-07, | |
| "loss": 0.9822747707366943, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5420875420875421, | |
| "grad_norm": 12.097616195678711, | |
| "learning_rate": 9.589154109084e-07, | |
| "loss": 0.9199867844581604, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5437710437710438, | |
| "grad_norm": 4.201647758483887, | |
| "learning_rate": 9.585660195769066e-07, | |
| "loss": 0.9225333333015442, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 7.826382160186768, | |
| "learning_rate": 9.582152205635682e-07, | |
| "loss": 1.0213161706924438, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5471380471380471, | |
| "grad_norm": 8.643582344055176, | |
| "learning_rate": 9.578630150770348e-07, | |
| "loss": 1.1659046411514282, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5488215488215489, | |
| "grad_norm": 16.885889053344727, | |
| "learning_rate": 9.575094043308027e-07, | |
| "loss": 1.0685768127441406, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5505050505050505, | |
| "grad_norm": 3.666364908218384, | |
| "learning_rate": 9.5715438954321e-07, | |
| "loss": 1.0853323936462402, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5521885521885522, | |
| "grad_norm": 21.654556274414062, | |
| "learning_rate": 9.567979719374313e-07, | |
| "loss": 0.9922153353691101, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5538720538720538, | |
| "grad_norm": 7.106581211090088, | |
| "learning_rate": 9.564401527414757e-07, | |
| "loss": 0.8094037771224976, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 6.885115146636963, | |
| "learning_rate": 9.56080933188181e-07, | |
| "loss": 0.7689495086669922, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5572390572390572, | |
| "grad_norm": 3.9134387969970703, | |
| "learning_rate": 9.557203145152093e-07, | |
| "loss": 1.064096212387085, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5589225589225589, | |
| "grad_norm": 3.955990791320801, | |
| "learning_rate": 9.55358297965044e-07, | |
| "loss": 1.1137442588806152, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5606060606060606, | |
| "grad_norm": 4.690779209136963, | |
| "learning_rate": 9.549948847849842e-07, | |
| "loss": 0.5054531693458557, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5622895622895623, | |
| "grad_norm": 32.8538818359375, | |
| "learning_rate": 9.546300762271414e-07, | |
| "loss": 0.6846545934677124, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.563973063973064, | |
| "grad_norm": 18.116151809692383, | |
| "learning_rate": 9.542638735484346e-07, | |
| "loss": 1.099835991859436, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5656565656565656, | |
| "grad_norm": 26.123899459838867, | |
| "learning_rate": 9.538962780105855e-07, | |
| "loss": 0.6106569766998291, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5673400673400674, | |
| "grad_norm": 6.80141019821167, | |
| "learning_rate": 9.535272908801164e-07, | |
| "loss": 0.6078236103057861, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.569023569023569, | |
| "grad_norm": 3.6088900566101074, | |
| "learning_rate": 9.531569134283426e-07, | |
| "loss": 0.6979132890701294, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5707070707070707, | |
| "grad_norm": 35.824989318847656, | |
| "learning_rate": 9.527851469313703e-07, | |
| "loss": 1.3292642831802368, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5723905723905723, | |
| "grad_norm": 13.528051376342773, | |
| "learning_rate": 9.524119926700916e-07, | |
| "loss": 0.41806691884994507, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5740740740740741, | |
| "grad_norm": 10.345752716064453, | |
| "learning_rate": 9.520374519301801e-07, | |
| "loss": 1.0647339820861816, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5757575757575758, | |
| "grad_norm": 5.383781433105469, | |
| "learning_rate": 9.516615260020859e-07, | |
| "loss": 1.1695669889450073, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5774410774410774, | |
| "grad_norm": 4.6796770095825195, | |
| "learning_rate": 9.512842161810322e-07, | |
| "loss": 1.1320273876190186, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5791245791245792, | |
| "grad_norm": 3.494124412536621, | |
| "learning_rate": 9.509055237670101e-07, | |
| "loss": 0.8368796706199646, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.5808080808080808, | |
| "grad_norm": 18.290544509887695, | |
| "learning_rate": 9.505254500647742e-07, | |
| "loss": 0.7732558250427246, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5824915824915825, | |
| "grad_norm": 3.7307989597320557, | |
| "learning_rate": 9.501439963838383e-07, | |
| "loss": 0.8185931444168091, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5841750841750841, | |
| "grad_norm": 5.913649559020996, | |
| "learning_rate": 9.497611640384712e-07, | |
| "loss": 1.0147478580474854, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5858585858585859, | |
| "grad_norm": 14.875641822814941, | |
| "learning_rate": 9.493769543476909e-07, | |
| "loss": 0.9212662577629089, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5875420875420876, | |
| "grad_norm": 10.849754333496094, | |
| "learning_rate": 9.489913686352616e-07, | |
| "loss": 0.8869191408157349, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5892255892255892, | |
| "grad_norm": 7.326023578643799, | |
| "learning_rate": 9.486044082296886e-07, | |
| "loss": 0.8455855846405029, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 8.260787010192871, | |
| "learning_rate": 9.48216074464213e-07, | |
| "loss": 0.944000780582428, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 4.983551979064941, | |
| "learning_rate": 9.47826368676808e-07, | |
| "loss": 1.0806821584701538, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5942760942760943, | |
| "grad_norm": 7.548647880554199, | |
| "learning_rate": 9.474352922101741e-07, | |
| "loss": 1.0155982971191406, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5959595959595959, | |
| "grad_norm": 9.95559024810791, | |
| "learning_rate": 9.470428464117344e-07, | |
| "loss": 0.8041818141937256, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5976430976430976, | |
| "grad_norm": 22.083297729492188, | |
| "learning_rate": 9.466490326336298e-07, | |
| "loss": 0.8329028487205505, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5993265993265994, | |
| "grad_norm": 3.1762735843658447, | |
| "learning_rate": 9.462538522327144e-07, | |
| "loss": 1.1545898914337158, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.601010101010101, | |
| "grad_norm": 2.4671504497528076, | |
| "learning_rate": 9.458573065705507e-07, | |
| "loss": 1.081796407699585, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6026936026936027, | |
| "grad_norm": 4.517568111419678, | |
| "learning_rate": 9.454593970134058e-07, | |
| "loss": 0.7743735313415527, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6043771043771043, | |
| "grad_norm": 11.208656311035156, | |
| "learning_rate": 9.45060124932245e-07, | |
| "loss": 0.9187523126602173, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 10.369696617126465, | |
| "learning_rate": 9.446594917027293e-07, | |
| "loss": 0.965773344039917, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6077441077441077, | |
| "grad_norm": 11.875805854797363, | |
| "learning_rate": 9.442574987052082e-07, | |
| "loss": 0.9600865840911865, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.6094276094276094, | |
| "grad_norm": 9.8040189743042, | |
| "learning_rate": 9.438541473247169e-07, | |
| "loss": 0.9117884635925293, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 61.72325134277344, | |
| "learning_rate": 9.434494389509707e-07, | |
| "loss": 1.0104196071624756, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6127946127946128, | |
| "grad_norm": 19.176124572753906, | |
| "learning_rate": 9.430433749783601e-07, | |
| "loss": 0.9295721650123596, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.6144781144781145, | |
| "grad_norm": 6.085904121398926, | |
| "learning_rate": 9.426359568059465e-07, | |
| "loss": 1.1639102697372437, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6161616161616161, | |
| "grad_norm": 2.7430849075317383, | |
| "learning_rate": 9.422271858374567e-07, | |
| "loss": 1.1210119724273682, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.6178451178451179, | |
| "grad_norm": 6.412540435791016, | |
| "learning_rate": 9.418170634812789e-07, | |
| "loss": 0.8046259880065918, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.6195286195286195, | |
| "grad_norm": 15.164510726928711, | |
| "learning_rate": 9.41405591150457e-07, | |
| "loss": 0.8280715942382812, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.6212121212121212, | |
| "grad_norm": 13.97409725189209, | |
| "learning_rate": 9.409927702626865e-07, | |
| "loss": 0.6932380199432373, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.622895622895623, | |
| "grad_norm": 2.947274684906006, | |
| "learning_rate": 9.405786022403089e-07, | |
| "loss": 1.2565734386444092, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6245791245791246, | |
| "grad_norm": 4.588158130645752, | |
| "learning_rate": 9.401630885103074e-07, | |
| "loss": 1.0269739627838135, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.6262626262626263, | |
| "grad_norm": 4.135093688964844, | |
| "learning_rate": 9.397462305043016e-07, | |
| "loss": 1.2328283786773682, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.6279461279461279, | |
| "grad_norm": 3.079167127609253, | |
| "learning_rate": 9.393280296585427e-07, | |
| "loss": 0.968951404094696, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.6296296296296297, | |
| "grad_norm": 2.28676176071167, | |
| "learning_rate": 9.389084874139085e-07, | |
| "loss": 1.2347244024276733, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.6313131313131313, | |
| "grad_norm": 8.729804992675781, | |
| "learning_rate": 9.384876052158987e-07, | |
| "loss": 1.3113691806793213, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.632996632996633, | |
| "grad_norm": 7.039168357849121, | |
| "learning_rate": 9.380653845146294e-07, | |
| "loss": 0.7496945858001709, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.6346801346801347, | |
| "grad_norm": 14.870685577392578, | |
| "learning_rate": 9.37641826764829e-07, | |
| "loss": 1.0088348388671875, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 3.9592251777648926, | |
| "learning_rate": 9.372169334258315e-07, | |
| "loss": 0.7920987606048584, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.6380471380471381, | |
| "grad_norm": 10.84424114227295, | |
| "learning_rate": 9.367907059615737e-07, | |
| "loss": 0.85060054063797, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.6397306397306397, | |
| "grad_norm": 235.0703582763672, | |
| "learning_rate": 9.363631458405885e-07, | |
| "loss": 0.6581774353981018, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6414141414141414, | |
| "grad_norm": 5.294051647186279, | |
| "learning_rate": 9.359342545360002e-07, | |
| "loss": 0.46980541944503784, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.6430976430976431, | |
| "grad_norm": 29.527233123779297, | |
| "learning_rate": 9.355040335255201e-07, | |
| "loss": 1.0372706651687622, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6447811447811448, | |
| "grad_norm": 4.027895927429199, | |
| "learning_rate": 9.350724842914403e-07, | |
| "loss": 1.104457139968872, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6464646464646465, | |
| "grad_norm": 45.400699615478516, | |
| "learning_rate": 9.346396083206297e-07, | |
| "loss": 0.8071002960205078, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6481481481481481, | |
| "grad_norm": 4.046747207641602, | |
| "learning_rate": 9.342054071045281e-07, | |
| "loss": 0.8214056491851807, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6498316498316499, | |
| "grad_norm": 4.489753723144531, | |
| "learning_rate": 9.337698821391413e-07, | |
| "loss": 0.8206263780593872, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6515151515151515, | |
| "grad_norm": 3.739696502685547, | |
| "learning_rate": 9.333330349250363e-07, | |
| "loss": 0.7051388025283813, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6531986531986532, | |
| "grad_norm": 5.395556449890137, | |
| "learning_rate": 9.328948669673353e-07, | |
| "loss": 0.9473454356193542, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6548821548821548, | |
| "grad_norm": 3.432518720626831, | |
| "learning_rate": 9.324553797757113e-07, | |
| "loss": 1.0663374662399292, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6565656565656566, | |
| "grad_norm": 15.647449493408203, | |
| "learning_rate": 9.320145748643827e-07, | |
| "loss": 1.015528678894043, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6582491582491582, | |
| "grad_norm": 18.728303909301758, | |
| "learning_rate": 9.315724537521078e-07, | |
| "loss": 1.0769071578979492, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6599326599326599, | |
| "grad_norm": 9.825349807739258, | |
| "learning_rate": 9.311290179621801e-07, | |
| "loss": 1.0078058242797852, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6616161616161617, | |
| "grad_norm": 8.568079948425293, | |
| "learning_rate": 9.306842690224221e-07, | |
| "loss": 1.1149715185165405, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6632996632996633, | |
| "grad_norm": 14.216378211975098, | |
| "learning_rate": 9.302382084651813e-07, | |
| "loss": 0.9104188680648804, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.664983164983165, | |
| "grad_norm": 3.3533241748809814, | |
| "learning_rate": 9.297908378273238e-07, | |
| "loss": 0.9613898992538452, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 22.51508140563965, | |
| "learning_rate": 9.293421586502299e-07, | |
| "loss": 1.0459431409835815, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6683501683501684, | |
| "grad_norm": 4.43943977355957, | |
| "learning_rate": 9.288921724797881e-07, | |
| "loss": 0.6562730073928833, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.67003367003367, | |
| "grad_norm": 3.550076484680176, | |
| "learning_rate": 9.2844088086639e-07, | |
| "loss": 0.9962120056152344, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6717171717171717, | |
| "grad_norm": 4.60956335067749, | |
| "learning_rate": 9.279882853649251e-07, | |
| "loss": 1.0277674198150635, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6734006734006734, | |
| "grad_norm": 4.80393648147583, | |
| "learning_rate": 9.275343875347754e-07, | |
| "loss": 0.6581063866615295, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6750841750841751, | |
| "grad_norm": 5.648859024047852, | |
| "learning_rate": 9.270791889398098e-07, | |
| "loss": 1.016190528869629, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6767676767676768, | |
| "grad_norm": 27.92025375366211, | |
| "learning_rate": 9.266226911483792e-07, | |
| "loss": 0.77015221118927, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6784511784511784, | |
| "grad_norm": 15.88348388671875, | |
| "learning_rate": 9.261648957333104e-07, | |
| "loss": 0.7054531574249268, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6801346801346801, | |
| "grad_norm": 11.626742362976074, | |
| "learning_rate": 9.257058042719014e-07, | |
| "loss": 1.162412405014038, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 3.297008752822876, | |
| "learning_rate": 9.252454183459151e-07, | |
| "loss": 1.0062317848205566, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6835016835016835, | |
| "grad_norm": 3.699937343597412, | |
| "learning_rate": 9.24783739541575e-07, | |
| "loss": 1.1737666130065918, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6851851851851852, | |
| "grad_norm": 12.622026443481445, | |
| "learning_rate": 9.243207694495587e-07, | |
| "loss": 0.5980294942855835, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6868686868686869, | |
| "grad_norm": 2.4388279914855957, | |
| "learning_rate": 9.238565096649931e-07, | |
| "loss": 1.1263744831085205, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6885521885521886, | |
| "grad_norm": 5.467193603515625, | |
| "learning_rate": 9.233909617874485e-07, | |
| "loss": 0.8187447786331177, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6902356902356902, | |
| "grad_norm": 19.933046340942383, | |
| "learning_rate": 9.229241274209331e-07, | |
| "loss": 0.7387347221374512, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6919191919191919, | |
| "grad_norm": 4.639487266540527, | |
| "learning_rate": 9.224560081738876e-07, | |
| "loss": 0.8205159902572632, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6936026936026936, | |
| "grad_norm": 5.4859089851379395, | |
| "learning_rate": 9.219866056591803e-07, | |
| "loss": 0.8951364755630493, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6952861952861953, | |
| "grad_norm": 10.06679916381836, | |
| "learning_rate": 9.215159214940999e-07, | |
| "loss": 0.924353837966919, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.696969696969697, | |
| "grad_norm": 4.803708076477051, | |
| "learning_rate": 9.210439573003513e-07, | |
| "loss": 0.8230616450309753, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6986531986531986, | |
| "grad_norm": 2.6663763523101807, | |
| "learning_rate": 9.205707147040502e-07, | |
| "loss": 1.2476671934127808, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7003367003367004, | |
| "grad_norm": 11.887960433959961, | |
| "learning_rate": 9.200961953357161e-07, | |
| "loss": 0.9090033173561096, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.702020202020202, | |
| "grad_norm": 12.790818214416504, | |
| "learning_rate": 9.196204008302679e-07, | |
| "loss": 0.7313128709793091, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.7037037037037037, | |
| "grad_norm": 19.932966232299805, | |
| "learning_rate": 9.191433328270181e-07, | |
| "loss": 0.9331467151641846, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.7053872053872053, | |
| "grad_norm": 14.500178337097168, | |
| "learning_rate": 9.186649929696663e-07, | |
| "loss": 0.6199721097946167, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 3.49665904045105, | |
| "learning_rate": 9.181853829062953e-07, | |
| "loss": 1.2793331146240234, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7087542087542088, | |
| "grad_norm": 4.229721546173096, | |
| "learning_rate": 9.177045042893626e-07, | |
| "loss": 1.1469063758850098, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.7104377104377104, | |
| "grad_norm": 110.08422088623047, | |
| "learning_rate": 9.172223587756982e-07, | |
| "loss": 1.1059083938598633, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.7121212121212122, | |
| "grad_norm": 18.97850799560547, | |
| "learning_rate": 9.167389480264958e-07, | |
| "loss": 0.8827245235443115, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.7138047138047138, | |
| "grad_norm": 17.975536346435547, | |
| "learning_rate": 9.162542737073089e-07, | |
| "loss": 0.8001298904418945, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.7154882154882155, | |
| "grad_norm": 7.855954647064209, | |
| "learning_rate": 9.157683374880446e-07, | |
| "loss": 0.9649063348770142, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7171717171717171, | |
| "grad_norm": 8.463844299316406, | |
| "learning_rate": 9.152811410429576e-07, | |
| "loss": 0.972816526889801, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.7188552188552189, | |
| "grad_norm": 12.091350555419922, | |
| "learning_rate": 9.147926860506445e-07, | |
| "loss": 0.7975931763648987, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.7205387205387206, | |
| "grad_norm": 4.873641014099121, | |
| "learning_rate": 9.143029741940385e-07, | |
| "loss": 1.1548885107040405, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 11.703914642333984, | |
| "learning_rate": 9.138120071604027e-07, | |
| "loss": 0.7869529724121094, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.7239057239057239, | |
| "grad_norm": 8.07150650024414, | |
| "learning_rate": 9.133197866413254e-07, | |
| "loss": 1.0205129384994507, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7255892255892256, | |
| "grad_norm": 9.105744361877441, | |
| "learning_rate": 9.128263143327132e-07, | |
| "loss": 1.2168781757354736, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 13.992351531982422, | |
| "learning_rate": 9.12331591934786e-07, | |
| "loss": 1.0841448307037354, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.7289562289562289, | |
| "grad_norm": 44.512203216552734, | |
| "learning_rate": 9.118356211520704e-07, | |
| "loss": 1.0125892162322998, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.7306397306397306, | |
| "grad_norm": 3.5231881141662598, | |
| "learning_rate": 9.113384036933945e-07, | |
| "loss": 1.2724123001098633, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.7323232323232324, | |
| "grad_norm": 5.931739330291748, | |
| "learning_rate": 9.108399412718818e-07, | |
| "loss": 1.1999413967132568, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.734006734006734, | |
| "grad_norm": 5.34647798538208, | |
| "learning_rate": 9.103402356049452e-07, | |
| "loss": 1.127119541168213, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.7356902356902357, | |
| "grad_norm": 4.207188606262207, | |
| "learning_rate": 9.098392884142805e-07, | |
| "loss": 1.114919900894165, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.7373737373737373, | |
| "grad_norm": 21.882280349731445, | |
| "learning_rate": 9.093371014258618e-07, | |
| "loss": 0.9378777742385864, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.7390572390572391, | |
| "grad_norm": 127.14752197265625, | |
| "learning_rate": 9.088336763699347e-07, | |
| "loss": 0.6694403886795044, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 81.61506652832031, | |
| "learning_rate": 9.083290149810101e-07, | |
| "loss": 0.6651909351348877, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.7424242424242424, | |
| "grad_norm": 3.663316488265991, | |
| "learning_rate": 9.07823118997859e-07, | |
| "loss": 1.182866096496582, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.7441077441077442, | |
| "grad_norm": 3.8022303581237793, | |
| "learning_rate": 9.07315990163506e-07, | |
| "loss": 1.1220306158065796, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.7457912457912458, | |
| "grad_norm": 3.328054189682007, | |
| "learning_rate": 9.06807630225223e-07, | |
| "loss": 0.6599295139312744, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.7474747474747475, | |
| "grad_norm": 3.3686916828155518, | |
| "learning_rate": 9.062980409345242e-07, | |
| "loss": 1.0259349346160889, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.7491582491582491, | |
| "grad_norm": 20.480480194091797, | |
| "learning_rate": 9.05787224047159e-07, | |
| "loss": 0.9568924903869629, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7508417508417509, | |
| "grad_norm": 6.441938877105713, | |
| "learning_rate": 9.052751813231064e-07, | |
| "loss": 0.9797095060348511, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7525252525252525, | |
| "grad_norm": 9.020792007446289, | |
| "learning_rate": 9.047619145265693e-07, | |
| "loss": 0.786825954914093, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7542087542087542, | |
| "grad_norm": 12.181696891784668, | |
| "learning_rate": 9.042474254259673e-07, | |
| "loss": 0.9024474620819092, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7558922558922558, | |
| "grad_norm": 28.832189559936523, | |
| "learning_rate": 9.037317157939322e-07, | |
| "loss": 0.6734418869018555, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7575757575757576, | |
| "grad_norm": 3.2818045616149902, | |
| "learning_rate": 9.032147874073007e-07, | |
| "loss": 0.9285035133361816, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7592592592592593, | |
| "grad_norm": 18.371009826660156, | |
| "learning_rate": 9.026966420471087e-07, | |
| "loss": 0.7218674421310425, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7609427609427609, | |
| "grad_norm": 2.9429922103881836, | |
| "learning_rate": 9.021772814985844e-07, | |
| "loss": 1.222078800201416, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7626262626262627, | |
| "grad_norm": 2.7464704513549805, | |
| "learning_rate": 9.016567075511441e-07, | |
| "loss": 0.9446361064910889, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7643097643097643, | |
| "grad_norm": 6.568495750427246, | |
| "learning_rate": 9.011349219983836e-07, | |
| "loss": 0.929685115814209, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.765993265993266, | |
| "grad_norm": 4.631781578063965, | |
| "learning_rate": 9.006119266380738e-07, | |
| "loss": 0.8691076040267944, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7676767676767676, | |
| "grad_norm": 19.05845069885254, | |
| "learning_rate": 9.000877232721539e-07, | |
| "loss": 1.0112216472625732, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.7693602693602694, | |
| "grad_norm": 14.539863586425781, | |
| "learning_rate": 8.99562313706725e-07, | |
| "loss": 0.890055775642395, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7710437710437711, | |
| "grad_norm": 5.530696868896484, | |
| "learning_rate": 8.99035699752044e-07, | |
| "loss": 1.0191471576690674, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 14.078718185424805, | |
| "learning_rate": 8.985078832225178e-07, | |
| "loss": 0.6652472019195557, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7744107744107744, | |
| "grad_norm": 28.123485565185547, | |
| "learning_rate": 8.979788659366963e-07, | |
| "loss": 0.5262911319732666, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7760942760942761, | |
| "grad_norm": 12.658363342285156, | |
| "learning_rate": 8.974486497172664e-07, | |
| "loss": 0.6195323467254639, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 9.243937492370605, | |
| "learning_rate": 8.969172363910464e-07, | |
| "loss": 0.9786189198493958, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7794612794612794, | |
| "grad_norm": 6.694032669067383, | |
| "learning_rate": 8.963846277889788e-07, | |
| "loss": 1.1813392639160156, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7811447811447811, | |
| "grad_norm": 2.7287495136260986, | |
| "learning_rate": 8.95850825746124e-07, | |
| "loss": 0.5288863182067871, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7828282828282829, | |
| "grad_norm": 12.440982818603516, | |
| "learning_rate": 8.953158321016549e-07, | |
| "loss": 1.3665971755981445, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7845117845117845, | |
| "grad_norm": 6.197256565093994, | |
| "learning_rate": 8.947796486988499e-07, | |
| "loss": 0.934798002243042, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7861952861952862, | |
| "grad_norm": 5.526829719543457, | |
| "learning_rate": 8.942422773850861e-07, | |
| "loss": 1.0153696537017822, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 3.1978728771209717, | |
| "learning_rate": 8.937037200118339e-07, | |
| "loss": 0.8981832265853882, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7895622895622896, | |
| "grad_norm": 2.9995744228363037, | |
| "learning_rate": 8.931639784346499e-07, | |
| "loss": 0.8695104718208313, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7912457912457912, | |
| "grad_norm": 6.706093788146973, | |
| "learning_rate": 8.926230545131711e-07, | |
| "loss": 1.0370559692382812, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7929292929292929, | |
| "grad_norm": 17.68717384338379, | |
| "learning_rate": 8.920809501111082e-07, | |
| "loss": 0.43204930424690247, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7946127946127947, | |
| "grad_norm": 4.556012153625488, | |
| "learning_rate": 8.915376670962384e-07, | |
| "loss": 0.934272289276123, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.7962962962962963, | |
| "grad_norm": 4.898090362548828, | |
| "learning_rate": 8.90993207340401e-07, | |
| "loss": 0.910577654838562, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.797979797979798, | |
| "grad_norm": 3.080552577972412, | |
| "learning_rate": 8.904475727194881e-07, | |
| "loss": 1.0652995109558105, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7996632996632996, | |
| "grad_norm": 6.745321273803711, | |
| "learning_rate": 8.899007651134413e-07, | |
| "loss": 0.8568437099456787, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8013468013468014, | |
| "grad_norm": 13.813915252685547, | |
| "learning_rate": 8.893527864062427e-07, | |
| "loss": 0.47221675515174866, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.803030303030303, | |
| "grad_norm": 5.70471715927124, | |
| "learning_rate": 8.88803638485909e-07, | |
| "loss": 0.9682356119155884, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.8047138047138047, | |
| "grad_norm": 9.05542278289795, | |
| "learning_rate": 8.882533232444864e-07, | |
| "loss": 0.9946258068084717, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.8063973063973064, | |
| "grad_norm": 30.702098846435547, | |
| "learning_rate": 8.877018425780425e-07, | |
| "loss": 1.1317826509475708, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 14.64018726348877, | |
| "learning_rate": 8.8714919838666e-07, | |
| "loss": 0.7012873888015747, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8097643097643098, | |
| "grad_norm": 3.149690866470337, | |
| "learning_rate": 8.865953925744305e-07, | |
| "loss": 0.795744776725769, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.8114478114478114, | |
| "grad_norm": 6.090580463409424, | |
| "learning_rate": 8.860404270494483e-07, | |
| "loss": 0.7089242935180664, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.8131313131313131, | |
| "grad_norm": 3.53495192527771, | |
| "learning_rate": 8.85484303723803e-07, | |
| "loss": 1.0081251859664917, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 4.274377346038818, | |
| "learning_rate": 8.849270245135737e-07, | |
| "loss": 1.2170288562774658, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.8164983164983165, | |
| "grad_norm": 3.169619560241699, | |
| "learning_rate": 8.843685913388216e-07, | |
| "loss": 1.0120604038238525, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 9.656790733337402, | |
| "learning_rate": 8.838090061235839e-07, | |
| "loss": 1.0408661365509033, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.8198653198653199, | |
| "grad_norm": 3.6206579208374023, | |
| "learning_rate": 8.832482707958671e-07, | |
| "loss": 0.7572422027587891, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.8215488215488216, | |
| "grad_norm": 4.2206034660339355, | |
| "learning_rate": 8.826863872876405e-07, | |
| "loss": 0.9668401479721069, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.8232323232323232, | |
| "grad_norm": 2.5796895027160645, | |
| "learning_rate": 8.82123357534829e-07, | |
| "loss": 1.2220442295074463, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.8249158249158249, | |
| "grad_norm": 3.009799003601074, | |
| "learning_rate": 8.815591834773073e-07, | |
| "loss": 1.1853399276733398, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8265993265993266, | |
| "grad_norm": 3.527939796447754, | |
| "learning_rate": 8.80993867058892e-07, | |
| "loss": 1.1044703722000122, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.8282828282828283, | |
| "grad_norm": 5.415159225463867, | |
| "learning_rate": 8.804274102273362e-07, | |
| "loss": 1.0707950592041016, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.82996632996633, | |
| "grad_norm": 41.4835205078125, | |
| "learning_rate": 8.798598149343223e-07, | |
| "loss": 0.9894696474075317, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.8316498316498316, | |
| "grad_norm": 11.90714168548584, | |
| "learning_rate": 8.792910831354544e-07, | |
| "loss": 0.8949055671691895, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 19.156835556030273, | |
| "learning_rate": 8.787212167902533e-07, | |
| "loss": 0.847869336605072, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.835016835016835, | |
| "grad_norm": 9.593557357788086, | |
| "learning_rate": 8.781502178621481e-07, | |
| "loss": 0.7175034284591675, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.8367003367003367, | |
| "grad_norm": 7.256720066070557, | |
| "learning_rate": 8.775780883184705e-07, | |
| "loss": 0.9604957103729248, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.8383838383838383, | |
| "grad_norm": 6.030484676361084, | |
| "learning_rate": 8.770048301304473e-07, | |
| "loss": 0.69129478931427, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.8400673400673401, | |
| "grad_norm": 19.379892349243164, | |
| "learning_rate": 8.764304452731941e-07, | |
| "loss": 0.9693500399589539, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.8417508417508418, | |
| "grad_norm": 5.4144086837768555, | |
| "learning_rate": 8.758549357257088e-07, | |
| "loss": 1.0944030284881592, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8434343434343434, | |
| "grad_norm": 3.4778013229370117, | |
| "learning_rate": 8.752783034708636e-07, | |
| "loss": 0.7972965240478516, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.8451178451178452, | |
| "grad_norm": 18.509031295776367, | |
| "learning_rate": 8.747005504953994e-07, | |
| "loss": 0.7230968475341797, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.8468013468013468, | |
| "grad_norm": 4.210479736328125, | |
| "learning_rate": 8.741216787899185e-07, | |
| "loss": 1.1015040874481201, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 2.3543701171875, | |
| "learning_rate": 8.73541690348877e-07, | |
| "loss": 0.6013465523719788, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.8501683501683501, | |
| "grad_norm": 4.900216579437256, | |
| "learning_rate": 8.729605871705794e-07, | |
| "loss": 0.9569622278213501, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8518518518518519, | |
| "grad_norm": 13.174873352050781, | |
| "learning_rate": 8.723783712571706e-07, | |
| "loss": 0.891572117805481, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.8535353535353535, | |
| "grad_norm": 7.153807163238525, | |
| "learning_rate": 8.717950446146296e-07, | |
| "loss": 0.7898436784744263, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.8552188552188552, | |
| "grad_norm": 17.859582901000977, | |
| "learning_rate": 8.712106092527618e-07, | |
| "loss": 0.6778484582901001, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.8569023569023569, | |
| "grad_norm": 25.399763107299805, | |
| "learning_rate": 8.706250671851929e-07, | |
| "loss": 1.0100421905517578, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.8585858585858586, | |
| "grad_norm": 4.458539962768555, | |
| "learning_rate": 8.70038420429362e-07, | |
| "loss": 1.280473232269287, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8602693602693603, | |
| "grad_norm": 13.934873580932617, | |
| "learning_rate": 8.694506710065139e-07, | |
| "loss": 0.9307641386985779, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8619528619528619, | |
| "grad_norm": 6.230085372924805, | |
| "learning_rate": 8.688618209416927e-07, | |
| "loss": 0.9810340404510498, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 7.749796390533447, | |
| "learning_rate": 8.682718722637344e-07, | |
| "loss": 0.9103548526763916, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8653198653198653, | |
| "grad_norm": 5.378295421600342, | |
| "learning_rate": 8.676808270052607e-07, | |
| "loss": 1.0003798007965088, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.867003367003367, | |
| "grad_norm": 5.721936225891113, | |
| "learning_rate": 8.670886872026711e-07, | |
| "loss": 0.6671168804168701, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8686868686868687, | |
| "grad_norm": 10.666192054748535, | |
| "learning_rate": 8.664954548961363e-07, | |
| "loss": 0.8651524782180786, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.8703703703703703, | |
| "grad_norm": 7.22635555267334, | |
| "learning_rate": 8.659011321295913e-07, | |
| "loss": 0.9622019529342651, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8720538720538721, | |
| "grad_norm": 4.455495357513428, | |
| "learning_rate": 8.65305720950728e-07, | |
| "loss": 0.9549316167831421, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.8737373737373737, | |
| "grad_norm": 7.26788854598999, | |
| "learning_rate": 8.647092234109884e-07, | |
| "loss": 1.1264393329620361, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8754208754208754, | |
| "grad_norm": 6.3819499015808105, | |
| "learning_rate": 8.64111641565558e-07, | |
| "loss": 1.0972923040390015, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.877104377104377, | |
| "grad_norm": 4.891845226287842, | |
| "learning_rate": 8.63512977473357e-07, | |
| "loss": 0.9982548952102661, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8787878787878788, | |
| "grad_norm": 16.61280059814453, | |
| "learning_rate": 8.629132331970353e-07, | |
| "loss": 1.1183404922485352, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8804713804713805, | |
| "grad_norm": 3.0736172199249268, | |
| "learning_rate": 8.623124108029645e-07, | |
| "loss": 1.0902597904205322, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8821548821548821, | |
| "grad_norm": 15.772442817687988, | |
| "learning_rate": 8.617105123612304e-07, | |
| "loss": 0.9946341514587402, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8838383838383839, | |
| "grad_norm": 22.210824966430664, | |
| "learning_rate": 8.611075399456263e-07, | |
| "loss": 0.8030619025230408, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8855218855218855, | |
| "grad_norm": 13.653421401977539, | |
| "learning_rate": 8.605034956336462e-07, | |
| "loss": 1.084486484527588, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.8872053872053872, | |
| "grad_norm": 13.737056732177734, | |
| "learning_rate": 8.598983815064766e-07, | |
| "loss": 0.5944472551345825, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 2.5293309688568115, | |
| "learning_rate": 8.592921996489902e-07, | |
| "loss": 0.9724396467208862, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8905723905723906, | |
| "grad_norm": 2.733849287033081, | |
| "learning_rate": 8.586849521497389e-07, | |
| "loss": 0.9384986162185669, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8922558922558923, | |
| "grad_norm": 18.489913940429688, | |
| "learning_rate": 8.580766411009455e-07, | |
| "loss": 0.9987908601760864, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8939393939393939, | |
| "grad_norm": 5.748605251312256, | |
| "learning_rate": 8.574672685984979e-07, | |
| "loss": 0.9200767278671265, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8956228956228957, | |
| "grad_norm": 11.951451301574707, | |
| "learning_rate": 8.568568367419404e-07, | |
| "loss": 0.844304621219635, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8973063973063973, | |
| "grad_norm": 2.693372964859009, | |
| "learning_rate": 8.562453476344677e-07, | |
| "loss": 1.1123064756393433, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.898989898989899, | |
| "grad_norm": 3.8241171836853027, | |
| "learning_rate": 8.556328033829172e-07, | |
| "loss": 0.8062398433685303, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.9006734006734006, | |
| "grad_norm": 11.482207298278809, | |
| "learning_rate": 8.550192060977614e-07, | |
| "loss": 0.9785133600234985, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9023569023569024, | |
| "grad_norm": 3.1708807945251465, | |
| "learning_rate": 8.544045578931013e-07, | |
| "loss": 1.2256948947906494, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.9040404040404041, | |
| "grad_norm": 3.0588254928588867, | |
| "learning_rate": 8.537888608866584e-07, | |
| "loss": 0.8702206611633301, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.9057239057239057, | |
| "grad_norm": 6.964415073394775, | |
| "learning_rate": 8.531721171997681e-07, | |
| "loss": 0.5286012291908264, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.9074074074074074, | |
| "grad_norm": 19.570329666137695, | |
| "learning_rate": 8.525543289573718e-07, | |
| "loss": 1.1106371879577637, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 3.5319879055023193, | |
| "learning_rate": 8.519354982880099e-07, | |
| "loss": 0.9486319422721863, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9107744107744108, | |
| "grad_norm": 3.6544623374938965, | |
| "learning_rate": 8.513156273238146e-07, | |
| "loss": 0.9495224356651306, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.9124579124579124, | |
| "grad_norm": 27.266931533813477, | |
| "learning_rate": 8.50694718200502e-07, | |
| "loss": 0.766098141670227, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.9141414141414141, | |
| "grad_norm": 4.358726978302002, | |
| "learning_rate": 8.500727730573655e-07, | |
| "loss": 1.1725554466247559, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.9158249158249159, | |
| "grad_norm": 13.713922500610352, | |
| "learning_rate": 8.494497940372675e-07, | |
| "loss": 0.9348576068878174, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.9175084175084175, | |
| "grad_norm": 2.8525874614715576, | |
| "learning_rate": 8.488257832866332e-07, | |
| "loss": 0.9388105869293213, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9191919191919192, | |
| "grad_norm": 19.22268295288086, | |
| "learning_rate": 8.482007429554419e-07, | |
| "loss": 1.0528115034103394, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.9208754208754208, | |
| "grad_norm": 7.083608627319336, | |
| "learning_rate": 8.475746751972207e-07, | |
| "loss": 0.9258947968482971, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.9225589225589226, | |
| "grad_norm": 17.767122268676758, | |
| "learning_rate": 8.469475821690364e-07, | |
| "loss": 0.7900251746177673, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.9242424242424242, | |
| "grad_norm": 11.199775695800781, | |
| "learning_rate": 8.463194660314884e-07, | |
| "loss": 0.43797174096107483, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 6.160865306854248, | |
| "learning_rate": 8.456903289487008e-07, | |
| "loss": 1.0159149169921875, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9276094276094277, | |
| "grad_norm": 23.086267471313477, | |
| "learning_rate": 8.45060173088316e-07, | |
| "loss": 0.5812975168228149, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.9292929292929293, | |
| "grad_norm": 5.783674240112305, | |
| "learning_rate": 8.444290006214858e-07, | |
| "loss": 1.1394703388214111, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.930976430976431, | |
| "grad_norm": 5.126986026763916, | |
| "learning_rate": 8.43796813722865e-07, | |
| "loss": 1.0383517742156982, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.9326599326599326, | |
| "grad_norm": 16.552364349365234, | |
| "learning_rate": 8.431636145706035e-07, | |
| "loss": 0.8570190072059631, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.9343434343434344, | |
| "grad_norm": 6.3068037033081055, | |
| "learning_rate": 8.425294053463387e-07, | |
| "loss": 1.227846384048462, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.936026936026936, | |
| "grad_norm": 21.39204978942871, | |
| "learning_rate": 8.418941882351883e-07, | |
| "loss": 1.2234206199645996, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.9377104377104377, | |
| "grad_norm": 3.4600205421447754, | |
| "learning_rate": 8.412579654257424e-07, | |
| "loss": 1.0893580913543701, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.9393939393939394, | |
| "grad_norm": 9.739093780517578, | |
| "learning_rate": 8.406207391100564e-07, | |
| "loss": 1.1603511571884155, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.9410774410774411, | |
| "grad_norm": 9.583012580871582, | |
| "learning_rate": 8.399825114836431e-07, | |
| "loss": 1.036285161972046, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.9427609427609428, | |
| "grad_norm": 3.670794725418091, | |
| "learning_rate": 8.393432847454651e-07, | |
| "loss": 1.2967090606689453, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 3.190880060195923, | |
| "learning_rate": 8.387030610979276e-07, | |
| "loss": 0.7892323732376099, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.9461279461279462, | |
| "grad_norm": 2.7288999557495117, | |
| "learning_rate": 8.380618427468703e-07, | |
| "loss": 0.8631899356842041, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.9478114478114478, | |
| "grad_norm": 80.42435455322266, | |
| "learning_rate": 8.374196319015605e-07, | |
| "loss": 0.8700990080833435, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.9494949494949495, | |
| "grad_norm": 2.7032294273376465, | |
| "learning_rate": 8.367764307746843e-07, | |
| "loss": 0.9584017992019653, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.9511784511784511, | |
| "grad_norm": 29.493919372558594, | |
| "learning_rate": 8.361322415823407e-07, | |
| "loss": 0.9330191016197205, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.9528619528619529, | |
| "grad_norm": 2.8431601524353027, | |
| "learning_rate": 8.354870665440322e-07, | |
| "loss": 0.9470508098602295, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 4.1329240798950195, | |
| "learning_rate": 8.348409078826586e-07, | |
| "loss": 1.003962755203247, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.9562289562289562, | |
| "grad_norm": 21.232402801513672, | |
| "learning_rate": 8.341937678245078e-07, | |
| "loss": 0.8706526756286621, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.9579124579124579, | |
| "grad_norm": 6.638863563537598, | |
| "learning_rate": 8.335456485992501e-07, | |
| "loss": 0.7324610948562622, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.9595959595959596, | |
| "grad_norm": 9.82058048248291, | |
| "learning_rate": 8.328965524399288e-07, | |
| "loss": 0.5701298713684082, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9612794612794613, | |
| "grad_norm": 4.2321672439575195, | |
| "learning_rate": 8.322464815829531e-07, | |
| "loss": 0.8950085639953613, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 4.749987602233887, | |
| "learning_rate": 8.315954382680909e-07, | |
| "loss": 0.6259889602661133, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.9646464646464646, | |
| "grad_norm": 3.1439943313598633, | |
| "learning_rate": 8.309434247384601e-07, | |
| "loss": 0.9208143949508667, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9663299663299664, | |
| "grad_norm": 9.139312744140625, | |
| "learning_rate": 8.302904432405219e-07, | |
| "loss": 0.7828265428543091, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.968013468013468, | |
| "grad_norm": 8.519466400146484, | |
| "learning_rate": 8.296364960240722e-07, | |
| "loss": 0.9561738967895508, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 21.469980239868164, | |
| "learning_rate": 8.289815853422342e-07, | |
| "loss": 0.608352541923523, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9713804713804713, | |
| "grad_norm": 6.825742721557617, | |
| "learning_rate": 8.283257134514507e-07, | |
| "loss": 0.9338740110397339, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9730639730639731, | |
| "grad_norm": 4.129487991333008, | |
| "learning_rate": 8.276688826114768e-07, | |
| "loss": 0.5884324312210083, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9747474747474747, | |
| "grad_norm": 5.313873291015625, | |
| "learning_rate": 8.270110950853706e-07, | |
| "loss": 0.9547237753868103, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9764309764309764, | |
| "grad_norm": 6.063114643096924, | |
| "learning_rate": 8.263523531394872e-07, | |
| "loss": 0.44445914030075073, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9781144781144782, | |
| "grad_norm": 17.088842391967773, | |
| "learning_rate": 8.256926590434696e-07, | |
| "loss": 1.1655336618423462, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.9797979797979798, | |
| "grad_norm": 2.781656265258789, | |
| "learning_rate": 8.250320150702416e-07, | |
| "loss": 0.6978096961975098, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9814814814814815, | |
| "grad_norm": 4.57460355758667, | |
| "learning_rate": 8.243704234959996e-07, | |
| "loss": 0.8053257465362549, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9831649831649831, | |
| "grad_norm": 7.392634391784668, | |
| "learning_rate": 8.237078866002051e-07, | |
| "loss": 0.8369849920272827, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9848484848484849, | |
| "grad_norm": 7.21369743347168, | |
| "learning_rate": 8.230444066655763e-07, | |
| "loss": 0.8643122911453247, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9865319865319865, | |
| "grad_norm": 8.024483680725098, | |
| "learning_rate": 8.223799859780808e-07, | |
| "loss": 0.6412187814712524, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.9882154882154882, | |
| "grad_norm": 25.19280433654785, | |
| "learning_rate": 8.217146268269274e-07, | |
| "loss": 0.917904257774353, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.98989898989899, | |
| "grad_norm": 9.128271102905273, | |
| "learning_rate": 8.210483315045584e-07, | |
| "loss": 0.4360630214214325, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.9915824915824916, | |
| "grad_norm": 10.149953842163086, | |
| "learning_rate": 8.203811023066416e-07, | |
| "loss": 1.071942925453186, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9932659932659933, | |
| "grad_norm": 8.710041999816895, | |
| "learning_rate": 8.197129415320622e-07, | |
| "loss": 0.4572172164916992, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9949494949494949, | |
| "grad_norm": 3.669222116470337, | |
| "learning_rate": 8.190438514829151e-07, | |
| "loss": 0.9243024587631226, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.9966329966329966, | |
| "grad_norm": 12.717865943908691, | |
| "learning_rate": 8.183738344644973e-07, | |
| "loss": 1.0385701656341553, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.9983164983164983, | |
| "grad_norm": 4.85836935043335, | |
| "learning_rate": 8.177028927852992e-07, | |
| "loss": 0.6608575582504272, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 11.576709747314453, | |
| "learning_rate": 8.170310287569973e-07, | |
| "loss": 0.7577022910118103, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.0016835016835017, | |
| "grad_norm": 9.695958137512207, | |
| "learning_rate": 8.163582446944456e-07, | |
| "loss": 0.4615962505340576, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.0033670033670035, | |
| "grad_norm": 6.610690116882324, | |
| "learning_rate": 8.156845429156687e-07, | |
| "loss": 0.4831297993659973, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.005050505050505, | |
| "grad_norm": 3.4326443672180176, | |
| "learning_rate": 8.150099257418522e-07, | |
| "loss": 1.146728515625, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.0067340067340067, | |
| "grad_norm": 20.49312973022461, | |
| "learning_rate": 8.143343954973366e-07, | |
| "loss": 0.8859339356422424, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.0084175084175084, | |
| "grad_norm": 3.5065126419067383, | |
| "learning_rate": 8.136579545096076e-07, | |
| "loss": 1.0677597522735596, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 13.90986156463623, | |
| "learning_rate": 8.129806051092889e-07, | |
| "loss": 1.1894700527191162, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0117845117845117, | |
| "grad_norm": 3.6254143714904785, | |
| "learning_rate": 8.123023496301343e-07, | |
| "loss": 0.985792338848114, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.0134680134680134, | |
| "grad_norm": 2.666475296020508, | |
| "learning_rate": 8.116231904090192e-07, | |
| "loss": 1.0036242008209229, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.0151515151515151, | |
| "grad_norm": 15.559446334838867, | |
| "learning_rate": 8.109431297859332e-07, | |
| "loss": 1.0831941366195679, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.0168350168350169, | |
| "grad_norm": 16.54594612121582, | |
| "learning_rate": 8.10262170103971e-07, | |
| "loss": 0.6582114696502686, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.0185185185185186, | |
| "grad_norm": 4.971505641937256, | |
| "learning_rate": 8.095803137093252e-07, | |
| "loss": 0.7359082698822021, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.02020202020202, | |
| "grad_norm": 3.355790853500366, | |
| "learning_rate": 8.088975629512781e-07, | |
| "loss": 0.5685245990753174, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.0218855218855218, | |
| "grad_norm": 9.155191421508789, | |
| "learning_rate": 8.082139201821933e-07, | |
| "loss": 0.8225246667861938, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.0235690235690236, | |
| "grad_norm": 12.392461776733398, | |
| "learning_rate": 8.075293877575079e-07, | |
| "loss": 0.4670335352420807, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.0252525252525253, | |
| "grad_norm": 15.242469787597656, | |
| "learning_rate": 8.068439680357239e-07, | |
| "loss": 0.9990904331207275, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.026936026936027, | |
| "grad_norm": 5.101475238800049, | |
| "learning_rate": 8.06157663378401e-07, | |
| "loss": 0.8169501423835754, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.0286195286195285, | |
| "grad_norm": 45.69724655151367, | |
| "learning_rate": 8.054704761501471e-07, | |
| "loss": 0.9720203280448914, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 9.621611595153809, | |
| "learning_rate": 8.047824087186116e-07, | |
| "loss": 1.1497771739959717, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.031986531986532, | |
| "grad_norm": 17.078630447387695, | |
| "learning_rate": 8.040934634544761e-07, | |
| "loss": 0.6966054439544678, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.0336700336700337, | |
| "grad_norm": 12.26323413848877, | |
| "learning_rate": 8.03403642731447e-07, | |
| "loss": 0.9055821299552917, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.0353535353535352, | |
| "grad_norm": 4.618709564208984, | |
| "learning_rate": 8.027129489262472e-07, | |
| "loss": 0.8367654085159302, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.037037037037037, | |
| "grad_norm": 14.03416919708252, | |
| "learning_rate": 8.020213844186071e-07, | |
| "loss": 0.5471811294555664, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.0387205387205387, | |
| "grad_norm": 2.462353229522705, | |
| "learning_rate": 8.013289515912575e-07, | |
| "loss": 0.9337582588195801, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.0404040404040404, | |
| "grad_norm": 3.580676794052124, | |
| "learning_rate": 8.006356528299211e-07, | |
| "loss": 0.9284713268280029, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.0420875420875422, | |
| "grad_norm": 14.55753231048584, | |
| "learning_rate": 7.999414905233035e-07, | |
| "loss": 0.5675897598266602, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.0437710437710437, | |
| "grad_norm": 3.7598259449005127, | |
| "learning_rate": 7.992464670630862e-07, | |
| "loss": 1.0432960987091064, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 6.506076335906982, | |
| "learning_rate": 7.985505848439171e-07, | |
| "loss": 1.0147356986999512, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.0471380471380471, | |
| "grad_norm": 4.660027503967285, | |
| "learning_rate": 7.978538462634036e-07, | |
| "loss": 0.7054228782653809, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.0488215488215489, | |
| "grad_norm": 15.017945289611816, | |
| "learning_rate": 7.971562537221032e-07, | |
| "loss": 0.7315689325332642, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.0505050505050506, | |
| "grad_norm": 11.619869232177734, | |
| "learning_rate": 7.964578096235156e-07, | |
| "loss": 0.9787733554840088, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.0521885521885521, | |
| "grad_norm": 25.001440048217773, | |
| "learning_rate": 7.957585163740746e-07, | |
| "loss": 0.7732163667678833, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0538720538720538, | |
| "grad_norm": 13.280570030212402, | |
| "learning_rate": 7.950583763831398e-07, | |
| "loss": 0.7055392861366272, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.0555555555555556, | |
| "grad_norm": 8.0188627243042, | |
| "learning_rate": 7.943573920629879e-07, | |
| "loss": 1.0268526077270508, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.0572390572390573, | |
| "grad_norm": 8.311823844909668, | |
| "learning_rate": 7.936555658288051e-07, | |
| "loss": 0.7499762177467346, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.0589225589225588, | |
| "grad_norm": 12.510072708129883, | |
| "learning_rate": 7.929529000986778e-07, | |
| "loss": 0.5642093420028687, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.0606060606060606, | |
| "grad_norm": 8.302406311035156, | |
| "learning_rate": 7.922493972935851e-07, | |
| "loss": 0.8775455355644226, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.0622895622895623, | |
| "grad_norm": 4.110003471374512, | |
| "learning_rate": 7.915450598373903e-07, | |
| "loss": 0.6986871957778931, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.063973063973064, | |
| "grad_norm": 5.865422248840332, | |
| "learning_rate": 7.908398901568324e-07, | |
| "loss": 0.8195330500602722, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.0656565656565657, | |
| "grad_norm": 9.913485527038574, | |
| "learning_rate": 7.901338906815174e-07, | |
| "loss": 0.8037704229354858, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.0673400673400673, | |
| "grad_norm": 111.66101837158203, | |
| "learning_rate": 7.894270638439106e-07, | |
| "loss": 0.6612458825111389, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.069023569023569, | |
| "grad_norm": 6.807026386260986, | |
| "learning_rate": 7.88719412079328e-07, | |
| "loss": 0.6571763157844543, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0707070707070707, | |
| "grad_norm": 6.202319622039795, | |
| "learning_rate": 7.880109378259274e-07, | |
| "loss": 0.7407518625259399, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.0723905723905724, | |
| "grad_norm": 18.488807678222656, | |
| "learning_rate": 7.873016435247011e-07, | |
| "loss": 0.5137653350830078, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.074074074074074, | |
| "grad_norm": 6.398234844207764, | |
| "learning_rate": 7.865915316194661e-07, | |
| "loss": 0.7220208644866943, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.0757575757575757, | |
| "grad_norm": 24.44901466369629, | |
| "learning_rate": 7.858806045568568e-07, | |
| "loss": 1.0816729068756104, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.0774410774410774, | |
| "grad_norm": 42.94617462158203, | |
| "learning_rate": 7.85168864786316e-07, | |
| "loss": 0.569089412689209, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.0791245791245792, | |
| "grad_norm": 17.059085845947266, | |
| "learning_rate": 7.844563147600869e-07, | |
| "loss": 0.34395474195480347, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.0808080808080809, | |
| "grad_norm": 5.726075172424316, | |
| "learning_rate": 7.837429569332038e-07, | |
| "loss": 1.104400873184204, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.0824915824915824, | |
| "grad_norm": 5.970583915710449, | |
| "learning_rate": 7.830287937634848e-07, | |
| "loss": 0.9108725786209106, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.0841750841750841, | |
| "grad_norm": 3.13798451423645, | |
| "learning_rate": 7.823138277115227e-07, | |
| "loss": 0.6928012371063232, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.0858585858585859, | |
| "grad_norm": 3.2338767051696777, | |
| "learning_rate": 7.81598061240676e-07, | |
| "loss": 0.6945496797561646, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.0875420875420876, | |
| "grad_norm": 9.174521446228027, | |
| "learning_rate": 7.808814968170612e-07, | |
| "loss": 1.177178144454956, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.0892255892255893, | |
| "grad_norm": 2.838789463043213, | |
| "learning_rate": 7.801641369095449e-07, | |
| "loss": 0.8742045164108276, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 33.68141555786133, | |
| "learning_rate": 7.794459839897334e-07, | |
| "loss": 0.5730578899383545, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.0925925925925926, | |
| "grad_norm": 8.239413261413574, | |
| "learning_rate": 7.787270405319656e-07, | |
| "loss": 0.6627512574195862, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.0942760942760943, | |
| "grad_norm": 10.630107879638672, | |
| "learning_rate": 7.780073090133045e-07, | |
| "loss": 0.6856255531311035, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.095959595959596, | |
| "grad_norm": 8.586835861206055, | |
| "learning_rate": 7.772867919135278e-07, | |
| "loss": 0.7367527484893799, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.0976430976430978, | |
| "grad_norm": 26.07152557373047, | |
| "learning_rate": 7.765654917151201e-07, | |
| "loss": 0.6313869953155518, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.0993265993265993, | |
| "grad_norm": 26.481813430786133, | |
| "learning_rate": 7.758434109032642e-07, | |
| "loss": 0.6839025020599365, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.101010101010101, | |
| "grad_norm": 11.492305755615234, | |
| "learning_rate": 7.751205519658321e-07, | |
| "loss": 0.5959317684173584, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.1026936026936027, | |
| "grad_norm": 5.645211219787598, | |
| "learning_rate": 7.743969173933771e-07, | |
| "loss": 0.5784125924110413, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.1043771043771045, | |
| "grad_norm": 4.408408164978027, | |
| "learning_rate": 7.736725096791249e-07, | |
| "loss": 1.2098188400268555, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.106060606060606, | |
| "grad_norm": 9.238399505615234, | |
| "learning_rate": 7.729473313189647e-07, | |
| "loss": 0.9550820589065552, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.1077441077441077, | |
| "grad_norm": 15.260536193847656, | |
| "learning_rate": 7.722213848114411e-07, | |
| "loss": 0.9281185865402222, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.1094276094276094, | |
| "grad_norm": 11.378418922424316, | |
| "learning_rate": 7.714946726577453e-07, | |
| "loss": 0.9321832656860352, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 27.803199768066406, | |
| "learning_rate": 7.707671973617066e-07, | |
| "loss": 0.7850360870361328, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.112794612794613, | |
| "grad_norm": 11.86633586883545, | |
| "learning_rate": 7.700389614297832e-07, | |
| "loss": 0.8705657124519348, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.1144781144781144, | |
| "grad_norm": 13.372186660766602, | |
| "learning_rate": 7.693099673710545e-07, | |
| "loss": 0.5348168015480042, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.1161616161616161, | |
| "grad_norm": 7.737417697906494, | |
| "learning_rate": 7.685802176972117e-07, | |
| "loss": 0.8875303268432617, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.1178451178451179, | |
| "grad_norm": 4.609512805938721, | |
| "learning_rate": 7.678497149225494e-07, | |
| "loss": 0.7146286964416504, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.1195286195286196, | |
| "grad_norm": 3.953033447265625, | |
| "learning_rate": 7.671184615639573e-07, | |
| "loss": 1.0624680519104004, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.121212121212121, | |
| "grad_norm": 15.329386711120605, | |
| "learning_rate": 7.663864601409106e-07, | |
| "loss": 0.7291280031204224, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.1228956228956228, | |
| "grad_norm": 5.592386722564697, | |
| "learning_rate": 7.656537131754621e-07, | |
| "loss": 1.146779179573059, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.1245791245791246, | |
| "grad_norm": 19.50740623474121, | |
| "learning_rate": 7.649202231922338e-07, | |
| "loss": 0.6419116258621216, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.1262626262626263, | |
| "grad_norm": 3.845174789428711, | |
| "learning_rate": 7.641859927184071e-07, | |
| "loss": 0.7372583150863647, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.127946127946128, | |
| "grad_norm": 8.609213829040527, | |
| "learning_rate": 7.634510242837149e-07, | |
| "loss": 0.603482723236084, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.1296296296296295, | |
| "grad_norm": 7.67048454284668, | |
| "learning_rate": 7.627153204204329e-07, | |
| "loss": 0.9267317056655884, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.1313131313131313, | |
| "grad_norm": 3.1689493656158447, | |
| "learning_rate": 7.619788836633701e-07, | |
| "loss": 1.1948891878128052, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.132996632996633, | |
| "grad_norm": 44.90256118774414, | |
| "learning_rate": 7.612417165498611e-07, | |
| "loss": 1.0813300609588623, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.1346801346801347, | |
| "grad_norm": 30.334089279174805, | |
| "learning_rate": 7.605038216197569e-07, | |
| "loss": 0.7344606518745422, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 7.781182765960693, | |
| "learning_rate": 7.597652014154162e-07, | |
| "loss": 0.5709810256958008, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.138047138047138, | |
| "grad_norm": 17.377174377441406, | |
| "learning_rate": 7.590258584816957e-07, | |
| "loss": 0.32737797498703003, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.1397306397306397, | |
| "grad_norm": 3.968998908996582, | |
| "learning_rate": 7.582857953659437e-07, | |
| "loss": 1.0901448726654053, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.1414141414141414, | |
| "grad_norm": 4.9800801277160645, | |
| "learning_rate": 7.575450146179887e-07, | |
| "loss": 1.098610281944275, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.1430976430976432, | |
| "grad_norm": 11.949906349182129, | |
| "learning_rate": 7.56803518790132e-07, | |
| "loss": 0.8105623722076416, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.144781144781145, | |
| "grad_norm": 3.4032137393951416, | |
| "learning_rate": 7.560613104371386e-07, | |
| "loss": 0.7330828905105591, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.1464646464646464, | |
| "grad_norm": 2.8660380840301514, | |
| "learning_rate": 7.553183921162289e-07, | |
| "loss": 0.9020315408706665, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.1481481481481481, | |
| "grad_norm": 12.72059154510498, | |
| "learning_rate": 7.545747663870687e-07, | |
| "loss": 0.9371917843818665, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.1498316498316499, | |
| "grad_norm": 23.1413631439209, | |
| "learning_rate": 7.53830435811762e-07, | |
| "loss": 0.7397361993789673, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 13.042642593383789, | |
| "learning_rate": 7.530854029548404e-07, | |
| "loss": 0.8247054815292358, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.1531986531986531, | |
| "grad_norm": 4.0835795402526855, | |
| "learning_rate": 7.523396703832557e-07, | |
| "loss": 1.090425968170166, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.1548821548821548, | |
| "grad_norm": 3.6361794471740723, | |
| "learning_rate": 7.515932406663705e-07, | |
| "loss": 1.0872161388397217, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.1565656565656566, | |
| "grad_norm": 13.066899299621582, | |
| "learning_rate": 7.508461163759493e-07, | |
| "loss": 0.49930015206336975, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.1582491582491583, | |
| "grad_norm": 5.910285472869873, | |
| "learning_rate": 7.500983000861493e-07, | |
| "loss": 0.46187859773635864, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.15993265993266, | |
| "grad_norm": 12.718847274780273, | |
| "learning_rate": 7.493497943735124e-07, | |
| "loss": 0.9587620496749878, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.1616161616161615, | |
| "grad_norm": 2.7174603939056396, | |
| "learning_rate": 7.48600601816956e-07, | |
| "loss": 0.7705467939376831, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.1632996632996633, | |
| "grad_norm": 10.425454139709473, | |
| "learning_rate": 7.478507249977632e-07, | |
| "loss": 0.5908098220825195, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.164983164983165, | |
| "grad_norm": 4.83370304107666, | |
| "learning_rate": 7.471001664995757e-07, | |
| "loss": 0.4560571312904358, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 16.3512020111084, | |
| "learning_rate": 7.46348928908383e-07, | |
| "loss": 0.6046204566955566, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.1683501683501682, | |
| "grad_norm": 3.3071091175079346, | |
| "learning_rate": 7.455970148125145e-07, | |
| "loss": 0.6498188972473145, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.17003367003367, | |
| "grad_norm": 3.1778576374053955, | |
| "learning_rate": 7.44844426802631e-07, | |
| "loss": 0.9177660942077637, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.1717171717171717, | |
| "grad_norm": 6.8912577629089355, | |
| "learning_rate": 7.440911674717148e-07, | |
| "loss": 0.9661788940429688, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.1734006734006734, | |
| "grad_norm": 2.982248306274414, | |
| "learning_rate": 7.433372394150613e-07, | |
| "loss": 0.7623599171638489, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.1750841750841752, | |
| "grad_norm": 6.73823356628418, | |
| "learning_rate": 7.425826452302695e-07, | |
| "loss": 0.6162515878677368, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.1767676767676767, | |
| "grad_norm": 7.467746734619141, | |
| "learning_rate": 7.418273875172344e-07, | |
| "loss": 0.7228857278823853, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.1784511784511784, | |
| "grad_norm": 10.521594047546387, | |
| "learning_rate": 7.410714688781362e-07, | |
| "loss": 0.547920823097229, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1801346801346801, | |
| "grad_norm": 4.692141056060791, | |
| "learning_rate": 7.403148919174327e-07, | |
| "loss": 1.011480450630188, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 6.844545841217041, | |
| "learning_rate": 7.3955765924185e-07, | |
| "loss": 0.7596945762634277, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.1835016835016834, | |
| "grad_norm": 8.648809432983398, | |
| "learning_rate": 7.387997734603734e-07, | |
| "loss": 0.771956205368042, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.1851851851851851, | |
| "grad_norm": 15.440680503845215, | |
| "learning_rate": 7.38041237184238e-07, | |
| "loss": 1.2356925010681152, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.1868686868686869, | |
| "grad_norm": 5.456315040588379, | |
| "learning_rate": 7.372820530269203e-07, | |
| "loss": 0.727834165096283, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.1885521885521886, | |
| "grad_norm": 33.579254150390625, | |
| "learning_rate": 7.365222236041298e-07, | |
| "loss": 0.780275821685791, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.1902356902356903, | |
| "grad_norm": 14.142115592956543, | |
| "learning_rate": 7.35761751533798e-07, | |
| "loss": 0.9008167386054993, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.1919191919191918, | |
| "grad_norm": 9.76620864868164, | |
| "learning_rate": 7.350006394360716e-07, | |
| "loss": 0.7642953991889954, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.1936026936026936, | |
| "grad_norm": 17.838695526123047, | |
| "learning_rate": 7.342388899333014e-07, | |
| "loss": 1.0995585918426514, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.1952861952861953, | |
| "grad_norm": 65.10449981689453, | |
| "learning_rate": 7.334765056500356e-07, | |
| "loss": 0.947974443435669, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.196969696969697, | |
| "grad_norm": 4.257263660430908, | |
| "learning_rate": 7.327134892130085e-07, | |
| "loss": 0.7925307750701904, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.1986531986531987, | |
| "grad_norm": 3.5786726474761963, | |
| "learning_rate": 7.319498432511329e-07, | |
| "loss": 0.6507192850112915, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.2003367003367003, | |
| "grad_norm": 9.806020736694336, | |
| "learning_rate": 7.311855703954901e-07, | |
| "loss": 0.9374374747276306, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.202020202020202, | |
| "grad_norm": 16.49274253845215, | |
| "learning_rate": 7.304206732793222e-07, | |
| "loss": 0.5745439529418945, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.2037037037037037, | |
| "grad_norm": 10.744287490844727, | |
| "learning_rate": 7.296551545380213e-07, | |
| "loss": 0.9440407752990723, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.2053872053872055, | |
| "grad_norm": 4.190220832824707, | |
| "learning_rate": 7.288890168091214e-07, | |
| "loss": 0.7019326686859131, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.2070707070707072, | |
| "grad_norm": 4.626961708068848, | |
| "learning_rate": 7.281222627322897e-07, | |
| "loss": 1.2138803005218506, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.2087542087542087, | |
| "grad_norm": 29.172809600830078, | |
| "learning_rate": 7.273548949493166e-07, | |
| "loss": 0.6954021453857422, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.2104377104377104, | |
| "grad_norm": 6.540690898895264, | |
| "learning_rate": 7.265869161041065e-07, | |
| "loss": 0.5005062818527222, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 1.7837268114089966, | |
| "learning_rate": 7.258183288426703e-07, | |
| "loss": 0.4664597511291504, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.2138047138047139, | |
| "grad_norm": 6.852010250091553, | |
| "learning_rate": 7.25049135813114e-07, | |
| "loss": 0.7454104423522949, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.2154882154882154, | |
| "grad_norm": 3.7926137447357178, | |
| "learning_rate": 7.242793396656315e-07, | |
| "loss": 0.9171748757362366, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.2171717171717171, | |
| "grad_norm": 4.602051734924316, | |
| "learning_rate": 7.235089430524943e-07, | |
| "loss": 0.9297394156455994, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.2188552188552189, | |
| "grad_norm": 8.485408782958984, | |
| "learning_rate": 7.227379486280432e-07, | |
| "loss": 0.6902468800544739, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.2205387205387206, | |
| "grad_norm": 3.0322980880737305, | |
| "learning_rate": 7.219663590486778e-07, | |
| "loss": 0.9321104288101196, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 21.71652603149414, | |
| "learning_rate": 7.211941769728493e-07, | |
| "loss": 0.9111616611480713, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.2239057239057238, | |
| "grad_norm": 5.9835405349731445, | |
| "learning_rate": 7.204214050610498e-07, | |
| "loss": 0.6736348867416382, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.2255892255892256, | |
| "grad_norm": 5.00324010848999, | |
| "learning_rate": 7.196480459758035e-07, | |
| "loss": 0.8823907375335693, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 9.083556175231934, | |
| "learning_rate": 7.188741023816581e-07, | |
| "loss": 0.8732795715332031, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.228956228956229, | |
| "grad_norm": 6.7020440101623535, | |
| "learning_rate": 7.180995769451747e-07, | |
| "loss": 0.9818441867828369, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.2306397306397305, | |
| "grad_norm": 13.26759147644043, | |
| "learning_rate": 7.173244723349194e-07, | |
| "loss": 0.7110154628753662, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.2323232323232323, | |
| "grad_norm": 3.6703689098358154, | |
| "learning_rate": 7.165487912214538e-07, | |
| "loss": 0.6870818138122559, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.234006734006734, | |
| "grad_norm": 4.100058078765869, | |
| "learning_rate": 7.157725362773258e-07, | |
| "loss": 0.8629697561264038, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.2356902356902357, | |
| "grad_norm": 7.570556640625, | |
| "learning_rate": 7.1499571017706e-07, | |
| "loss": 0.9524326324462891, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.2373737373737375, | |
| "grad_norm": 3.626100778579712, | |
| "learning_rate": 7.142183155971493e-07, | |
| "loss": 1.1208899021148682, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.239057239057239, | |
| "grad_norm": 6.774829387664795, | |
| "learning_rate": 7.13440355216045e-07, | |
| "loss": 0.6910721063613892, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.2407407407407407, | |
| "grad_norm": 12.941313743591309, | |
| "learning_rate": 7.126618317141482e-07, | |
| "loss": 0.6839091777801514, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 13.288043022155762, | |
| "learning_rate": 7.118827477737999e-07, | |
| "loss": 0.4849187135696411, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.2441077441077442, | |
| "grad_norm": 14.330803871154785, | |
| "learning_rate": 7.111031060792719e-07, | |
| "loss": 0.7669592499732971, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.2457912457912457, | |
| "grad_norm": 3.7719264030456543, | |
| "learning_rate": 7.103229093167579e-07, | |
| "loss": 0.7678747773170471, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.2474747474747474, | |
| "grad_norm": 5.733471393585205, | |
| "learning_rate": 7.095421601743643e-07, | |
| "loss": 0.7603921890258789, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.2491582491582491, | |
| "grad_norm": 3.3023183345794678, | |
| "learning_rate": 7.087608613421e-07, | |
| "loss": 0.475089430809021, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.2508417508417509, | |
| "grad_norm": 6.135479927062988, | |
| "learning_rate": 7.079790155118684e-07, | |
| "loss": 0.6280136108398438, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.2525252525252526, | |
| "grad_norm": 14.41522216796875, | |
| "learning_rate": 7.071966253774575e-07, | |
| "loss": 0.7469892501831055, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.2542087542087543, | |
| "grad_norm": 2.5887715816497803, | |
| "learning_rate": 7.064136936345304e-07, | |
| "loss": 0.7018432021141052, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.2558922558922558, | |
| "grad_norm": 3.5334408283233643, | |
| "learning_rate": 7.056302229806163e-07, | |
| "loss": 0.825816810131073, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.2575757575757576, | |
| "grad_norm": 3.581906795501709, | |
| "learning_rate": 7.048462161151012e-07, | |
| "loss": 0.8269777297973633, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.2592592592592593, | |
| "grad_norm": 8.52226734161377, | |
| "learning_rate": 7.040616757392188e-07, | |
| "loss": 0.7199699282646179, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.2609427609427608, | |
| "grad_norm": 2.9323740005493164, | |
| "learning_rate": 7.032766045560408e-07, | |
| "loss": 0.9787487387657166, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.2626262626262625, | |
| "grad_norm": 6.500389099121094, | |
| "learning_rate": 7.024910052704677e-07, | |
| "loss": 1.0706979036331177, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2643097643097643, | |
| "grad_norm": 5.391655445098877, | |
| "learning_rate": 7.017048805892194e-07, | |
| "loss": 0.5319828987121582, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.265993265993266, | |
| "grad_norm": 5.92175817489624, | |
| "learning_rate": 7.009182332208266e-07, | |
| "loss": 0.7819663286209106, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.2676767676767677, | |
| "grad_norm": 4.497714996337891, | |
| "learning_rate": 7.001310658756201e-07, | |
| "loss": 1.1338582038879395, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.2693602693602695, | |
| "grad_norm": 4.954183578491211, | |
| "learning_rate": 6.993433812657226e-07, | |
| "loss": 1.1781617403030396, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.271043771043771, | |
| "grad_norm": 17.044879913330078, | |
| "learning_rate": 6.985551821050395e-07, | |
| "loss": 0.5676237344741821, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 17.09630012512207, | |
| "learning_rate": 6.97766471109248e-07, | |
| "loss": 0.6173258423805237, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.2744107744107744, | |
| "grad_norm": 5.917657375335693, | |
| "learning_rate": 6.969772509957895e-07, | |
| "loss": 0.8361184597015381, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.2760942760942762, | |
| "grad_norm": 4.721149921417236, | |
| "learning_rate": 6.961875244838596e-07, | |
| "loss": 0.8495975732803345, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.2777777777777777, | |
| "grad_norm": 11.31229019165039, | |
| "learning_rate": 6.953972942943981e-07, | |
| "loss": 0.7243598699569702, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.2794612794612794, | |
| "grad_norm": 3.162838935852051, | |
| "learning_rate": 6.946065631500806e-07, | |
| "loss": 0.9145760536193848, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.2811447811447811, | |
| "grad_norm": 9.259127616882324, | |
| "learning_rate": 6.938153337753088e-07, | |
| "loss": 0.6645021438598633, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.2828282828282829, | |
| "grad_norm": 26.91777229309082, | |
| "learning_rate": 6.930236088962004e-07, | |
| "loss": 0.651879072189331, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.2845117845117846, | |
| "grad_norm": 5.544670104980469, | |
| "learning_rate": 6.922313912405811e-07, | |
| "loss": 0.8310514688491821, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.2861952861952861, | |
| "grad_norm": 16.370214462280273, | |
| "learning_rate": 6.914386835379738e-07, | |
| "loss": 0.7569658756256104, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.2878787878787878, | |
| "grad_norm": 8.142780303955078, | |
| "learning_rate": 6.906454885195904e-07, | |
| "loss": 0.4488654136657715, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.2895622895622896, | |
| "grad_norm": 5.413924217224121, | |
| "learning_rate": 6.898518089183211e-07, | |
| "loss": 0.8656577467918396, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.2912457912457913, | |
| "grad_norm": 4.607274532318115, | |
| "learning_rate": 6.890576474687263e-07, | |
| "loss": 1.0356013774871826, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.2929292929292928, | |
| "grad_norm": 6.521271705627441, | |
| "learning_rate": 6.882630069070262e-07, | |
| "loss": 0.9825664758682251, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.2946127946127945, | |
| "grad_norm": 5.2521071434021, | |
| "learning_rate": 6.874678899710923e-07, | |
| "loss": 0.6595628261566162, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.2962962962962963, | |
| "grad_norm": 16.32155990600586, | |
| "learning_rate": 6.866722994004364e-07, | |
| "loss": 0.7686331868171692, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.297979797979798, | |
| "grad_norm": 15.72677230834961, | |
| "learning_rate": 6.858762379362032e-07, | |
| "loss": 0.8358673453330994, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.2996632996632997, | |
| "grad_norm": 5.651062965393066, | |
| "learning_rate": 6.850797083211591e-07, | |
| "loss": 0.9706641435623169, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.3013468013468015, | |
| "grad_norm": 10.415674209594727, | |
| "learning_rate": 6.842827132996841e-07, | |
| "loss": 0.8287351131439209, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.303030303030303, | |
| "grad_norm": 16.539886474609375, | |
| "learning_rate": 6.83485255617761e-07, | |
| "loss": 0.8370147943496704, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.3047138047138047, | |
| "grad_norm": 6.127871036529541, | |
| "learning_rate": 6.826873380229673e-07, | |
| "loss": 0.6265941858291626, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.3063973063973064, | |
| "grad_norm": 6.429442882537842, | |
| "learning_rate": 6.818889632644649e-07, | |
| "loss": 0.9182727336883545, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.308080808080808, | |
| "grad_norm": 4.870426654815674, | |
| "learning_rate": 6.810901340929906e-07, | |
| "loss": 0.962719202041626, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.3097643097643097, | |
| "grad_norm": 4.017622947692871, | |
| "learning_rate": 6.802908532608472e-07, | |
| "loss": 1.0228416919708252, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.3114478114478114, | |
| "grad_norm": 6.815629482269287, | |
| "learning_rate": 6.794911235218932e-07, | |
| "loss": 0.9271608591079712, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.3131313131313131, | |
| "grad_norm": 18.521018981933594, | |
| "learning_rate": 6.786909476315342e-07, | |
| "loss": 0.473792165517807, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.3148148148148149, | |
| "grad_norm": 7.367074966430664, | |
| "learning_rate": 6.778903283467128e-07, | |
| "loss": 0.5411000847816467, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.3164983164983166, | |
| "grad_norm": 23.994110107421875, | |
| "learning_rate": 6.770892684258995e-07, | |
| "loss": 0.5685646533966064, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 2.764239549636841, | |
| "learning_rate": 6.762877706290823e-07, | |
| "loss": 1.0790038108825684, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.3198653198653199, | |
| "grad_norm": 13.226496696472168, | |
| "learning_rate": 6.754858377177587e-07, | |
| "loss": 0.6365941762924194, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.3215488215488216, | |
| "grad_norm": 8.614484786987305, | |
| "learning_rate": 6.74683472454925e-07, | |
| "loss": 0.9468154907226562, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.3232323232323233, | |
| "grad_norm": 6.47797966003418, | |
| "learning_rate": 6.738806776050672e-07, | |
| "loss": 0.8475841283798218, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.3249158249158248, | |
| "grad_norm": 5.899251461029053, | |
| "learning_rate": 6.730774559341512e-07, | |
| "loss": 0.7157614231109619, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.3265993265993266, | |
| "grad_norm": 2.9437053203582764, | |
| "learning_rate": 6.722738102096135e-07, | |
| "loss": 1.0155985355377197, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.3282828282828283, | |
| "grad_norm": 7.690277576446533, | |
| "learning_rate": 6.714697432003519e-07, | |
| "loss": 0.8999049663543701, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.32996632996633, | |
| "grad_norm": 8.396078109741211, | |
| "learning_rate": 6.706652576767156e-07, | |
| "loss": 0.6247600317001343, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.3316498316498318, | |
| "grad_norm": 7.930534362792969, | |
| "learning_rate": 6.698603564104958e-07, | |
| "loss": 0.6954329013824463, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 3.4395411014556885, | |
| "learning_rate": 6.690550421749157e-07, | |
| "loss": 1.0694022178649902, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.335016835016835, | |
| "grad_norm": 4.19919490814209, | |
| "learning_rate": 6.682493177446221e-07, | |
| "loss": 0.946961522102356, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.3367003367003367, | |
| "grad_norm": 6.434517860412598, | |
| "learning_rate": 6.674431858956743e-07, | |
| "loss": 0.5836731195449829, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.3383838383838385, | |
| "grad_norm": 4.509551048278809, | |
| "learning_rate": 6.666366494055358e-07, | |
| "loss": 0.72353595495224, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.34006734006734, | |
| "grad_norm": 17.6490478515625, | |
| "learning_rate": 6.658297110530646e-07, | |
| "loss": 0.598315954208374, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.3417508417508417, | |
| "grad_norm": 8.337894439697266, | |
| "learning_rate": 6.650223736185023e-07, | |
| "loss": 1.0166845321655273, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.3434343434343434, | |
| "grad_norm": 27.206972122192383, | |
| "learning_rate": 6.642146398834663e-07, | |
| "loss": 0.4449620842933655, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.3451178451178452, | |
| "grad_norm": 8.197900772094727, | |
| "learning_rate": 6.63406512630939e-07, | |
| "loss": 0.9014108180999756, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.3468013468013469, | |
| "grad_norm": 7.576647758483887, | |
| "learning_rate": 6.625979946452592e-07, | |
| "loss": 0.809765100479126, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3484848484848486, | |
| "grad_norm": 44.33702087402344, | |
| "learning_rate": 6.617890887121111e-07, | |
| "loss": 0.7150375843048096, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.3501683501683501, | |
| "grad_norm": 7.649331569671631, | |
| "learning_rate": 6.60979797618516e-07, | |
| "loss": 0.8225715756416321, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.3518518518518519, | |
| "grad_norm": 4.691690444946289, | |
| "learning_rate": 6.601701241528228e-07, | |
| "loss": 1.2066047191619873, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.3535353535353536, | |
| "grad_norm": 4.63446569442749, | |
| "learning_rate": 6.593600711046969e-07, | |
| "loss": 0.924203097820282, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.355218855218855, | |
| "grad_norm": 10.622723579406738, | |
| "learning_rate": 6.585496412651116e-07, | |
| "loss": 0.5192527770996094, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.3569023569023568, | |
| "grad_norm": 3.938314914703369, | |
| "learning_rate": 6.57738837426339e-07, | |
| "loss": 1.000133752822876, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.3585858585858586, | |
| "grad_norm": 4.946197986602783, | |
| "learning_rate": 6.569276623819396e-07, | |
| "loss": 0.6809890270233154, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.3602693602693603, | |
| "grad_norm": 10.934066772460938, | |
| "learning_rate": 6.561161189267526e-07, | |
| "loss": 0.6985521912574768, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.361952861952862, | |
| "grad_norm": 3.1545114517211914, | |
| "learning_rate": 6.553042098568865e-07, | |
| "loss": 0.916617214679718, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 3.2825722694396973, | |
| "learning_rate": 6.544919379697099e-07, | |
| "loss": 0.729028582572937, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.3653198653198653, | |
| "grad_norm": 3.8294615745544434, | |
| "learning_rate": 6.536793060638412e-07, | |
| "loss": 1.0753536224365234, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.367003367003367, | |
| "grad_norm": 2.503497838973999, | |
| "learning_rate": 6.528663169391391e-07, | |
| "loss": 0.9852238893508911, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.3686868686868687, | |
| "grad_norm": 8.0145263671875, | |
| "learning_rate": 6.520529733966932e-07, | |
| "loss": 0.6827946901321411, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.3703703703703702, | |
| "grad_norm": 3.5943119525909424, | |
| "learning_rate": 6.512392782388144e-07, | |
| "loss": 0.9226878881454468, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.372053872053872, | |
| "grad_norm": 4.993966102600098, | |
| "learning_rate": 6.504252342690247e-07, | |
| "loss": 0.9282613396644592, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.3737373737373737, | |
| "grad_norm": 8.258445739746094, | |
| "learning_rate": 6.496108442920482e-07, | |
| "loss": 1.0419143438339233, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.3754208754208754, | |
| "grad_norm": 11.405352592468262, | |
| "learning_rate": 6.48796111113801e-07, | |
| "loss": 0.7039163112640381, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.3771043771043772, | |
| "grad_norm": 2.947396755218506, | |
| "learning_rate": 6.479810375413819e-07, | |
| "loss": 0.39542487263679504, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.378787878787879, | |
| "grad_norm": 8.117940902709961, | |
| "learning_rate": 6.471656263830618e-07, | |
| "loss": 0.6473898887634277, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.3804713804713804, | |
| "grad_norm": 3.3129611015319824, | |
| "learning_rate": 6.463498804482757e-07, | |
| "loss": 0.7153133153915405, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.3821548821548821, | |
| "grad_norm": 7.839447498321533, | |
| "learning_rate": 6.455338025476116e-07, | |
| "loss": 0.9829051494598389, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.3838383838383839, | |
| "grad_norm": 6.2321343421936035, | |
| "learning_rate": 6.447173954928011e-07, | |
| "loss": 1.191624641418457, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.3855218855218856, | |
| "grad_norm": 8.412511825561523, | |
| "learning_rate": 6.439006620967097e-07, | |
| "loss": 0.8809744715690613, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.387205387205387, | |
| "grad_norm": 3.1402454376220703, | |
| "learning_rate": 6.430836051733282e-07, | |
| "loss": 1.0235364437103271, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 4.362932205200195, | |
| "learning_rate": 6.42266227537761e-07, | |
| "loss": 0.9193822741508484, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.3905723905723906, | |
| "grad_norm": 2.9106979370117188, | |
| "learning_rate": 6.414485320062181e-07, | |
| "loss": 1.2303351163864136, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.3922558922558923, | |
| "grad_norm": 3.156247854232788, | |
| "learning_rate": 6.406305213960045e-07, | |
| "loss": 1.0456502437591553, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 13.951912879943848, | |
| "learning_rate": 6.398121985255116e-07, | |
| "loss": 0.6429623365402222, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.3956228956228955, | |
| "grad_norm": 2.782015323638916, | |
| "learning_rate": 6.389935662142053e-07, | |
| "loss": 0.6639566421508789, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.3973063973063973, | |
| "grad_norm": 9.586030960083008, | |
| "learning_rate": 6.381746272826186e-07, | |
| "loss": 0.9411950707435608, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.398989898989899, | |
| "grad_norm": 25.26241111755371, | |
| "learning_rate": 6.373553845523407e-07, | |
| "loss": 0.8540170192718506, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.4006734006734007, | |
| "grad_norm": 40.64924240112305, | |
| "learning_rate": 6.365358408460076e-07, | |
| "loss": 0.7800917625427246, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.4023569023569022, | |
| "grad_norm": 5.472312927246094, | |
| "learning_rate": 6.35715998987292e-07, | |
| "loss": 0.5686221718788147, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.404040404040404, | |
| "grad_norm": 15.37363338470459, | |
| "learning_rate": 6.348958618008943e-07, | |
| "loss": 0.8799217939376831, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.4057239057239057, | |
| "grad_norm": 2.726579189300537, | |
| "learning_rate": 6.340754321125318e-07, | |
| "loss": 0.8866001963615417, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.4074074074074074, | |
| "grad_norm": 2.6039483547210693, | |
| "learning_rate": 6.332547127489305e-07, | |
| "loss": 0.8179314136505127, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 3.4876205921173096, | |
| "learning_rate": 6.324337065378136e-07, | |
| "loss": 1.2043547630310059, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.410774410774411, | |
| "grad_norm": 13.763050079345703, | |
| "learning_rate": 6.316124163078927e-07, | |
| "loss": 0.488219678401947, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.4124579124579124, | |
| "grad_norm": 8.983017921447754, | |
| "learning_rate": 6.307908448888588e-07, | |
| "loss": 1.0192590951919556, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 25.50023651123047, | |
| "learning_rate": 6.299689951113709e-07, | |
| "loss": 1.12066650390625, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.4158249158249159, | |
| "grad_norm": 2.510024309158325, | |
| "learning_rate": 6.29146869807047e-07, | |
| "loss": 0.6489291191101074, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.4175084175084174, | |
| "grad_norm": 20.36254119873047, | |
| "learning_rate": 6.283244718084551e-07, | |
| "loss": 0.5022568702697754, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.4191919191919191, | |
| "grad_norm": 25.578750610351562, | |
| "learning_rate": 6.27501803949102e-07, | |
| "loss": 0.6631441712379456, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.4208754208754208, | |
| "grad_norm": 3.3692312240600586, | |
| "learning_rate": 6.266788690634247e-07, | |
| "loss": 1.16062593460083, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.4225589225589226, | |
| "grad_norm": 34.72169876098633, | |
| "learning_rate": 6.258556699867804e-07, | |
| "loss": 0.5728762149810791, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.4242424242424243, | |
| "grad_norm": 6.119333744049072, | |
| "learning_rate": 6.25032209555436e-07, | |
| "loss": 0.6605233550071716, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.425925925925926, | |
| "grad_norm": 5.281041622161865, | |
| "learning_rate": 6.242084906065592e-07, | |
| "loss": 0.6033918261528015, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.4276094276094276, | |
| "grad_norm": 7.152311325073242, | |
| "learning_rate": 6.233845159782085e-07, | |
| "loss": 1.2653751373291016, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.4292929292929293, | |
| "grad_norm": 5.30517053604126, | |
| "learning_rate": 6.22560288509323e-07, | |
| "loss": 1.0920519828796387, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.430976430976431, | |
| "grad_norm": 2.4701943397521973, | |
| "learning_rate": 6.217358110397133e-07, | |
| "loss": 0.8582168817520142, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4326599326599325, | |
| "grad_norm": 3.8614046573638916, | |
| "learning_rate": 6.209110864100511e-07, | |
| "loss": 0.8965442776679993, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.4343434343434343, | |
| "grad_norm": 6.42789888381958, | |
| "learning_rate": 6.200861174618599e-07, | |
| "loss": 0.570695698261261, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.436026936026936, | |
| "grad_norm": 35.690406799316406, | |
| "learning_rate": 6.192609070375045e-07, | |
| "loss": 0.4622350335121155, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.4377104377104377, | |
| "grad_norm": 12.375661849975586, | |
| "learning_rate": 6.184354579801825e-07, | |
| "loss": 1.0623770952224731, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.4393939393939394, | |
| "grad_norm": 80.91400146484375, | |
| "learning_rate": 6.176097731339128e-07, | |
| "loss": 1.1389422416687012, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.4410774410774412, | |
| "grad_norm": 10.364182472229004, | |
| "learning_rate": 6.167838553435273e-07, | |
| "loss": 1.0922863483428955, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.4427609427609427, | |
| "grad_norm": 2.8876967430114746, | |
| "learning_rate": 6.159577074546601e-07, | |
| "loss": 1.0083891153335571, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 26.479930877685547, | |
| "learning_rate": 6.151313323137387e-07, | |
| "loss": 0.958626925945282, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.4461279461279462, | |
| "grad_norm": 2.9834275245666504, | |
| "learning_rate": 6.14304732767973e-07, | |
| "loss": 0.8797729015350342, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.4478114478114479, | |
| "grad_norm": 7.285353660583496, | |
| "learning_rate": 6.134779116653459e-07, | |
| "loss": 0.7979905605316162, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.4494949494949494, | |
| "grad_norm": 13.242645263671875, | |
| "learning_rate": 6.126508718546044e-07, | |
| "loss": 0.6679774522781372, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.4511784511784511, | |
| "grad_norm": 5.430975437164307, | |
| "learning_rate": 6.118236161852486e-07, | |
| "loss": 0.7967842221260071, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.4528619528619529, | |
| "grad_norm": 15.7615385055542, | |
| "learning_rate": 6.10996147507522e-07, | |
| "loss": 0.9348810911178589, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 14.294349670410156, | |
| "learning_rate": 6.101684686724027e-07, | |
| "loss": 0.7149630188941956, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.4562289562289563, | |
| "grad_norm": 4.464022636413574, | |
| "learning_rate": 6.093405825315923e-07, | |
| "loss": 1.0214498043060303, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.457912457912458, | |
| "grad_norm": 3.1939473152160645, | |
| "learning_rate": 6.08512491937507e-07, | |
| "loss": 1.2640581130981445, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.4595959595959596, | |
| "grad_norm": 4.230099678039551, | |
| "learning_rate": 6.076841997432677e-07, | |
| "loss": 0.9663617014884949, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.4612794612794613, | |
| "grad_norm": 11.766712188720703, | |
| "learning_rate": 6.06855708802689e-07, | |
| "loss": 0.7833054065704346, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.462962962962963, | |
| "grad_norm": 6.552529811859131, | |
| "learning_rate": 6.060270219702709e-07, | |
| "loss": 0.6994054317474365, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.4646464646464645, | |
| "grad_norm": 2.931861400604248, | |
| "learning_rate": 6.051981421011882e-07, | |
| "loss": 1.1358039379119873, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.4663299663299663, | |
| "grad_norm": 9.284839630126953, | |
| "learning_rate": 6.043690720512812e-07, | |
| "loss": 0.7364188432693481, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.468013468013468, | |
| "grad_norm": 5.37172794342041, | |
| "learning_rate": 6.035398146770444e-07, | |
| "loss": 0.5165277123451233, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.4696969696969697, | |
| "grad_norm": 5.121616363525391, | |
| "learning_rate": 6.027103728356189e-07, | |
| "loss": 1.0125455856323242, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.4713804713804715, | |
| "grad_norm": 2.773219347000122, | |
| "learning_rate": 6.018807493847804e-07, | |
| "loss": 1.035334825515747, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.4730639730639732, | |
| "grad_norm": 7.262451171875, | |
| "learning_rate": 6.010509471829312e-07, | |
| "loss": 0.7966405153274536, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.4747474747474747, | |
| "grad_norm": 7.338104248046875, | |
| "learning_rate": 6.002209690890889e-07, | |
| "loss": 0.7077836990356445, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.4764309764309764, | |
| "grad_norm": 7.950678825378418, | |
| "learning_rate": 5.993908179628772e-07, | |
| "loss": 0.7144612073898315, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.4781144781144782, | |
| "grad_norm": 9.630928039550781, | |
| "learning_rate": 5.985604966645159e-07, | |
| "loss": 0.8856356143951416, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.4797979797979797, | |
| "grad_norm": 15.059102058410645, | |
| "learning_rate": 5.977300080548113e-07, | |
| "loss": 0.7022537589073181, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.4814814814814814, | |
| "grad_norm": 8.7070894241333, | |
| "learning_rate": 5.968993549951463e-07, | |
| "loss": 0.764058530330658, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.4831649831649831, | |
| "grad_norm": 8.469696998596191, | |
| "learning_rate": 5.9606854034747e-07, | |
| "loss": 0.9842470288276672, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.4848484848484849, | |
| "grad_norm": 3.3772764205932617, | |
| "learning_rate": 5.952375669742885e-07, | |
| "loss": 0.9660754799842834, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.4865319865319866, | |
| "grad_norm": 15.527210235595703, | |
| "learning_rate": 5.944064377386546e-07, | |
| "loss": 0.7293991446495056, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.4882154882154883, | |
| "grad_norm": 7.509492874145508, | |
| "learning_rate": 5.935751555041584e-07, | |
| "loss": 0.8063384294509888, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.4898989898989898, | |
| "grad_norm": 4.355234622955322, | |
| "learning_rate": 5.927437231349168e-07, | |
| "loss": 1.001720666885376, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.4915824915824916, | |
| "grad_norm": 12.318822860717773, | |
| "learning_rate": 5.919121434955643e-07, | |
| "loss": 0.4859294295310974, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.4932659932659933, | |
| "grad_norm": 2.495269536972046, | |
| "learning_rate": 5.910804194512425e-07, | |
| "loss": 0.8450926542282104, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.494949494949495, | |
| "grad_norm": 11.203375816345215, | |
| "learning_rate": 5.902485538675909e-07, | |
| "loss": 0.8008178472518921, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.4966329966329965, | |
| "grad_norm": 7.061748504638672, | |
| "learning_rate": 5.894165496107362e-07, | |
| "loss": 0.9183659553527832, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.4983164983164983, | |
| "grad_norm": 10.182241439819336, | |
| "learning_rate": 5.885844095472832e-07, | |
| "loss": 0.9454483985900879, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 10.898093223571777, | |
| "learning_rate": 5.877521365443047e-07, | |
| "loss": 0.612937331199646, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.5016835016835017, | |
| "grad_norm": 4.307864665985107, | |
| "learning_rate": 5.869197334693311e-07, | |
| "loss": 1.2052326202392578, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.5033670033670035, | |
| "grad_norm": 5.633955478668213, | |
| "learning_rate": 5.860872031903415e-07, | |
| "loss": 0.8493650555610657, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.5050505050505052, | |
| "grad_norm": 4.648436069488525, | |
| "learning_rate": 5.85254548575753e-07, | |
| "loss": 1.030457615852356, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.5067340067340067, | |
| "grad_norm": 19.26193618774414, | |
| "learning_rate": 5.84421772494411e-07, | |
| "loss": 0.6253769397735596, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.5084175084175084, | |
| "grad_norm": 18.525527954101562, | |
| "learning_rate": 5.835888778155793e-07, | |
| "loss": 0.6486117839813232, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.51010101010101, | |
| "grad_norm": 11.307801246643066, | |
| "learning_rate": 5.827558674089309e-07, | |
| "loss": 0.9593780636787415, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.5117845117845117, | |
| "grad_norm": 8.364538192749023, | |
| "learning_rate": 5.81922744144537e-07, | |
| "loss": 0.9520887136459351, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.5134680134680134, | |
| "grad_norm": 20.700618743896484, | |
| "learning_rate": 5.810895108928576e-07, | |
| "loss": 1.0315901041030884, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 5.827000617980957, | |
| "learning_rate": 5.802561705247322e-07, | |
| "loss": 0.8540360331535339, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5168350168350169, | |
| "grad_norm": 5.0441365242004395, | |
| "learning_rate": 5.794227259113688e-07, | |
| "loss": 1.0596797466278076, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.5185185185185186, | |
| "grad_norm": 15.39765453338623, | |
| "learning_rate": 5.785891799243345e-07, | |
| "loss": 0.9995817542076111, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.5202020202020203, | |
| "grad_norm": 3.5208356380462646, | |
| "learning_rate": 5.777555354355465e-07, | |
| "loss": 0.8799208402633667, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.5218855218855218, | |
| "grad_norm": 19.627885818481445, | |
| "learning_rate": 5.769217953172606e-07, | |
| "loss": 0.7398556470870972, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.5235690235690236, | |
| "grad_norm": 6.568966865539551, | |
| "learning_rate": 5.760879624420619e-07, | |
| "loss": 0.7647089958190918, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.5252525252525253, | |
| "grad_norm": 1.675675868988037, | |
| "learning_rate": 5.752540396828562e-07, | |
| "loss": 0.31169167160987854, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.5269360269360268, | |
| "grad_norm": 2.579169273376465, | |
| "learning_rate": 5.744200299128579e-07, | |
| "loss": 1.1429425477981567, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.5286195286195285, | |
| "grad_norm": 23.35523796081543, | |
| "learning_rate": 5.735859360055814e-07, | |
| "loss": 0.8635933995246887, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.5303030303030303, | |
| "grad_norm": 21.272926330566406, | |
| "learning_rate": 5.727517608348317e-07, | |
| "loss": 0.947623610496521, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.531986531986532, | |
| "grad_norm": 5.091054916381836, | |
| "learning_rate": 5.719175072746926e-07, | |
| "loss": 0.8388112187385559, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.5336700336700337, | |
| "grad_norm": 5.891815185546875, | |
| "learning_rate": 5.710831781995191e-07, | |
| "loss": 0.7908442616462708, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.5353535353535355, | |
| "grad_norm": 5.613356113433838, | |
| "learning_rate": 5.702487764839258e-07, | |
| "loss": 1.0302139520645142, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.5370370370370372, | |
| "grad_norm": 5.067226886749268, | |
| "learning_rate": 5.694143050027778e-07, | |
| "loss": 0.9267786145210266, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.5387205387205387, | |
| "grad_norm": 5.48887300491333, | |
| "learning_rate": 5.685797666311801e-07, | |
| "loss": 0.9696795344352722, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.5404040404040404, | |
| "grad_norm": 6.487977981567383, | |
| "learning_rate": 5.677451642444689e-07, | |
| "loss": 0.7679098844528198, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.542087542087542, | |
| "grad_norm": 8.134166717529297, | |
| "learning_rate": 5.669105007182005e-07, | |
| "loss": 0.7442073822021484, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.5437710437710437, | |
| "grad_norm": 4.254693984985352, | |
| "learning_rate": 5.660757789281417e-07, | |
| "loss": 1.0978777408599854, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 11.371539115905762, | |
| "learning_rate": 5.652410017502606e-07, | |
| "loss": 0.9501652717590332, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.5471380471380471, | |
| "grad_norm": 16.133960723876953, | |
| "learning_rate": 5.644061720607157e-07, | |
| "loss": 0.536079466342926, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.5488215488215489, | |
| "grad_norm": 3.337813377380371, | |
| "learning_rate": 5.635712927358466e-07, | |
| "loss": 0.7914686799049377, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.5505050505050506, | |
| "grad_norm": 3.199794292449951, | |
| "learning_rate": 5.627363666521635e-07, | |
| "loss": 0.6903548240661621, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.5521885521885523, | |
| "grad_norm": 6.261875152587891, | |
| "learning_rate": 5.619013966863388e-07, | |
| "loss": 0.5220504403114319, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.5538720538720538, | |
| "grad_norm": 3.182934284210205, | |
| "learning_rate": 5.610663857151945e-07, | |
| "loss": 0.9434134364128113, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 10.680120468139648, | |
| "learning_rate": 5.602313366156953e-07, | |
| "loss": 1.0320630073547363, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.557239057239057, | |
| "grad_norm": 3.8439457416534424, | |
| "learning_rate": 5.593962522649366e-07, | |
| "loss": 0.837065577507019, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5589225589225588, | |
| "grad_norm": 3.5652201175689697, | |
| "learning_rate": 5.585611355401352e-07, | |
| "loss": 0.9864023923873901, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 1.5606060606060606, | |
| "grad_norm": 20.397172927856445, | |
| "learning_rate": 5.577259893186196e-07, | |
| "loss": 0.6269755363464355, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 1.5622895622895623, | |
| "grad_norm": 5.430056571960449, | |
| "learning_rate": 5.568908164778201e-07, | |
| "loss": 0.6682024598121643, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 1.563973063973064, | |
| "grad_norm": 3.305800199508667, | |
| "learning_rate": 5.560556198952585e-07, | |
| "loss": 1.017985224723816, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 1.5656565656565657, | |
| "grad_norm": 10.345197677612305, | |
| "learning_rate": 5.552204024485382e-07, | |
| "loss": 0.46250391006469727, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.5673400673400675, | |
| "grad_norm": 3.9617226123809814, | |
| "learning_rate": 5.543851670153353e-07, | |
| "loss": 1.0285084247589111, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 1.569023569023569, | |
| "grad_norm": 4.295073509216309, | |
| "learning_rate": 5.535499164733869e-07, | |
| "loss": 0.44839808344841003, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 1.5707070707070707, | |
| "grad_norm": 9.806756973266602, | |
| "learning_rate": 5.527146537004823e-07, | |
| "loss": 1.037379503250122, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 1.5723905723905722, | |
| "grad_norm": 7.301255702972412, | |
| "learning_rate": 5.518793815744538e-07, | |
| "loss": 0.6518345475196838, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 1.574074074074074, | |
| "grad_norm": 2.5327539443969727, | |
| "learning_rate": 5.510441029731648e-07, | |
| "loss": 0.8190163969993591, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 5.190461158752441, | |
| "learning_rate": 5.502088207745018e-07, | |
| "loss": 0.8958265781402588, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 1.5774410774410774, | |
| "grad_norm": 4.127246379852295, | |
| "learning_rate": 5.493735378563634e-07, | |
| "loss": 1.0178121328353882, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 1.5791245791245792, | |
| "grad_norm": 6.272322654724121, | |
| "learning_rate": 5.485382570966506e-07, | |
| "loss": 0.6380331516265869, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 1.5808080808080809, | |
| "grad_norm": 4.318612575531006, | |
| "learning_rate": 5.477029813732572e-07, | |
| "loss": 1.184647798538208, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 1.5824915824915826, | |
| "grad_norm": 13.378433227539062, | |
| "learning_rate": 5.468677135640595e-07, | |
| "loss": 0.8356841802597046, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.5841750841750841, | |
| "grad_norm": 6.793831825256348, | |
| "learning_rate": 5.460324565469065e-07, | |
| "loss": 0.5384290218353271, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 1.5858585858585859, | |
| "grad_norm": 10.296432495117188, | |
| "learning_rate": 5.4519721319961e-07, | |
| "loss": 0.574350893497467, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 1.5875420875420876, | |
| "grad_norm": 2.8328824043273926, | |
| "learning_rate": 5.443619863999349e-07, | |
| "loss": 0.7007859945297241, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 1.589225589225589, | |
| "grad_norm": 4.71426248550415, | |
| "learning_rate": 5.435267790255889e-07, | |
| "loss": 1.0490843057632446, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 4.223301887512207, | |
| "learning_rate": 5.426915939542127e-07, | |
| "loss": 0.2507448196411133, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.5925925925925926, | |
| "grad_norm": 4.74931526184082, | |
| "learning_rate": 5.418564340633704e-07, | |
| "loss": 1.1350317001342773, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 1.5942760942760943, | |
| "grad_norm": 4.932158470153809, | |
| "learning_rate": 5.410213022305395e-07, | |
| "loss": 0.8503820300102234, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 1.595959595959596, | |
| "grad_norm": 14.765481948852539, | |
| "learning_rate": 5.401862013331e-07, | |
| "loss": 1.011979103088379, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 1.5976430976430978, | |
| "grad_norm": 19.991121292114258, | |
| "learning_rate": 5.393511342483262e-07, | |
| "loss": 0.9245116710662842, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 1.5993265993265995, | |
| "grad_norm": 8.519593238830566, | |
| "learning_rate": 5.385161038533756e-07, | |
| "loss": 1.0895578861236572, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.601010101010101, | |
| "grad_norm": 7.841440200805664, | |
| "learning_rate": 5.376811130252791e-07, | |
| "loss": 0.9659103155136108, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 1.6026936026936027, | |
| "grad_norm": 20.247276306152344, | |
| "learning_rate": 5.368461646409316e-07, | |
| "loss": 0.796362042427063, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 1.6043771043771042, | |
| "grad_norm": 3.420994281768799, | |
| "learning_rate": 5.360112615770814e-07, | |
| "loss": 1.1793514490127563, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 1.606060606060606, | |
| "grad_norm": 3.9010415077209473, | |
| "learning_rate": 5.351764067103209e-07, | |
| "loss": 0.9917897582054138, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 1.6077441077441077, | |
| "grad_norm": 2.7143442630767822, | |
| "learning_rate": 5.343416029170767e-07, | |
| "loss": 0.6407607793807983, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.6094276094276094, | |
| "grad_norm": 30.25970458984375, | |
| "learning_rate": 5.335068530735986e-07, | |
| "loss": 0.6329153776168823, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 1.6111111111111112, | |
| "grad_norm": 24.24694061279297, | |
| "learning_rate": 5.326721600559513e-07, | |
| "loss": 0.8712905645370483, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 1.612794612794613, | |
| "grad_norm": 8.489051818847656, | |
| "learning_rate": 5.318375267400035e-07, | |
| "loss": 0.7373044490814209, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 1.6144781144781146, | |
| "grad_norm": 17.98837661743164, | |
| "learning_rate": 5.310029560014182e-07, | |
| "loss": 0.6858376860618591, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 8.963407516479492, | |
| "learning_rate": 5.301684507156424e-07, | |
| "loss": 0.7940559983253479, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.6178451178451179, | |
| "grad_norm": 3.7711410522460938, | |
| "learning_rate": 5.293340137578983e-07, | |
| "loss": 0.9433008432388306, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 1.6195286195286194, | |
| "grad_norm": 3.9224212169647217, | |
| "learning_rate": 5.284996480031722e-07, | |
| "loss": 0.7148711085319519, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 1.621212121212121, | |
| "grad_norm": 4.903892993927002, | |
| "learning_rate": 5.276653563262053e-07, | |
| "loss": 0.6378931403160095, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 1.6228956228956228, | |
| "grad_norm": 2.4689173698425293, | |
| "learning_rate": 5.268311416014831e-07, | |
| "loss": 0.8439034223556519, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 1.6245791245791246, | |
| "grad_norm": 10.568015098571777, | |
| "learning_rate": 5.259970067032267e-07, | |
| "loss": 0.8784427642822266, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.6262626262626263, | |
| "grad_norm": 3.8736679553985596, | |
| "learning_rate": 5.251629545053817e-07, | |
| "loss": 0.711959958076477, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 1.627946127946128, | |
| "grad_norm": 8.50756549835205, | |
| "learning_rate": 5.243289878816088e-07, | |
| "loss": 1.071230173110962, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 1.6296296296296298, | |
| "grad_norm": 7.397336006164551, | |
| "learning_rate": 5.23495109705274e-07, | |
| "loss": 1.075880765914917, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 1.6313131313131313, | |
| "grad_norm": 4.465485572814941, | |
| "learning_rate": 5.226613228494383e-07, | |
| "loss": 1.1016345024108887, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 1.632996632996633, | |
| "grad_norm": 3.5152881145477295, | |
| "learning_rate": 5.218276301868484e-07, | |
| "loss": 0.8878377676010132, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.6346801346801347, | |
| "grad_norm": 2.7900075912475586, | |
| "learning_rate": 5.209940345899263e-07, | |
| "loss": 1.0775192975997925, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 16.28611183166504, | |
| "learning_rate": 5.201605389307595e-07, | |
| "loss": 0.8081328868865967, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 1.638047138047138, | |
| "grad_norm": 4.536927223205566, | |
| "learning_rate": 5.193271460810912e-07, | |
| "loss": 0.5076104998588562, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 1.6397306397306397, | |
| "grad_norm": 7.646073341369629, | |
| "learning_rate": 5.184938589123105e-07, | |
| "loss": 1.030837059020996, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 1.6414141414141414, | |
| "grad_norm": 3.751291036605835, | |
| "learning_rate": 5.176606802954427e-07, | |
| "loss": 1.0447328090667725, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.6430976430976432, | |
| "grad_norm": 10.364418983459473, | |
| "learning_rate": 5.168276131011378e-07, | |
| "loss": 0.5750001072883606, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 1.644781144781145, | |
| "grad_norm": 12.142930030822754, | |
| "learning_rate": 5.159946601996638e-07, | |
| "loss": 0.5072500705718994, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 1.6464646464646466, | |
| "grad_norm": 26.362775802612305, | |
| "learning_rate": 5.151618244608931e-07, | |
| "loss": 0.3224486708641052, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 1.6481481481481481, | |
| "grad_norm": 5.14034366607666, | |
| "learning_rate": 5.143291087542957e-07, | |
| "loss": 0.7505396604537964, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 1.6498316498316499, | |
| "grad_norm": 3.361147880554199, | |
| "learning_rate": 5.134965159489276e-07, | |
| "loss": 0.8362823128700256, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.6515151515151514, | |
| "grad_norm": 19.640413284301758, | |
| "learning_rate": 5.126640489134211e-07, | |
| "loss": 0.7406565546989441, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 1.6531986531986531, | |
| "grad_norm": 8.70249080657959, | |
| "learning_rate": 5.118317105159754e-07, | |
| "loss": 0.5722910761833191, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 1.6548821548821548, | |
| "grad_norm": 4.43184232711792, | |
| "learning_rate": 5.109995036243469e-07, | |
| "loss": 0.6934190392494202, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 1.6565656565656566, | |
| "grad_norm": 6.205933094024658, | |
| "learning_rate": 5.10167431105838e-07, | |
| "loss": 0.8717750310897827, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 1.6582491582491583, | |
| "grad_norm": 11.131174087524414, | |
| "learning_rate": 5.093354958272888e-07, | |
| "loss": 0.8401749730110168, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.65993265993266, | |
| "grad_norm": 7.66545295715332, | |
| "learning_rate": 5.085037006550664e-07, | |
| "loss": 0.9823508858680725, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 1.6616161616161618, | |
| "grad_norm": 2.336907148361206, | |
| "learning_rate": 5.076720484550552e-07, | |
| "loss": 0.8289145231246948, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 1.6632996632996633, | |
| "grad_norm": 4.420996189117432, | |
| "learning_rate": 5.068405420926468e-07, | |
| "loss": 0.787537693977356, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 1.664983164983165, | |
| "grad_norm": 16.187654495239258, | |
| "learning_rate": 5.060091844327308e-07, | |
| "loss": 0.8101489543914795, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 6.166725158691406, | |
| "learning_rate": 5.051779783396839e-07, | |
| "loss": 0.9080666303634644, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.6683501683501682, | |
| "grad_norm": 14.882169723510742, | |
| "learning_rate": 5.043469266773607e-07, | |
| "loss": 0.5505136251449585, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 1.67003367003367, | |
| "grad_norm": 20.98061180114746, | |
| "learning_rate": 5.035160323090842e-07, | |
| "loss": 0.4539128839969635, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 1.6717171717171717, | |
| "grad_norm": 3.427556276321411, | |
| "learning_rate": 5.026852980976348e-07, | |
| "loss": 1.0426026582717896, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 1.6734006734006734, | |
| "grad_norm": 13.226459503173828, | |
| "learning_rate": 5.018547269052416e-07, | |
| "loss": 0.9861583113670349, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 1.6750841750841752, | |
| "grad_norm": 3.2640278339385986, | |
| "learning_rate": 5.010243215935715e-07, | |
| "loss": 0.6827632784843445, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.676767676767677, | |
| "grad_norm": 3.51690673828125, | |
| "learning_rate": 5.001940850237208e-07, | |
| "loss": 1.151839256286621, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 1.6784511784511784, | |
| "grad_norm": 9.070838928222656, | |
| "learning_rate": 4.993640200562031e-07, | |
| "loss": 0.7563179731369019, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 1.6801346801346801, | |
| "grad_norm": 7.1710896492004395, | |
| "learning_rate": 4.985341295509421e-07, | |
| "loss": 0.6942537426948547, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 2.580467939376831, | |
| "learning_rate": 4.977044163672595e-07, | |
| "loss": 0.9790170192718506, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 1.6835016835016834, | |
| "grad_norm": 13.908555030822754, | |
| "learning_rate": 4.968748833638661e-07, | |
| "loss": 0.7780789136886597, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6851851851851851, | |
| "grad_norm": 4.1657209396362305, | |
| "learning_rate": 4.960455333988525e-07, | |
| "loss": 0.6467783451080322, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 1.6868686868686869, | |
| "grad_norm": 8.925399780273438, | |
| "learning_rate": 4.952163693296782e-07, | |
| "loss": 0.7447915077209473, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 1.6885521885521886, | |
| "grad_norm": 9.181722640991211, | |
| "learning_rate": 4.943873940131618e-07, | |
| "loss": 0.6678234338760376, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 1.6902356902356903, | |
| "grad_norm": 4.147680282592773, | |
| "learning_rate": 4.935586103054729e-07, | |
| "loss": 0.9828382730484009, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 1.691919191919192, | |
| "grad_norm": 4.527743339538574, | |
| "learning_rate": 4.927300210621198e-07, | |
| "loss": 0.6916370987892151, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.6936026936026936, | |
| "grad_norm": 8.658282279968262, | |
| "learning_rate": 4.919016291379407e-07, | |
| "loss": 0.9242024421691895, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 1.6952861952861953, | |
| "grad_norm": 4.856821537017822, | |
| "learning_rate": 4.910734373870946e-07, | |
| "loss": 0.6717578172683716, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 6.037668704986572, | |
| "learning_rate": 4.902454486630506e-07, | |
| "loss": 0.8340665102005005, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 1.6986531986531985, | |
| "grad_norm": 46.316307067871094, | |
| "learning_rate": 4.894176658185781e-07, | |
| "loss": 0.8020853996276855, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 1.7003367003367003, | |
| "grad_norm": 7.765172958374023, | |
| "learning_rate": 4.885900917057374e-07, | |
| "loss": 0.8143132328987122, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.702020202020202, | |
| "grad_norm": 2.976177930831909, | |
| "learning_rate": 4.877627291758697e-07, | |
| "loss": 1.0872082710266113, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 1.7037037037037037, | |
| "grad_norm": 7.9460225105285645, | |
| "learning_rate": 4.869355810795866e-07, | |
| "loss": 0.8318688273429871, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 1.7053872053872055, | |
| "grad_norm": 5.210888385772705, | |
| "learning_rate": 4.861086502667617e-07, | |
| "loss": 0.9813876152038574, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 1.7070707070707072, | |
| "grad_norm": 6.269561767578125, | |
| "learning_rate": 4.852819395865196e-07, | |
| "loss": 1.1104636192321777, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 1.708754208754209, | |
| "grad_norm": 14.44339656829834, | |
| "learning_rate": 4.844554518872261e-07, | |
| "loss": 0.6626958847045898, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.7104377104377104, | |
| "grad_norm": 22.18317413330078, | |
| "learning_rate": 4.836291900164793e-07, | |
| "loss": 0.5179702639579773, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 1.7121212121212122, | |
| "grad_norm": 4.272150039672852, | |
| "learning_rate": 4.82803156821099e-07, | |
| "loss": 1.0629268884658813, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 1.7138047138047137, | |
| "grad_norm": 4.388714790344238, | |
| "learning_rate": 4.81977355147117e-07, | |
| "loss": 0.8111241459846497, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 1.7154882154882154, | |
| "grad_norm": 10.06100082397461, | |
| "learning_rate": 4.811517878397676e-07, | |
| "loss": 0.4932488799095154, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 1.7171717171717171, | |
| "grad_norm": 6.3692474365234375, | |
| "learning_rate": 4.803264577434778e-07, | |
| "loss": 0.5541532039642334, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.7188552188552189, | |
| "grad_norm": 3.8727078437805176, | |
| "learning_rate": 4.795013677018567e-07, | |
| "loss": 0.9600075483322144, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 1.7205387205387206, | |
| "grad_norm": 3.6546130180358887, | |
| "learning_rate": 4.786765205576866e-07, | |
| "loss": 0.9439678192138672, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 1.7222222222222223, | |
| "grad_norm": 2.5347650051116943, | |
| "learning_rate": 4.778519191529133e-07, | |
| "loss": 1.1322201490402222, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 1.723905723905724, | |
| "grad_norm": 3.2225019931793213, | |
| "learning_rate": 4.770275663286354e-07, | |
| "loss": 1.0858080387115479, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 1.7255892255892256, | |
| "grad_norm": 7.802936553955078, | |
| "learning_rate": 4.762034649250951e-07, | |
| "loss": 0.4231239855289459, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 2.6615946292877197, | |
| "learning_rate": 4.753796177816688e-07, | |
| "loss": 1.0833523273468018, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 1.7289562289562288, | |
| "grad_norm": 12.313030242919922, | |
| "learning_rate": 4.745560277368563e-07, | |
| "loss": 0.9946305751800537, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 1.7306397306397305, | |
| "grad_norm": 4.885106563568115, | |
| "learning_rate": 4.7373269762827196e-07, | |
| "loss": 0.8092712163925171, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 1.7323232323232323, | |
| "grad_norm": 6.8623809814453125, | |
| "learning_rate": 4.7290963029263453e-07, | |
| "loss": 1.1297715902328491, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 1.734006734006734, | |
| "grad_norm": 3.386683702468872, | |
| "learning_rate": 4.720868285657571e-07, | |
| "loss": 0.6623663902282715, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.7356902356902357, | |
| "grad_norm": 7.138562202453613, | |
| "learning_rate": 4.7126429528253775e-07, | |
| "loss": 1.0328242778778076, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 1.7373737373737375, | |
| "grad_norm": 12.482364654541016, | |
| "learning_rate": 4.7044203327694995e-07, | |
| "loss": 0.7162414789199829, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 1.7390572390572392, | |
| "grad_norm": 2.729790449142456, | |
| "learning_rate": 4.6962004538203224e-07, | |
| "loss": 0.74675053358078, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 1.7407407407407407, | |
| "grad_norm": 11.824975967407227, | |
| "learning_rate": 4.687983344298786e-07, | |
| "loss": 0.8567626476287842, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 1.7424242424242424, | |
| "grad_norm": 18.96659278869629, | |
| "learning_rate": 4.679769032516293e-07, | |
| "loss": 0.7988073825836182, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.7441077441077442, | |
| "grad_norm": 9.409902572631836, | |
| "learning_rate": 4.6715575467746014e-07, | |
| "loss": 0.6924943923950195, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 1.7457912457912457, | |
| "grad_norm": 4.546468257904053, | |
| "learning_rate": 4.663348915365735e-07, | |
| "loss": 0.5785316228866577, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 1.7474747474747474, | |
| "grad_norm": 19.581872940063477, | |
| "learning_rate": 4.6551431665718833e-07, | |
| "loss": 1.1338218450546265, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 1.7491582491582491, | |
| "grad_norm": 5.740807056427002, | |
| "learning_rate": 4.646940328665302e-07, | |
| "loss": 0.8011679649353027, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 1.7508417508417509, | |
| "grad_norm": 11.178342819213867, | |
| "learning_rate": 4.638740429908222e-07, | |
| "loss": 1.0102814435958862, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.7525252525252526, | |
| "grad_norm": 5.02017879486084, | |
| "learning_rate": 4.6305434985527437e-07, | |
| "loss": 0.7039767503738403, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 1.7542087542087543, | |
| "grad_norm": 2.7927052974700928, | |
| "learning_rate": 4.6223495628407427e-07, | |
| "loss": 1.2280118465423584, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 1.7558922558922558, | |
| "grad_norm": 3.4108667373657227, | |
| "learning_rate": 4.614158651003778e-07, | |
| "loss": 0.8403428196907043, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 7.315975189208984, | |
| "learning_rate": 4.605970791262984e-07, | |
| "loss": 0.5117719769477844, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 1.7592592592592593, | |
| "grad_norm": 26.32462501525879, | |
| "learning_rate": 4.5977860118289846e-07, | |
| "loss": 0.5781146287918091, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.7609427609427608, | |
| "grad_norm": 6.7758965492248535, | |
| "learning_rate": 4.5896043409017895e-07, | |
| "loss": 0.6854249238967896, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 1.7626262626262625, | |
| "grad_norm": 8.735897064208984, | |
| "learning_rate": 4.5814258066706946e-07, | |
| "loss": 0.4588479995727539, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 1.7643097643097643, | |
| "grad_norm": 3.5783393383026123, | |
| "learning_rate": 4.5732504373141957e-07, | |
| "loss": 0.6785897612571716, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 1.765993265993266, | |
| "grad_norm": 3.7991697788238525, | |
| "learning_rate": 4.5650782609998785e-07, | |
| "loss": 1.091996192932129, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 1.7676767676767677, | |
| "grad_norm": 4.503328800201416, | |
| "learning_rate": 4.556909305884327e-07, | |
| "loss": 0.9916384816169739, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7693602693602695, | |
| "grad_norm": 4.141926288604736, | |
| "learning_rate": 4.5487436001130295e-07, | |
| "loss": 0.9449851512908936, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 1.7710437710437712, | |
| "grad_norm": 13.318826675415039, | |
| "learning_rate": 4.5405811718202804e-07, | |
| "loss": 0.5735121369361877, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 4.924741268157959, | |
| "learning_rate": 4.5324220491290765e-07, | |
| "loss": 0.7375026941299438, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 1.7744107744107744, | |
| "grad_norm": 7.583310127258301, | |
| "learning_rate": 4.5242662601510305e-07, | |
| "loss": 0.9382034540176392, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 1.776094276094276, | |
| "grad_norm": 60.219932556152344, | |
| "learning_rate": 4.516113832986267e-07, | |
| "loss": 0.6118134260177612, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 2.8085687160491943, | |
| "learning_rate": 4.5079647957233256e-07, | |
| "loss": 0.869990348815918, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 1.7794612794612794, | |
| "grad_norm": 2.663541078567505, | |
| "learning_rate": 4.499819176439071e-07, | |
| "loss": 0.9881576299667358, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 1.7811447811447811, | |
| "grad_norm": 3.3883938789367676, | |
| "learning_rate": 4.4916770031985887e-07, | |
| "loss": 0.9770991206169128, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 1.7828282828282829, | |
| "grad_norm": 3.3858611583709717, | |
| "learning_rate": 4.48353830405509e-07, | |
| "loss": 1.073500394821167, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 1.7845117845117846, | |
| "grad_norm": 29.282451629638672, | |
| "learning_rate": 4.475403107049819e-07, | |
| "loss": 0.6810465455055237, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.7861952861952863, | |
| "grad_norm": 12.527602195739746, | |
| "learning_rate": 4.4672714402119514e-07, | |
| "loss": 0.682815432548523, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 1.7878787878787878, | |
| "grad_norm": 8.23759937286377, | |
| "learning_rate": 4.4591433315585025e-07, | |
| "loss": 0.7326172590255737, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 1.7895622895622896, | |
| "grad_norm": 2.9576361179351807, | |
| "learning_rate": 4.4510188090942246e-07, | |
| "loss": 0.736370861530304, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 1.791245791245791, | |
| "grad_norm": 2.443329095840454, | |
| "learning_rate": 4.4428979008115173e-07, | |
| "loss": 0.7781453728675842, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 1.7929292929292928, | |
| "grad_norm": 8.095796585083008, | |
| "learning_rate": 4.434780634690326e-07, | |
| "loss": 0.7423359155654907, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.7946127946127945, | |
| "grad_norm": 4.694947719573975, | |
| "learning_rate": 4.426667038698049e-07, | |
| "loss": 0.5872843265533447, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 1.7962962962962963, | |
| "grad_norm": 4.841182708740234, | |
| "learning_rate": 4.418557140789436e-07, | |
| "loss": 0.769493579864502, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 1.797979797979798, | |
| "grad_norm": 35.13887023925781, | |
| "learning_rate": 4.4104509689065016e-07, | |
| "loss": 0.40486854314804077, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 1.7996632996632997, | |
| "grad_norm": 4.075418472290039, | |
| "learning_rate": 4.402348550978414e-07, | |
| "loss": 1.0084233283996582, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 1.8013468013468015, | |
| "grad_norm": 5.782071590423584, | |
| "learning_rate": 4.394249914921415e-07, | |
| "loss": 0.852903425693512, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.803030303030303, | |
| "grad_norm": 5.194396018981934, | |
| "learning_rate": 4.3861550886387133e-07, | |
| "loss": 0.8081188201904297, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 1.8047138047138047, | |
| "grad_norm": 5.665356636047363, | |
| "learning_rate": 4.378064100020391e-07, | |
| "loss": 0.7818201780319214, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 1.8063973063973064, | |
| "grad_norm": 13.495172500610352, | |
| "learning_rate": 4.369976976943307e-07, | |
| "loss": 0.9256261587142944, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 1.808080808080808, | |
| "grad_norm": 4.425163269042969, | |
| "learning_rate": 4.361893747271005e-07, | |
| "loss": 0.9166650772094727, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 1.8097643097643097, | |
| "grad_norm": 4.725872039794922, | |
| "learning_rate": 4.3538144388536105e-07, | |
| "loss": 1.0181063413619995, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.8114478114478114, | |
| "grad_norm": 11.999753952026367, | |
| "learning_rate": 4.3457390795277415e-07, | |
| "loss": 1.053621530532837, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 1.8131313131313131, | |
| "grad_norm": 7.518166542053223, | |
| "learning_rate": 4.3376676971164096e-07, | |
| "loss": 0.8652574419975281, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 1.8148148148148149, | |
| "grad_norm": 4.472687244415283, | |
| "learning_rate": 4.3296003194289224e-07, | |
| "loss": 0.7134494781494141, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 1.8164983164983166, | |
| "grad_norm": 5.567774772644043, | |
| "learning_rate": 4.321536974260788e-07, | |
| "loss": 0.5291237831115723, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 3.6279776096343994, | |
| "learning_rate": 4.313477689393628e-07, | |
| "loss": 0.9376990795135498, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.8198653198653199, | |
| "grad_norm": 5.327882766723633, | |
| "learning_rate": 4.305422492595063e-07, | |
| "loss": 0.8061087131500244, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 1.8215488215488216, | |
| "grad_norm": 8.221955299377441, | |
| "learning_rate": 4.2973714116186433e-07, | |
| "loss": 0.9052633047103882, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 1.823232323232323, | |
| "grad_norm": 5.31283712387085, | |
| "learning_rate": 4.289324474203726e-07, | |
| "loss": 0.9636974930763245, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 1.8249158249158248, | |
| "grad_norm": 4.392337799072266, | |
| "learning_rate": 4.281281708075397e-07, | |
| "loss": 0.9123021364212036, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 1.8265993265993266, | |
| "grad_norm": 5.4881744384765625, | |
| "learning_rate": 4.2732431409443694e-07, | |
| "loss": 0.6539809703826904, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.8282828282828283, | |
| "grad_norm": 4.425382614135742, | |
| "learning_rate": 4.26520880050689e-07, | |
| "loss": 0.7706068158149719, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 1.82996632996633, | |
| "grad_norm": 5.568687915802002, | |
| "learning_rate": 4.25717871444464e-07, | |
| "loss": 0.47670307755470276, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 1.8316498316498318, | |
| "grad_norm": 3.117622137069702, | |
| "learning_rate": 4.249152910424648e-07, | |
| "loss": 0.49261391162872314, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 14.853697776794434, | |
| "learning_rate": 4.2411314160991827e-07, | |
| "loss": 0.7614182233810425, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 1.835016835016835, | |
| "grad_norm": 3.1323137283325195, | |
| "learning_rate": 4.23311425910567e-07, | |
| "loss": 0.8719555735588074, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.8367003367003367, | |
| "grad_norm": 16.170852661132812, | |
| "learning_rate": 4.225101467066587e-07, | |
| "loss": 0.5341575741767883, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 1.8383838383838382, | |
| "grad_norm": 32.08811950683594, | |
| "learning_rate": 4.2170930675893745e-07, | |
| "loss": 0.9574685096740723, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 1.84006734006734, | |
| "grad_norm": 3.1121602058410645, | |
| "learning_rate": 4.209089088266337e-07, | |
| "loss": 1.0799657106399536, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 1.8417508417508417, | |
| "grad_norm": 18.168685913085938, | |
| "learning_rate": 4.201089556674553e-07, | |
| "loss": 0.9567815065383911, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 1.8434343434343434, | |
| "grad_norm": 11.96113109588623, | |
| "learning_rate": 4.193094500375772e-07, | |
| "loss": 0.6286576390266418, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.8451178451178452, | |
| "grad_norm": 4.731752395629883, | |
| "learning_rate": 4.1851039469163306e-07, | |
| "loss": 0.8796607255935669, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 1.8468013468013469, | |
| "grad_norm": 10.77106761932373, | |
| "learning_rate": 4.177117923827046e-07, | |
| "loss": 0.6798102855682373, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 1.8484848484848486, | |
| "grad_norm": 7.331570148468018, | |
| "learning_rate": 4.169136458623126e-07, | |
| "loss": 0.8384144902229309, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 1.8501683501683501, | |
| "grad_norm": 22.100555419921875, | |
| "learning_rate": 4.161159578804079e-07, | |
| "loss": 0.46593400835990906, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 3.0996837615966797, | |
| "learning_rate": 4.153187311853611e-07, | |
| "loss": 1.0288646221160889, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8535353535353534, | |
| "grad_norm": 8.950583457946777, | |
| "learning_rate": 4.145219685239535e-07, | |
| "loss": 0.7397197484970093, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 1.855218855218855, | |
| "grad_norm": 4.474309921264648, | |
| "learning_rate": 4.1372567264136806e-07, | |
| "loss": 0.652114987373352, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 1.8569023569023568, | |
| "grad_norm": 15.298965454101562, | |
| "learning_rate": 4.129298462811789e-07, | |
| "loss": 0.9816831350326538, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 1.8585858585858586, | |
| "grad_norm": 6.661653518676758, | |
| "learning_rate": 4.121344921853426e-07, | |
| "loss": 0.782197892665863, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 1.8602693602693603, | |
| "grad_norm": 13.741491317749023, | |
| "learning_rate": 4.1133961309418885e-07, | |
| "loss": 0.35760360956192017, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.861952861952862, | |
| "grad_norm": 10.694735527038574, | |
| "learning_rate": 4.1054521174641065e-07, | |
| "loss": 0.9551196098327637, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 3.1088006496429443, | |
| "learning_rate": 4.097512908790546e-07, | |
| "loss": 1.12099027633667, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 1.8653198653198653, | |
| "grad_norm": 25.677371978759766, | |
| "learning_rate": 4.089578532275123e-07, | |
| "loss": 0.3952019214630127, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 1.867003367003367, | |
| "grad_norm": 9.417123794555664, | |
| "learning_rate": 4.081649015255104e-07, | |
| "loss": 0.6426748633384705, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 1.8686868686868687, | |
| "grad_norm": 11.567337989807129, | |
| "learning_rate": 4.0737243850510097e-07, | |
| "loss": 0.6122760772705078, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.8703703703703702, | |
| "grad_norm": 5.804279327392578, | |
| "learning_rate": 4.065804668966527e-07, | |
| "loss": 0.6974793672561646, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 1.872053872053872, | |
| "grad_norm": 2.7025163173675537, | |
| "learning_rate": 4.057889894288409e-07, | |
| "loss": 1.073783040046692, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 1.8737373737373737, | |
| "grad_norm": 4.876822471618652, | |
| "learning_rate": 4.049980088286384e-07, | |
| "loss": 0.6222144365310669, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 1.8754208754208754, | |
| "grad_norm": 9.058050155639648, | |
| "learning_rate": 4.042075278213065e-07, | |
| "loss": 0.44170594215393066, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 1.8771043771043772, | |
| "grad_norm": 7.270165920257568, | |
| "learning_rate": 4.0341754913038463e-07, | |
| "loss": 1.0838236808776855, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 5.1195969581604, | |
| "learning_rate": 4.0262807547768164e-07, | |
| "loss": 0.9825941324234009, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 1.8804713804713806, | |
| "grad_norm": 6.805301666259766, | |
| "learning_rate": 4.018391095832665e-07, | |
| "loss": 0.5576257705688477, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 1.8821548821548821, | |
| "grad_norm": 5.000736236572266, | |
| "learning_rate": 4.0105065416545904e-07, | |
| "loss": 0.7729544639587402, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 1.8838383838383839, | |
| "grad_norm": 26.782957077026367, | |
| "learning_rate": 4.002627119408196e-07, | |
| "loss": 0.9620450735092163, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 1.8855218855218854, | |
| "grad_norm": 2.8780593872070312, | |
| "learning_rate": 3.994752856241407e-07, | |
| "loss": 1.1825776100158691, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.887205387205387, | |
| "grad_norm": 22.294492721557617, | |
| "learning_rate": 3.9868837792843744e-07, | |
| "loss": 1.0324305295944214, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 4.332054615020752, | |
| "learning_rate": 3.97901991564938e-07, | |
| "loss": 0.9554680585861206, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 1.8905723905723906, | |
| "grad_norm": 3.9427170753479004, | |
| "learning_rate": 3.971161292430738e-07, | |
| "loss": 1.0006061792373657, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 1.8922558922558923, | |
| "grad_norm": 13.30428695678711, | |
| "learning_rate": 3.9633079367047176e-07, | |
| "loss": 0.9314384460449219, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 1.893939393939394, | |
| "grad_norm": 7.6282057762146, | |
| "learning_rate": 3.9554598755294313e-07, | |
| "loss": 1.031144380569458, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8956228956228958, | |
| "grad_norm": 4.266255855560303, | |
| "learning_rate": 3.947617135944751e-07, | |
| "loss": 1.2106260061264038, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 1.8973063973063973, | |
| "grad_norm": 4.872833251953125, | |
| "learning_rate": 3.9397797449722157e-07, | |
| "loss": 0.9372920393943787, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 1.898989898989899, | |
| "grad_norm": 6.311352729797363, | |
| "learning_rate": 3.931947729614935e-07, | |
| "loss": 0.8530165553092957, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 1.9006734006734005, | |
| "grad_norm": 4.680610656738281, | |
| "learning_rate": 3.924121116857496e-07, | |
| "loss": 1.026566505432129, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 1.9023569023569022, | |
| "grad_norm": 9.47015380859375, | |
| "learning_rate": 3.9162999336658754e-07, | |
| "loss": 0.778825044631958, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.904040404040404, | |
| "grad_norm": 4.094303607940674, | |
| "learning_rate": 3.908484206987338e-07, | |
| "loss": 0.837942361831665, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 1.9057239057239057, | |
| "grad_norm": 4.3366522789001465, | |
| "learning_rate": 3.9006739637503504e-07, | |
| "loss": 0.5546213388442993, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 1.9074074074074074, | |
| "grad_norm": 4.000308036804199, | |
| "learning_rate": 3.8928692308644873e-07, | |
| "loss": 0.8694909811019897, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 9.866999626159668, | |
| "learning_rate": 3.8850700352203393e-07, | |
| "loss": 0.7251837253570557, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 1.910774410774411, | |
| "grad_norm": 25.70250129699707, | |
| "learning_rate": 3.8772764036894135e-07, | |
| "loss": 0.8718059659004211, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.9124579124579124, | |
| "grad_norm": 3.6470611095428467, | |
| "learning_rate": 3.8694883631240525e-07, | |
| "loss": 0.9727774858474731, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 1.9141414141414141, | |
| "grad_norm": 17.55716896057129, | |
| "learning_rate": 3.8617059403573315e-07, | |
| "loss": 0.7658140659332275, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 1.9158249158249159, | |
| "grad_norm": 321.7969665527344, | |
| "learning_rate": 3.8539291622029726e-07, | |
| "loss": 0.9249438047409058, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 1.9175084175084174, | |
| "grad_norm": 12.505878448486328, | |
| "learning_rate": 3.8461580554552473e-07, | |
| "loss": 0.6528811454772949, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 1.9191919191919191, | |
| "grad_norm": 11.336592674255371, | |
| "learning_rate": 3.8383926468888894e-07, | |
| "loss": 0.4342978596687317, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.9208754208754208, | |
| "grad_norm": 5.1289567947387695, | |
| "learning_rate": 3.830632963258998e-07, | |
| "loss": 1.0175809860229492, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 1.9225589225589226, | |
| "grad_norm": 13.561434745788574, | |
| "learning_rate": 3.82287903130095e-07, | |
| "loss": 0.5333043336868286, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 1.9242424242424243, | |
| "grad_norm": 1.7644530534744263, | |
| "learning_rate": 3.815130877730299e-07, | |
| "loss": 0.6704491376876831, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 1.925925925925926, | |
| "grad_norm": 5.901256084442139, | |
| "learning_rate": 3.807388529242699e-07, | |
| "loss": 1.0216944217681885, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 1.9276094276094278, | |
| "grad_norm": 7.095743179321289, | |
| "learning_rate": 3.799652012513795e-07, | |
| "loss": 0.9275904893875122, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.9292929292929293, | |
| "grad_norm": 11.874740600585938, | |
| "learning_rate": 3.791921354199145e-07, | |
| "loss": 0.5191354751586914, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 1.930976430976431, | |
| "grad_norm": 5.656970500946045, | |
| "learning_rate": 3.784196580934117e-07, | |
| "loss": 0.9575490951538086, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 1.9326599326599325, | |
| "grad_norm": 37.43537521362305, | |
| "learning_rate": 3.776477719333806e-07, | |
| "loss": 0.8639167547225952, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 1.9343434343434343, | |
| "grad_norm": 3.692530632019043, | |
| "learning_rate": 3.768764795992939e-07, | |
| "loss": 0.8566898107528687, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 1.936026936026936, | |
| "grad_norm": 7.679093837738037, | |
| "learning_rate": 3.761057837485782e-07, | |
| "loss": 0.7409002780914307, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.9377104377104377, | |
| "grad_norm": 4.928491592407227, | |
| "learning_rate": 3.753356870366049e-07, | |
| "loss": 1.2324477434158325, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 6.215064525604248, | |
| "learning_rate": 3.745661921166813e-07, | |
| "loss": 1.0157601833343506, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 1.9410774410774412, | |
| "grad_norm": 9.274593353271484, | |
| "learning_rate": 3.73797301640041e-07, | |
| "loss": 0.39169979095458984, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 1.942760942760943, | |
| "grad_norm": 5.436382293701172, | |
| "learning_rate": 3.730290182558352e-07, | |
| "loss": 0.9424724578857422, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 1.9444444444444444, | |
| "grad_norm": 6.4669389724731445, | |
| "learning_rate": 3.722613446111238e-07, | |
| "loss": 1.0893113613128662, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.9461279461279462, | |
| "grad_norm": 3.2764360904693604, | |
| "learning_rate": 3.7149428335086505e-07, | |
| "loss": 0.9788646697998047, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 1.9478114478114477, | |
| "grad_norm": 6.531454086303711, | |
| "learning_rate": 3.70727837117908e-07, | |
| "loss": 0.9268249869346619, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 1.9494949494949494, | |
| "grad_norm": 10.755637168884277, | |
| "learning_rate": 3.6996200855298243e-07, | |
| "loss": 0.7596557140350342, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 1.9511784511784511, | |
| "grad_norm": 4.191674709320068, | |
| "learning_rate": 3.691968002946899e-07, | |
| "loss": 0.8969882130622864, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 1.9528619528619529, | |
| "grad_norm": 3.586559772491455, | |
| "learning_rate": 3.684322149794947e-07, | |
| "loss": 0.926864743232727, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 3.739887237548828, | |
| "learning_rate": 3.676682552417152e-07, | |
| "loss": 1.0153056383132935, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 1.9562289562289563, | |
| "grad_norm": 5.188506603240967, | |
| "learning_rate": 3.669049237135139e-07, | |
| "loss": 0.7965476512908936, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 1.957912457912458, | |
| "grad_norm": 19.38260269165039, | |
| "learning_rate": 3.6614222302488915e-07, | |
| "loss": 0.5549055337905884, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 1.9595959595959596, | |
| "grad_norm": 3.7028939723968506, | |
| "learning_rate": 3.6538015580366585e-07, | |
| "loss": 1.1440973281860352, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 1.9612794612794613, | |
| "grad_norm": 3.7629427909851074, | |
| "learning_rate": 3.6461872467548625e-07, | |
| "loss": 1.0486090183258057, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.9629629629629628, | |
| "grad_norm": 4.191500186920166, | |
| "learning_rate": 3.638579322638007e-07, | |
| "loss": 1.029564619064331, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 1.9646464646464645, | |
| "grad_norm": 3.297617197036743, | |
| "learning_rate": 3.6309778118985943e-07, | |
| "loss": 1.0488507747650146, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 1.9663299663299663, | |
| "grad_norm": 3.2955570220947266, | |
| "learning_rate": 3.623382740727028e-07, | |
| "loss": 0.9328145384788513, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 1.968013468013468, | |
| "grad_norm": 3.8717432022094727, | |
| "learning_rate": 3.61579413529152e-07, | |
| "loss": 1.0710524320602417, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 1.9696969696969697, | |
| "grad_norm": 16.362682342529297, | |
| "learning_rate": 3.608212021738011e-07, | |
| "loss": 0.565844714641571, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.9713804713804715, | |
| "grad_norm": 4.6352057456970215, | |
| "learning_rate": 3.600636426190075e-07, | |
| "loss": 0.7352415919303894, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 1.9730639730639732, | |
| "grad_norm": 30.211545944213867, | |
| "learning_rate": 3.593067374748823e-07, | |
| "loss": 0.5901581645011902, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 1.9747474747474747, | |
| "grad_norm": 14.593511581420898, | |
| "learning_rate": 3.585504893492821e-07, | |
| "loss": 0.8802275657653809, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 1.9764309764309764, | |
| "grad_norm": 6.9926438331604, | |
| "learning_rate": 3.577949008478004e-07, | |
| "loss": 0.7798852920532227, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 1.9781144781144782, | |
| "grad_norm": 27.421436309814453, | |
| "learning_rate": 3.57039974573757e-07, | |
| "loss": 0.726132333278656, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.9797979797979797, | |
| "grad_norm": 3.9214117527008057, | |
| "learning_rate": 3.562857131281907e-07, | |
| "loss": 0.7651845216751099, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 1.9814814814814814, | |
| "grad_norm": 2.7648415565490723, | |
| "learning_rate": 3.555321191098498e-07, | |
| "loss": 0.4599582552909851, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 1.9831649831649831, | |
| "grad_norm": 3.028148651123047, | |
| "learning_rate": 3.547791951151824e-07, | |
| "loss": 1.0578691959381104, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 1.9848484848484849, | |
| "grad_norm": 3.8524043560028076, | |
| "learning_rate": 3.5402694373832863e-07, | |
| "loss": 0.9566428065299988, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 1.9865319865319866, | |
| "grad_norm": 6.2976250648498535, | |
| "learning_rate": 3.53275367571111e-07, | |
| "loss": 0.9507308602333069, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.9882154882154883, | |
| "grad_norm": 9.228460311889648, | |
| "learning_rate": 3.525244692030256e-07, | |
| "loss": 0.646575927734375, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 1.98989898989899, | |
| "grad_norm": 3.0814363956451416, | |
| "learning_rate": 3.517742512212333e-07, | |
| "loss": 0.9748328924179077, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 1.9915824915824916, | |
| "grad_norm": 15.091059684753418, | |
| "learning_rate": 3.5102471621055083e-07, | |
| "loss": 0.8788052797317505, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 1.9932659932659933, | |
| "grad_norm": 2.8277087211608887, | |
| "learning_rate": 3.5027586675344134e-07, | |
| "loss": 1.026127576828003, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 1.9949494949494948, | |
| "grad_norm": 155.9358673095703, | |
| "learning_rate": 3.495277054300065e-07, | |
| "loss": 0.41760489344596863, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.9966329966329965, | |
| "grad_norm": 47.22990036010742, | |
| "learning_rate": 3.487802348179771e-07, | |
| "loss": 0.6611791849136353, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 1.9983164983164983, | |
| "grad_norm": 3.25925612449646, | |
| "learning_rate": 3.480334574927034e-07, | |
| "loss": 0.9254864454269409, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.941661834716797, | |
| "learning_rate": 3.4728737602714777e-07, | |
| "loss": 0.8802586793899536, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.0016835016835017, | |
| "grad_norm": 49.501678466796875, | |
| "learning_rate": 3.465419929918748e-07, | |
| "loss": 0.709393322467804, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.0033670033670035, | |
| "grad_norm": 5.433994770050049, | |
| "learning_rate": 3.457973109550426e-07, | |
| "loss": 1.1732385158538818, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.005050505050505, | |
| "grad_norm": 29.29537582397461, | |
| "learning_rate": 3.4505333248239437e-07, | |
| "loss": 0.6126368641853333, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.006734006734007, | |
| "grad_norm": 15.888188362121582, | |
| "learning_rate": 3.443100601372486e-07, | |
| "loss": 0.534448504447937, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.008417508417508, | |
| "grad_norm": 4.571238040924072, | |
| "learning_rate": 3.435674964804913e-07, | |
| "loss": 0.6711810827255249, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.01010101010101, | |
| "grad_norm": 5.0248517990112305, | |
| "learning_rate": 3.4282564407056714e-07, | |
| "loss": 0.856137752532959, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.0117845117845117, | |
| "grad_norm": 3.410614490509033, | |
| "learning_rate": 3.420845054634693e-07, | |
| "loss": 1.0443634986877441, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.0134680134680134, | |
| "grad_norm": 8.341497421264648, | |
| "learning_rate": 3.413440832127323e-07, | |
| "loss": 0.6617559194564819, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.015151515151515, | |
| "grad_norm": 9.096837043762207, | |
| "learning_rate": 3.406043798694226e-07, | |
| "loss": 0.7012159824371338, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.016835016835017, | |
| "grad_norm": 6.935766220092773, | |
| "learning_rate": 3.39865397982129e-07, | |
| "loss": 0.8126204013824463, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.0185185185185186, | |
| "grad_norm": 10.908267974853516, | |
| "learning_rate": 3.3912714009695525e-07, | |
| "loss": 0.7988526225090027, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 3.9570505619049072, | |
| "learning_rate": 3.3838960875751057e-07, | |
| "loss": 0.6374803781509399, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.021885521885522, | |
| "grad_norm": 149.55186462402344, | |
| "learning_rate": 3.3765280650490043e-07, | |
| "loss": 0.4227946400642395, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.0235690235690234, | |
| "grad_norm": 9.027767181396484, | |
| "learning_rate": 3.3691673587771866e-07, | |
| "loss": 0.8504242897033691, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.025252525252525, | |
| "grad_norm": 8.02371883392334, | |
| "learning_rate": 3.361813994120386e-07, | |
| "loss": 0.7173169851303101, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.026936026936027, | |
| "grad_norm": 2.9558398723602295, | |
| "learning_rate": 3.354467996414034e-07, | |
| "loss": 0.8256983757019043, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.0286195286195285, | |
| "grad_norm": 15.695834159851074, | |
| "learning_rate": 3.3471293909681844e-07, | |
| "loss": 0.8146846294403076, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.0303030303030303, | |
| "grad_norm": 4.859415531158447, | |
| "learning_rate": 3.339798203067422e-07, | |
| "loss": 0.9352428913116455, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.031986531986532, | |
| "grad_norm": 9.821430206298828, | |
| "learning_rate": 3.332474457970773e-07, | |
| "loss": 0.7644020318984985, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.0336700336700337, | |
| "grad_norm": 2.8421833515167236, | |
| "learning_rate": 3.32515818091162e-07, | |
| "loss": 0.936759889125824, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.0353535353535355, | |
| "grad_norm": 4.6429243087768555, | |
| "learning_rate": 3.3178493970976183e-07, | |
| "loss": 0.7487270832061768, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.037037037037037, | |
| "grad_norm": 8.991228103637695, | |
| "learning_rate": 3.310548131710601e-07, | |
| "loss": 0.4855067729949951, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.038720538720539, | |
| "grad_norm": 3.958752393722534, | |
| "learning_rate": 3.3032544099065003e-07, | |
| "loss": 0.7952554821968079, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.04040404040404, | |
| "grad_norm": 4.862611293792725, | |
| "learning_rate": 3.295968256815257e-07, | |
| "loss": 0.36966073513031006, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.042087542087542, | |
| "grad_norm": 10.325581550598145, | |
| "learning_rate": 3.288689697540733e-07, | |
| "loss": 0.4272541403770447, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.0437710437710437, | |
| "grad_norm": 46.957489013671875, | |
| "learning_rate": 3.281418757160629e-07, | |
| "loss": 0.6797230839729309, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.0454545454545454, | |
| "grad_norm": 3.5477898120880127, | |
| "learning_rate": 3.274155460726392e-07, | |
| "loss": 0.8319392204284668, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.047138047138047, | |
| "grad_norm": 6.360466480255127, | |
| "learning_rate": 3.2668998332631374e-07, | |
| "loss": 0.6863579154014587, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.048821548821549, | |
| "grad_norm": 6.163110733032227, | |
| "learning_rate": 3.259651899769552e-07, | |
| "loss": 0.845360279083252, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.0505050505050506, | |
| "grad_norm": 4.161473274230957, | |
| "learning_rate": 3.2524116852178163e-07, | |
| "loss": 1.2110919952392578, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.0521885521885523, | |
| "grad_norm": 9.452940940856934, | |
| "learning_rate": 3.245179214553519e-07, | |
| "loss": 0.7553325891494751, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.053872053872054, | |
| "grad_norm": 2.847379207611084, | |
| "learning_rate": 3.23795451269556e-07, | |
| "loss": 0.8473318219184875, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.0555555555555554, | |
| "grad_norm": 8.454484939575195, | |
| "learning_rate": 3.2307376045360804e-07, | |
| "loss": 0.7530231475830078, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.057239057239057, | |
| "grad_norm": 3.292670965194702, | |
| "learning_rate": 3.223528514940365e-07, | |
| "loss": 0.8452006578445435, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.058922558922559, | |
| "grad_norm": 5.7511820793151855, | |
| "learning_rate": 3.216327268746759e-07, | |
| "loss": 1.0079270601272583, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 3.7970666885375977, | |
| "learning_rate": 3.2091338907665864e-07, | |
| "loss": 0.8261886835098267, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.0622895622895623, | |
| "grad_norm": 4.847807884216309, | |
| "learning_rate": 3.201948405784062e-07, | |
| "loss": 0.7386308908462524, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.063973063973064, | |
| "grad_norm": 4.138136386871338, | |
| "learning_rate": 3.1947708385562033e-07, | |
| "loss": 0.967164158821106, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.0656565656565657, | |
| "grad_norm": 6.592377185821533, | |
| "learning_rate": 3.1876012138127525e-07, | |
| "loss": 0.820540189743042, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.0673400673400675, | |
| "grad_norm": 3.9689667224884033, | |
| "learning_rate": 3.1804395562560795e-07, | |
| "loss": 0.884551465511322, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.069023569023569, | |
| "grad_norm": 3.013533353805542, | |
| "learning_rate": 3.173285890561109e-07, | |
| "loss": 0.7905436158180237, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.0707070707070705, | |
| "grad_norm": 12.277018547058105, | |
| "learning_rate": 3.166140241375233e-07, | |
| "loss": 0.6569070219993591, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.0723905723905722, | |
| "grad_norm": 6.178629398345947, | |
| "learning_rate": 3.159002633318214e-07, | |
| "loss": 0.6464763879776001, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.074074074074074, | |
| "grad_norm": 41.42927932739258, | |
| "learning_rate": 3.151873090982117e-07, | |
| "loss": 0.7555403709411621, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.0757575757575757, | |
| "grad_norm": 6.328332424163818, | |
| "learning_rate": 3.144751638931219e-07, | |
| "loss": 0.8773843050003052, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.0774410774410774, | |
| "grad_norm": 3.656052589416504, | |
| "learning_rate": 3.137638301701912e-07, | |
| "loss": 0.5875815749168396, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.079124579124579, | |
| "grad_norm": 5.517158031463623, | |
| "learning_rate": 3.13053310380264e-07, | |
| "loss": 1.0708808898925781, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.080808080808081, | |
| "grad_norm": 3.563227891921997, | |
| "learning_rate": 3.123436069713801e-07, | |
| "loss": 1.0506317615509033, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.0824915824915826, | |
| "grad_norm": 3.579038619995117, | |
| "learning_rate": 3.116347223887658e-07, | |
| "loss": 0.5262918472290039, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.0841750841750843, | |
| "grad_norm": 4.360744953155518, | |
| "learning_rate": 3.1092665907482705e-07, | |
| "loss": 0.6860552430152893, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.0858585858585856, | |
| "grad_norm": 5.244029521942139, | |
| "learning_rate": 3.102194194691402e-07, | |
| "loss": 0.8589056730270386, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.0875420875420874, | |
| "grad_norm": 2.6665806770324707, | |
| "learning_rate": 3.0951300600844277e-07, | |
| "loss": 0.7219854593276978, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.089225589225589, | |
| "grad_norm": 4.833096027374268, | |
| "learning_rate": 3.088074211266265e-07, | |
| "loss": 0.6794151067733765, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.090909090909091, | |
| "grad_norm": 4.1560773849487305, | |
| "learning_rate": 3.0810266725472843e-07, | |
| "loss": 1.1254472732543945, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.0925925925925926, | |
| "grad_norm": 13.069584846496582, | |
| "learning_rate": 3.073987468209218e-07, | |
| "loss": 0.7453956604003906, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.0942760942760943, | |
| "grad_norm": 2.8535449504852295, | |
| "learning_rate": 3.0669566225050904e-07, | |
| "loss": 0.7250915765762329, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.095959595959596, | |
| "grad_norm": 7.14851188659668, | |
| "learning_rate": 3.059934159659122e-07, | |
| "loss": 0.9290302991867065, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.0976430976430978, | |
| "grad_norm": 4.216705799102783, | |
| "learning_rate": 3.052920103866651e-07, | |
| "loss": 0.9226129055023193, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.0993265993265995, | |
| "grad_norm": 6.80454158782959, | |
| "learning_rate": 3.0459144792940506e-07, | |
| "loss": 0.6964681148529053, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.101010101010101, | |
| "grad_norm": 4.916928291320801, | |
| "learning_rate": 3.038917310078648e-07, | |
| "loss": 0.9581238627433777, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.1026936026936025, | |
| "grad_norm": 6.742152214050293, | |
| "learning_rate": 3.031928620328632e-07, | |
| "loss": 0.5878009796142578, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.1043771043771042, | |
| "grad_norm": 8.78212833404541, | |
| "learning_rate": 3.024948434122981e-07, | |
| "loss": 0.6806055307388306, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.106060606060606, | |
| "grad_norm": 5.455780029296875, | |
| "learning_rate": 3.017976775511374e-07, | |
| "loss": 1.1094366312026978, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.1077441077441077, | |
| "grad_norm": 16.17888641357422, | |
| "learning_rate": 3.011013668514106e-07, | |
| "loss": 0.9498310089111328, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.1094276094276094, | |
| "grad_norm": 33.786502838134766, | |
| "learning_rate": 3.0040591371220126e-07, | |
| "loss": 0.9682769775390625, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.111111111111111, | |
| "grad_norm": 4.037204265594482, | |
| "learning_rate": 2.997113205296381e-07, | |
| "loss": 0.6556534171104431, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.112794612794613, | |
| "grad_norm": 4.9933576583862305, | |
| "learning_rate": 2.990175896968867e-07, | |
| "loss": 0.6443968415260315, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.1144781144781146, | |
| "grad_norm": 9.915764808654785, | |
| "learning_rate": 2.983247236041416e-07, | |
| "loss": 0.8275219202041626, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.1161616161616164, | |
| "grad_norm": 8.728922843933105, | |
| "learning_rate": 2.9763272463861846e-07, | |
| "loss": 0.4485883116722107, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.1178451178451176, | |
| "grad_norm": 4.324676513671875, | |
| "learning_rate": 2.9694159518454436e-07, | |
| "loss": 1.0087292194366455, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.1195286195286194, | |
| "grad_norm": 29.97382164001465, | |
| "learning_rate": 2.9625133762315134e-07, | |
| "loss": 0.30623072385787964, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 13.100899696350098, | |
| "learning_rate": 2.9556195433266724e-07, | |
| "loss": 0.5369913578033447, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.122895622895623, | |
| "grad_norm": 5.657482147216797, | |
| "learning_rate": 2.94873447688307e-07, | |
| "loss": 0.3709213137626648, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.1245791245791246, | |
| "grad_norm": 8.130796432495117, | |
| "learning_rate": 2.9418582006226644e-07, | |
| "loss": 0.528016209602356, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.1262626262626263, | |
| "grad_norm": 12.914457321166992, | |
| "learning_rate": 2.9349907382371175e-07, | |
| "loss": 0.5530096888542175, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.127946127946128, | |
| "grad_norm": 13.510022163391113, | |
| "learning_rate": 2.9281321133877256e-07, | |
| "loss": 0.4185825288295746, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.1296296296296298, | |
| "grad_norm": 4.050384998321533, | |
| "learning_rate": 2.921282349705338e-07, | |
| "loss": 0.6386127471923828, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.1313131313131315, | |
| "grad_norm": 6.590632915496826, | |
| "learning_rate": 2.914441470790274e-07, | |
| "loss": 0.9100687503814697, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.1329966329966332, | |
| "grad_norm": 4.762322425842285, | |
| "learning_rate": 2.9076095002122373e-07, | |
| "loss": 0.5006492137908936, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.1346801346801345, | |
| "grad_norm": 5.085036754608154, | |
| "learning_rate": 2.900786461510243e-07, | |
| "loss": 0.7980141639709473, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.1363636363636362, | |
| "grad_norm": 7.086611270904541, | |
| "learning_rate": 2.8939723781925304e-07, | |
| "loss": 0.5176095962524414, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.138047138047138, | |
| "grad_norm": 8.522965431213379, | |
| "learning_rate": 2.8871672737364814e-07, | |
| "loss": 0.4830123782157898, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.1397306397306397, | |
| "grad_norm": 9.686579704284668, | |
| "learning_rate": 2.8803711715885457e-07, | |
| "loss": 0.7633793354034424, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.1414141414141414, | |
| "grad_norm": 3.3301565647125244, | |
| "learning_rate": 2.8735840951641566e-07, | |
| "loss": 0.21130666136741638, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.143097643097643, | |
| "grad_norm": 2.9118270874023438, | |
| "learning_rate": 2.866806067847645e-07, | |
| "loss": 0.4212937355041504, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.144781144781145, | |
| "grad_norm": 3.857438087463379, | |
| "learning_rate": 2.860037112992167e-07, | |
| "loss": 0.7907487154006958, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.1464646464646466, | |
| "grad_norm": 3.103694438934326, | |
| "learning_rate": 2.8532772539196236e-07, | |
| "loss": 0.9942638874053955, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.148148148148148, | |
| "grad_norm": 5.230748653411865, | |
| "learning_rate": 2.8465265139205696e-07, | |
| "loss": 0.6354756951332092, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.1498316498316496, | |
| "grad_norm": 22.947580337524414, | |
| "learning_rate": 2.839784916254147e-07, | |
| "loss": 0.3525312840938568, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.1515151515151514, | |
| "grad_norm": 4.191389083862305, | |
| "learning_rate": 2.8330524841479964e-07, | |
| "loss": 0.6104186773300171, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.153198653198653, | |
| "grad_norm": 3.742684841156006, | |
| "learning_rate": 2.8263292407981777e-07, | |
| "loss": 0.6527650356292725, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.154882154882155, | |
| "grad_norm": 17.96328353881836, | |
| "learning_rate": 2.819615209369093e-07, | |
| "loss": 0.5300241112709045, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.1565656565656566, | |
| "grad_norm": 27.818559646606445, | |
| "learning_rate": 2.812910412993409e-07, | |
| "loss": 0.5620636940002441, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.1582491582491583, | |
| "grad_norm": 3.4521737098693848, | |
| "learning_rate": 2.806214874771965e-07, | |
| "loss": 0.8342366218566895, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.15993265993266, | |
| "grad_norm": 7.760178089141846, | |
| "learning_rate": 2.799528617773711e-07, | |
| "loss": 0.6607711315155029, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.1616161616161618, | |
| "grad_norm": 4.2295379638671875, | |
| "learning_rate": 2.792851665035616e-07, | |
| "loss": 0.5361987352371216, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.1632996632996635, | |
| "grad_norm": 4.252224922180176, | |
| "learning_rate": 2.7861840395625887e-07, | |
| "loss": 1.0253345966339111, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.164983164983165, | |
| "grad_norm": 5.132496356964111, | |
| "learning_rate": 2.779525764327406e-07, | |
| "loss": 1.1341686248779297, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.1666666666666665, | |
| "grad_norm": 10.062471389770508, | |
| "learning_rate": 2.7728768622706294e-07, | |
| "loss": 0.8332287073135376, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.1683501683501682, | |
| "grad_norm": 11.787501335144043, | |
| "learning_rate": 2.7662373563005206e-07, | |
| "loss": 0.3077271282672882, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.17003367003367, | |
| "grad_norm": 6.277064323425293, | |
| "learning_rate": 2.7596072692929724e-07, | |
| "loss": 0.7766256332397461, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.1717171717171717, | |
| "grad_norm": 22.47462272644043, | |
| "learning_rate": 2.752986624091427e-07, | |
| "loss": 0.32620465755462646, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.1734006734006734, | |
| "grad_norm": 7.154234886169434, | |
| "learning_rate": 2.746375443506788e-07, | |
| "loss": 0.5342273116111755, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.175084175084175, | |
| "grad_norm": 10.44245719909668, | |
| "learning_rate": 2.739773750317358e-07, | |
| "loss": 0.45068609714508057, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.176767676767677, | |
| "grad_norm": 7.9054155349731445, | |
| "learning_rate": 2.7331815672687476e-07, | |
| "loss": 0.6677770614624023, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.1784511784511786, | |
| "grad_norm": 10.807048797607422, | |
| "learning_rate": 2.726598917073798e-07, | |
| "loss": 0.7541825175285339, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.18013468013468, | |
| "grad_norm": 7.9458746910095215, | |
| "learning_rate": 2.720025822412512e-07, | |
| "loss": 0.7445704340934753, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 12.376832962036133, | |
| "learning_rate": 2.713462305931966e-07, | |
| "loss": 0.5584303736686707, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.1835016835016834, | |
| "grad_norm": 3.6782195568084717, | |
| "learning_rate": 2.706908390246232e-07, | |
| "loss": 0.42317822575569153, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.185185185185185, | |
| "grad_norm": 33.567867279052734, | |
| "learning_rate": 2.7003640979363133e-07, | |
| "loss": 0.8278957605361938, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.186868686868687, | |
| "grad_norm": 8.654823303222656, | |
| "learning_rate": 2.6938294515500463e-07, | |
| "loss": 0.8979749083518982, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.1885521885521886, | |
| "grad_norm": 4.184791564941406, | |
| "learning_rate": 2.687304473602039e-07, | |
| "loss": 0.7217346429824829, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.1902356902356903, | |
| "grad_norm": 22.945192337036133, | |
| "learning_rate": 2.6807891865735865e-07, | |
| "loss": 0.9164705276489258, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.191919191919192, | |
| "grad_norm": 8.126714706420898, | |
| "learning_rate": 2.674283612912591e-07, | |
| "loss": 0.853008508682251, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.1936026936026938, | |
| "grad_norm": 9.629704475402832, | |
| "learning_rate": 2.6677877750334935e-07, | |
| "loss": 0.6331396102905273, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.1952861952861955, | |
| "grad_norm": 3.6879630088806152, | |
| "learning_rate": 2.6613016953171894e-07, | |
| "loss": 0.9496104121208191, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.196969696969697, | |
| "grad_norm": 3.8279647827148438, | |
| "learning_rate": 2.65482539611095e-07, | |
| "loss": 0.7846404910087585, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.1986531986531985, | |
| "grad_norm": 20.262147903442383, | |
| "learning_rate": 2.648358899728351e-07, | |
| "loss": 0.489252507686615, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.2003367003367003, | |
| "grad_norm": 8.78268051147461, | |
| "learning_rate": 2.6419022284491965e-07, | |
| "loss": 0.8057292699813843, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.202020202020202, | |
| "grad_norm": 4.244235038757324, | |
| "learning_rate": 2.635455404519433e-07, | |
| "loss": 0.6223278641700745, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.2037037037037037, | |
| "grad_norm": 11.954898834228516, | |
| "learning_rate": 2.629018450151081e-07, | |
| "loss": 0.5752437114715576, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 2.2053872053872055, | |
| "grad_norm": 13.8149995803833, | |
| "learning_rate": 2.6225913875221594e-07, | |
| "loss": 0.3817511796951294, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.207070707070707, | |
| "grad_norm": 4.223384857177734, | |
| "learning_rate": 2.6161742387766e-07, | |
| "loss": 0.6272555589675903, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 2.208754208754209, | |
| "grad_norm": 21.69821548461914, | |
| "learning_rate": 2.609767026024182e-07, | |
| "loss": 0.7172547578811646, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 2.2104377104377106, | |
| "grad_norm": 8.276639938354492, | |
| "learning_rate": 2.6033697713404514e-07, | |
| "loss": 0.6655735373497009, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 2.212121212121212, | |
| "grad_norm": 8.716046333312988, | |
| "learning_rate": 2.5969824967666374e-07, | |
| "loss": 0.6124321818351746, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 2.2138047138047137, | |
| "grad_norm": 8.070818901062012, | |
| "learning_rate": 2.590605224309592e-07, | |
| "loss": 0.4091968536376953, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.2154882154882154, | |
| "grad_norm": 5.510800361633301, | |
| "learning_rate": 2.5842379759417023e-07, | |
| "loss": 0.521186113357544, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 2.217171717171717, | |
| "grad_norm": 39.2961540222168, | |
| "learning_rate": 2.5778807736008153e-07, | |
| "loss": 0.2366686761379242, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 2.218855218855219, | |
| "grad_norm": 3.912489891052246, | |
| "learning_rate": 2.5715336391901695e-07, | |
| "loss": 0.8710294961929321, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 2.2205387205387206, | |
| "grad_norm": 4.83061408996582, | |
| "learning_rate": 2.565196594578315e-07, | |
| "loss": 1.1489973068237305, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 4.629734516143799, | |
| "learning_rate": 2.5588696615990336e-07, | |
| "loss": 1.078352928161621, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.223905723905724, | |
| "grad_norm": 7.968264102935791, | |
| "learning_rate": 2.5525528620512737e-07, | |
| "loss": 0.7425380349159241, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.225589225589226, | |
| "grad_norm": 13.291003227233887, | |
| "learning_rate": 2.5462462176990686e-07, | |
| "loss": 0.7818918228149414, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.227272727272727, | |
| "grad_norm": 11.734708786010742, | |
| "learning_rate": 2.539949750271458e-07, | |
| "loss": 0.7145400047302246, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.228956228956229, | |
| "grad_norm": 5.949611186981201, | |
| "learning_rate": 2.533663481462424e-07, | |
| "loss": 0.4055989980697632, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.2306397306397305, | |
| "grad_norm": 5.281031608581543, | |
| "learning_rate": 2.5273874329308083e-07, | |
| "loss": 1.0042195320129395, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.2323232323232323, | |
| "grad_norm": 8.864117622375488, | |
| "learning_rate": 2.5211216263002375e-07, | |
| "loss": 0.604977011680603, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.234006734006734, | |
| "grad_norm": 28.879344940185547, | |
| "learning_rate": 2.514866083159053e-07, | |
| "loss": 0.566184937953949, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.2356902356902357, | |
| "grad_norm": 7.084741592407227, | |
| "learning_rate": 2.508620825060231e-07, | |
| "loss": 0.6506372094154358, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.2373737373737375, | |
| "grad_norm": 22.613136291503906, | |
| "learning_rate": 2.5023858735213156e-07, | |
| "loss": 0.9167625904083252, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.239057239057239, | |
| "grad_norm": 6.915469169616699, | |
| "learning_rate": 2.4961612500243364e-07, | |
| "loss": 0.7674777507781982, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.240740740740741, | |
| "grad_norm": 8.177582740783691, | |
| "learning_rate": 2.4899469760157413e-07, | |
| "loss": 0.8097570538520813, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 14.568964004516602, | |
| "learning_rate": 2.48374307290632e-07, | |
| "loss": 0.4266725182533264, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.244107744107744, | |
| "grad_norm": 4.135527610778809, | |
| "learning_rate": 2.4775495620711254e-07, | |
| "loss": 0.7610059976577759, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.2457912457912457, | |
| "grad_norm": 7.860456466674805, | |
| "learning_rate": 2.4713664648494133e-07, | |
| "loss": 0.6509280204772949, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.2474747474747474, | |
| "grad_norm": 7.511784553527832, | |
| "learning_rate": 2.465193802544552e-07, | |
| "loss": 0.5061072111129761, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.249158249158249, | |
| "grad_norm": 4.732418060302734, | |
| "learning_rate": 2.4590315964239606e-07, | |
| "loss": 0.36101067066192627, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.250841750841751, | |
| "grad_norm": 10.225937843322754, | |
| "learning_rate": 2.452879867719034e-07, | |
| "loss": 0.6636744737625122, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.2525252525252526, | |
| "grad_norm": 6.152078628540039, | |
| "learning_rate": 2.4467386376250633e-07, | |
| "loss": 0.8210121989250183, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.2542087542087543, | |
| "grad_norm": 6.384221076965332, | |
| "learning_rate": 2.440607927301171e-07, | |
| "loss": 0.5604538917541504, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.255892255892256, | |
| "grad_norm": 3.0290005207061768, | |
| "learning_rate": 2.4344877578702355e-07, | |
| "loss": 0.9680004119873047, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.257575757575758, | |
| "grad_norm": 8.649748802185059, | |
| "learning_rate": 2.4283781504188126e-07, | |
| "loss": 0.2856512665748596, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.259259259259259, | |
| "grad_norm": 12.650278091430664, | |
| "learning_rate": 2.422279125997073e-07, | |
| "loss": 0.21757878363132477, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.260942760942761, | |
| "grad_norm": 5.625198841094971, | |
| "learning_rate": 2.416190705618722e-07, | |
| "loss": 0.7161245346069336, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.2626262626262625, | |
| "grad_norm": 3.8364768028259277, | |
| "learning_rate": 2.4101129102609273e-07, | |
| "loss": 0.44631901383399963, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.2643097643097643, | |
| "grad_norm": 2.489049196243286, | |
| "learning_rate": 2.404045760864253e-07, | |
| "loss": 1.060034155845642, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.265993265993266, | |
| "grad_norm": 22.317943572998047, | |
| "learning_rate": 2.397989278332583e-07, | |
| "loss": 0.8590011596679688, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.2676767676767677, | |
| "grad_norm": 3.2131800651550293, | |
| "learning_rate": 2.391943483533044e-07, | |
| "loss": 0.7794303297996521, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.2693602693602695, | |
| "grad_norm": 3.656132936477661, | |
| "learning_rate": 2.385908397295945e-07, | |
| "loss": 0.6720019578933716, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.271043771043771, | |
| "grad_norm": 3.8519668579101562, | |
| "learning_rate": 2.3798840404146995e-07, | |
| "loss": 0.7614578008651733, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 4.142553329467773, | |
| "learning_rate": 2.3738704336457484e-07, | |
| "loss": 0.8712958097457886, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.274410774410774, | |
| "grad_norm": 6.8363237380981445, | |
| "learning_rate": 2.3678675977084986e-07, | |
| "loss": 0.5424622297286987, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.276094276094276, | |
| "grad_norm": 3.5155107975006104, | |
| "learning_rate": 2.3618755532852466e-07, | |
| "loss": 0.973854660987854, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.2777777777777777, | |
| "grad_norm": 7.004105091094971, | |
| "learning_rate": 2.3558943210211047e-07, | |
| "loss": 1.0108654499053955, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.2794612794612794, | |
| "grad_norm": 1.2474193572998047, | |
| "learning_rate": 2.3499239215239357e-07, | |
| "loss": 0.5368537306785583, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.281144781144781, | |
| "grad_norm": 5.437285423278809, | |
| "learning_rate": 2.3439643753642798e-07, | |
| "loss": 0.690973162651062, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.282828282828283, | |
| "grad_norm": 11.235260009765625, | |
| "learning_rate": 2.3380157030752775e-07, | |
| "loss": 0.6230310201644897, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.2845117845117846, | |
| "grad_norm": 9.484489440917969, | |
| "learning_rate": 2.33207792515261e-07, | |
| "loss": 0.5481805205345154, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.2861952861952863, | |
| "grad_norm": 9.018638610839844, | |
| "learning_rate": 2.3261510620544208e-07, | |
| "loss": 0.8037227392196655, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.287878787878788, | |
| "grad_norm": 12.419392585754395, | |
| "learning_rate": 2.3202351342012452e-07, | |
| "loss": 0.6880577802658081, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.28956228956229, | |
| "grad_norm": 29.25603485107422, | |
| "learning_rate": 2.3143301619759456e-07, | |
| "loss": 0.579788327217102, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.291245791245791, | |
| "grad_norm": 12.553728103637695, | |
| "learning_rate": 2.308436165723636e-07, | |
| "loss": 0.7886263132095337, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.292929292929293, | |
| "grad_norm": 14.242766380310059, | |
| "learning_rate": 2.3025531657516115e-07, | |
| "loss": 0.7852193117141724, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.2946127946127945, | |
| "grad_norm": 7.794075012207031, | |
| "learning_rate": 2.2966811823292842e-07, | |
| "loss": 0.7775453925132751, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.2962962962962963, | |
| "grad_norm": 7.859867572784424, | |
| "learning_rate": 2.2908202356881075e-07, | |
| "loss": 0.6673729419708252, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.297979797979798, | |
| "grad_norm": 6.257922172546387, | |
| "learning_rate": 2.2849703460215077e-07, | |
| "loss": 1.060187816619873, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.2996632996632997, | |
| "grad_norm": 5.627756595611572, | |
| "learning_rate": 2.2791315334848162e-07, | |
| "loss": 0.6064283847808838, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 2.3013468013468015, | |
| "grad_norm": 6.193628787994385, | |
| "learning_rate": 2.2733038181952e-07, | |
| "loss": 0.648173451423645, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 10.281158447265625, | |
| "learning_rate": 2.2674872202315892e-07, | |
| "loss": 0.49927544593811035, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 2.3047138047138045, | |
| "grad_norm": 7.590847969055176, | |
| "learning_rate": 2.2616817596346103e-07, | |
| "loss": 0.7152895927429199, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 2.3063973063973062, | |
| "grad_norm": 7.842513084411621, | |
| "learning_rate": 2.2558874564065215e-07, | |
| "loss": 0.5551795959472656, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.308080808080808, | |
| "grad_norm": 5.1881890296936035, | |
| "learning_rate": 2.2501043305111313e-07, | |
| "loss": 0.8357152938842773, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 2.3097643097643097, | |
| "grad_norm": 5.037477493286133, | |
| "learning_rate": 2.2443324018737436e-07, | |
| "loss": 0.8395123481750488, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 2.3114478114478114, | |
| "grad_norm": 4.545862674713135, | |
| "learning_rate": 2.2385716903810822e-07, | |
| "loss": 0.8929284811019897, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 2.313131313131313, | |
| "grad_norm": 10.017370223999023, | |
| "learning_rate": 2.2328222158812198e-07, | |
| "loss": 0.707942008972168, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 2.314814814814815, | |
| "grad_norm": 7.563255310058594, | |
| "learning_rate": 2.227083998183516e-07, | |
| "loss": 0.12098832428455353, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.3164983164983166, | |
| "grad_norm": 7.330215930938721, | |
| "learning_rate": 2.221357057058546e-07, | |
| "loss": 0.4100933074951172, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 2.3181818181818183, | |
| "grad_norm": 4.7282185554504395, | |
| "learning_rate": 2.2156414122380307e-07, | |
| "loss": 0.5965608358383179, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 2.31986531986532, | |
| "grad_norm": 3.0822274684906006, | |
| "learning_rate": 2.2099370834147712e-07, | |
| "loss": 0.945094645023346, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 2.3215488215488214, | |
| "grad_norm": 7.529977321624756, | |
| "learning_rate": 2.2042440902425822e-07, | |
| "loss": 0.7363934516906738, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 13.28249740600586, | |
| "learning_rate": 2.1985624523362185e-07, | |
| "loss": 0.7786830067634583, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.324915824915825, | |
| "grad_norm": 11.899820327758789, | |
| "learning_rate": 2.1928921892713132e-07, | |
| "loss": 0.6262949705123901, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 2.3265993265993266, | |
| "grad_norm": 4.841851234436035, | |
| "learning_rate": 2.187233320584311e-07, | |
| "loss": 0.9699975252151489, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 2.3282828282828283, | |
| "grad_norm": 9.435696601867676, | |
| "learning_rate": 2.181585865772393e-07, | |
| "loss": 0.8197389245033264, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 2.32996632996633, | |
| "grad_norm": 4.551506042480469, | |
| "learning_rate": 2.175949844293417e-07, | |
| "loss": 0.6494600772857666, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 2.3316498316498318, | |
| "grad_norm": 19.35220718383789, | |
| "learning_rate": 2.1703252755658512e-07, | |
| "loss": 0.7402999997138977, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 5.450087070465088, | |
| "learning_rate": 2.1647121789686985e-07, | |
| "loss": 0.7242530584335327, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 2.3350168350168348, | |
| "grad_norm": 6.281241416931152, | |
| "learning_rate": 2.1591105738414395e-07, | |
| "loss": 0.7737699151039124, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 2.3367003367003365, | |
| "grad_norm": 4.79439640045166, | |
| "learning_rate": 2.153520479483962e-07, | |
| "loss": 0.7753046751022339, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 2.3383838383838382, | |
| "grad_norm": 7.926896095275879, | |
| "learning_rate": 2.1479419151564908e-07, | |
| "loss": 0.5965973138809204, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 2.34006734006734, | |
| "grad_norm": 13.744224548339844, | |
| "learning_rate": 2.1423749000795286e-07, | |
| "loss": 0.7432798743247986, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.3417508417508417, | |
| "grad_norm": 4.1591949462890625, | |
| "learning_rate": 2.1368194534337864e-07, | |
| "loss": 0.6963976621627808, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 2.3434343434343434, | |
| "grad_norm": 5.26281213760376, | |
| "learning_rate": 2.1312755943601113e-07, | |
| "loss": 0.8363964557647705, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 2.345117845117845, | |
| "grad_norm": 4.026867389678955, | |
| "learning_rate": 2.1257433419594329e-07, | |
| "loss": 0.6121779680252075, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 2.346801346801347, | |
| "grad_norm": 3.700312614440918, | |
| "learning_rate": 2.1202227152926898e-07, | |
| "loss": 1.0569815635681152, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 2.3484848484848486, | |
| "grad_norm": 5.786956310272217, | |
| "learning_rate": 2.114713733380761e-07, | |
| "loss": 0.8500775098800659, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.3501683501683504, | |
| "grad_norm": 3.6336448192596436, | |
| "learning_rate": 2.1092164152044082e-07, | |
| "loss": 0.6126809120178223, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 2.351851851851852, | |
| "grad_norm": 16.343307495117188, | |
| "learning_rate": 2.1037307797042073e-07, | |
| "loss": 0.7721902132034302, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 2.3535353535353534, | |
| "grad_norm": 4.7194600105285645, | |
| "learning_rate": 2.0982568457804772e-07, | |
| "loss": 1.0643179416656494, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 2.355218855218855, | |
| "grad_norm": 5.305932998657227, | |
| "learning_rate": 2.0927946322932257e-07, | |
| "loss": 0.6048824191093445, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 2.356902356902357, | |
| "grad_norm": 2.0404253005981445, | |
| "learning_rate": 2.0873441580620778e-07, | |
| "loss": 1.1490514278411865, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.3585858585858586, | |
| "grad_norm": 4.3384480476379395, | |
| "learning_rate": 2.0819054418662068e-07, | |
| "loss": 1.0097895860671997, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 2.3602693602693603, | |
| "grad_norm": 7.471581935882568, | |
| "learning_rate": 2.0764785024442816e-07, | |
| "loss": 0.8789470791816711, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 2.361952861952862, | |
| "grad_norm": 9.630654335021973, | |
| "learning_rate": 2.071063358494392e-07, | |
| "loss": 0.8657972812652588, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 9.908742904663086, | |
| "learning_rate": 2.0656600286739846e-07, | |
| "loss": 0.9500114917755127, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 2.3653198653198655, | |
| "grad_norm": 3.0417370796203613, | |
| "learning_rate": 2.060268531599806e-07, | |
| "loss": 1.0881528854370117, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.3670033670033668, | |
| "grad_norm": 15.979384422302246, | |
| "learning_rate": 2.0548888858478314e-07, | |
| "loss": 0.8370237350463867, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 2.3686868686868685, | |
| "grad_norm": 2.701646327972412, | |
| "learning_rate": 2.0495211099532051e-07, | |
| "loss": 0.7017450332641602, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 2.3703703703703702, | |
| "grad_norm": 3.518488645553589, | |
| "learning_rate": 2.0441652224101739e-07, | |
| "loss": 0.7352346777915955, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 2.372053872053872, | |
| "grad_norm": 5.064514636993408, | |
| "learning_rate": 2.038821241672022e-07, | |
| "loss": 0.7799332141876221, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 2.3737373737373737, | |
| "grad_norm": 4.822017192840576, | |
| "learning_rate": 2.0334891861510124e-07, | |
| "loss": 0.8013976812362671, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.3754208754208754, | |
| "grad_norm": 13.20271110534668, | |
| "learning_rate": 2.0281690742183214e-07, | |
| "loss": 0.5635098814964294, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 2.377104377104377, | |
| "grad_norm": 4.322653293609619, | |
| "learning_rate": 2.0228609242039707e-07, | |
| "loss": 1.05335533618927, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 2.378787878787879, | |
| "grad_norm": 8.060440063476562, | |
| "learning_rate": 2.017564754396771e-07, | |
| "loss": 0.9288073778152466, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 2.3804713804713806, | |
| "grad_norm": 6.93074369430542, | |
| "learning_rate": 2.012280583044258e-07, | |
| "loss": 0.49736571311950684, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 2.3821548821548824, | |
| "grad_norm": 11.825316429138184, | |
| "learning_rate": 2.0070084283526223e-07, | |
| "loss": 1.044695258140564, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.3838383838383836, | |
| "grad_norm": 7.59405517578125, | |
| "learning_rate": 2.001748308486656e-07, | |
| "loss": 0.8302027583122253, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 2.3855218855218854, | |
| "grad_norm": 3.9063162803649902, | |
| "learning_rate": 1.9965002415696878e-07, | |
| "loss": 0.658703088760376, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 2.387205387205387, | |
| "grad_norm": 9.86563491821289, | |
| "learning_rate": 1.9912642456835125e-07, | |
| "loss": 0.6858144998550415, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 2.388888888888889, | |
| "grad_norm": 4.106326580047607, | |
| "learning_rate": 1.9860403388683408e-07, | |
| "loss": 0.5258500576019287, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 2.3905723905723906, | |
| "grad_norm": 3.920785427093506, | |
| "learning_rate": 1.980828539122731e-07, | |
| "loss": 0.9032931327819824, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.3922558922558923, | |
| "grad_norm": 1.4234728813171387, | |
| "learning_rate": 1.9756288644035244e-07, | |
| "loss": 0.43326181173324585, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 2.393939393939394, | |
| "grad_norm": 4.104327201843262, | |
| "learning_rate": 1.970441332625788e-07, | |
| "loss": 1.114197015762329, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.3956228956228958, | |
| "grad_norm": 7.699793815612793, | |
| "learning_rate": 1.965265961662753e-07, | |
| "loss": 0.8347800970077515, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 2.3973063973063975, | |
| "grad_norm": 4.057286262512207, | |
| "learning_rate": 1.9601027693457485e-07, | |
| "loss": 1.1171047687530518, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 2.398989898989899, | |
| "grad_norm": 4.676527976989746, | |
| "learning_rate": 1.9549517734641453e-07, | |
| "loss": 0.8913414478302002, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.4006734006734005, | |
| "grad_norm": 5.339909076690674, | |
| "learning_rate": 1.9498129917652917e-07, | |
| "loss": 0.5060603022575378, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 2.4023569023569022, | |
| "grad_norm": 7.147670269012451, | |
| "learning_rate": 1.9446864419544517e-07, | |
| "loss": 0.7295070886611938, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 6.569252014160156, | |
| "learning_rate": 1.9395721416947475e-07, | |
| "loss": 0.6507788896560669, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 2.4057239057239057, | |
| "grad_norm": 6.89575719833374, | |
| "learning_rate": 1.9344701086070957e-07, | |
| "loss": 0.7100333571434021, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 2.4074074074074074, | |
| "grad_norm": 7.443199634552002, | |
| "learning_rate": 1.9293803602701458e-07, | |
| "loss": 0.49127644300460815, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.409090909090909, | |
| "grad_norm": 3.398568868637085, | |
| "learning_rate": 1.924302914220222e-07, | |
| "loss": 0.8142455816268921, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 2.410774410774411, | |
| "grad_norm": 7.437132835388184, | |
| "learning_rate": 1.9192377879512656e-07, | |
| "loss": 0.5337988138198853, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 2.4124579124579126, | |
| "grad_norm": 4.250380516052246, | |
| "learning_rate": 1.914184998914764e-07, | |
| "loss": 0.7382153868675232, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 2.4141414141414144, | |
| "grad_norm": 4.774903297424316, | |
| "learning_rate": 1.9091445645197024e-07, | |
| "loss": 0.9528558254241943, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 2.4158249158249157, | |
| "grad_norm": 3.7426023483276367, | |
| "learning_rate": 1.9041165021324986e-07, | |
| "loss": 0.8381022214889526, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.4175084175084174, | |
| "grad_norm": 2.178778648376465, | |
| "learning_rate": 1.899100829076945e-07, | |
| "loss": 0.5464705228805542, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 2.419191919191919, | |
| "grad_norm": 4.025269031524658, | |
| "learning_rate": 1.894097562634142e-07, | |
| "loss": 1.0029910802841187, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 2.420875420875421, | |
| "grad_norm": 5.291448593139648, | |
| "learning_rate": 1.8891067200424498e-07, | |
| "loss": 0.8049919605255127, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 2.4225589225589226, | |
| "grad_norm": 3.155411720275879, | |
| "learning_rate": 1.8841283184974216e-07, | |
| "loss": 0.5165250301361084, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 3.550431251525879, | |
| "learning_rate": 1.8791623751517432e-07, | |
| "loss": 0.9810848832130432, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.425925925925926, | |
| "grad_norm": 8.65785026550293, | |
| "learning_rate": 1.8742089071151812e-07, | |
| "loss": 0.6320451498031616, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 2.4276094276094278, | |
| "grad_norm": 24.364227294921875, | |
| "learning_rate": 1.8692679314545155e-07, | |
| "loss": 0.691448450088501, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 2.429292929292929, | |
| "grad_norm": 3.4331605434417725, | |
| "learning_rate": 1.8643394651934867e-07, | |
| "loss": 0.5786364078521729, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 2.430976430976431, | |
| "grad_norm": 16.977510452270508, | |
| "learning_rate": 1.8594235253127372e-07, | |
| "loss": 0.6802031993865967, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 2.4326599326599325, | |
| "grad_norm": 5.363550662994385, | |
| "learning_rate": 1.8545201287497442e-07, | |
| "loss": 0.5717660188674927, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.4343434343434343, | |
| "grad_norm": 23.09035873413086, | |
| "learning_rate": 1.849629292398774e-07, | |
| "loss": 0.7750734686851501, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 2.436026936026936, | |
| "grad_norm": 2.4943952560424805, | |
| "learning_rate": 1.8447510331108163e-07, | |
| "loss": 0.9770002365112305, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 2.4377104377104377, | |
| "grad_norm": 6.26854133605957, | |
| "learning_rate": 1.839885367693526e-07, | |
| "loss": 0.8726930618286133, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 2.4393939393939394, | |
| "grad_norm": 11.332048416137695, | |
| "learning_rate": 1.8350323129111672e-07, | |
| "loss": 0.7943978309631348, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 2.441077441077441, | |
| "grad_norm": 20.655651092529297, | |
| "learning_rate": 1.8301918854845577e-07, | |
| "loss": 0.5449969172477722, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.442760942760943, | |
| "grad_norm": 3.659409761428833, | |
| "learning_rate": 1.8253641020910043e-07, | |
| "loss": 0.9310587644577026, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 11.209527969360352, | |
| "learning_rate": 1.820548979364253e-07, | |
| "loss": 0.5611803531646729, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 2.4461279461279464, | |
| "grad_norm": 6.078222751617432, | |
| "learning_rate": 1.815746533894429e-07, | |
| "loss": 0.4734145998954773, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 2.4478114478114477, | |
| "grad_norm": 43.15976333618164, | |
| "learning_rate": 1.8109567822279753e-07, | |
| "loss": 0.6027005910873413, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 2.4494949494949494, | |
| "grad_norm": 8.55388355255127, | |
| "learning_rate": 1.8061797408676023e-07, | |
| "loss": 0.7029461860656738, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.451178451178451, | |
| "grad_norm": 3.861863374710083, | |
| "learning_rate": 1.801415426272229e-07, | |
| "loss": 0.5813450813293457, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 2.452861952861953, | |
| "grad_norm": 4.0164103507995605, | |
| "learning_rate": 1.796663854856922e-07, | |
| "loss": 0.8507091999053955, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 2.4545454545454546, | |
| "grad_norm": 9.30286693572998, | |
| "learning_rate": 1.7919250429928446e-07, | |
| "loss": 0.7457901239395142, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 2.4562289562289563, | |
| "grad_norm": 1.883726954460144, | |
| "learning_rate": 1.7871990070071987e-07, | |
| "loss": 0.45636504888534546, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 2.457912457912458, | |
| "grad_norm": 4.311119079589844, | |
| "learning_rate": 1.7824857631831648e-07, | |
| "loss": 0.9269647002220154, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.45959595959596, | |
| "grad_norm": 19.505645751953125, | |
| "learning_rate": 1.7777853277598522e-07, | |
| "loss": 0.5110766887664795, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 2.461279461279461, | |
| "grad_norm": 3.043074131011963, | |
| "learning_rate": 1.7730977169322397e-07, | |
| "loss": 0.41358011960983276, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 2.462962962962963, | |
| "grad_norm": 3.94612455368042, | |
| "learning_rate": 1.768422946851117e-07, | |
| "loss": 0.7347300052642822, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 2.4646464646464645, | |
| "grad_norm": 13.160529136657715, | |
| "learning_rate": 1.763761033623034e-07, | |
| "loss": 0.652132511138916, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 2.4663299663299663, | |
| "grad_norm": 7.081724643707275, | |
| "learning_rate": 1.7591119933102455e-07, | |
| "loss": 0.4731465280056, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.468013468013468, | |
| "grad_norm": 7.2086358070373535, | |
| "learning_rate": 1.7544758419306493e-07, | |
| "loss": 0.8537788391113281, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 2.4696969696969697, | |
| "grad_norm": 5.239010810852051, | |
| "learning_rate": 1.749852595457738e-07, | |
| "loss": 0.7587542533874512, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 2.4713804713804715, | |
| "grad_norm": 7.071168899536133, | |
| "learning_rate": 1.7452422698205427e-07, | |
| "loss": 0.5985921621322632, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 2.473063973063973, | |
| "grad_norm": 3.5129053592681885, | |
| "learning_rate": 1.7406448809035723e-07, | |
| "loss": 0.674223780632019, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 2.474747474747475, | |
| "grad_norm": 4.072961807250977, | |
| "learning_rate": 1.736060444546768e-07, | |
| "loss": 0.6285250186920166, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.4764309764309766, | |
| "grad_norm": 5.048702239990234, | |
| "learning_rate": 1.731488976545442e-07, | |
| "loss": 0.5890775918960571, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 2.478114478114478, | |
| "grad_norm": 5.47603178024292, | |
| "learning_rate": 1.726930492650223e-07, | |
| "loss": 0.6147992610931396, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 2.4797979797979797, | |
| "grad_norm": 3.560030221939087, | |
| "learning_rate": 1.7223850085670082e-07, | |
| "loss": 0.9968768358230591, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 2.4814814814814814, | |
| "grad_norm": 2.8818583488464355, | |
| "learning_rate": 1.7178525399569026e-07, | |
| "loss": 1.031359314918518, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 2.483164983164983, | |
| "grad_norm": 2.790241241455078, | |
| "learning_rate": 1.7133331024361668e-07, | |
| "loss": 1.090069055557251, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 56.298179626464844, | |
| "learning_rate": 1.7088267115761645e-07, | |
| "loss": 0.9623196125030518, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 2.4865319865319866, | |
| "grad_norm": 4.772038459777832, | |
| "learning_rate": 1.7043333829033093e-07, | |
| "loss": 0.6764428019523621, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 2.4882154882154883, | |
| "grad_norm": 15.111934661865234, | |
| "learning_rate": 1.6998531318990084e-07, | |
| "loss": 0.9181029796600342, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 2.48989898989899, | |
| "grad_norm": 17.119279861450195, | |
| "learning_rate": 1.695385973999612e-07, | |
| "loss": 0.603553056716919, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 2.4915824915824913, | |
| "grad_norm": 3.2011559009552, | |
| "learning_rate": 1.690931924596359e-07, | |
| "loss": 0.9430979490280151, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.493265993265993, | |
| "grad_norm": 10.394431114196777, | |
| "learning_rate": 1.6864909990353222e-07, | |
| "loss": 0.6838173866271973, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 2.494949494949495, | |
| "grad_norm": 2.8894050121307373, | |
| "learning_rate": 1.6820632126173595e-07, | |
| "loss": 0.829933762550354, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 2.4966329966329965, | |
| "grad_norm": 6.6212544441223145, | |
| "learning_rate": 1.6776485805980593e-07, | |
| "loss": 0.7385812997817993, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 2.4983164983164983, | |
| "grad_norm": 3.0128917694091797, | |
| "learning_rate": 1.673247118187685e-07, | |
| "loss": 0.9367114901542664, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 7.358500003814697, | |
| "learning_rate": 1.6688588405511265e-07, | |
| "loss": 0.9481908082962036, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.5016835016835017, | |
| "grad_norm": 7.319785118103027, | |
| "learning_rate": 1.6644837628078485e-07, | |
| "loss": 0.4760739207267761, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 2.5033670033670035, | |
| "grad_norm": 24.174762725830078, | |
| "learning_rate": 1.6601219000318317e-07, | |
| "loss": 0.529428243637085, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 2.505050505050505, | |
| "grad_norm": 90.50502014160156, | |
| "learning_rate": 1.6557732672515305e-07, | |
| "loss": 0.8081066012382507, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 2.506734006734007, | |
| "grad_norm": 9.112408638000488, | |
| "learning_rate": 1.6514378794498152e-07, | |
| "loss": 0.46742603182792664, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 2.5084175084175087, | |
| "grad_norm": 3.2685351371765137, | |
| "learning_rate": 1.6471157515639195e-07, | |
| "loss": 0.8512880802154541, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.51010101010101, | |
| "grad_norm": 11.603774070739746, | |
| "learning_rate": 1.6428068984853923e-07, | |
| "loss": 0.8741171360015869, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 2.5117845117845117, | |
| "grad_norm": 6.443422317504883, | |
| "learning_rate": 1.6385113350600476e-07, | |
| "loss": 0.4871176779270172, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 2.5134680134680134, | |
| "grad_norm": 22.373445510864258, | |
| "learning_rate": 1.6342290760879064e-07, | |
| "loss": 0.8540467023849487, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 2.515151515151515, | |
| "grad_norm": 5.546900272369385, | |
| "learning_rate": 1.6299601363231542e-07, | |
| "loss": 0.7414556741714478, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 2.516835016835017, | |
| "grad_norm": 4.198864459991455, | |
| "learning_rate": 1.6257045304740842e-07, | |
| "loss": 0.11034494638442993, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.5185185185185186, | |
| "grad_norm": 3.5712265968322754, | |
| "learning_rate": 1.6214622732030483e-07, | |
| "loss": 0.988459050655365, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 2.5202020202020203, | |
| "grad_norm": 6.247505187988281, | |
| "learning_rate": 1.617233379126409e-07, | |
| "loss": 0.6715781092643738, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 2.5218855218855216, | |
| "grad_norm": 4.307699680328369, | |
| "learning_rate": 1.6130178628144858e-07, | |
| "loss": 0.7559702396392822, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 2.5235690235690234, | |
| "grad_norm": 32.38378143310547, | |
| "learning_rate": 1.6088157387915046e-07, | |
| "loss": 0.61976158618927, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 5.182736396789551, | |
| "learning_rate": 1.6046270215355522e-07, | |
| "loss": 0.5721726417541504, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.526936026936027, | |
| "grad_norm": 11.062474250793457, | |
| "learning_rate": 1.600451725478522e-07, | |
| "loss": 0.5903807878494263, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 2.5286195286195285, | |
| "grad_norm": 3.9762520790100098, | |
| "learning_rate": 1.5962898650060646e-07, | |
| "loss": 1.0528504848480225, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 2.5303030303030303, | |
| "grad_norm": 12.059609413146973, | |
| "learning_rate": 1.5921414544575406e-07, | |
| "loss": 0.8805992603302002, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 2.531986531986532, | |
| "grad_norm": 5.079036235809326, | |
| "learning_rate": 1.5880065081259714e-07, | |
| "loss": 0.8200486898422241, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 2.5336700336700337, | |
| "grad_norm": 6.0202741622924805, | |
| "learning_rate": 1.583885040257985e-07, | |
| "loss": 0.5228027105331421, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.5353535353535355, | |
| "grad_norm": 10.853965759277344, | |
| "learning_rate": 1.579777065053773e-07, | |
| "loss": 0.8398838639259338, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 2.537037037037037, | |
| "grad_norm": 7.994739055633545, | |
| "learning_rate": 1.5756825966670399e-07, | |
| "loss": 0.7166822552680969, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 2.538720538720539, | |
| "grad_norm": 8.935235977172852, | |
| "learning_rate": 1.5716016492049495e-07, | |
| "loss": 0.7087036371231079, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 2.5404040404040407, | |
| "grad_norm": 11.376523971557617, | |
| "learning_rate": 1.5675342367280838e-07, | |
| "loss": 1.0162254571914673, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 2.542087542087542, | |
| "grad_norm": 3.1120080947875977, | |
| "learning_rate": 1.563480373250392e-07, | |
| "loss": 0.7916754484176636, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.5437710437710437, | |
| "grad_norm": 16.1352596282959, | |
| "learning_rate": 1.559440072739137e-07, | |
| "loss": 0.8983919024467468, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 5.045600891113281, | |
| "learning_rate": 1.5554133491148556e-07, | |
| "loss": 1.0679364204406738, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 2.547138047138047, | |
| "grad_norm": 24.617691040039062, | |
| "learning_rate": 1.5514002162513035e-07, | |
| "loss": 0.3964739143848419, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 2.548821548821549, | |
| "grad_norm": 4.4443359375, | |
| "learning_rate": 1.5474006879754137e-07, | |
| "loss": 0.7372143268585205, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 2.5505050505050506, | |
| "grad_norm": 6.077057838439941, | |
| "learning_rate": 1.5434147780672437e-07, | |
| "loss": 0.6355978846549988, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.5521885521885523, | |
| "grad_norm": 2.639094829559326, | |
| "learning_rate": 1.539442500259929e-07, | |
| "loss": 0.5554131269454956, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 2.5538720538720536, | |
| "grad_norm": 5.577948093414307, | |
| "learning_rate": 1.5354838682396384e-07, | |
| "loss": 0.9816339612007141, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 2.5555555555555554, | |
| "grad_norm": 4.363624572753906, | |
| "learning_rate": 1.5315388956455266e-07, | |
| "loss": 1.0391297340393066, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 2.557239057239057, | |
| "grad_norm": 9.215215682983398, | |
| "learning_rate": 1.5276075960696817e-07, | |
| "loss": 0.7156937122344971, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 2.558922558922559, | |
| "grad_norm": 2.8174784183502197, | |
| "learning_rate": 1.5236899830570854e-07, | |
| "loss": 1.0105350017547607, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.5606060606060606, | |
| "grad_norm": 3.025399923324585, | |
| "learning_rate": 1.5197860701055643e-07, | |
| "loss": 0.767303466796875, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 2.5622895622895623, | |
| "grad_norm": 8.97220230102539, | |
| "learning_rate": 1.515895870665739e-07, | |
| "loss": 0.99961256980896, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 2.563973063973064, | |
| "grad_norm": 10.237662315368652, | |
| "learning_rate": 1.5120193981409848e-07, | |
| "loss": 0.7313355207443237, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 2.5656565656565657, | |
| "grad_norm": 4.435790538787842, | |
| "learning_rate": 1.508156665887381e-07, | |
| "loss": 0.9470257759094238, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 2.5673400673400675, | |
| "grad_norm": 8.973566055297852, | |
| "learning_rate": 1.5043076872136646e-07, | |
| "loss": 0.4554850459098816, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.569023569023569, | |
| "grad_norm": 3.580697774887085, | |
| "learning_rate": 1.5004724753811864e-07, | |
| "loss": 1.0283160209655762, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 2.570707070707071, | |
| "grad_norm": 3.4427924156188965, | |
| "learning_rate": 1.496651043603866e-07, | |
| "loss": 0.12811371684074402, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 2.5723905723905722, | |
| "grad_norm": 30.826913833618164, | |
| "learning_rate": 1.4928434050481424e-07, | |
| "loss": 0.7465952634811401, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 2.574074074074074, | |
| "grad_norm": 12.796523094177246, | |
| "learning_rate": 1.4890495728329334e-07, | |
| "loss": 0.4082253873348236, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 2.5757575757575757, | |
| "grad_norm": 3.8712823390960693, | |
| "learning_rate": 1.485269560029587e-07, | |
| "loss": 0.8437204360961914, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.5774410774410774, | |
| "grad_norm": 4.653648376464844, | |
| "learning_rate": 1.481503379661838e-07, | |
| "loss": 0.7468912601470947, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 2.579124579124579, | |
| "grad_norm": 9.738509178161621, | |
| "learning_rate": 1.4777510447057616e-07, | |
| "loss": 0.6074585318565369, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 2.580808080808081, | |
| "grad_norm": 4.1727495193481445, | |
| "learning_rate": 1.4740125680897328e-07, | |
| "loss": 0.7406507730484009, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 2.5824915824915826, | |
| "grad_norm": 9.242506980895996, | |
| "learning_rate": 1.470287962694373e-07, | |
| "loss": 0.4214972257614136, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 2.584175084175084, | |
| "grad_norm": 12.610301971435547, | |
| "learning_rate": 1.4665772413525175e-07, | |
| "loss": 0.17865464091300964, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.5858585858585856, | |
| "grad_norm": 21.455978393554688, | |
| "learning_rate": 1.4628804168491636e-07, | |
| "loss": 0.6329761743545532, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 2.5875420875420874, | |
| "grad_norm": 5.749107837677002, | |
| "learning_rate": 1.4591975019214238e-07, | |
| "loss": 1.0531988143920898, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 2.589225589225589, | |
| "grad_norm": 6.151569366455078, | |
| "learning_rate": 1.4555285092584917e-07, | |
| "loss": 0.4620995819568634, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 2.590909090909091, | |
| "grad_norm": 9.935331344604492, | |
| "learning_rate": 1.451873451501592e-07, | |
| "loss": 0.9808303117752075, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 2.5925925925925926, | |
| "grad_norm": 8.355198860168457, | |
| "learning_rate": 1.448232341243933e-07, | |
| "loss": 0.7373911142349243, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.5942760942760943, | |
| "grad_norm": 3.359959125518799, | |
| "learning_rate": 1.4446051910306743e-07, | |
| "loss": 1.0398435592651367, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 2.595959595959596, | |
| "grad_norm": 6.27101469039917, | |
| "learning_rate": 1.440992013358875e-07, | |
| "loss": 0.6558928489685059, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 2.5976430976430978, | |
| "grad_norm": 4.128625869750977, | |
| "learning_rate": 1.4373928206774504e-07, | |
| "loss": 0.6560384035110474, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 2.5993265993265995, | |
| "grad_norm": 4.040182113647461, | |
| "learning_rate": 1.4338076253871345e-07, | |
| "loss": 0.9103618264198303, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 2.601010101010101, | |
| "grad_norm": 3.0742857456207275, | |
| "learning_rate": 1.4302364398404344e-07, | |
| "loss": 0.9666507244110107, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.602693602693603, | |
| "grad_norm": 6.105360507965088, | |
| "learning_rate": 1.4266792763415863e-07, | |
| "loss": 0.7367033362388611, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 2.6043771043771042, | |
| "grad_norm": 4.493860244750977, | |
| "learning_rate": 1.4231361471465143e-07, | |
| "loss": 0.614148736000061, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 2.295088052749634, | |
| "learning_rate": 1.4196070644627903e-07, | |
| "loss": 0.7760593891143799, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 2.6077441077441077, | |
| "grad_norm": 3.1130990982055664, | |
| "learning_rate": 1.4160920404495887e-07, | |
| "loss": 0.6581928730010986, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 2.6094276094276094, | |
| "grad_norm": 4.99691104888916, | |
| "learning_rate": 1.4125910872176466e-07, | |
| "loss": 0.7904366254806519, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.611111111111111, | |
| "grad_norm": 5.191680908203125, | |
| "learning_rate": 1.4091042168292211e-07, | |
| "loss": 0.6951947212219238, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 2.612794612794613, | |
| "grad_norm": 3.600395679473877, | |
| "learning_rate": 1.4056314412980463e-07, | |
| "loss": 0.9162784218788147, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 2.6144781144781146, | |
| "grad_norm": 7.186698913574219, | |
| "learning_rate": 1.402172772589297e-07, | |
| "loss": 0.917360782623291, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 2.616161616161616, | |
| "grad_norm": 19.83490753173828, | |
| "learning_rate": 1.3987282226195416e-07, | |
| "loss": 0.2932959198951721, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 2.6178451178451176, | |
| "grad_norm": 3.4233388900756836, | |
| "learning_rate": 1.395297803256703e-07, | |
| "loss": 0.9224929809570312, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.6195286195286194, | |
| "grad_norm": 5.625677585601807, | |
| "learning_rate": 1.39188152632002e-07, | |
| "loss": 0.526210367679596, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 2.621212121212121, | |
| "grad_norm": 3.952099323272705, | |
| "learning_rate": 1.3884794035800056e-07, | |
| "loss": 0.610154926776886, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 2.622895622895623, | |
| "grad_norm": 3.5759785175323486, | |
| "learning_rate": 1.3850914467584013e-07, | |
| "loss": 0.9689432382583618, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 2.6245791245791246, | |
| "grad_norm": 3.9002864360809326, | |
| "learning_rate": 1.3817176675281456e-07, | |
| "loss": 1.0947141647338867, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 8.866259574890137, | |
| "learning_rate": 1.378358077513328e-07, | |
| "loss": 0.7083148956298828, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.627946127946128, | |
| "grad_norm": 2.6722095012664795, | |
| "learning_rate": 1.3750126882891475e-07, | |
| "loss": 0.9863229393959045, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 2.6296296296296298, | |
| "grad_norm": 3.054203510284424, | |
| "learning_rate": 1.371681511381879e-07, | |
| "loss": 0.9456894397735596, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 2.6313131313131315, | |
| "grad_norm": 7.191009521484375, | |
| "learning_rate": 1.3683645582688296e-07, | |
| "loss": 0.7224574685096741, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 2.6329966329966332, | |
| "grad_norm": 4.021665096282959, | |
| "learning_rate": 1.3650618403782963e-07, | |
| "loss": 0.8824139833450317, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 2.634680134680135, | |
| "grad_norm": 8.395366668701172, | |
| "learning_rate": 1.3617733690895327e-07, | |
| "loss": 0.6597309112548828, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.6363636363636362, | |
| "grad_norm": 5.360447883605957, | |
| "learning_rate": 1.3584991557327076e-07, | |
| "loss": 0.3653567433357239, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 2.638047138047138, | |
| "grad_norm": 5.391804218292236, | |
| "learning_rate": 1.355239211588861e-07, | |
| "loss": 0.9479780793190002, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 2.6397306397306397, | |
| "grad_norm": 5.806617736816406, | |
| "learning_rate": 1.3519935478898732e-07, | |
| "loss": 0.880384087562561, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 2.6414141414141414, | |
| "grad_norm": 5.112968921661377, | |
| "learning_rate": 1.348762175818422e-07, | |
| "loss": 0.5330120921134949, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 2.643097643097643, | |
| "grad_norm": 5.756229400634766, | |
| "learning_rate": 1.345545106507943e-07, | |
| "loss": 1.0363292694091797, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.644781144781145, | |
| "grad_norm": 3.318345785140991, | |
| "learning_rate": 1.3423423510425942e-07, | |
| "loss": 0.6152174472808838, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 2.6464646464646466, | |
| "grad_norm": 2.97463321685791, | |
| "learning_rate": 1.3391539204572155e-07, | |
| "loss": 0.9172265529632568, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 2.648148148148148, | |
| "grad_norm": 3.3665931224823, | |
| "learning_rate": 1.3359798257372913e-07, | |
| "loss": 1.0443644523620605, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 2.6498316498316496, | |
| "grad_norm": 18.04493522644043, | |
| "learning_rate": 1.332820077818914e-07, | |
| "loss": 0.6649324297904968, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 2.6515151515151514, | |
| "grad_norm": 3.4756081104278564, | |
| "learning_rate": 1.3296746875887445e-07, | |
| "loss": 0.9889142513275146, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.653198653198653, | |
| "grad_norm": 6.93226432800293, | |
| "learning_rate": 1.3265436658839757e-07, | |
| "loss": 0.3890528082847595, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 2.654882154882155, | |
| "grad_norm": 26.966766357421875, | |
| "learning_rate": 1.3234270234922947e-07, | |
| "loss": 1.0187561511993408, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 2.6565656565656566, | |
| "grad_norm": 7.550897121429443, | |
| "learning_rate": 1.3203247711518466e-07, | |
| "loss": 0.691092848777771, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 2.6582491582491583, | |
| "grad_norm": 26.227054595947266, | |
| "learning_rate": 1.3172369195511945e-07, | |
| "loss": 0.5036376118659973, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 2.65993265993266, | |
| "grad_norm": 28.069713592529297, | |
| "learning_rate": 1.3141634793292868e-07, | |
| "loss": 0.5947234034538269, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.6616161616161618, | |
| "grad_norm": 4.802524566650391, | |
| "learning_rate": 1.3111044610754202e-07, | |
| "loss": 0.7470720410346985, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 2.6632996632996635, | |
| "grad_norm": 7.207154273986816, | |
| "learning_rate": 1.3080598753291972e-07, | |
| "loss": 0.9500914812088013, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 2.6649831649831652, | |
| "grad_norm": 9.974961280822754, | |
| "learning_rate": 1.3050297325804975e-07, | |
| "loss": 0.7958386540412903, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 4.640317440032959, | |
| "learning_rate": 1.3020140432694386e-07, | |
| "loss": 0.8439849615097046, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 2.6683501683501682, | |
| "grad_norm": 8.00158977508545, | |
| "learning_rate": 1.2990128177863372e-07, | |
| "loss": 0.7472466230392456, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.67003367003367, | |
| "grad_norm": 5.717433929443359, | |
| "learning_rate": 1.2960260664716803e-07, | |
| "loss": 1.0863356590270996, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 2.6717171717171717, | |
| "grad_norm": 3.502814769744873, | |
| "learning_rate": 1.293053799616082e-07, | |
| "loss": 1.0433530807495117, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 2.6734006734006734, | |
| "grad_norm": 8.597749710083008, | |
| "learning_rate": 1.2900960274602512e-07, | |
| "loss": 0.6032207608222961, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 2.675084175084175, | |
| "grad_norm": 2.7261288166046143, | |
| "learning_rate": 1.2871527601949583e-07, | |
| "loss": 1.049224853515625, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 2.676767676767677, | |
| "grad_norm": 3.249244213104248, | |
| "learning_rate": 1.284224007960998e-07, | |
| "loss": 0.7596105337142944, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.678451178451178, | |
| "grad_norm": 3.4446780681610107, | |
| "learning_rate": 1.281309780849153e-07, | |
| "loss": 0.9340767860412598, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 2.68013468013468, | |
| "grad_norm": 4.839624404907227, | |
| "learning_rate": 1.278410088900162e-07, | |
| "loss": 1.0885896682739258, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 2.6818181818181817, | |
| "grad_norm": 4.542941093444824, | |
| "learning_rate": 1.2755249421046854e-07, | |
| "loss": 0.9115286469459534, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 2.6835016835016834, | |
| "grad_norm": 42.488826751708984, | |
| "learning_rate": 1.2726543504032654e-07, | |
| "loss": 0.7943265438079834, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 2.685185185185185, | |
| "grad_norm": 1.467315435409546, | |
| "learning_rate": 1.2697983236862997e-07, | |
| "loss": 0.7184177041053772, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.686868686868687, | |
| "grad_norm": 4.0649213790893555, | |
| "learning_rate": 1.2669568717940022e-07, | |
| "loss": 0.7381956577301025, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 2.6885521885521886, | |
| "grad_norm": 3.688559055328369, | |
| "learning_rate": 1.2641300045163692e-07, | |
| "loss": 0.8747034072875977, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 2.6902356902356903, | |
| "grad_norm": 6.0027337074279785, | |
| "learning_rate": 1.2613177315931483e-07, | |
| "loss": 0.6696113348007202, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 2.691919191919192, | |
| "grad_norm": 4.327197551727295, | |
| "learning_rate": 1.258520062713804e-07, | |
| "loss": 0.8139593601226807, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 2.6936026936026938, | |
| "grad_norm": 3.2183837890625, | |
| "learning_rate": 1.255737007517482e-07, | |
| "loss": 0.9807404279708862, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.6952861952861955, | |
| "grad_norm": 7.404758453369141, | |
| "learning_rate": 1.2529685755929779e-07, | |
| "loss": 0.8705126047134399, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 2.6969696969696972, | |
| "grad_norm": 4.221065044403076, | |
| "learning_rate": 1.250214776478705e-07, | |
| "loss": 0.7467024326324463, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 2.6986531986531985, | |
| "grad_norm": 8.078044891357422, | |
| "learning_rate": 1.2474756196626604e-07, | |
| "loss": 0.9621119499206543, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 2.7003367003367003, | |
| "grad_norm": 4.394944190979004, | |
| "learning_rate": 1.2447511145823904e-07, | |
| "loss": 0.6447912454605103, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 2.702020202020202, | |
| "grad_norm": 3.8557052612304688, | |
| "learning_rate": 1.2420412706249637e-07, | |
| "loss": 0.9262001514434814, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.7037037037037037, | |
| "grad_norm": 4.75685977935791, | |
| "learning_rate": 1.2393460971269306e-07, | |
| "loss": 0.6955965161323547, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 2.7053872053872055, | |
| "grad_norm": 4.87318229675293, | |
| "learning_rate": 1.2366656033742985e-07, | |
| "loss": 0.6773475408554077, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 2.707070707070707, | |
| "grad_norm": 5.815934658050537, | |
| "learning_rate": 1.233999798602498e-07, | |
| "loss": 0.48384755849838257, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 2.708754208754209, | |
| "grad_norm": 3.4917256832122803, | |
| "learning_rate": 1.2313486919963455e-07, | |
| "loss": 0.8545089960098267, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 2.71043771043771, | |
| "grad_norm": 19.76070785522461, | |
| "learning_rate": 1.2287122926900205e-07, | |
| "loss": 0.4410606026649475, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.712121212121212, | |
| "grad_norm": 5.099394798278809, | |
| "learning_rate": 1.2260906097670272e-07, | |
| "loss": 0.8183356523513794, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 2.7138047138047137, | |
| "grad_norm": 4.549499034881592, | |
| "learning_rate": 1.2234836522601667e-07, | |
| "loss": 0.5615583062171936, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 2.7154882154882154, | |
| "grad_norm": 4.583916187286377, | |
| "learning_rate": 1.2208914291515035e-07, | |
| "loss": 0.4506787657737732, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 2.717171717171717, | |
| "grad_norm": 3.6649909019470215, | |
| "learning_rate": 1.218313949372339e-07, | |
| "loss": 0.8952913284301758, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 2.718855218855219, | |
| "grad_norm": 69.62271118164062, | |
| "learning_rate": 1.2157512218031732e-07, | |
| "loss": 0.4370509088039398, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.7205387205387206, | |
| "grad_norm": 8.150357246398926, | |
| "learning_rate": 1.2132032552736818e-07, | |
| "loss": 0.9717521071434021, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 2.7222222222222223, | |
| "grad_norm": 3.7782340049743652, | |
| "learning_rate": 1.2106700585626828e-07, | |
| "loss": 0.7311519384384155, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 2.723905723905724, | |
| "grad_norm": 3.866910219192505, | |
| "learning_rate": 1.208151640398103e-07, | |
| "loss": 0.8734760880470276, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 2.725589225589226, | |
| "grad_norm": 4.73469877243042, | |
| "learning_rate": 1.2056480094569536e-07, | |
| "loss": 0.855620265007019, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 4.2583699226379395, | |
| "learning_rate": 1.203159174365296e-07, | |
| "loss": 0.8622401356697083, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.728956228956229, | |
| "grad_norm": 3.042707920074463, | |
| "learning_rate": 1.200685143698214e-07, | |
| "loss": 0.8962169885635376, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 2.7306397306397305, | |
| "grad_norm": 3.5346055030822754, | |
| "learning_rate": 1.1982259259797856e-07, | |
| "loss": 0.6588426232337952, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 2.7323232323232323, | |
| "grad_norm": 3.266772747039795, | |
| "learning_rate": 1.1957815296830494e-07, | |
| "loss": 0.8494440317153931, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 2.734006734006734, | |
| "grad_norm": 15.043532371520996, | |
| "learning_rate": 1.1933519632299793e-07, | |
| "loss": 0.9317235946655273, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 2.7356902356902357, | |
| "grad_norm": 5.527817249298096, | |
| "learning_rate": 1.1909372349914553e-07, | |
| "loss": 0.9118114709854126, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.7373737373737375, | |
| "grad_norm": 3.4315481185913086, | |
| "learning_rate": 1.1885373532872297e-07, | |
| "loss": 0.4174748957157135, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 2.739057239057239, | |
| "grad_norm": 3.0668060779571533, | |
| "learning_rate": 1.1861523263859069e-07, | |
| "loss": 0.6279425621032715, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 2.7407407407407405, | |
| "grad_norm": 5.9774651527404785, | |
| "learning_rate": 1.1837821625049076e-07, | |
| "loss": 0.6725097894668579, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 2.742424242424242, | |
| "grad_norm": 3.119798183441162, | |
| "learning_rate": 1.1814268698104425e-07, | |
| "loss": 0.70163893699646, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 2.744107744107744, | |
| "grad_norm": 5.229933261871338, | |
| "learning_rate": 1.1790864564174873e-07, | |
| "loss": 0.5799877643585205, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.7457912457912457, | |
| "grad_norm": 8.287593841552734, | |
| "learning_rate": 1.1767609303897506e-07, | |
| "loss": 0.7188424468040466, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 2.7474747474747474, | |
| "grad_norm": 19.248619079589844, | |
| "learning_rate": 1.1744502997396474e-07, | |
| "loss": 0.9669326543807983, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 2.749158249158249, | |
| "grad_norm": 6.006091594696045, | |
| "learning_rate": 1.1721545724282727e-07, | |
| "loss": 1.0581872463226318, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 2.750841750841751, | |
| "grad_norm": 20.20122528076172, | |
| "learning_rate": 1.1698737563653745e-07, | |
| "loss": 0.5354408621788025, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 2.7525252525252526, | |
| "grad_norm": 3.2330803871154785, | |
| "learning_rate": 1.1676078594093212e-07, | |
| "loss": 1.0935049057006836, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.7542087542087543, | |
| "grad_norm": 15.937528610229492, | |
| "learning_rate": 1.1653568893670834e-07, | |
| "loss": 0.5233392715454102, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 2.755892255892256, | |
| "grad_norm": 5.933197498321533, | |
| "learning_rate": 1.1631208539941993e-07, | |
| "loss": 0.8539717197418213, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 2.757575757575758, | |
| "grad_norm": 34.71628189086914, | |
| "learning_rate": 1.1608997609947508e-07, | |
| "loss": 0.35395973920822144, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 2.7592592592592595, | |
| "grad_norm": 3.8929779529571533, | |
| "learning_rate": 1.158693618021339e-07, | |
| "loss": 0.09008853882551193, | |
| "step": 3278 | |
| }, | |
| { | |
| "epoch": 2.760942760942761, | |
| "grad_norm": 3.974247694015503, | |
| "learning_rate": 1.1565024326750545e-07, | |
| "loss": 1.1840243339538574, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.7626262626262625, | |
| "grad_norm": 4.763762474060059, | |
| "learning_rate": 1.1543262125054523e-07, | |
| "loss": 1.1094727516174316, | |
| "step": 3282 | |
| }, | |
| { | |
| "epoch": 2.7643097643097643, | |
| "grad_norm": 26.06635093688965, | |
| "learning_rate": 1.1521649650105264e-07, | |
| "loss": 0.40256187319755554, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 2.765993265993266, | |
| "grad_norm": 3.0916554927825928, | |
| "learning_rate": 1.150018697636685e-07, | |
| "loss": 0.9139037132263184, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 2.7676767676767677, | |
| "grad_norm": 5.2920026779174805, | |
| "learning_rate": 1.1478874177787204e-07, | |
| "loss": 0.8635107278823853, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 2.7693602693602695, | |
| "grad_norm": 7.0219573974609375, | |
| "learning_rate": 1.1457711327797898e-07, | |
| "loss": 0.3769862651824951, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.771043771043771, | |
| "grad_norm": 4.999329090118408, | |
| "learning_rate": 1.1436698499313855e-07, | |
| "loss": 1.1161870956420898, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 2.7727272727272725, | |
| "grad_norm": 3.669652223587036, | |
| "learning_rate": 1.1415835764733103e-07, | |
| "loss": 0.949033796787262, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 2.774410774410774, | |
| "grad_norm": 3.6179237365722656, | |
| "learning_rate": 1.1395123195936543e-07, | |
| "loss": 0.9398729801177979, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 2.776094276094276, | |
| "grad_norm": 4.2878947257995605, | |
| "learning_rate": 1.1374560864287696e-07, | |
| "loss": 0.3119538426399231, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 3.065671443939209, | |
| "learning_rate": 1.1354148840632437e-07, | |
| "loss": 0.5504776239395142, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.7794612794612794, | |
| "grad_norm": 8.210501670837402, | |
| "learning_rate": 1.1333887195298781e-07, | |
| "loss": 0.6545171737670898, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 2.781144781144781, | |
| "grad_norm": 24.731203079223633, | |
| "learning_rate": 1.1313775998096624e-07, | |
| "loss": 0.5451493263244629, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 2.782828282828283, | |
| "grad_norm": 24.600292205810547, | |
| "learning_rate": 1.1293815318317493e-07, | |
| "loss": 0.8595808148384094, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 2.7845117845117846, | |
| "grad_norm": 9.5689058303833, | |
| "learning_rate": 1.1274005224734338e-07, | |
| "loss": 0.573542058467865, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 2.7861952861952863, | |
| "grad_norm": 7.985528945922852, | |
| "learning_rate": 1.1254345785601264e-07, | |
| "loss": 0.7355629205703735, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 4.394935607910156, | |
| "learning_rate": 1.1234837068653313e-07, | |
| "loss": 0.5512019395828247, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 2.78956228956229, | |
| "grad_norm": 3.483132839202881, | |
| "learning_rate": 1.1215479141106207e-07, | |
| "loss": 0.8127498626708984, | |
| "step": 3314 | |
| }, | |
| { | |
| "epoch": 2.791245791245791, | |
| "grad_norm": 18.29281234741211, | |
| "learning_rate": 1.119627206965618e-07, | |
| "loss": 1.0899267196655273, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 2.792929292929293, | |
| "grad_norm": 9.126869201660156, | |
| "learning_rate": 1.1177215920479654e-07, | |
| "loss": 0.8100671172142029, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 2.7946127946127945, | |
| "grad_norm": 7.040780544281006, | |
| "learning_rate": 1.1158310759233083e-07, | |
| "loss": 0.43027007579803467, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.7962962962962963, | |
| "grad_norm": 23.27480697631836, | |
| "learning_rate": 1.113955665105271e-07, | |
| "loss": 0.7666506767272949, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 2.797979797979798, | |
| "grad_norm": 3.0902600288391113, | |
| "learning_rate": 1.1120953660554319e-07, | |
| "loss": 0.769917905330658, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 2.7996632996632997, | |
| "grad_norm": 12.026223182678223, | |
| "learning_rate": 1.110250185183305e-07, | |
| "loss": 0.4246026277542114, | |
| "step": 3326 | |
| }, | |
| { | |
| "epoch": 2.8013468013468015, | |
| "grad_norm": 3.31463360786438, | |
| "learning_rate": 1.108420128846314e-07, | |
| "loss": 0.8164354562759399, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 2.8030303030303028, | |
| "grad_norm": 4.4698486328125, | |
| "learning_rate": 1.1066052033497734e-07, | |
| "loss": 0.8739584684371948, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.8047138047138045, | |
| "grad_norm": 11.48538875579834, | |
| "learning_rate": 1.1048054149468646e-07, | |
| "loss": 0.6384426951408386, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 2.8063973063973062, | |
| "grad_norm": 3.5100231170654297, | |
| "learning_rate": 1.1030207698386169e-07, | |
| "loss": 0.7716495990753174, | |
| "step": 3334 | |
| }, | |
| { | |
| "epoch": 2.808080808080808, | |
| "grad_norm": 14.741443634033203, | |
| "learning_rate": 1.1012512741738827e-07, | |
| "loss": 0.7237218618392944, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 2.8097643097643097, | |
| "grad_norm": 4.032273292541504, | |
| "learning_rate": 1.0994969340493191e-07, | |
| "loss": 0.4440898895263672, | |
| "step": 3338 | |
| }, | |
| { | |
| "epoch": 2.8114478114478114, | |
| "grad_norm": 9.446307182312012, | |
| "learning_rate": 1.0977577555093672e-07, | |
| "loss": 0.791456937789917, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.813131313131313, | |
| "grad_norm": 5.13400936126709, | |
| "learning_rate": 1.0960337445462273e-07, | |
| "loss": 0.897986650466919, | |
| "step": 3342 | |
| }, | |
| { | |
| "epoch": 2.814814814814815, | |
| "grad_norm": 3.0911977291107178, | |
| "learning_rate": 1.0943249070998429e-07, | |
| "loss": 0.4430878162384033, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 2.8164983164983166, | |
| "grad_norm": 3.849289655685425, | |
| "learning_rate": 1.0926312490578795e-07, | |
| "loss": 0.9019819498062134, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 2.8181818181818183, | |
| "grad_norm": 11.346426010131836, | |
| "learning_rate": 1.0909527762556997e-07, | |
| "loss": 0.6365593671798706, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 2.81986531986532, | |
| "grad_norm": 33.87907028198242, | |
| "learning_rate": 1.089289494476349e-07, | |
| "loss": 0.9726608395576477, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.821548821548822, | |
| "grad_norm": 13.32236385345459, | |
| "learning_rate": 1.0876414094505339e-07, | |
| "loss": 0.9321683049201965, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 2.823232323232323, | |
| "grad_norm": 3.9304733276367188, | |
| "learning_rate": 1.0860085268566002e-07, | |
| "loss": 0.8083049058914185, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 2.824915824915825, | |
| "grad_norm": 14.073712348937988, | |
| "learning_rate": 1.084390852320515e-07, | |
| "loss": 0.8524267673492432, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 2.8265993265993266, | |
| "grad_norm": 3.060366630554199, | |
| "learning_rate": 1.0827883914158484e-07, | |
| "loss": 0.6664683818817139, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 7.288575172424316, | |
| "learning_rate": 1.0812011496637521e-07, | |
| "loss": 0.6165136098861694, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.82996632996633, | |
| "grad_norm": 9.925680160522461, | |
| "learning_rate": 1.0796291325329419e-07, | |
| "loss": 0.782645583152771, | |
| "step": 3362 | |
| }, | |
| { | |
| "epoch": 2.8316498316498318, | |
| "grad_norm": 7.882792949676514, | |
| "learning_rate": 1.0780723454396788e-07, | |
| "loss": 0.6414890289306641, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 2.8333333333333335, | |
| "grad_norm": 3.061633825302124, | |
| "learning_rate": 1.0765307937477489e-07, | |
| "loss": 0.5577088594436646, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 2.8350168350168348, | |
| "grad_norm": 10.289335250854492, | |
| "learning_rate": 1.0750044827684457e-07, | |
| "loss": 0.5626717209815979, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 2.8367003367003365, | |
| "grad_norm": 4.862819194793701, | |
| "learning_rate": 1.073493417760554e-07, | |
| "loss": 0.8943830132484436, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.8383838383838382, | |
| "grad_norm": 5.187376499176025, | |
| "learning_rate": 1.0719976039303275e-07, | |
| "loss": 0.747265100479126, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 2.84006734006734, | |
| "grad_norm": 11.796051979064941, | |
| "learning_rate": 1.0705170464314741e-07, | |
| "loss": 0.46709078550338745, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 2.8417508417508417, | |
| "grad_norm": 2.545010805130005, | |
| "learning_rate": 1.069051750365139e-07, | |
| "loss": 1.0213559865951538, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 2.8434343434343434, | |
| "grad_norm": 4.854709148406982, | |
| "learning_rate": 1.0676017207798818e-07, | |
| "loss": 0.872999906539917, | |
| "step": 3378 | |
| }, | |
| { | |
| "epoch": 2.845117845117845, | |
| "grad_norm": 4.323747158050537, | |
| "learning_rate": 1.0661669626716654e-07, | |
| "loss": 0.6622998118400574, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.846801346801347, | |
| "grad_norm": 15.708161354064941, | |
| "learning_rate": 1.0647474809838358e-07, | |
| "loss": 0.6927282810211182, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 5.333789348602295, | |
| "learning_rate": 1.0633432806071032e-07, | |
| "loss": 0.6410980224609375, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 2.8501683501683504, | |
| "grad_norm": 5.245711803436279, | |
| "learning_rate": 1.0619543663795291e-07, | |
| "loss": 0.8350679874420166, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 2.851851851851852, | |
| "grad_norm": 1.0896079540252686, | |
| "learning_rate": 1.0605807430865085e-07, | |
| "loss": 0.8719289302825928, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 2.8535353535353534, | |
| "grad_norm": 6.591011047363281, | |
| "learning_rate": 1.0592224154607507e-07, | |
| "loss": 0.6173574328422546, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.855218855218855, | |
| "grad_norm": 6.573550224304199, | |
| "learning_rate": 1.0578793881822661e-07, | |
| "loss": 0.5777392387390137, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 2.856902356902357, | |
| "grad_norm": 2.5468170642852783, | |
| "learning_rate": 1.056551665878349e-07, | |
| "loss": 1.0248732566833496, | |
| "step": 3394 | |
| }, | |
| { | |
| "epoch": 2.8585858585858586, | |
| "grad_norm": 3.26706862449646, | |
| "learning_rate": 1.055239253123561e-07, | |
| "loss": 1.0530986785888672, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 2.8602693602693603, | |
| "grad_norm": 4.729337215423584, | |
| "learning_rate": 1.0539421544397163e-07, | |
| "loss": 0.5177785158157349, | |
| "step": 3398 | |
| }, | |
| { | |
| "epoch": 2.861952861952862, | |
| "grad_norm": 10.192523002624512, | |
| "learning_rate": 1.052660374295866e-07, | |
| "loss": 0.48648959398269653, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.8636363636363638, | |
| "grad_norm": 9.012269973754883, | |
| "learning_rate": 1.0513939171082812e-07, | |
| "loss": 0.5270302295684814, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 2.865319865319865, | |
| "grad_norm": 4.075140476226807, | |
| "learning_rate": 1.0501427872404407e-07, | |
| "loss": 0.49075964093208313, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 2.8670033670033668, | |
| "grad_norm": 5.544951438903809, | |
| "learning_rate": 1.0489069890030129e-07, | |
| "loss": 0.883784294128418, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 2.8686868686868685, | |
| "grad_norm": 4.438905715942383, | |
| "learning_rate": 1.0476865266538431e-07, | |
| "loss": 0.43367594480514526, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 2.8703703703703702, | |
| "grad_norm": 4.913120269775391, | |
| "learning_rate": 1.0464814043979367e-07, | |
| "loss": 0.9170664548873901, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.872053872053872, | |
| "grad_norm": 4.941054821014404, | |
| "learning_rate": 1.0452916263874477e-07, | |
| "loss": 0.5428977608680725, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 2.8737373737373737, | |
| "grad_norm": 5.263132572174072, | |
| "learning_rate": 1.0441171967216618e-07, | |
| "loss": 0.7901989817619324, | |
| "step": 3414 | |
| }, | |
| { | |
| "epoch": 2.8754208754208754, | |
| "grad_norm": 9.088157653808594, | |
| "learning_rate": 1.042958119446983e-07, | |
| "loss": 0.2979390025138855, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 2.877104377104377, | |
| "grad_norm": 10.997780799865723, | |
| "learning_rate": 1.0418143985569209e-07, | |
| "loss": 0.6635469198226929, | |
| "step": 3418 | |
| }, | |
| { | |
| "epoch": 2.878787878787879, | |
| "grad_norm": 3.945129871368408, | |
| "learning_rate": 1.0406860379920746e-07, | |
| "loss": 0.4760744273662567, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.8804713804713806, | |
| "grad_norm": 8.165509223937988, | |
| "learning_rate": 1.0395730416401211e-07, | |
| "loss": 0.8622602820396423, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 2.8821548821548824, | |
| "grad_norm": 2.621253728866577, | |
| "learning_rate": 1.0384754133358014e-07, | |
| "loss": 0.6706223487854004, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 2.883838383838384, | |
| "grad_norm": 7.00462532043457, | |
| "learning_rate": 1.0373931568609063e-07, | |
| "loss": 0.7515609264373779, | |
| "step": 3426 | |
| }, | |
| { | |
| "epoch": 2.8855218855218854, | |
| "grad_norm": 6.707590103149414, | |
| "learning_rate": 1.0363262759442654e-07, | |
| "loss": 0.6428268551826477, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 2.887205387205387, | |
| "grad_norm": 15.433223724365234, | |
| "learning_rate": 1.0352747742617327e-07, | |
| "loss": 0.4187021851539612, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 5.754827976226807, | |
| "learning_rate": 1.0342386554361728e-07, | |
| "loss": 0.6734333634376526, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 2.8905723905723906, | |
| "grad_norm": 16.424394607543945, | |
| "learning_rate": 1.0332179230374509e-07, | |
| "loss": 0.6447641253471375, | |
| "step": 3434 | |
| }, | |
| { | |
| "epoch": 2.8922558922558923, | |
| "grad_norm": 7.980709552764893, | |
| "learning_rate": 1.032212580582421e-07, | |
| "loss": 1.063244104385376, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 2.893939393939394, | |
| "grad_norm": 11.630234718322754, | |
| "learning_rate": 1.0312226315349098e-07, | |
| "loss": 0.9426344037055969, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 2.8956228956228958, | |
| "grad_norm": 4.453112602233887, | |
| "learning_rate": 1.0302480793057082e-07, | |
| "loss": 0.8930955529212952, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.897306397306397, | |
| "grad_norm": 4.323969841003418, | |
| "learning_rate": 1.0292889272525597e-07, | |
| "loss": 1.0264780521392822, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 2.898989898989899, | |
| "grad_norm": 4.514182090759277, | |
| "learning_rate": 1.0283451786801456e-07, | |
| "loss": 0.4191988706588745, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 2.9006734006734005, | |
| "grad_norm": 3.260342597961426, | |
| "learning_rate": 1.0274168368400774e-07, | |
| "loss": 0.5836988687515259, | |
| "step": 3446 | |
| }, | |
| { | |
| "epoch": 2.9023569023569022, | |
| "grad_norm": 3.745016574859619, | |
| "learning_rate": 1.0265039049308834e-07, | |
| "loss": 1.1238579750061035, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 2.904040404040404, | |
| "grad_norm": 12.539746284484863, | |
| "learning_rate": 1.0256063860979977e-07, | |
| "loss": 0.40760430693626404, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.9057239057239057, | |
| "grad_norm": 2.8215339183807373, | |
| "learning_rate": 1.0247242834337502e-07, | |
| "loss": 0.7182443737983704, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 2.9074074074074074, | |
| "grad_norm": 27.12503433227539, | |
| "learning_rate": 1.0238575999773569e-07, | |
| "loss": 0.6834052205085754, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 2.9466779232025146, | |
| "learning_rate": 1.0230063387149058e-07, | |
| "loss": 1.065738320350647, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 2.910774410774411, | |
| "grad_norm": 7.221368789672852, | |
| "learning_rate": 1.0221705025793505e-07, | |
| "loss": 0.8638687133789062, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 2.9124579124579126, | |
| "grad_norm": 15.298805236816406, | |
| "learning_rate": 1.021350094450498e-07, | |
| "loss": 1.0362968444824219, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.9141414141414144, | |
| "grad_norm": 2.772352695465088, | |
| "learning_rate": 1.0205451171549999e-07, | |
| "loss": 1.0920348167419434, | |
| "step": 3462 | |
| }, | |
| { | |
| "epoch": 2.915824915824916, | |
| "grad_norm": 6.832037448883057, | |
| "learning_rate": 1.0197555734663415e-07, | |
| "loss": 0.8181166648864746, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 2.9175084175084174, | |
| "grad_norm": 10.260382652282715, | |
| "learning_rate": 1.0189814661048329e-07, | |
| "loss": 1.0308600664138794, | |
| "step": 3466 | |
| }, | |
| { | |
| "epoch": 2.919191919191919, | |
| "grad_norm": 8.912053108215332, | |
| "learning_rate": 1.0182227977375995e-07, | |
| "loss": 0.6785660982131958, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 2.920875420875421, | |
| "grad_norm": 3.7292585372924805, | |
| "learning_rate": 1.0174795709785737e-07, | |
| "loss": 0.2668553590774536, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.9225589225589226, | |
| "grad_norm": 15.397346496582031, | |
| "learning_rate": 1.0167517883884837e-07, | |
| "loss": 0.8357558250427246, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 2.9242424242424243, | |
| "grad_norm": 5.987993240356445, | |
| "learning_rate": 1.016039452474847e-07, | |
| "loss": 0.7866486310958862, | |
| "step": 3474 | |
| }, | |
| { | |
| "epoch": 2.925925925925926, | |
| "grad_norm": 5.408625602722168, | |
| "learning_rate": 1.0153425656919609e-07, | |
| "loss": 0.40831270813941956, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 2.9276094276094278, | |
| "grad_norm": 5.647230625152588, | |
| "learning_rate": 1.0146611304408931e-07, | |
| "loss": 0.8993617296218872, | |
| "step": 3478 | |
| }, | |
| { | |
| "epoch": 2.929292929292929, | |
| "grad_norm": 4.667529106140137, | |
| "learning_rate": 1.0139951490694746e-07, | |
| "loss": 0.570891261100769, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.930976430976431, | |
| "grad_norm": 3.3206403255462646, | |
| "learning_rate": 1.013344623872292e-07, | |
| "loss": 0.8598926663398743, | |
| "step": 3482 | |
| }, | |
| { | |
| "epoch": 2.9326599326599325, | |
| "grad_norm": 16.66160774230957, | |
| "learning_rate": 1.0127095570906781e-07, | |
| "loss": 0.6207292079925537, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 2.9343434343434343, | |
| "grad_norm": 5.509361267089844, | |
| "learning_rate": 1.0120899509127051e-07, | |
| "loss": 0.7470987439155579, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 2.936026936026936, | |
| "grad_norm": 4.825704097747803, | |
| "learning_rate": 1.0114858074731771e-07, | |
| "loss": 0.8294214606285095, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 2.9377104377104377, | |
| "grad_norm": 6.74330997467041, | |
| "learning_rate": 1.0108971288536224e-07, | |
| "loss": 0.8246122598648071, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.9393939393939394, | |
| "grad_norm": 14.303301811218262, | |
| "learning_rate": 1.0103239170822867e-07, | |
| "loss": 0.936402440071106, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 2.941077441077441, | |
| "grad_norm": 11.945917129516602, | |
| "learning_rate": 1.0097661741341254e-07, | |
| "loss": 0.5219341516494751, | |
| "step": 3494 | |
| }, | |
| { | |
| "epoch": 2.942760942760943, | |
| "grad_norm": 4.1998820304870605, | |
| "learning_rate": 1.0092239019307974e-07, | |
| "loss": 0.8593817949295044, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 2.9444444444444446, | |
| "grad_norm": 11.838610649108887, | |
| "learning_rate": 1.0086971023406596e-07, | |
| "loss": 0.4355551600456238, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 2.9461279461279464, | |
| "grad_norm": 7.149326801300049, | |
| "learning_rate": 1.0081857771787575e-07, | |
| "loss": 0.6722170114517212, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.9478114478114477, | |
| "grad_norm": 6.950955390930176, | |
| "learning_rate": 1.0076899282068215e-07, | |
| "loss": 0.8052189350128174, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 2.9494949494949494, | |
| "grad_norm": 8.336926460266113, | |
| "learning_rate": 1.00720955713326e-07, | |
| "loss": 0.3886244297027588, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 2.951178451178451, | |
| "grad_norm": 2.504477024078369, | |
| "learning_rate": 1.0067446656131536e-07, | |
| "loss": 0.7975258231163025, | |
| "step": 3506 | |
| }, | |
| { | |
| "epoch": 2.952861952861953, | |
| "grad_norm": 3.895413398742676, | |
| "learning_rate": 1.0062952552482489e-07, | |
| "loss": 0.9311509132385254, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 2.9545454545454546, | |
| "grad_norm": 3.7988312244415283, | |
| "learning_rate": 1.0058613275869534e-07, | |
| "loss": 0.7745894193649292, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.9562289562289563, | |
| "grad_norm": 5.594661235809326, | |
| "learning_rate": 1.0054428841243314e-07, | |
| "loss": 0.8809847235679626, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 2.957912457912458, | |
| "grad_norm": 10.233572006225586, | |
| "learning_rate": 1.0050399263020963e-07, | |
| "loss": 0.5408470630645752, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 2.9595959595959593, | |
| "grad_norm": 4.301779747009277, | |
| "learning_rate": 1.0046524555086075e-07, | |
| "loss": 0.9347457885742188, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 2.961279461279461, | |
| "grad_norm": 3.945042610168457, | |
| "learning_rate": 1.0042804730788647e-07, | |
| "loss": 0.9306644797325134, | |
| "step": 3518 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 8.176459312438965, | |
| "learning_rate": 1.0039239802945032e-07, | |
| "loss": 0.5422787666320801, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.9646464646464645, | |
| "grad_norm": 7.75977087020874, | |
| "learning_rate": 1.003582978383792e-07, | |
| "loss": 0.8068456649780273, | |
| "step": 3522 | |
| }, | |
| { | |
| "epoch": 2.9663299663299663, | |
| "grad_norm": 4.503453731536865, | |
| "learning_rate": 1.003257468521625e-07, | |
| "loss": 0.329245924949646, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 2.968013468013468, | |
| "grad_norm": 5.413825035095215, | |
| "learning_rate": 1.0029474518295213e-07, | |
| "loss": 0.9549334049224854, | |
| "step": 3526 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 5.911332607269287, | |
| "learning_rate": 1.0026529293756189e-07, | |
| "loss": 0.8807719945907593, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 2.9713804713804715, | |
| "grad_norm": 4.6141462326049805, | |
| "learning_rate": 1.0023739021746709e-07, | |
| "loss": 0.9357779026031494, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.973063973063973, | |
| "grad_norm": 22.926517486572266, | |
| "learning_rate": 1.002110371188044e-07, | |
| "loss": 0.7154991626739502, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 2.974747474747475, | |
| "grad_norm": 10.70749282836914, | |
| "learning_rate": 1.0018623373237139e-07, | |
| "loss": 0.3366190493106842, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 2.9764309764309766, | |
| "grad_norm": 5.614308834075928, | |
| "learning_rate": 1.0016298014362602e-07, | |
| "loss": 0.9368351697921753, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 2.9781144781144784, | |
| "grad_norm": 17.715473175048828, | |
| "learning_rate": 1.0014127643268678e-07, | |
| "loss": 0.5009272694587708, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 2.9797979797979797, | |
| "grad_norm": 4.969324111938477, | |
| "learning_rate": 1.0012112267433204e-07, | |
| "loss": 1.1264997720718384, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.9814814814814814, | |
| "grad_norm": 4.244232177734375, | |
| "learning_rate": 1.0010251893799999e-07, | |
| "loss": 0.9415320158004761, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 2.983164983164983, | |
| "grad_norm": 5.749111652374268, | |
| "learning_rate": 1.0008546528778836e-07, | |
| "loss": 0.5878887176513672, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 2.984848484848485, | |
| "grad_norm": 4.314813613891602, | |
| "learning_rate": 1.0006996178245414e-07, | |
| "loss": 0.934430718421936, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 2.9865319865319866, | |
| "grad_norm": 11.60458755493164, | |
| "learning_rate": 1.0005600847541344e-07, | |
| "loss": 0.4331338703632355, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 2.9882154882154883, | |
| "grad_norm": 16.16608428955078, | |
| "learning_rate": 1.0004360541474121e-07, | |
| "loss": 0.4102497398853302, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.98989898989899, | |
| "grad_norm": 4.306326866149902, | |
| "learning_rate": 1.0003275264317129e-07, | |
| "loss": 0.6111245155334473, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 2.9915824915824913, | |
| "grad_norm": 3.421985387802124, | |
| "learning_rate": 1.00023450198096e-07, | |
| "loss": 1.004423975944519, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 2.993265993265993, | |
| "grad_norm": 5.036525249481201, | |
| "learning_rate": 1.0001569811156621e-07, | |
| "loss": 0.9042291045188904, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 2.994949494949495, | |
| "grad_norm": 2.6041200160980225, | |
| "learning_rate": 1.0000949641029108e-07, | |
| "loss": 0.8039933443069458, | |
| "step": 3558 | |
| }, | |
| { | |
| "epoch": 2.9966329966329965, | |
| "grad_norm": 4.743382930755615, | |
| "learning_rate": 1.000048451156381e-07, | |
| "loss": 0.5211207270622253, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.9983164983164983, | |
| "grad_norm": 7.2076416015625, | |
| "learning_rate": 1.0000174424363293e-07, | |
| "loss": 0.7096606492996216, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 8.714981079101562, | |
| "learning_rate": 1.0000019380495939e-07, | |
| "loss": 0.6827124953269958, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 3564, | |
| "total_flos": 4.2988160857187287e+18, | |
| "train_loss": 0.8751117374819068, | |
| "train_runtime": 6006.754, | |
| "train_samples_per_second": 9.493, | |
| "train_steps_per_second": 0.593 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3564, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.2988160857187287e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |