Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-41 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-41 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-41") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-41") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-41") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-41 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-41" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-41
- SGLang
How to use furproxy/9b-41 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-41" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-41" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-41 with Docker Model Runner:
docker model run hf.co/furproxy/9b-41
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 774, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007751937984496124, | |
| "grad_norm": 2.3754148483276367, | |
| "learning_rate": 7.692307692307693e-07, | |
| "loss": 4.068140983581543, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.015503875968992248, | |
| "grad_norm": 0.20276732742786407, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 2.0239908695220947, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.023255813953488372, | |
| "grad_norm": 0.35163113474845886, | |
| "learning_rate": 3.846153846153846e-06, | |
| "loss": 1.9337211847305298, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.031007751937984496, | |
| "grad_norm": 0.19401516020298004, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 1.9213242530822754, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03875968992248062, | |
| "grad_norm": 0.11387048661708832, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 2.1911349296569824, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.046511627906976744, | |
| "grad_norm": 2.326526165008545, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 3.268449306488037, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05426356589147287, | |
| "grad_norm": 0.15703660249710083, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.7003194093704224, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06201550387596899, | |
| "grad_norm": 0.6381284594535828, | |
| "learning_rate": 1.153846153846154e-05, | |
| "loss": 1.8064090013504028, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06976744186046512, | |
| "grad_norm": 0.5508278608322144, | |
| "learning_rate": 1.3076923076923078e-05, | |
| "loss": 1.3964051008224487, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07751937984496124, | |
| "grad_norm": 0.3715132772922516, | |
| "learning_rate": 1.4615384615384615e-05, | |
| "loss": 1.59793221950531, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08527131782945736, | |
| "grad_norm": 7.088710308074951, | |
| "learning_rate": 1.6153846153846154e-05, | |
| "loss": 1.7358227968215942, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09302325581395349, | |
| "grad_norm": 0.14647279679775238, | |
| "learning_rate": 1.7692307692307694e-05, | |
| "loss": 1.3447600603103638, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10077519379844961, | |
| "grad_norm": 0.22108672559261322, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": 1.4682307243347168, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.10852713178294573, | |
| "grad_norm": 0.3697395622730255, | |
| "learning_rate": 2.076923076923077e-05, | |
| "loss": 1.2035093307495117, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11627906976744186, | |
| "grad_norm": 0.24884682893753052, | |
| "learning_rate": 2.230769230769231e-05, | |
| "loss": 1.1427452564239502, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12403100775193798, | |
| "grad_norm": 0.18956558406352997, | |
| "learning_rate": 2.3846153846153846e-05, | |
| "loss": 1.3711202144622803, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13178294573643412, | |
| "grad_norm": 0.18877695500850677, | |
| "learning_rate": 2.5384615384615386e-05, | |
| "loss": 1.2189266681671143, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.13953488372093023, | |
| "grad_norm": 0.10388179123401642, | |
| "learning_rate": 2.6923076923076923e-05, | |
| "loss": 1.3252586126327515, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.14728682170542637, | |
| "grad_norm": 0.2508637309074402, | |
| "learning_rate": 2.846153846153846e-05, | |
| "loss": 1.0033904314041138, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.15503875968992248, | |
| "grad_norm": 0.09986624866724014, | |
| "learning_rate": 3e-05, | |
| "loss": 1.4535468816757202, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16279069767441862, | |
| "grad_norm": 0.12885728478431702, | |
| "learning_rate": 2.999580739494117e-05, | |
| "loss": 1.0325186252593994, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17054263565891473, | |
| "grad_norm": 0.10903146117925644, | |
| "learning_rate": 2.998323233708815e-05, | |
| "loss": 1.2467223405838013, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.17829457364341086, | |
| "grad_norm": 0.11212802678346634, | |
| "learning_rate": 2.9962283096597995e-05, | |
| "loss": 1.6686618328094482, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.18604651162790697, | |
| "grad_norm": 0.2613708972930908, | |
| "learning_rate": 2.9932973451022333e-05, | |
| "loss": 0.8405603170394897, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1937984496124031, | |
| "grad_norm": 0.8741084337234497, | |
| "learning_rate": 2.9895322676246387e-05, | |
| "loss": 0.6372175812721252, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20155038759689922, | |
| "grad_norm": 0.20064491033554077, | |
| "learning_rate": 2.9849355533811937e-05, | |
| "loss": 1.0768086910247803, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.20930232558139536, | |
| "grad_norm": 0.20809470117092133, | |
| "learning_rate": 2.9795102254632528e-05, | |
| "loss": 0.58198082447052, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.21705426356589147, | |
| "grad_norm": 0.10565000772476196, | |
| "learning_rate": 2.9732598519111736e-05, | |
| "loss": 1.3517603874206543, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2248062015503876, | |
| "grad_norm": 0.10898104310035706, | |
| "learning_rate": 2.9661885433677437e-05, | |
| "loss": 1.340335488319397, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.23255813953488372, | |
| "grad_norm": 0.3277449309825897, | |
| "learning_rate": 2.9583009503747627e-05, | |
| "loss": 1.1451056003570557, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24031007751937986, | |
| "grad_norm": 0.11206506192684174, | |
| "learning_rate": 2.9496022603145497e-05, | |
| "loss": 1.2255440950393677, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.24806201550387597, | |
| "grad_norm": 0.14122240245342255, | |
| "learning_rate": 2.940098193998391e-05, | |
| "loss": 1.2778782844543457, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2558139534883721, | |
| "grad_norm": 0.17153455317020416, | |
| "learning_rate": 2.9297950019041724e-05, | |
| "loss": 1.178369402885437, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.26356589147286824, | |
| "grad_norm": 0.2940099239349365, | |
| "learning_rate": 2.918699460065665e-05, | |
| "loss": 1.1788100004196167, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2713178294573643, | |
| "grad_norm": 0.07703827321529388, | |
| "learning_rate": 2.906818865616178e-05, | |
| "loss": 1.306922435760498, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.27906976744186046, | |
| "grad_norm": 0.24490903317928314, | |
| "learning_rate": 2.8941610319894977e-05, | |
| "loss": 1.0475130081176758, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2868217054263566, | |
| "grad_norm": 0.13828147947788239, | |
| "learning_rate": 2.8807342837812783e-05, | |
| "loss": 1.1680102348327637, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.29457364341085274, | |
| "grad_norm": 0.13997578620910645, | |
| "learning_rate": 2.8665474512742603e-05, | |
| "loss": 1.0921390056610107, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3023255813953488, | |
| "grad_norm": 0.08565083891153336, | |
| "learning_rate": 2.8516098646309108e-05, | |
| "loss": 1.1694703102111816, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.31007751937984496, | |
| "grad_norm": 0.054738063365221024, | |
| "learning_rate": 2.8359313477573215e-05, | |
| "loss": 1.1712660789489746, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3178294573643411, | |
| "grad_norm": 0.06600243598222733, | |
| "learning_rate": 2.8195222118423792e-05, | |
| "loss": 1.32106351852417, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.32558139534883723, | |
| "grad_norm": 0.1856859177350998, | |
| "learning_rate": 2.8023932485764768e-05, | |
| "loss": 1.002191424369812, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.1050916314125061, | |
| "learning_rate": 2.7845557230542076e-05, | |
| "loss": 1.1279501914978027, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.34108527131782945, | |
| "grad_norm": 0.09759809821844101, | |
| "learning_rate": 2.7660213663657282e-05, | |
| "loss": 1.3432408571243286, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3488372093023256, | |
| "grad_norm": 0.09217233955860138, | |
| "learning_rate": 2.7468023678816447e-05, | |
| "loss": 0.8359699249267578, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.35658914728682173, | |
| "grad_norm": 0.11377432197332382, | |
| "learning_rate": 2.726911367236509e-05, | |
| "loss": 1.1406829357147217, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3643410852713178, | |
| "grad_norm": 0.14340269565582275, | |
| "learning_rate": 2.706361446016193e-05, | |
| "loss": 1.142421841621399, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.37209302325581395, | |
| "grad_norm": 0.39369332790374756, | |
| "learning_rate": 2.6851661191546038e-05, | |
| "loss": 1.2204563617706299, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3798449612403101, | |
| "grad_norm": 0.1904468685388565, | |
| "learning_rate": 2.6633393260454096e-05, | |
| "loss": 0.7862477898597717, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3875968992248062, | |
| "grad_norm": 0.13884544372558594, | |
| "learning_rate": 2.6408954213746028e-05, | |
| "loss": 0.7346755862236023, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3953488372093023, | |
| "grad_norm": 0.07463762909173965, | |
| "learning_rate": 2.61784916567995e-05, | |
| "loss": 1.3203626871109009, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.40310077519379844, | |
| "grad_norm": 0.1436045616865158, | |
| "learning_rate": 2.5942157156435248e-05, | |
| "loss": 1.2376055717468262, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4108527131782946, | |
| "grad_norm": 0.14889220893383026, | |
| "learning_rate": 2.570010614123707e-05, | |
| "loss": 1.0368235111236572, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4186046511627907, | |
| "grad_norm": 0.073515884578228, | |
| "learning_rate": 2.545249779933216e-05, | |
| "loss": 1.105363130569458, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4263565891472868, | |
| "grad_norm": 0.0721718966960907, | |
| "learning_rate": 2.5199494973698856e-05, | |
| "loss": 1.0211938619613647, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.43410852713178294, | |
| "grad_norm": 0.08736886829137802, | |
| "learning_rate": 2.494126405507074e-05, | |
| "loss": 0.9343675971031189, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4418604651162791, | |
| "grad_norm": 0.6035546064376831, | |
| "learning_rate": 2.4677974872507553e-05, | |
| "loss": 1.0941760540008545, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4496124031007752, | |
| "grad_norm": 0.10358745604753494, | |
| "learning_rate": 2.440980058170478e-05, | |
| "loss": 1.0486119985580444, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4573643410852713, | |
| "grad_norm": 0.41432875394821167, | |
| "learning_rate": 2.4136917551115484e-05, | |
| "loss": 0.9473840594291687, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.46511627906976744, | |
| "grad_norm": 0.06210324168205261, | |
| "learning_rate": 2.38595052459592e-05, | |
| "loss": 1.2813960313796997, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4728682170542636, | |
| "grad_norm": 0.21510902047157288, | |
| "learning_rate": 2.357774611019419e-05, | |
| "loss": 1.0586227178573608, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4806201550387597, | |
| "grad_norm": 0.10468325763940811, | |
| "learning_rate": 2.3291825446530736e-05, | |
| "loss": 1.2756110429763794, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.4883720930232558, | |
| "grad_norm": 1.1782441139221191, | |
| "learning_rate": 2.3001931294564265e-05, | |
| "loss": 1.168853759765625, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.49612403100775193, | |
| "grad_norm": 0.11016172915697098, | |
| "learning_rate": 2.27082543071086e-05, | |
| "loss": 1.181935429573059, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5038759689922481, | |
| "grad_norm": 0.6895468831062317, | |
| "learning_rate": 2.2410987624810524e-05, | |
| "loss": 1.1901732683181763, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5116279069767442, | |
| "grad_norm": 0.18288742005825043, | |
| "learning_rate": 2.2110326749128233e-05, | |
| "loss": 0.7289036512374878, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5193798449612403, | |
| "grad_norm": 0.11772674322128296, | |
| "learning_rate": 2.1806469413757164e-05, | |
| "loss": 1.161149024963379, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5271317829457365, | |
| "grad_norm": 0.2412514090538025, | |
| "learning_rate": 2.149961545458773e-05, | |
| "loss": 1.1283718347549438, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5348837209302325, | |
| "grad_norm": 0.08974076807498932, | |
| "learning_rate": 2.118996667828058e-05, | |
| "loss": 1.362121343612671, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5426356589147286, | |
| "grad_norm": 0.12378139048814774, | |
| "learning_rate": 2.0877726729545665e-05, | |
| "loss": 1.2608673572540283, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5503875968992248, | |
| "grad_norm": 0.41343384981155396, | |
| "learning_rate": 2.0563100957212577e-05, | |
| "loss": 0.5950201153755188, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5581395348837209, | |
| "grad_norm": 0.09795749187469482, | |
| "learning_rate": 2.0246296279180093e-05, | |
| "loss": 1.3639545440673828, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5658914728682171, | |
| "grad_norm": 0.09332104027271271, | |
| "learning_rate": 1.9927521046333833e-05, | |
| "loss": 1.0145015716552734, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5736434108527132, | |
| "grad_norm": 0.15143655240535736, | |
| "learning_rate": 1.960698490552145e-05, | |
| "loss": 0.9937471151351929, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5813953488372093, | |
| "grad_norm": 0.13751712441444397, | |
| "learning_rate": 1.9284898661675586e-05, | |
| "loss": 1.0032529830932617, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5891472868217055, | |
| "grad_norm": 0.0935468077659607, | |
| "learning_rate": 1.8961474139175106e-05, | |
| "loss": 1.2299753427505493, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5968992248062015, | |
| "grad_norm": 0.06415323913097382, | |
| "learning_rate": 1.863692404253597e-05, | |
| "loss": 1.2138370275497437, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6046511627906976, | |
| "grad_norm": 0.17963920533657074, | |
| "learning_rate": 1.8311461816523192e-05, | |
| "loss": 0.7944934964179993, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6124031007751938, | |
| "grad_norm": 0.13642992079257965, | |
| "learning_rate": 1.7985301505776026e-05, | |
| "loss": 0.8701238036155701, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6201550387596899, | |
| "grad_norm": 0.09022583067417145, | |
| "learning_rate": 1.765865761403861e-05, | |
| "loss": 1.279708981513977, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.627906976744186, | |
| "grad_norm": 0.310406357049942, | |
| "learning_rate": 1.733174496308864e-05, | |
| "loss": 1.020676612854004, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6356589147286822, | |
| "grad_norm": 0.0761994794011116, | |
| "learning_rate": 1.700477855145699e-05, | |
| "loss": 1.2313765287399292, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6434108527131783, | |
| "grad_norm": 0.09753235429525375, | |
| "learning_rate": 1.6677973413030936e-05, | |
| "loss": 0.9673617482185364, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6511627906976745, | |
| "grad_norm": 0.16590853035449982, | |
| "learning_rate": 1.6351544475634266e-05, | |
| "loss": 1.194890022277832, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6589147286821705, | |
| "grad_norm": 0.14948436617851257, | |
| "learning_rate": 1.6025706419677057e-05, | |
| "loss": 0.5818596482276917, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.1858793944120407, | |
| "learning_rate": 1.5700673536968222e-05, | |
| "loss": 1.1378095149993896, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6744186046511628, | |
| "grad_norm": 0.2438780963420868, | |
| "learning_rate": 1.5376659589783572e-05, | |
| "loss": 0.864031970500946, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.6821705426356589, | |
| "grad_norm": 0.13808684051036835, | |
| "learning_rate": 1.5053877670282186e-05, | |
| "loss": 0.9113052487373352, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.689922480620155, | |
| "grad_norm": 0.211846262216568, | |
| "learning_rate": 1.4732540060363447e-05, | |
| "loss": 0.9309589862823486, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.6976744186046512, | |
| "grad_norm": 0.13679386675357819, | |
| "learning_rate": 1.4412858092056991e-05, | |
| "loss": 1.002301573753357, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7054263565891473, | |
| "grad_norm": 0.09943073987960815, | |
| "learning_rate": 1.4095042008537343e-05, | |
| "loss": 1.0712729692459106, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7131782945736435, | |
| "grad_norm": 0.18878647685050964, | |
| "learning_rate": 1.3779300825854622e-05, | |
| "loss": 0.9123468995094299, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7209302325581395, | |
| "grad_norm": 0.08719319850206375, | |
| "learning_rate": 1.3465842195472321e-05, | |
| "loss": 1.2733235359191895, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7286821705426356, | |
| "grad_norm": 0.06448693573474884, | |
| "learning_rate": 1.3154872267702522e-05, | |
| "loss": 0.9789453148841858, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7364341085271318, | |
| "grad_norm": 0.12241167575120926, | |
| "learning_rate": 1.2846595556128331e-05, | |
| "loss": 1.0140795707702637, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7441860465116279, | |
| "grad_norm": 0.14142774045467377, | |
| "learning_rate": 1.254121480310276e-05, | |
| "loss": 1.1332778930664062, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.751937984496124, | |
| "grad_norm": 0.07337574660778046, | |
| "learning_rate": 1.2238930846412475e-05, | |
| "loss": 1.201830506324768, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7596899224806202, | |
| "grad_norm": 0.07899358868598938, | |
| "learning_rate": 1.1939942487194116e-05, | |
| "loss": 1.2011100053787231, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7674418604651163, | |
| "grad_norm": 0.10521137714385986, | |
| "learning_rate": 1.1644446359190004e-05, | |
| "loss": 0.5936653017997742, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.7751937984496124, | |
| "grad_norm": 0.16837139427661896, | |
| "learning_rate": 1.1352636799429354e-05, | |
| "loss": 1.3216241598129272, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7829457364341085, | |
| "grad_norm": 0.11404802650213242, | |
| "learning_rate": 1.1064705720419829e-05, | |
| "loss": 1.084835171699524, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.7906976744186046, | |
| "grad_norm": 0.24780981242656708, | |
| "learning_rate": 1.0780842483933755e-05, | |
| "loss": 1.2125266790390015, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.7984496124031008, | |
| "grad_norm": 0.12619031965732574, | |
| "learning_rate": 1.050123377647171e-05, | |
| "loss": 1.0225963592529297, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8062015503875969, | |
| "grad_norm": 1.412670612335205, | |
| "learning_rate": 1.0226063486485695e-05, | |
| "loss": 0.7963980436325073, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.813953488372093, | |
| "grad_norm": 0.18459799885749817, | |
| "learning_rate": 9.955512583442334e-06, | |
| "loss": 1.2788116931915283, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8217054263565892, | |
| "grad_norm": 0.058253731578588486, | |
| "learning_rate": 9.68975899880592e-06, | |
| "loss": 1.1842073202133179, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8294573643410853, | |
| "grad_norm": 0.09324084967374802, | |
| "learning_rate": 9.42897750901933e-06, | |
| "loss": 0.9420091509819031, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8372093023255814, | |
| "grad_norm": 0.14589789509773254, | |
| "learning_rate": 9.173339620559935e-06, | |
| "loss": 1.0436409711837769, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8449612403100775, | |
| "grad_norm": 0.08236993849277496, | |
| "learning_rate": 8.923013457146082e-06, | |
| "loss": 1.2834446430206299, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8527131782945736, | |
| "grad_norm": 0.07797209173440933, | |
| "learning_rate": 8.678163649168214e-06, | |
| "loss": 1.1693506240844727, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8604651162790697, | |
| "grad_norm": 0.21979407966136932, | |
| "learning_rate": 8.438951225417476e-06, | |
| "loss": 0.49415066838264465, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8682170542635659, | |
| "grad_norm": 0.16792796552181244, | |
| "learning_rate": 8.205533507182963e-06, | |
| "loss": 1.1654852628707886, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.875968992248062, | |
| "grad_norm": 0.1074092760682106, | |
| "learning_rate": 7.978064004787238e-06, | |
| "loss": 1.2648242712020874, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.8837209302325582, | |
| "grad_norm": 0.12686721980571747, | |
| "learning_rate": 7.756692316628162e-06, | |
| "loss": 0.8766679167747498, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.8914728682170543, | |
| "grad_norm": 0.10413216799497604, | |
| "learning_rate": 7.541564030793536e-06, | |
| "loss": 0.9922328591346741, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8992248062015504, | |
| "grad_norm": 0.07999309152364731, | |
| "learning_rate": 7.33282062931308e-06, | |
| "loss": 0.837881863117218, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9069767441860465, | |
| "grad_norm": 0.16637900471687317, | |
| "learning_rate": 7.13059939511089e-06, | |
| "loss": 1.272527813911438, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9147286821705426, | |
| "grad_norm": 0.13920988142490387, | |
| "learning_rate": 6.935033321719421e-06, | |
| "loss": 0.6637862920761108, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9224806201550387, | |
| "grad_norm": 0.07921171188354492, | |
| "learning_rate": 6.746251025814548e-06, | |
| "loss": 1.2028839588165283, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9302325581395349, | |
| "grad_norm": 0.11715266853570938, | |
| "learning_rate": 6.564376662629032e-06, | |
| "loss": 1.0310890674591064, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.937984496124031, | |
| "grad_norm": 0.1706083118915558, | |
| "learning_rate": 6.389529844300147e-06, | |
| "loss": 1.129476547241211, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9457364341085271, | |
| "grad_norm": 0.09015638381242752, | |
| "learning_rate": 6.2218255612051575e-06, | |
| "loss": 0.9788402915000916, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9534883720930233, | |
| "grad_norm": 0.09626635164022446, | |
| "learning_rate": 6.061374106336329e-06, | |
| "loss": 0.7472362518310547, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9612403100775194, | |
| "grad_norm": 0.17239803075790405, | |
| "learning_rate": 5.9082810027652495e-06, | |
| "loss": 0.7408154606819153, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9689922480620154, | |
| "grad_norm": 0.07973187416791916, | |
| "learning_rate": 5.762646934244157e-06, | |
| "loss": 1.1912089586257935, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9767441860465116, | |
| "grad_norm": 0.08109164237976074, | |
| "learning_rate": 5.6245676789899e-06, | |
| "loss": 0.970727264881134, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.9844961240310077, | |
| "grad_norm": 0.2656784951686859, | |
| "learning_rate": 5.494134046694101e-06, | |
| "loss": 0.9474197626113892, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.9922480620155039, | |
| "grad_norm": 0.09388367086648941, | |
| "learning_rate": 5.371431818800934e-06, | |
| "loss": 0.7675265073776245, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.07208788394927979, | |
| "learning_rate": 5.256541692091799e-06, | |
| "loss": 1.151860237121582, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0077519379844961, | |
| "grad_norm": 0.07177931815385818, | |
| "learning_rate": 5.149539225613974e-06, | |
| "loss": 0.6956380605697632, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0155038759689923, | |
| "grad_norm": 0.06252402067184448, | |
| "learning_rate": 5.050494790988212e-06, | |
| "loss": 0.9135383367538452, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.0232558139534884, | |
| "grad_norm": 0.17128507792949677, | |
| "learning_rate": 4.95947352612787e-06, | |
| "loss": 0.721315324306488, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0310077519379846, | |
| "grad_norm": 0.08467314392328262, | |
| "learning_rate": 4.876535292400089e-06, | |
| "loss": 0.4410458207130432, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0387596899224807, | |
| "grad_norm": 0.10766004770994186, | |
| "learning_rate": 4.801734635257148e-06, | |
| "loss": 0.8536827564239502, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0465116279069768, | |
| "grad_norm": 0.15451736748218536, | |
| "learning_rate": 4.735120748363916e-06, | |
| "loss": 0.903506875038147, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.054263565891473, | |
| "grad_norm": 0.06612464040517807, | |
| "learning_rate": 4.676737441244975e-06, | |
| "loss": 0.48186248540878296, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.062015503875969, | |
| "grad_norm": 0.10002221167087555, | |
| "learning_rate": 4.626623110472677e-06, | |
| "loss": 0.8960871696472168, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.069767441860465, | |
| "grad_norm": 0.07858562469482422, | |
| "learning_rate": 4.584810714415135e-06, | |
| "loss": 0.8507243990898132, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.0775193798449612, | |
| "grad_norm": 0.06665261089801788, | |
| "learning_rate": 4.5513277515607014e-06, | |
| "loss": 0.9197998642921448, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.0852713178294573, | |
| "grad_norm": 0.16345328092575073, | |
| "learning_rate": 4.526196242433211e-06, | |
| "loss": 0.778313398361206, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0930232558139534, | |
| "grad_norm": 0.10489022731781006, | |
| "learning_rate": 4.509432715109887e-06, | |
| "loss": 0.5479567050933838, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1007751937984496, | |
| "grad_norm": 0.05080074071884155, | |
| "learning_rate": 4.50104819435143e-06, | |
| "loss": 0.6334800720214844, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.1085271317829457, | |
| "grad_norm": 0.12185381352901459, | |
| "learning_rate": 4.50104819435143e-06, | |
| "loss": 0.8215212225914001, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1162790697674418, | |
| "grad_norm": 0.08784171938896179, | |
| "learning_rate": 4.509432715109887e-06, | |
| "loss": 0.5245926976203918, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.124031007751938, | |
| "grad_norm": 0.09065528959035873, | |
| "learning_rate": 4.526196242433211e-06, | |
| "loss": 1.0330955982208252, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1317829457364341, | |
| "grad_norm": 0.04104357957839966, | |
| "learning_rate": 4.5513277515607014e-06, | |
| "loss": 0.5526050329208374, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1395348837209303, | |
| "grad_norm": 0.31155890226364136, | |
| "learning_rate": 4.584810714415136e-06, | |
| "loss": 1.046125888824463, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.1472868217054264, | |
| "grad_norm": 0.15053009986877441, | |
| "learning_rate": 4.626623110472676e-06, | |
| "loss": 0.3840217590332031, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.1550387596899225, | |
| "grad_norm": 0.08694499731063843, | |
| "learning_rate": 4.676737441244973e-06, | |
| "loss": 0.6799867153167725, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.1627906976744187, | |
| "grad_norm": 0.07247356325387955, | |
| "learning_rate": 4.735120748363917e-06, | |
| "loss": 0.6748986840248108, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1705426356589148, | |
| "grad_norm": 0.06139397993683815, | |
| "learning_rate": 4.801734635257148e-06, | |
| "loss": 0.8421810865402222, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.178294573643411, | |
| "grad_norm": 0.08629012107849121, | |
| "learning_rate": 4.876535292400087e-06, | |
| "loss": 0.5402819514274597, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.1860465116279069, | |
| "grad_norm": 0.17801746726036072, | |
| "learning_rate": 4.95947352612787e-06, | |
| "loss": 0.9019787311553955, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.193798449612403, | |
| "grad_norm": 0.13845831155776978, | |
| "learning_rate": 5.050494790988212e-06, | |
| "loss": 0.8330530524253845, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2015503875968991, | |
| "grad_norm": 0.0652163103222847, | |
| "learning_rate": 5.149539225613974e-06, | |
| "loss": 1.0060863494873047, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2093023255813953, | |
| "grad_norm": 0.0937967598438263, | |
| "learning_rate": 5.256541692091797e-06, | |
| "loss": 0.5403499007225037, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.2170542635658914, | |
| "grad_norm": 0.19726139307022095, | |
| "learning_rate": 5.371431818800936e-06, | |
| "loss": 0.37406668066978455, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2248062015503876, | |
| "grad_norm": 0.11253905296325684, | |
| "learning_rate": 5.494134046694099e-06, | |
| "loss": 0.6960604786872864, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2325581395348837, | |
| "grad_norm": 0.08688368648290634, | |
| "learning_rate": 5.624567678989899e-06, | |
| "loss": 0.7832977771759033, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2403100775193798, | |
| "grad_norm": 0.19534340500831604, | |
| "learning_rate": 5.762646934244156e-06, | |
| "loss": 0.9501113295555115, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.248062015503876, | |
| "grad_norm": 0.06447340548038483, | |
| "learning_rate": 5.908281002765248e-06, | |
| "loss": 1.0130536556243896, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.255813953488372, | |
| "grad_norm": 0.11461887508630753, | |
| "learning_rate": 6.061374106336328e-06, | |
| "loss": 0.631900429725647, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.2635658914728682, | |
| "grad_norm": 0.09350797533988953, | |
| "learning_rate": 6.2218255612051575e-06, | |
| "loss": 0.8754401803016663, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.2713178294573644, | |
| "grad_norm": 0.11175557225942612, | |
| "learning_rate": 6.389529844300143e-06, | |
| "loss": 0.7127947807312012, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.2790697674418605, | |
| "grad_norm": 0.09055038541555405, | |
| "learning_rate": 6.564376662629029e-06, | |
| "loss": 0.4656026363372803, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2868217054263567, | |
| "grad_norm": 0.09712733328342438, | |
| "learning_rate": 6.74625102581455e-06, | |
| "loss": 0.8079378008842468, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.2945736434108528, | |
| "grad_norm": 0.18206307291984558, | |
| "learning_rate": 6.935033321719419e-06, | |
| "loss": 0.5637804865837097, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.302325581395349, | |
| "grad_norm": 0.23368722200393677, | |
| "learning_rate": 7.130599395110884e-06, | |
| "loss": 0.8007771968841553, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.310077519379845, | |
| "grad_norm": 0.05224426090717316, | |
| "learning_rate": 7.332820629313082e-06, | |
| "loss": 0.551106333732605, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.3178294573643412, | |
| "grad_norm": 0.07984264940023422, | |
| "learning_rate": 7.541564030793533e-06, | |
| "loss": 0.7754759788513184, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3255813953488373, | |
| "grad_norm": 0.22976501286029816, | |
| "learning_rate": 7.75669231662816e-06, | |
| "loss": 0.7786872982978821, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.17023955285549164, | |
| "learning_rate": 7.978064004787231e-06, | |
| "loss": 0.7895460724830627, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.3410852713178294, | |
| "grad_norm": 0.12108391523361206, | |
| "learning_rate": 8.205533507182961e-06, | |
| "loss": 0.20940443873405457, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.3488372093023255, | |
| "grad_norm": 0.07635517418384552, | |
| "learning_rate": 8.438951225417474e-06, | |
| "loss": 0.819771409034729, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3565891472868217, | |
| "grad_norm": 0.11260077357292175, | |
| "learning_rate": 8.678163649168212e-06, | |
| "loss": 0.9801982641220093, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3643410852713178, | |
| "grad_norm": 0.09885291010141373, | |
| "learning_rate": 8.923013457146075e-06, | |
| "loss": 0.7718797326087952, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.372093023255814, | |
| "grad_norm": 0.09329655021429062, | |
| "learning_rate": 9.173339620559931e-06, | |
| "loss": 0.40787971019744873, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.37984496124031, | |
| "grad_norm": 0.11724522709846497, | |
| "learning_rate": 9.428977509019326e-06, | |
| "loss": 0.797160804271698, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.3875968992248062, | |
| "grad_norm": 0.11735495924949646, | |
| "learning_rate": 9.689758998805924e-06, | |
| "loss": 0.6483190059661865, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.3953488372093024, | |
| "grad_norm": 0.08914632350206375, | |
| "learning_rate": 9.955512583442333e-06, | |
| "loss": 0.7835768461227417, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4031007751937985, | |
| "grad_norm": 0.07666268944740295, | |
| "learning_rate": 1.0226063486485691e-05, | |
| "loss": 0.6386092901229858, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.4108527131782946, | |
| "grad_norm": 0.08281254768371582, | |
| "learning_rate": 1.0501233776471714e-05, | |
| "loss": 0.8520874977111816, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4186046511627908, | |
| "grad_norm": 0.14842084050178528, | |
| "learning_rate": 1.0780842483933755e-05, | |
| "loss": 0.37374499440193176, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4263565891472867, | |
| "grad_norm": 0.24841120839118958, | |
| "learning_rate": 1.1064705720419827e-05, | |
| "loss": 0.3320968449115753, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.4341085271317828, | |
| "grad_norm": 0.11581484228372574, | |
| "learning_rate": 1.135263679942935e-05, | |
| "loss": 0.7746375799179077, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.441860465116279, | |
| "grad_norm": 0.0945417657494545, | |
| "learning_rate": 1.1644446359190006e-05, | |
| "loss": 0.6704602241516113, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.449612403100775, | |
| "grad_norm": 0.06997057050466537, | |
| "learning_rate": 1.1939942487194116e-05, | |
| "loss": 0.9213350415229797, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.4573643410852712, | |
| "grad_norm": 0.07435750216245651, | |
| "learning_rate": 1.2238930846412471e-05, | |
| "loss": 0.7233853936195374, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.4651162790697674, | |
| "grad_norm": 0.18093754351139069, | |
| "learning_rate": 1.2541214803102757e-05, | |
| "loss": 0.5185383558273315, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.4728682170542635, | |
| "grad_norm": 0.052637044340372086, | |
| "learning_rate": 1.2846595556128331e-05, | |
| "loss": 0.7751470804214478, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4806201550387597, | |
| "grad_norm": 0.10150747746229172, | |
| "learning_rate": 1.3154872267702518e-05, | |
| "loss": 0.7363438010215759, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.4883720930232558, | |
| "grad_norm": 0.08896318078041077, | |
| "learning_rate": 1.3465842195472318e-05, | |
| "loss": 0.697909951210022, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.496124031007752, | |
| "grad_norm": 0.09349460154771805, | |
| "learning_rate": 1.3779300825854622e-05, | |
| "loss": 0.5058455467224121, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.503875968992248, | |
| "grad_norm": 0.0640454888343811, | |
| "learning_rate": 1.4095042008537336e-05, | |
| "loss": 0.6899944543838501, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5116279069767442, | |
| "grad_norm": 0.08342494815587997, | |
| "learning_rate": 1.4412858092056988e-05, | |
| "loss": 0.5844802856445312, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5193798449612403, | |
| "grad_norm": 0.14086060225963593, | |
| "learning_rate": 1.4732540060363447e-05, | |
| "loss": 0.6977730393409729, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5271317829457365, | |
| "grad_norm": 0.135100856423378, | |
| "learning_rate": 1.5053877670282176e-05, | |
| "loss": 0.7261441349983215, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.5348837209302326, | |
| "grad_norm": 0.1089802235364914, | |
| "learning_rate": 1.537665958978357e-05, | |
| "loss": 0.7607800960540771, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.5426356589147288, | |
| "grad_norm": 0.17686955630779266, | |
| "learning_rate": 1.5700673536968222e-05, | |
| "loss": 0.5964785218238831, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.550387596899225, | |
| "grad_norm": 0.06133165583014488, | |
| "learning_rate": 1.6025706419677047e-05, | |
| "loss": 0.7581831812858582, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.558139534883721, | |
| "grad_norm": 0.11867906898260117, | |
| "learning_rate": 1.6351544475634256e-05, | |
| "loss": 0.5359363555908203, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.5658914728682172, | |
| "grad_norm": 0.08171830326318741, | |
| "learning_rate": 1.6677973413030932e-05, | |
| "loss": 0.9142735004425049, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.5736434108527133, | |
| "grad_norm": 0.11325247585773468, | |
| "learning_rate": 1.7004778551456975e-05, | |
| "loss": 0.7637568712234497, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.5813953488372094, | |
| "grad_norm": 0.054144054651260376, | |
| "learning_rate": 1.7331744963088644e-05, | |
| "loss": 0.31641456484794617, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.5891472868217056, | |
| "grad_norm": 0.051439665257930756, | |
| "learning_rate": 1.7658657614038598e-05, | |
| "loss": 0.780099630355835, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5968992248062015, | |
| "grad_norm": 0.07669004052877426, | |
| "learning_rate": 1.7985301505776015e-05, | |
| "loss": 0.7998414635658264, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6046511627906976, | |
| "grad_norm": 0.08620447665452957, | |
| "learning_rate": 1.8311461816523192e-05, | |
| "loss": 0.5864279866218567, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6124031007751938, | |
| "grad_norm": 0.0925377830862999, | |
| "learning_rate": 1.8636924042535962e-05, | |
| "loss": 0.47105392813682556, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.62015503875969, | |
| "grad_norm": 0.08717449009418488, | |
| "learning_rate": 1.8961474139175093e-05, | |
| "loss": 0.8024092316627502, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.627906976744186, | |
| "grad_norm": 0.12033627182245255, | |
| "learning_rate": 1.9284898661675586e-05, | |
| "loss": 0.810451090335846, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6356589147286822, | |
| "grad_norm": 0.07522077113389969, | |
| "learning_rate": 1.9606984905521443e-05, | |
| "loss": 0.4688906967639923, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.6434108527131783, | |
| "grad_norm": 0.07208564877510071, | |
| "learning_rate": 1.9927521046333837e-05, | |
| "loss": 0.7383279204368591, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.6511627906976745, | |
| "grad_norm": 0.09510686248540878, | |
| "learning_rate": 2.0246296279180093e-05, | |
| "loss": 0.8395543694496155, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.6589147286821704, | |
| "grad_norm": 0.15472382307052612, | |
| "learning_rate": 2.0563100957212567e-05, | |
| "loss": 0.8986775875091553, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.09020368754863739, | |
| "learning_rate": 2.0877726729545672e-05, | |
| "loss": 0.8169777393341064, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6744186046511627, | |
| "grad_norm": 0.198333740234375, | |
| "learning_rate": 2.1189966678280578e-05, | |
| "loss": 1.033119559288025, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.6821705426356588, | |
| "grad_norm": 0.08684570342302322, | |
| "learning_rate": 2.149961545458772e-05, | |
| "loss": 0.5892492532730103, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.689922480620155, | |
| "grad_norm": 0.0764966830611229, | |
| "learning_rate": 2.1806469413757164e-05, | |
| "loss": 0.7995302081108093, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.697674418604651, | |
| "grad_norm": 0.13916683197021484, | |
| "learning_rate": 2.211032674912823e-05, | |
| "loss": 0.8415105938911438, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.7054263565891472, | |
| "grad_norm": 0.24585378170013428, | |
| "learning_rate": 2.241098762481052e-05, | |
| "loss": 0.6350277066230774, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7131782945736433, | |
| "grad_norm": 0.050845544785261154, | |
| "learning_rate": 2.27082543071086e-05, | |
| "loss": 0.8463593125343323, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.7209302325581395, | |
| "grad_norm": 0.07698489725589752, | |
| "learning_rate": 2.3001931294564265e-05, | |
| "loss": 0.5609403252601624, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.7286821705426356, | |
| "grad_norm": 0.06638149172067642, | |
| "learning_rate": 2.3291825446530733e-05, | |
| "loss": 0.8690592050552368, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.7364341085271318, | |
| "grad_norm": 0.08811336010694504, | |
| "learning_rate": 2.357774611019419e-05, | |
| "loss": 0.8064720630645752, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.744186046511628, | |
| "grad_norm": 0.0755743682384491, | |
| "learning_rate": 2.385950524595919e-05, | |
| "loss": 1.0067108869552612, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.751937984496124, | |
| "grad_norm": 0.06093823164701462, | |
| "learning_rate": 2.4136917551115478e-05, | |
| "loss": 0.967079222202301, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.7596899224806202, | |
| "grad_norm": 0.09034255892038345, | |
| "learning_rate": 2.4409800581704777e-05, | |
| "loss": 0.6444424986839294, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.7674418604651163, | |
| "grad_norm": 0.1733829230070114, | |
| "learning_rate": 2.4677974872507553e-05, | |
| "loss": 0.8322298526763916, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.7751937984496124, | |
| "grad_norm": 0.23445071280002594, | |
| "learning_rate": 2.4941264055070734e-05, | |
| "loss": 0.4230212867259979, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.7829457364341086, | |
| "grad_norm": 0.1249038353562355, | |
| "learning_rate": 2.5199494973698852e-05, | |
| "loss": 0.6065483093261719, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7906976744186047, | |
| "grad_norm": 0.08323405683040619, | |
| "learning_rate": 2.545249779933216e-05, | |
| "loss": 0.8183580040931702, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.7984496124031009, | |
| "grad_norm": 0.10287293046712875, | |
| "learning_rate": 2.5700106141237063e-05, | |
| "loss": 0.9282822608947754, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.806201550387597, | |
| "grad_norm": 0.053924717009067535, | |
| "learning_rate": 2.594215715643524e-05, | |
| "loss": 0.8734548687934875, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.8139534883720931, | |
| "grad_norm": 0.10388979315757751, | |
| "learning_rate": 2.6178491656799497e-05, | |
| "loss": 0.8903089165687561, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.8217054263565893, | |
| "grad_norm": 0.06755795329809189, | |
| "learning_rate": 2.640895421374602e-05, | |
| "loss": 0.4710087180137634, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8294573643410854, | |
| "grad_norm": 0.08703745901584625, | |
| "learning_rate": 2.6633393260454096e-05, | |
| "loss": 1.1290743350982666, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.8372093023255816, | |
| "grad_norm": 0.10183677822351456, | |
| "learning_rate": 2.6851661191546034e-05, | |
| "loss": 0.6608400344848633, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.8449612403100775, | |
| "grad_norm": 0.11454630643129349, | |
| "learning_rate": 2.706361446016192e-05, | |
| "loss": 0.850265383720398, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.8527131782945736, | |
| "grad_norm": 0.06782646477222443, | |
| "learning_rate": 2.7269113672365096e-05, | |
| "loss": 0.6361703872680664, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.8604651162790697, | |
| "grad_norm": 0.08559778332710266, | |
| "learning_rate": 2.7468023678816444e-05, | |
| "loss": 1.0639129877090454, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.8682170542635659, | |
| "grad_norm": 0.06762553006410599, | |
| "learning_rate": 2.766021366365728e-05, | |
| "loss": 0.6422796845436096, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.875968992248062, | |
| "grad_norm": 0.07438317686319351, | |
| "learning_rate": 2.784555723054208e-05, | |
| "loss": 0.7208263874053955, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.8837209302325582, | |
| "grad_norm": 0.07318796217441559, | |
| "learning_rate": 2.8023932485764764e-05, | |
| "loss": 0.8420804738998413, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.8914728682170543, | |
| "grad_norm": 0.10379486531019211, | |
| "learning_rate": 2.81952221184238e-05, | |
| "loss": 0.5533670783042908, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.8992248062015504, | |
| "grad_norm": 1.0894800424575806, | |
| "learning_rate": 2.8359313477573215e-05, | |
| "loss": 0.688605785369873, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9069767441860463, | |
| "grad_norm": 0.23758739233016968, | |
| "learning_rate": 2.8516098646309108e-05, | |
| "loss": 0.5789573192596436, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.9147286821705425, | |
| "grad_norm": 0.06857667863368988, | |
| "learning_rate": 2.8665474512742607e-05, | |
| "loss": 0.6448074579238892, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.9224806201550386, | |
| "grad_norm": 0.08650626242160797, | |
| "learning_rate": 2.8807342837812783e-05, | |
| "loss": 0.6479641199111938, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.9302325581395348, | |
| "grad_norm": 0.07275024801492691, | |
| "learning_rate": 2.894161031989497e-05, | |
| "loss": 0.4521400034427643, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.937984496124031, | |
| "grad_norm": 0.05953352525830269, | |
| "learning_rate": 2.906818865616178e-05, | |
| "loss": 0.9132779240608215, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.945736434108527, | |
| "grad_norm": 0.12861226499080658, | |
| "learning_rate": 2.9186994600656647e-05, | |
| "loss": 0.6908618807792664, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.9534883720930232, | |
| "grad_norm": 0.07091208547353745, | |
| "learning_rate": 2.929795001904172e-05, | |
| "loss": 0.6676538586616516, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.9612403100775193, | |
| "grad_norm": 0.11093394458293915, | |
| "learning_rate": 2.9400981939983914e-05, | |
| "loss": 1.0052788257598877, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.9689922480620154, | |
| "grad_norm": 0.05772824585437775, | |
| "learning_rate": 2.9496022603145494e-05, | |
| "loss": 0.7913935780525208, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.9767441860465116, | |
| "grad_norm": 0.0762370154261589, | |
| "learning_rate": 2.9583009503747627e-05, | |
| "loss": 0.9280475974082947, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9844961240310077, | |
| "grad_norm": 0.18662315607070923, | |
| "learning_rate": 2.9661885433677437e-05, | |
| "loss": 0.7493736743927002, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.9922480620155039, | |
| "grad_norm": 0.07221183180809021, | |
| "learning_rate": 2.9732598519111736e-05, | |
| "loss": 1.0501880645751953, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.08491652458906174, | |
| "learning_rate": 2.9795102254632528e-05, | |
| "loss": 1.011595368385315, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.007751937984496, | |
| "grad_norm": 0.09373293071985245, | |
| "learning_rate": 2.9849355533811937e-05, | |
| "loss": 0.5705936551094055, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.0155038759689923, | |
| "grad_norm": 0.06463813781738281, | |
| "learning_rate": 2.9895322676246387e-05, | |
| "loss": 0.7379302978515625, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0232558139534884, | |
| "grad_norm": 0.09566348791122437, | |
| "learning_rate": 2.993297345102233e-05, | |
| "loss": 0.46209296584129333, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.0310077519379846, | |
| "grad_norm": 0.05616720765829086, | |
| "learning_rate": 2.9962283096597995e-05, | |
| "loss": 0.773676335811615, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.0387596899224807, | |
| "grad_norm": 0.09464262425899506, | |
| "learning_rate": 2.998323233708815e-05, | |
| "loss": 0.6592158675193787, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.046511627906977, | |
| "grad_norm": 0.09258489310741425, | |
| "learning_rate": 2.999580739494117e-05, | |
| "loss": 0.7777129411697388, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.054263565891473, | |
| "grad_norm": 0.13635995984077454, | |
| "learning_rate": 3e-05, | |
| "loss": 0.385895311832428, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.062015503875969, | |
| "grad_norm": 0.1054837629199028, | |
| "learning_rate": 2.999580739494117e-05, | |
| "loss": 0.7748541235923767, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.0697674418604652, | |
| "grad_norm": 0.08860507607460022, | |
| "learning_rate": 2.998323233708815e-05, | |
| "loss": 0.407875120639801, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.0775193798449614, | |
| "grad_norm": 0.07644882053136826, | |
| "learning_rate": 2.9962283096598e-05, | |
| "loss": 0.41405466198921204, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.0852713178294575, | |
| "grad_norm": 0.20681916177272797, | |
| "learning_rate": 2.9932973451022333e-05, | |
| "loss": 0.701027512550354, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.0930232558139537, | |
| "grad_norm": 0.07310563325881958, | |
| "learning_rate": 2.9895322676246387e-05, | |
| "loss": 0.4735100567340851, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.10077519379845, | |
| "grad_norm": 0.0755162462592125, | |
| "learning_rate": 2.9849355533811937e-05, | |
| "loss": 0.27081194519996643, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.108527131782946, | |
| "grad_norm": 0.07929737865924835, | |
| "learning_rate": 2.9795102254632528e-05, | |
| "loss": 0.6002092957496643, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.116279069767442, | |
| "grad_norm": 0.25740522146224976, | |
| "learning_rate": 2.973259851911174e-05, | |
| "loss": 0.4636404514312744, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.124031007751938, | |
| "grad_norm": 0.07688764482736588, | |
| "learning_rate": 2.9661885433677434e-05, | |
| "loss": 0.4923861026763916, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.1317829457364343, | |
| "grad_norm": 0.24001885950565338, | |
| "learning_rate": 2.9583009503747627e-05, | |
| "loss": 0.3250856101512909, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.13953488372093, | |
| "grad_norm": 0.09132993221282959, | |
| "learning_rate": 2.9496022603145497e-05, | |
| "loss": 0.7897784113883972, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.147286821705426, | |
| "grad_norm": 0.09284122288227081, | |
| "learning_rate": 2.940098193998391e-05, | |
| "loss": 0.8441802859306335, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.1550387596899223, | |
| "grad_norm": 0.07503140717744827, | |
| "learning_rate": 2.9297950019041724e-05, | |
| "loss": 0.4028940498828888, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.1627906976744184, | |
| "grad_norm": 0.11651087552309036, | |
| "learning_rate": 2.9186994600656647e-05, | |
| "loss": 0.6657426953315735, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.1705426356589146, | |
| "grad_norm": 0.06494183093309402, | |
| "learning_rate": 2.906818865616178e-05, | |
| "loss": 0.5439774990081787, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.1782945736434107, | |
| "grad_norm": 0.05145857110619545, | |
| "learning_rate": 2.8941610319894977e-05, | |
| "loss": 0.7213448882102966, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.186046511627907, | |
| "grad_norm": 0.1473415493965149, | |
| "learning_rate": 2.8807342837812783e-05, | |
| "loss": 0.38557326793670654, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.193798449612403, | |
| "grad_norm": 0.2709689438343048, | |
| "learning_rate": 2.8665474512742603e-05, | |
| "loss": 0.41664543747901917, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.201550387596899, | |
| "grad_norm": 0.06767801940441132, | |
| "learning_rate": 2.851609864630911e-05, | |
| "loss": 0.4579377770423889, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.2093023255813953, | |
| "grad_norm": 0.3255118727684021, | |
| "learning_rate": 2.8359313477573215e-05, | |
| "loss": 0.3196179270744324, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.2170542635658914, | |
| "grad_norm": 0.1096249520778656, | |
| "learning_rate": 2.8195222118423792e-05, | |
| "loss": 0.5369107127189636, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.2248062015503876, | |
| "grad_norm": 0.2894248068332672, | |
| "learning_rate": 2.8023932485764768e-05, | |
| "loss": 0.23676389455795288, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.2325581395348837, | |
| "grad_norm": 0.19947735965251923, | |
| "learning_rate": 2.7845557230542076e-05, | |
| "loss": 0.44901129603385925, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.24031007751938, | |
| "grad_norm": 0.06506390869617462, | |
| "learning_rate": 2.766021366365729e-05, | |
| "loss": 0.5859266519546509, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.248062015503876, | |
| "grad_norm": 0.10611079633235931, | |
| "learning_rate": 2.746802367881645e-05, | |
| "loss": 0.6005488038063049, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.255813953488372, | |
| "grad_norm": 0.05949712544679642, | |
| "learning_rate": 2.726911367236509e-05, | |
| "loss": 0.32260704040527344, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.2635658914728682, | |
| "grad_norm": 0.09240850806236267, | |
| "learning_rate": 2.706361446016193e-05, | |
| "loss": 0.8233704566955566, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.2713178294573644, | |
| "grad_norm": 0.08874181658029556, | |
| "learning_rate": 2.685166119154604e-05, | |
| "loss": 0.4317566156387329, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.2790697674418605, | |
| "grad_norm": 0.05373215302824974, | |
| "learning_rate": 2.6633393260454096e-05, | |
| "loss": 0.8105683326721191, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.2868217054263567, | |
| "grad_norm": 0.05979755148291588, | |
| "learning_rate": 2.6408954213746025e-05, | |
| "loss": 0.4510256350040436, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.294573643410853, | |
| "grad_norm": 0.056298933923244476, | |
| "learning_rate": 2.6178491656799504e-05, | |
| "loss": 0.7199202179908752, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.302325581395349, | |
| "grad_norm": 0.06022209674119949, | |
| "learning_rate": 2.5942157156435248e-05, | |
| "loss": 0.47333112359046936, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.310077519379845, | |
| "grad_norm": 0.1291632205247879, | |
| "learning_rate": 2.570010614123707e-05, | |
| "loss": 0.4947061836719513, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.317829457364341, | |
| "grad_norm": 0.7499107718467712, | |
| "learning_rate": 2.5452497799332167e-05, | |
| "loss": 0.6046218872070312, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.3255813953488373, | |
| "grad_norm": 0.05242902785539627, | |
| "learning_rate": 2.519949497369886e-05, | |
| "loss": 0.37087422609329224, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.4367184340953827, | |
| "learning_rate": 2.494126405507074e-05, | |
| "loss": 0.579389214515686, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.3410852713178296, | |
| "grad_norm": 0.0486953966319561, | |
| "learning_rate": 2.467797487250756e-05, | |
| "loss": 0.7329738736152649, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.3488372093023258, | |
| "grad_norm": 0.09891688823699951, | |
| "learning_rate": 2.4409800581704784e-05, | |
| "loss": 0.5676310658454895, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.356589147286822, | |
| "grad_norm": 0.12641683220863342, | |
| "learning_rate": 2.4136917551115484e-05, | |
| "loss": 0.6383396983146667, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.3643410852713176, | |
| "grad_norm": 0.07655075937509537, | |
| "learning_rate": 2.3859505245959206e-05, | |
| "loss": 0.6663593053817749, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.3720930232558137, | |
| "grad_norm": 0.062206994742155075, | |
| "learning_rate": 2.3577746110194188e-05, | |
| "loss": 0.32523995637893677, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.37984496124031, | |
| "grad_norm": 0.13163718581199646, | |
| "learning_rate": 2.329182544653074e-05, | |
| "loss": 0.5087898373603821, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.387596899224806, | |
| "grad_norm": 0.04577813297510147, | |
| "learning_rate": 2.3001931294564278e-05, | |
| "loss": 0.5215730667114258, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.395348837209302, | |
| "grad_norm": 0.06540275365114212, | |
| "learning_rate": 2.27082543071086e-05, | |
| "loss": 0.7069303393363953, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.4031007751937983, | |
| "grad_norm": 0.04587893187999725, | |
| "learning_rate": 2.2410987624810527e-05, | |
| "loss": 0.6097102165222168, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.4108527131782944, | |
| "grad_norm": 0.18531644344329834, | |
| "learning_rate": 2.2110326749128246e-05, | |
| "loss": 0.28449299931526184, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.4186046511627906, | |
| "grad_norm": 0.06915592402219772, | |
| "learning_rate": 2.180646941375716e-05, | |
| "loss": 0.5394483208656311, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.4263565891472867, | |
| "grad_norm": 0.0683450847864151, | |
| "learning_rate": 2.149961545458774e-05, | |
| "loss": 0.351560115814209, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.434108527131783, | |
| "grad_norm": 0.0661771222949028, | |
| "learning_rate": 2.1189966678280585e-05, | |
| "loss": 0.6790451407432556, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.441860465116279, | |
| "grad_norm": 0.2682180106639862, | |
| "learning_rate": 2.0877726729545665e-05, | |
| "loss": 0.34560778737068176, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.449612403100775, | |
| "grad_norm": 0.05607810616493225, | |
| "learning_rate": 2.0563100957212584e-05, | |
| "loss": 0.35299909114837646, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.4573643410852712, | |
| "grad_norm": 0.1276787519454956, | |
| "learning_rate": 2.02462962791801e-05, | |
| "loss": 0.45075708627700806, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.4651162790697674, | |
| "grad_norm": 0.07231509685516357, | |
| "learning_rate": 1.9927521046333833e-05, | |
| "loss": 0.4892677664756775, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.4728682170542635, | |
| "grad_norm": 0.12232723832130432, | |
| "learning_rate": 1.9606984905521463e-05, | |
| "loss": 0.6066938042640686, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.4806201550387597, | |
| "grad_norm": 0.054693497717380524, | |
| "learning_rate": 1.928489866167559e-05, | |
| "loss": 0.3974202275276184, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.488372093023256, | |
| "grad_norm": 0.07348073273897171, | |
| "learning_rate": 1.896147413917511e-05, | |
| "loss": 0.43941450119018555, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.496124031007752, | |
| "grad_norm": 0.05807847902178764, | |
| "learning_rate": 1.863692404253597e-05, | |
| "loss": 0.5508748888969421, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.503875968992248, | |
| "grad_norm": 0.08628101646900177, | |
| "learning_rate": 1.83114618165232e-05, | |
| "loss": 0.5954611897468567, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.511627906976744, | |
| "grad_norm": 0.08698024600744247, | |
| "learning_rate": 1.798530150577603e-05, | |
| "loss": 0.7873520851135254, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.5193798449612403, | |
| "grad_norm": 0.0802086666226387, | |
| "learning_rate": 1.765865761403861e-05, | |
| "loss": 0.27345526218414307, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.5271317829457365, | |
| "grad_norm": 0.058408260345458984, | |
| "learning_rate": 1.7331744963088654e-05, | |
| "loss": 0.5833812355995178, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.5348837209302326, | |
| "grad_norm": 0.10947899520397186, | |
| "learning_rate": 1.7004778551456995e-05, | |
| "loss": 0.3762988746166229, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.5426356589147288, | |
| "grad_norm": 0.08571284264326096, | |
| "learning_rate": 1.667797341303094e-05, | |
| "loss": 0.5067244172096252, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.550387596899225, | |
| "grad_norm": 0.06394554674625397, | |
| "learning_rate": 1.6351544475634277e-05, | |
| "loss": 0.42985814809799194, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.558139534883721, | |
| "grad_norm": 0.1848040074110031, | |
| "learning_rate": 1.6025706419677054e-05, | |
| "loss": 0.8337141871452332, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.565891472868217, | |
| "grad_norm": 0.04375322908163071, | |
| "learning_rate": 1.570067353696823e-05, | |
| "loss": 0.5003541707992554, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.5736434108527133, | |
| "grad_norm": 0.04600893706083298, | |
| "learning_rate": 1.5376659589783585e-05, | |
| "loss": 0.3022569715976715, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.5813953488372094, | |
| "grad_norm": 0.05343756452202797, | |
| "learning_rate": 1.5053877670282193e-05, | |
| "loss": 0.4718426465988159, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.5891472868217056, | |
| "grad_norm": 0.09041419625282288, | |
| "learning_rate": 1.473254006036345e-05, | |
| "loss": 0.4901648163795471, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.5968992248062017, | |
| "grad_norm": 0.06343325972557068, | |
| "learning_rate": 1.4412858092056995e-05, | |
| "loss": 0.6914687156677246, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.604651162790698, | |
| "grad_norm": 0.06813778728246689, | |
| "learning_rate": 1.4095042008537343e-05, | |
| "loss": 0.4894769787788391, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.612403100775194, | |
| "grad_norm": 0.1439589262008667, | |
| "learning_rate": 1.3779300825854615e-05, | |
| "loss": 0.7514118552207947, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.62015503875969, | |
| "grad_norm": 0.07022061944007874, | |
| "learning_rate": 1.3465842195472315e-05, | |
| "loss": 0.7393191456794739, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.6279069767441863, | |
| "grad_norm": 0.07147393375635147, | |
| "learning_rate": 1.3154872267702535e-05, | |
| "loss": 0.8212107419967651, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.6356589147286824, | |
| "grad_norm": 0.06350687146186829, | |
| "learning_rate": 1.2846595556128338e-05, | |
| "loss": 0.7656596302986145, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.6434108527131785, | |
| "grad_norm": 0.1861017942428589, | |
| "learning_rate": 1.2541214803102764e-05, | |
| "loss": 0.39778298139572144, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.6511627906976747, | |
| "grad_norm": 0.12844400107860565, | |
| "learning_rate": 1.2238930846412478e-05, | |
| "loss": 0.4492897689342499, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.6589147286821704, | |
| "grad_norm": 0.09717841446399689, | |
| "learning_rate": 1.1939942487194114e-05, | |
| "loss": 0.5477796792984009, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.0593947097659111, | |
| "learning_rate": 1.1644446359190002e-05, | |
| "loss": 0.28585392236709595, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.6744186046511627, | |
| "grad_norm": 0.045567888766527176, | |
| "learning_rate": 1.1352636799429364e-05, | |
| "loss": 0.5053625106811523, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.682170542635659, | |
| "grad_norm": 0.05959075689315796, | |
| "learning_rate": 1.1064705720419824e-05, | |
| "loss": 0.567241370677948, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.689922480620155, | |
| "grad_norm": 0.15132786333560944, | |
| "learning_rate": 1.0780842483933762e-05, | |
| "loss": 0.6684018969535828, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.697674418604651, | |
| "grad_norm": 0.08239150047302246, | |
| "learning_rate": 1.0501233776471719e-05, | |
| "loss": 0.33106908202171326, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.705426356589147, | |
| "grad_norm": 0.06124155595898628, | |
| "learning_rate": 1.0226063486485696e-05, | |
| "loss": 0.566682755947113, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.7131782945736433, | |
| "grad_norm": 0.07035624980926514, | |
| "learning_rate": 9.955512583442338e-06, | |
| "loss": 0.4341398775577545, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.7209302325581395, | |
| "grad_norm": 0.05901051685214043, | |
| "learning_rate": 9.689758998805937e-06, | |
| "loss": 0.4164765775203705, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.7286821705426356, | |
| "grad_norm": 0.04497726634144783, | |
| "learning_rate": 9.428977509019321e-06, | |
| "loss": 0.40749120712280273, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.7364341085271318, | |
| "grad_norm": 0.15438151359558105, | |
| "learning_rate": 9.173339620559945e-06, | |
| "loss": 0.28900110721588135, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.744186046511628, | |
| "grad_norm": 0.05592001974582672, | |
| "learning_rate": 8.923013457146072e-06, | |
| "loss": 0.41211241483688354, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.751937984496124, | |
| "grad_norm": 0.0629056990146637, | |
| "learning_rate": 8.678163649168217e-06, | |
| "loss": 0.5537896156311035, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.75968992248062, | |
| "grad_norm": 0.06699282675981522, | |
| "learning_rate": 8.43895122541748e-06, | |
| "loss": 0.5788278579711914, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.7674418604651163, | |
| "grad_norm": 0.13577990233898163, | |
| "learning_rate": 8.205533507182964e-06, | |
| "loss": 0.37125617265701294, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.7751937984496124, | |
| "grad_norm": 0.16210103034973145, | |
| "learning_rate": 7.978064004787233e-06, | |
| "loss": 0.3962320387363434, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.7829457364341086, | |
| "grad_norm": 0.06700747460126877, | |
| "learning_rate": 7.756692316628171e-06, | |
| "loss": 0.6869024634361267, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.7906976744186047, | |
| "grad_norm": 0.06708226352930069, | |
| "learning_rate": 7.541564030793529e-06, | |
| "loss": 0.5122371912002563, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.798449612403101, | |
| "grad_norm": 0.0619942843914032, | |
| "learning_rate": 7.332820629313089e-06, | |
| "loss": 0.4030957818031311, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.806201550387597, | |
| "grad_norm": 0.08853983879089355, | |
| "learning_rate": 7.1305993951108914e-06, | |
| "loss": 0.4579683840274811, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.813953488372093, | |
| "grad_norm": 0.1136261448264122, | |
| "learning_rate": 6.935033321719423e-06, | |
| "loss": 0.4582154452800751, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.8217054263565893, | |
| "grad_norm": 0.03374806419014931, | |
| "learning_rate": 6.74625102581455e-06, | |
| "loss": 0.46171411871910095, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.8294573643410854, | |
| "grad_norm": 0.05085311084985733, | |
| "learning_rate": 6.56437666262903e-06, | |
| "loss": 0.4829785227775574, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.8372093023255816, | |
| "grad_norm": 0.051897477358579636, | |
| "learning_rate": 6.389529844300143e-06, | |
| "loss": 0.4446869194507599, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.8449612403100772, | |
| "grad_norm": 0.06380399316549301, | |
| "learning_rate": 6.221825561205165e-06, | |
| "loss": 0.5170708298683167, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.8527131782945734, | |
| "grad_norm": 0.07282527536153793, | |
| "learning_rate": 6.061374106336333e-06, | |
| "loss": 0.6230844259262085, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.8604651162790695, | |
| "grad_norm": 0.09063038229942322, | |
| "learning_rate": 5.908281002765252e-06, | |
| "loss": 0.35932058095932007, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.8682170542635657, | |
| "grad_norm": 1.006274938583374, | |
| "learning_rate": 5.762646934244159e-06, | |
| "loss": 0.3806362748146057, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.875968992248062, | |
| "grad_norm": 0.3232397139072418, | |
| "learning_rate": 5.624567678989899e-06, | |
| "loss": 0.513190507888794, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.883720930232558, | |
| "grad_norm": 0.120949886739254, | |
| "learning_rate": 5.494134046694099e-06, | |
| "loss": 0.6526894569396973, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.891472868217054, | |
| "grad_norm": 0.12644588947296143, | |
| "learning_rate": 5.371431818800933e-06, | |
| "loss": 0.4791458249092102, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.89922480620155, | |
| "grad_norm": 0.06687454879283905, | |
| "learning_rate": 5.256541692091802e-06, | |
| "loss": 0.5770004987716675, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.9069767441860463, | |
| "grad_norm": 0.08843245357275009, | |
| "learning_rate": 5.149539225613978e-06, | |
| "loss": 0.3434167802333832, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.9147286821705425, | |
| "grad_norm": 0.07200266420841217, | |
| "learning_rate": 5.050494790988215e-06, | |
| "loss": 0.4575299322605133, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.9224806201550386, | |
| "grad_norm": 0.07060275971889496, | |
| "learning_rate": 4.959473526127871e-06, | |
| "loss": 0.3564453721046448, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.9302325581395348, | |
| "grad_norm": 0.06662537902593613, | |
| "learning_rate": 4.876535292400089e-06, | |
| "loss": 0.7428521513938904, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.937984496124031, | |
| "grad_norm": 0.05963343381881714, | |
| "learning_rate": 4.801734635257146e-06, | |
| "loss": 0.4571719169616699, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.945736434108527, | |
| "grad_norm": 0.05507909134030342, | |
| "learning_rate": 4.73512074836392e-06, | |
| "loss": 0.5132399797439575, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.953488372093023, | |
| "grad_norm": 0.05289539694786072, | |
| "learning_rate": 4.676737441244973e-06, | |
| "loss": 0.814540445804596, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.9612403100775193, | |
| "grad_norm": 0.05587043985724449, | |
| "learning_rate": 4.626623110472678e-06, | |
| "loss": 0.5996021628379822, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.9689922480620154, | |
| "grad_norm": 0.0820513367652893, | |
| "learning_rate": 4.584810714415136e-06, | |
| "loss": 0.2337801605463028, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.9767441860465116, | |
| "grad_norm": 0.08467745780944824, | |
| "learning_rate": 4.551327751560703e-06, | |
| "loss": 0.43569573760032654, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.9844961240310077, | |
| "grad_norm": 0.09394794702529907, | |
| "learning_rate": 4.526196242433211e-06, | |
| "loss": 0.42782190442085266, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.992248062015504, | |
| "grad_norm": 0.05439142882823944, | |
| "learning_rate": 4.509432715109889e-06, | |
| "loss": 0.516304612159729, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.03667069226503372, | |
| "learning_rate": 4.50104819435143e-06, | |
| "loss": 0.15898612141609192, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 774, | |
| "total_flos": 3.2487544184132076e+18, | |
| "train_loss": 0.8073726282178277, | |
| "train_runtime": 15483.8831, | |
| "train_samples_per_second": 3.199, | |
| "train_steps_per_second": 0.05 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 774, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.2487544184132076e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |