Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-61 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-61 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-61") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-61") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-61") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-61 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-61" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-61", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-61
- SGLang
How to use furproxy/9b-61 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-61" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-61", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-61" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-61", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-61 with Docker Model Runner:
docker model run hf.co/furproxy/9b-61
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 928, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008620689655172414, | |
| "grad_norm": 0.2490028440952301, | |
| "learning_rate": 8.510638297872341e-07, | |
| "loss": 3.7051703929901123, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.017241379310344827, | |
| "grad_norm": 0.9354268908500671, | |
| "learning_rate": 2.553191489361702e-06, | |
| "loss": 2.5432679653167725, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02586206896551724, | |
| "grad_norm": 1.1884268522262573, | |
| "learning_rate": 4.255319148936171e-06, | |
| "loss": 2.127363681793213, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.034482758620689655, | |
| "grad_norm": 0.21074028313159943, | |
| "learning_rate": 5.957446808510638e-06, | |
| "loss": 1.8431488275527954, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04310344827586207, | |
| "grad_norm": 1.0560111999511719, | |
| "learning_rate": 7.659574468085107e-06, | |
| "loss": 1.2361581325531006, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05172413793103448, | |
| "grad_norm": 0.1471565067768097, | |
| "learning_rate": 9.361702127659576e-06, | |
| "loss": 1.7195181846618652, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0603448275862069, | |
| "grad_norm": 0.28380081057548523, | |
| "learning_rate": 1.1063829787234044e-05, | |
| "loss": 1.7923121452331543, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 0.16648316383361816, | |
| "learning_rate": 1.2765957446808513e-05, | |
| "loss": 0.7930053472518921, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07758620689655173, | |
| "grad_norm": 0.662397027015686, | |
| "learning_rate": 1.4468085106382981e-05, | |
| "loss": 0.8174023032188416, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08620689655172414, | |
| "grad_norm": 0.13494855165481567, | |
| "learning_rate": 1.6170212765957446e-05, | |
| "loss": 1.5101033449172974, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09482758620689655, | |
| "grad_norm": 0.1281285285949707, | |
| "learning_rate": 1.7872340425531915e-05, | |
| "loss": 1.6496508121490479, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10344827586206896, | |
| "grad_norm": 0.19361676275730133, | |
| "learning_rate": 1.9574468085106384e-05, | |
| "loss": 1.5436862707138062, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11206896551724138, | |
| "grad_norm": 0.26801514625549316, | |
| "learning_rate": 2.1276595744680852e-05, | |
| "loss": 1.258086919784546, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1206896551724138, | |
| "grad_norm": 0.6263072490692139, | |
| "learning_rate": 2.2978723404255324e-05, | |
| "loss": 0.4653662145137787, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12931034482758622, | |
| "grad_norm": 0.09440683573484421, | |
| "learning_rate": 2.468085106382979e-05, | |
| "loss": 1.5492061376571655, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 0.18837958574295044, | |
| "learning_rate": 2.6382978723404255e-05, | |
| "loss": 1.1802101135253906, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14655172413793102, | |
| "grad_norm": 0.13221898674964905, | |
| "learning_rate": 2.8085106382978727e-05, | |
| "loss": 1.5317002534866333, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15517241379310345, | |
| "grad_norm": 0.09329993277788162, | |
| "learning_rate": 2.9787234042553192e-05, | |
| "loss": 1.4935221672058105, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16379310344827586, | |
| "grad_norm": 0.1460646688938141, | |
| "learning_rate": 3.1489361702127664e-05, | |
| "loss": 1.4348247051239014, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 0.11409169435501099, | |
| "learning_rate": 3.319148936170213e-05, | |
| "loss": 1.6168015003204346, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1810344827586207, | |
| "grad_norm": 0.212503582239151, | |
| "learning_rate": 3.48936170212766e-05, | |
| "loss": 1.7296580076217651, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1896551724137931, | |
| "grad_norm": 0.2593461275100708, | |
| "learning_rate": 3.6595744680851066e-05, | |
| "loss": 1.0080050230026245, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19827586206896552, | |
| "grad_norm": 0.15509171783924103, | |
| "learning_rate": 3.829787234042554e-05, | |
| "loss": 1.2960833311080933, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 0.11303484439849854, | |
| "learning_rate": 4e-05, | |
| "loss": 1.463219404220581, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.21551724137931033, | |
| "grad_norm": 0.10671091824769974, | |
| "learning_rate": 3.999954222867108e-05, | |
| "loss": 1.5820165872573853, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22413793103448276, | |
| "grad_norm": 0.2280566394329071, | |
| "learning_rate": 3.999816893796815e-05, | |
| "loss": 0.7817078232765198, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.23275862068965517, | |
| "grad_norm": 0.0907350480556488, | |
| "learning_rate": 3.9995880197741576e-05, | |
| "loss": 1.5489472150802612, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2413793103448276, | |
| "grad_norm": 0.11344298720359802, | |
| "learning_rate": 3.999267612440463e-05, | |
| "loss": 1.447698950767517, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.14231091737747192, | |
| "learning_rate": 3.9988556880927647e-05, | |
| "loss": 1.4835537672042847, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.25862068965517243, | |
| "grad_norm": 0.10744752734899521, | |
| "learning_rate": 3.998352267682969e-05, | |
| "loss": 1.4630577564239502, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2672413793103448, | |
| "grad_norm": 0.33425694704055786, | |
| "learning_rate": 3.99775737681679e-05, | |
| "loss": 1.2140964269638062, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 0.0645914226770401, | |
| "learning_rate": 3.9970710457524474e-05, | |
| "loss": 1.4031065702438354, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.28448275862068967, | |
| "grad_norm": 0.07510636001825333, | |
| "learning_rate": 3.9962933093991296e-05, | |
| "loss": 1.3692433834075928, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.29310344827586204, | |
| "grad_norm": 0.06527955085039139, | |
| "learning_rate": 3.995424207315214e-05, | |
| "loss": 1.3801196813583374, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3017241379310345, | |
| "grad_norm": 0.12786391377449036, | |
| "learning_rate": 3.994463783706259e-05, | |
| "loss": 1.151434063911438, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3103448275862069, | |
| "grad_norm": 0.19329607486724854, | |
| "learning_rate": 3.9934120874227505e-05, | |
| "loss": 1.0419366359710693, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.31896551724137934, | |
| "grad_norm": 0.08128409832715988, | |
| "learning_rate": 3.992269171957624e-05, | |
| "loss": 1.2281261682510376, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3275862068965517, | |
| "grad_norm": 0.1377028524875641, | |
| "learning_rate": 3.991035095443538e-05, | |
| "loss": 1.3645248413085938, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.33620689655172414, | |
| "grad_norm": 0.05881645902991295, | |
| "learning_rate": 3.9897099206499204e-05, | |
| "loss": 1.2278920412063599, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 0.15875652432441711, | |
| "learning_rate": 3.9882937149797735e-05, | |
| "loss": 1.0160530805587769, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.35344827586206895, | |
| "grad_norm": 0.09680726379156113, | |
| "learning_rate": 3.986786550466246e-05, | |
| "loss": 1.4256402254104614, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3620689655172414, | |
| "grad_norm": 0.30841922760009766, | |
| "learning_rate": 3.985188503768975e-05, | |
| "loss": 1.318834900856018, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3706896551724138, | |
| "grad_norm": 0.14286209642887115, | |
| "learning_rate": 3.983499656170176e-05, | |
| "loss": 1.474804162979126, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3793103448275862, | |
| "grad_norm": 0.11407710611820221, | |
| "learning_rate": 3.981720093570517e-05, | |
| "loss": 1.3328633308410645, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3879310344827586, | |
| "grad_norm": 0.4702969193458557, | |
| "learning_rate": 3.9798499064847466e-05, | |
| "loss": 1.1703399419784546, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39655172413793105, | |
| "grad_norm": 0.13214579224586487, | |
| "learning_rate": 3.9778891900370905e-05, | |
| "loss": 1.0334569215774536, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4051724137931034, | |
| "grad_norm": 0.17201748490333557, | |
| "learning_rate": 3.9758380439564117e-05, | |
| "loss": 1.1891283988952637, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 0.2221606969833374, | |
| "learning_rate": 3.97369657257114e-05, | |
| "loss": 1.4113942384719849, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4224137931034483, | |
| "grad_norm": 0.39266011118888855, | |
| "learning_rate": 3.9714648848039655e-05, | |
| "loss": 1.2900447845458984, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.43103448275862066, | |
| "grad_norm": 0.09964156895875931, | |
| "learning_rate": 3.969143094166295e-05, | |
| "loss": 1.270521640777588, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4396551724137931, | |
| "grad_norm": 0.5467281937599182, | |
| "learning_rate": 3.966731318752484e-05, | |
| "loss": 1.2587134838104248, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4482758620689655, | |
| "grad_norm": 0.10468795150518417, | |
| "learning_rate": 3.964229681233825e-05, | |
| "loss": 1.3480840921401978, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.45689655172413796, | |
| "grad_norm": 0.14980582892894745, | |
| "learning_rate": 3.961638308852309e-05, | |
| "loss": 1.0994645357131958, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.46551724137931033, | |
| "grad_norm": 0.22398428618907928, | |
| "learning_rate": 3.958957333414157e-05, | |
| "loss": 1.233306646347046, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.47413793103448276, | |
| "grad_norm": 0.20820066332817078, | |
| "learning_rate": 3.9561868912831135e-05, | |
| "loss": 1.2420070171356201, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 0.07252290099859238, | |
| "learning_rate": 3.953327123373506e-05, | |
| "loss": 1.5031483173370361, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.49137931034482757, | |
| "grad_norm": 0.30114564299583435, | |
| "learning_rate": 3.950378175143088e-05, | |
| "loss": 1.2730351686477661, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.5594906806945801, | |
| "learning_rate": 3.947340196585631e-05, | |
| "loss": 1.0227445363998413, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5086206896551724, | |
| "grad_norm": 0.06979751586914062, | |
| "learning_rate": 3.944213342223299e-05, | |
| "loss": 1.3545396327972412, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5172413793103449, | |
| "grad_norm": 0.13030360639095306, | |
| "learning_rate": 3.9409977710987896e-05, | |
| "loss": 1.332112431526184, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5258620689655172, | |
| "grad_norm": 0.0672382041811943, | |
| "learning_rate": 3.937693646767245e-05, | |
| "loss": 1.3639230728149414, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5344827586206896, | |
| "grad_norm": 0.13222964107990265, | |
| "learning_rate": 3.9343011372879275e-05, | |
| "loss": 1.5418974161148071, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5431034482758621, | |
| "grad_norm": 0.09776262193918228, | |
| "learning_rate": 3.930820415215681e-05, | |
| "loss": 1.6416376829147339, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 0.058448825031518936, | |
| "learning_rate": 3.927251657592146e-05, | |
| "loss": 1.1251301765441895, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5603448275862069, | |
| "grad_norm": 0.052552610635757446, | |
| "learning_rate": 3.923595045936757e-05, | |
| "loss": 1.3253697156906128, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5689655172413793, | |
| "grad_norm": 0.19469662010669708, | |
| "learning_rate": 3.919850766237512e-05, | |
| "loss": 1.263968586921692, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5775862068965517, | |
| "grad_norm": 0.17169688642024994, | |
| "learning_rate": 3.9160190089415106e-05, | |
| "loss": 0.7878425717353821, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5862068965517241, | |
| "grad_norm": 0.07661473006010056, | |
| "learning_rate": 3.912099968945268e-05, | |
| "loss": 1.0626349449157715, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5948275862068966, | |
| "grad_norm": 0.04046183452010155, | |
| "learning_rate": 3.908093845584798e-05, | |
| "loss": 1.05846107006073, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.603448275862069, | |
| "grad_norm": 0.09312810003757477, | |
| "learning_rate": 3.9040008426254824e-05, | |
| "loss": 1.0136967897415161, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6120689655172413, | |
| "grad_norm": 0.06341353058815002, | |
| "learning_rate": 3.8998211682516976e-05, | |
| "loss": 1.0979063510894775, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 0.07724281400442123, | |
| "learning_rate": 3.895555035056233e-05, | |
| "loss": 1.2308785915374756, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6293103448275862, | |
| "grad_norm": 0.06727743148803711, | |
| "learning_rate": 3.891202660029474e-05, | |
| "loss": 1.3009754419326782, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6379310344827587, | |
| "grad_norm": 0.06898898631334305, | |
| "learning_rate": 3.886764264548363e-05, | |
| "loss": 1.3296358585357666, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.646551724137931, | |
| "grad_norm": 0.09349878877401352, | |
| "learning_rate": 3.882240074365145e-05, | |
| "loss": 1.2398273944854736, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6551724137931034, | |
| "grad_norm": 0.17715542018413544, | |
| "learning_rate": 3.8776303195958814e-05, | |
| "loss": 0.9495888948440552, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6637931034482759, | |
| "grad_norm": 0.7314029932022095, | |
| "learning_rate": 3.872935234708747e-05, | |
| "loss": 1.1250660419464111, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6724137931034483, | |
| "grad_norm": 0.24987009167671204, | |
| "learning_rate": 3.868155058512102e-05, | |
| "loss": 1.2095718383789062, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6810344827586207, | |
| "grad_norm": 0.14576859772205353, | |
| "learning_rate": 3.8632900341423464e-05, | |
| "loss": 1.295078992843628, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.11293840408325195, | |
| "learning_rate": 3.858340409051558e-05, | |
| "loss": 1.338647723197937, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6982758620689655, | |
| "grad_norm": 0.14143511652946472, | |
| "learning_rate": 3.853306434994895e-05, | |
| "loss": 1.6283466815948486, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7068965517241379, | |
| "grad_norm": 0.06157878786325455, | |
| "learning_rate": 3.848188368017803e-05, | |
| "loss": 1.055685043334961, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7155172413793104, | |
| "grad_norm": 0.10017464309930801, | |
| "learning_rate": 3.8429864684429846e-05, | |
| "loss": 1.3115266561508179, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7241379310344828, | |
| "grad_norm": 0.1262677162885666, | |
| "learning_rate": 3.837701000857159e-05, | |
| "loss": 1.3648704290390015, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7327586206896551, | |
| "grad_norm": 0.1431019902229309, | |
| "learning_rate": 3.832332234097606e-05, | |
| "loss": 1.2736293077468872, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7413793103448276, | |
| "grad_norm": 0.3130134046077728, | |
| "learning_rate": 3.8268804412384936e-05, | |
| "loss": 1.2950979471206665, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.10647283494472504, | |
| "learning_rate": 3.821345899576982e-05, | |
| "loss": 0.9605190753936768, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 0.16715826094150543, | |
| "learning_rate": 3.815728890619127e-05, | |
| "loss": 0.7583910226821899, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7672413793103449, | |
| "grad_norm": 0.21642223000526428, | |
| "learning_rate": 3.8100297000655566e-05, | |
| "loss": 1.3208308219909668, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7758620689655172, | |
| "grad_norm": 0.09079600870609283, | |
| "learning_rate": 3.804248617796941e-05, | |
| "loss": 1.3078787326812744, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7844827586206896, | |
| "grad_norm": 0.08445187658071518, | |
| "learning_rate": 3.798385937859249e-05, | |
| "loss": 1.0150703191757202, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7931034482758621, | |
| "grad_norm": 0.07777632772922516, | |
| "learning_rate": 3.79244195844879e-05, | |
| "loss": 0.8019794225692749, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8017241379310345, | |
| "grad_norm": 0.0645889863371849, | |
| "learning_rate": 3.7864169818970465e-05, | |
| "loss": 1.423434853553772, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8103448275862069, | |
| "grad_norm": 0.14517554640769958, | |
| "learning_rate": 3.7803113146553e-05, | |
| "loss": 1.5573757886886597, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8189655172413793, | |
| "grad_norm": 0.08607929199934006, | |
| "learning_rate": 3.774125267279041e-05, | |
| "loss": 1.2926381826400757, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 0.07077501714229584, | |
| "learning_rate": 3.767859154412171e-05, | |
| "loss": 1.2611286640167236, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8362068965517241, | |
| "grad_norm": 0.08681857585906982, | |
| "learning_rate": 3.7615132947710036e-05, | |
| "loss": 1.3005847930908203, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8448275862068966, | |
| "grad_norm": 0.15100322663784027, | |
| "learning_rate": 3.755088011128049e-05, | |
| "loss": 1.3176685571670532, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.853448275862069, | |
| "grad_norm": 0.1908247470855713, | |
| "learning_rate": 3.7485836302956016e-05, | |
| "loss": 1.2926079034805298, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 0.11072229593992233, | |
| "learning_rate": 3.7420004831091105e-05, | |
| "loss": 1.290125846862793, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8706896551724138, | |
| "grad_norm": 0.10266255587339401, | |
| "learning_rate": 3.735338904410358e-05, | |
| "loss": 1.333167552947998, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8793103448275862, | |
| "grad_norm": 0.12212225794792175, | |
| "learning_rate": 3.728599233030425e-05, | |
| "loss": 0.894460916519165, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8879310344827587, | |
| "grad_norm": 0.07256551086902618, | |
| "learning_rate": 3.72178181177246e-05, | |
| "loss": 1.2725472450256348, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 0.12780705094337463, | |
| "learning_rate": 3.714886987394238e-05, | |
| "loss": 1.3160998821258545, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9051724137931034, | |
| "grad_norm": 0.9440551400184631, | |
| "learning_rate": 3.70791511059053e-05, | |
| "loss": 0.6522892117500305, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9137931034482759, | |
| "grad_norm": 0.09754825383424759, | |
| "learning_rate": 3.700866535975256e-05, | |
| "loss": 0.9885504245758057, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9224137931034483, | |
| "grad_norm": 0.4700559675693512, | |
| "learning_rate": 3.69374162206346e-05, | |
| "loss": 1.2210713624954224, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9310344827586207, | |
| "grad_norm": 0.15046778321266174, | |
| "learning_rate": 3.6865407312530635e-05, | |
| "loss": 1.2765154838562012, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9396551724137931, | |
| "grad_norm": 0.11940351128578186, | |
| "learning_rate": 3.67926422980644e-05, | |
| "loss": 1.0038096904754639, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9482758620689655, | |
| "grad_norm": 0.11115628480911255, | |
| "learning_rate": 3.671912487831783e-05, | |
| "loss": 0.9949377179145813, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9568965517241379, | |
| "grad_norm": 0.07952730357646942, | |
| "learning_rate": 3.664485879264279e-05, | |
| "loss": 1.3989291191101074, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 0.09136626124382019, | |
| "learning_rate": 3.656984781847094e-05, | |
| "loss": 0.9785476922988892, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9741379310344828, | |
| "grad_norm": 0.057783424854278564, | |
| "learning_rate": 3.649409577112152e-05, | |
| "loss": 0.9384239315986633, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9827586206896551, | |
| "grad_norm": 0.07152694463729858, | |
| "learning_rate": 3.641760650360736e-05, | |
| "loss": 1.2639554738998413, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9913793103448276, | |
| "grad_norm": 0.33899354934692383, | |
| "learning_rate": 3.634038390643886e-05, | |
| "loss": 0.9682251811027527, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.07940968871116638, | |
| "learning_rate": 3.626243190742613e-05, | |
| "loss": 0.870396614074707, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0086206896551724, | |
| "grad_norm": 0.12955711781978607, | |
| "learning_rate": 3.618375447147918e-05, | |
| "loss": 0.9028921723365784, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0172413793103448, | |
| "grad_norm": 0.18779002130031586, | |
| "learning_rate": 3.6104355600406284e-05, | |
| "loss": 0.7830209136009216, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0258620689655173, | |
| "grad_norm": 0.9097674489021301, | |
| "learning_rate": 3.6024239332710415e-05, | |
| "loss": 0.8674835562705994, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0344827586206897, | |
| "grad_norm": 0.27647626399993896, | |
| "learning_rate": 3.5943409743383826e-05, | |
| "loss": 0.6074855327606201, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.043103448275862, | |
| "grad_norm": 0.11918103694915771, | |
| "learning_rate": 3.586187094370079e-05, | |
| "loss": 1.2056649923324585, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0517241379310345, | |
| "grad_norm": 0.11876146495342255, | |
| "learning_rate": 3.577962708100851e-05, | |
| "loss": 0.39286842942237854, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0603448275862069, | |
| "grad_norm": 0.12174484878778458, | |
| "learning_rate": 3.569668233851613e-05, | |
| "loss": 0.9662060141563416, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0689655172413792, | |
| "grad_norm": 0.06474713236093521, | |
| "learning_rate": 3.561304093508198e-05, | |
| "loss": 0.9487460851669312, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0775862068965518, | |
| "grad_norm": 0.1427253633737564, | |
| "learning_rate": 3.552870712499898e-05, | |
| "loss": 0.8610017895698547, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0862068965517242, | |
| "grad_norm": 0.1900222897529602, | |
| "learning_rate": 3.54436851977783e-05, | |
| "loss": 0.448039710521698, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0948275862068966, | |
| "grad_norm": 0.22907602787017822, | |
| "learning_rate": 3.535797947793111e-05, | |
| "loss": 0.8761284351348877, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.103448275862069, | |
| "grad_norm": 0.15932975709438324, | |
| "learning_rate": 3.527159432474865e-05, | |
| "loss": 0.8125666975975037, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1120689655172413, | |
| "grad_norm": 0.349263459444046, | |
| "learning_rate": 3.518453413208053e-05, | |
| "loss": 0.6569501757621765, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1206896551724137, | |
| "grad_norm": 0.14833390712738037, | |
| "learning_rate": 3.509680332811121e-05, | |
| "loss": 0.7028253674507141, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1293103448275863, | |
| "grad_norm": 0.05770875886082649, | |
| "learning_rate": 3.5008406375134756e-05, | |
| "loss": 1.1632713079452515, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1379310344827587, | |
| "grad_norm": 0.055273283272981644, | |
| "learning_rate": 3.491934776932791e-05, | |
| "loss": 0.8371788263320923, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.146551724137931, | |
| "grad_norm": 0.05901844799518585, | |
| "learning_rate": 3.482963204052139e-05, | |
| "loss": 1.0003291368484497, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1551724137931034, | |
| "grad_norm": 0.06095067784190178, | |
| "learning_rate": 3.473926375196943e-05, | |
| "loss": 0.9226457476615906, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1637931034482758, | |
| "grad_norm": 0.07438337802886963, | |
| "learning_rate": 3.464824750011779e-05, | |
| "loss": 1.1149680614471436, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1724137931034484, | |
| "grad_norm": 0.06853963434696198, | |
| "learning_rate": 3.455658791436985e-05, | |
| "loss": 1.0302170515060425, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1810344827586208, | |
| "grad_norm": 0.0973813459277153, | |
| "learning_rate": 3.446428965685121e-05, | |
| "loss": 0.7569156289100647, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1896551724137931, | |
| "grad_norm": 0.056327857077121735, | |
| "learning_rate": 3.437135742217254e-05, | |
| "loss": 0.3794441223144531, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1982758620689655, | |
| "grad_norm": 0.07669582962989807, | |
| "learning_rate": 3.427779593719079e-05, | |
| "loss": 1.1280944347381592, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.206896551724138, | |
| "grad_norm": 0.058203186839818954, | |
| "learning_rate": 3.4183609960768764e-05, | |
| "loss": 0.9517163634300232, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2155172413793103, | |
| "grad_norm": 0.06986816227436066, | |
| "learning_rate": 3.4088804283533094e-05, | |
| "loss": 0.6671708822250366, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2241379310344827, | |
| "grad_norm": 0.08936108648777008, | |
| "learning_rate": 3.399338372763055e-05, | |
| "loss": 0.7694864869117737, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2327586206896552, | |
| "grad_norm": 0.12140902131795883, | |
| "learning_rate": 3.389735314648274e-05, | |
| "loss": 0.8068587183952332, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2413793103448276, | |
| "grad_norm": 0.04297681525349617, | |
| "learning_rate": 3.380071742453931e-05, | |
| "loss": 0.40287792682647705, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.11908482015132904, | |
| "learning_rate": 3.370348147702949e-05, | |
| "loss": 1.0401684045791626, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2586206896551724, | |
| "grad_norm": 0.057489216327667236, | |
| "learning_rate": 3.360565024971202e-05, | |
| "loss": 0.8889655470848083, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2672413793103448, | |
| "grad_norm": 0.15609467029571533, | |
| "learning_rate": 3.350722871862368e-05, | |
| "loss": 0.9757481813430786, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2758620689655173, | |
| "grad_norm": 0.1248452365398407, | |
| "learning_rate": 3.340822188982616e-05, | |
| "loss": 0.7736673355102539, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2844827586206897, | |
| "grad_norm": 0.09071607887744904, | |
| "learning_rate": 3.330863479915138e-05, | |
| "loss": 1.0164954662322998, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.293103448275862, | |
| "grad_norm": 0.06744378060102463, | |
| "learning_rate": 3.320847251194546e-05, | |
| "loss": 0.9475960731506348, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3017241379310345, | |
| "grad_norm": 0.07189597189426422, | |
| "learning_rate": 3.310774012281099e-05, | |
| "loss": 0.6825069785118103, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3103448275862069, | |
| "grad_norm": 0.07518645375967026, | |
| "learning_rate": 3.300644275534793e-05, | |
| "loss": 0.5717735290527344, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.3189655172413794, | |
| "grad_norm": 0.09223438799381256, | |
| "learning_rate": 3.290458556189299e-05, | |
| "loss": 1.3711295127868652, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3275862068965516, | |
| "grad_norm": 0.14958783984184265, | |
| "learning_rate": 3.2802173723257604e-05, | |
| "loss": 0.6421374082565308, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3362068965517242, | |
| "grad_norm": 0.1238432452082634, | |
| "learning_rate": 3.2699212448464385e-05, | |
| "loss": 0.9758880734443665, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3448275862068966, | |
| "grad_norm": 0.06866496056318283, | |
| "learning_rate": 3.259570697448217e-05, | |
| "loss": 0.9329778552055359, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.353448275862069, | |
| "grad_norm": 0.10072822868824005, | |
| "learning_rate": 3.249166256595967e-05, | |
| "loss": 1.2179062366485596, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3620689655172413, | |
| "grad_norm": 0.06878109276294708, | |
| "learning_rate": 3.2387084514957675e-05, | |
| "loss": 1.3471888303756714, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.3706896551724137, | |
| "grad_norm": 0.06524922698736191, | |
| "learning_rate": 3.2281978140679894e-05, | |
| "loss": 0.9441757202148438, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 0.11429349333047867, | |
| "learning_rate": 3.21763487892024e-05, | |
| "loss": 0.7498874664306641, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3879310344827587, | |
| "grad_norm": 0.07464733719825745, | |
| "learning_rate": 3.207020183320171e-05, | |
| "loss": 1.4824918508529663, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.396551724137931, | |
| "grad_norm": 0.11774388700723648, | |
| "learning_rate": 3.196354267168149e-05, | |
| "loss": 0.49022743105888367, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4051724137931034, | |
| "grad_norm": 0.16186490654945374, | |
| "learning_rate": 3.185637672969799e-05, | |
| "loss": 0.6543675661087036, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4137931034482758, | |
| "grad_norm": 0.10584386438131332, | |
| "learning_rate": 3.1748709458084045e-05, | |
| "loss": 0.8541685342788696, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4224137931034484, | |
| "grad_norm": 0.07407426834106445, | |
| "learning_rate": 3.1640546333171894e-05, | |
| "loss": 0.7656717300415039, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4310344827586206, | |
| "grad_norm": 0.16052280366420746, | |
| "learning_rate": 3.153189285651458e-05, | |
| "loss": 0.6957482695579529, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4396551724137931, | |
| "grad_norm": 0.13904230296611786, | |
| "learning_rate": 3.142275455460614e-05, | |
| "loss": 0.6638420224189758, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4482758620689655, | |
| "grad_norm": 0.11371087282896042, | |
| "learning_rate": 3.131313697860053e-05, | |
| "loss": 0.7661845088005066, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.456896551724138, | |
| "grad_norm": 0.11575423926115036, | |
| "learning_rate": 3.120304570402924e-05, | |
| "loss": 1.1160173416137695, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4655172413793103, | |
| "grad_norm": 0.22061830759048462, | |
| "learning_rate": 3.1092486330517714e-05, | |
| "loss": 1.384441614151001, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4741379310344827, | |
| "grad_norm": 0.12608060240745544, | |
| "learning_rate": 3.098146448150055e-05, | |
| "loss": 1.145660638809204, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4827586206896552, | |
| "grad_norm": 0.06820492446422577, | |
| "learning_rate": 3.086998580393547e-05, | |
| "loss": 0.9891381859779358, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.4913793103448276, | |
| "grad_norm": 0.11383876949548721, | |
| "learning_rate": 3.075805596801605e-05, | |
| "loss": 0.6093174815177917, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.2013673037290573, | |
| "learning_rate": 3.0645680666883374e-05, | |
| "loss": 0.9298641681671143, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5086206896551724, | |
| "grad_norm": 0.08500847220420837, | |
| "learning_rate": 3.053286561633644e-05, | |
| "loss": 0.9974504113197327, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5172413793103448, | |
| "grad_norm": 0.14812250435352325, | |
| "learning_rate": 3.041961655454143e-05, | |
| "loss": 0.9739059209823608, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5258620689655173, | |
| "grad_norm": 0.11965472251176834, | |
| "learning_rate": 3.030593924173984e-05, | |
| "loss": 1.133984088897705, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5344827586206895, | |
| "grad_norm": 0.36424365639686584, | |
| "learning_rate": 3.0191839459955514e-05, | |
| "loss": 0.8807175755500793, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.543103448275862, | |
| "grad_norm": 0.05107448622584343, | |
| "learning_rate": 3.0077323012700534e-05, | |
| "loss": 0.8361281156539917, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5517241379310345, | |
| "grad_norm": 0.09036049991846085, | |
| "learning_rate": 2.996239572468003e-05, | |
| "loss": 1.2387166023254395, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5603448275862069, | |
| "grad_norm": 0.06331617385149002, | |
| "learning_rate": 2.984706344149595e-05, | |
| "loss": 1.0467900037765503, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.5689655172413794, | |
| "grad_norm": 0.06433523446321487, | |
| "learning_rate": 2.9731332029349667e-05, | |
| "loss": 1.0626113414764404, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5775862068965516, | |
| "grad_norm": 0.09752818942070007, | |
| "learning_rate": 2.961520737474367e-05, | |
| "loss": 1.0128107070922852, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5862068965517242, | |
| "grad_norm": 0.05285457894206047, | |
| "learning_rate": 2.9498695384182123e-05, | |
| "loss": 0.9877223968505859, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.5948275862068966, | |
| "grad_norm": 0.05934653803706169, | |
| "learning_rate": 2.9381801983870435e-05, | |
| "loss": 0.9603118300437927, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.603448275862069, | |
| "grad_norm": 0.22097375988960266, | |
| "learning_rate": 2.9264533119413866e-05, | |
| "loss": 1.081476092338562, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6120689655172413, | |
| "grad_norm": 0.10628407448530197, | |
| "learning_rate": 2.914689475551506e-05, | |
| "loss": 0.7714329957962036, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6206896551724137, | |
| "grad_norm": 0.10955756157636642, | |
| "learning_rate": 2.902889287567072e-05, | |
| "loss": 0.9913143515586853, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6293103448275863, | |
| "grad_norm": 0.07451241463422775, | |
| "learning_rate": 2.8910533481867195e-05, | |
| "loss": 1.1765313148498535, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.6379310344827587, | |
| "grad_norm": 0.07359088957309723, | |
| "learning_rate": 2.879182259427528e-05, | |
| "loss": 0.7655573487281799, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.646551724137931, | |
| "grad_norm": 0.13642138242721558, | |
| "learning_rate": 2.8672766250943947e-05, | |
| "loss": 1.3452657461166382, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6551724137931034, | |
| "grad_norm": 0.08765345811843872, | |
| "learning_rate": 2.8553370507493246e-05, | |
| "loss": 0.9972445964813232, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6637931034482758, | |
| "grad_norm": 0.0989682674407959, | |
| "learning_rate": 2.8433641436806306e-05, | |
| "loss": 0.8845785856246948, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.6724137931034484, | |
| "grad_norm": 0.06875207275152206, | |
| "learning_rate": 2.8313585128720444e-05, | |
| "loss": 1.3110713958740234, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.6810344827586206, | |
| "grad_norm": 0.13957612216472626, | |
| "learning_rate": 2.8193207689717393e-05, | |
| "loss": 0.8128502368927002, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6896551724137931, | |
| "grad_norm": 0.6921377778053284, | |
| "learning_rate": 2.807251524261275e-05, | |
| "loss": 0.6244351863861084, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.6982758620689655, | |
| "grad_norm": 0.30923035740852356, | |
| "learning_rate": 2.7951513926244484e-05, | |
| "loss": 1.127506136894226, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.706896551724138, | |
| "grad_norm": 0.0620148703455925, | |
| "learning_rate": 2.7830209895160764e-05, | |
| "loss": 1.042289137840271, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7155172413793105, | |
| "grad_norm": 0.16145341098308563, | |
| "learning_rate": 2.770860931930687e-05, | |
| "loss": 1.0570330619812012, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 0.09267118573188782, | |
| "learning_rate": 2.7586718383711367e-05, | |
| "loss": 0.9959380626678467, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7327586206896552, | |
| "grad_norm": 0.07319535315036774, | |
| "learning_rate": 2.7464543288171558e-05, | |
| "loss": 1.0200254917144775, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7413793103448276, | |
| "grad_norm": 0.055158186703920364, | |
| "learning_rate": 2.7342090246938076e-05, | |
| "loss": 0.6205574870109558, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.07343259453773499, | |
| "learning_rate": 2.721936548839887e-05, | |
| "loss": 0.8922735452651978, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.7586206896551724, | |
| "grad_norm": 0.06107189506292343, | |
| "learning_rate": 2.709637525476236e-05, | |
| "loss": 0.6991145014762878, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7672413793103448, | |
| "grad_norm": 0.0519319549202919, | |
| "learning_rate": 2.697312580173995e-05, | |
| "loss": 0.8093492984771729, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7758620689655173, | |
| "grad_norm": 0.07292782515287399, | |
| "learning_rate": 2.684962339822785e-05, | |
| "loss": 0.7507970929145813, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.7844827586206895, | |
| "grad_norm": 0.07456238567829132, | |
| "learning_rate": 2.672587432598823e-05, | |
| "loss": 0.5883830189704895, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.793103448275862, | |
| "grad_norm": 0.11243204772472382, | |
| "learning_rate": 2.6601884879329653e-05, | |
| "loss": 0.7915773391723633, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8017241379310345, | |
| "grad_norm": 0.07653719186782837, | |
| "learning_rate": 2.6477661364786996e-05, | |
| "loss": 1.0269769430160522, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.8103448275862069, | |
| "grad_norm": 0.14341171085834503, | |
| "learning_rate": 2.635321010080062e-05, | |
| "loss": 1.053789496421814, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8189655172413794, | |
| "grad_norm": 0.12033911049365997, | |
| "learning_rate": 2.6228537417395034e-05, | |
| "loss": 1.158492088317871, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8275862068965516, | |
| "grad_norm": 0.047955527901649475, | |
| "learning_rate": 2.61036496558569e-05, | |
| "loss": 0.9592758417129517, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8362068965517242, | |
| "grad_norm": 0.088678739964962, | |
| "learning_rate": 2.59785531684125e-05, | |
| "loss": 0.6086317300796509, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8448275862068966, | |
| "grad_norm": 0.07942725718021393, | |
| "learning_rate": 2.585325431790464e-05, | |
| "loss": 1.0528879165649414, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.853448275862069, | |
| "grad_norm": 0.0694958046078682, | |
| "learning_rate": 2.572775947746903e-05, | |
| "loss": 1.0576783418655396, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8620689655172413, | |
| "grad_norm": 0.17858955264091492, | |
| "learning_rate": 2.5602075030210096e-05, | |
| "loss": 0.9204137325286865, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.8706896551724137, | |
| "grad_norm": 0.296277791261673, | |
| "learning_rate": 2.5476207368876334e-05, | |
| "loss": 1.114011287689209, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.8793103448275863, | |
| "grad_norm": 0.07735295593738556, | |
| "learning_rate": 2.535016289553514e-05, | |
| "loss": 0.7933326363563538, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.8879310344827587, | |
| "grad_norm": 0.12477041035890579, | |
| "learning_rate": 2.5223948021247197e-05, | |
| "loss": 0.9807726144790649, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.896551724137931, | |
| "grad_norm": 0.09196372330188751, | |
| "learning_rate": 2.509756916574035e-05, | |
| "loss": 1.0345503091812134, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9051724137931034, | |
| "grad_norm": 0.06840290129184723, | |
| "learning_rate": 2.4971032757083123e-05, | |
| "loss": 1.1201728582382202, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.9137931034482758, | |
| "grad_norm": 0.11144451051950455, | |
| "learning_rate": 2.4844345231357734e-05, | |
| "loss": 0.28341731429100037, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.9224137931034484, | |
| "grad_norm": 0.14570969343185425, | |
| "learning_rate": 2.4717513032332736e-05, | |
| "loss": 0.7789583206176758, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9310344827586206, | |
| "grad_norm": 0.05790058895945549, | |
| "learning_rate": 2.4590542611135274e-05, | |
| "loss": 1.012285590171814, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9396551724137931, | |
| "grad_norm": 0.05153496563434601, | |
| "learning_rate": 2.446344042592295e-05, | |
| "loss": 1.0196033716201782, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9482758620689655, | |
| "grad_norm": 0.057060956954956055, | |
| "learning_rate": 2.433621294155535e-05, | |
| "loss": 0.8052966594696045, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.956896551724138, | |
| "grad_norm": 0.0602966733276844, | |
| "learning_rate": 2.420886662926521e-05, | |
| "loss": 0.9915321469306946, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9655172413793105, | |
| "grad_norm": 0.07094614952802658, | |
| "learning_rate": 2.4081407966329256e-05, | |
| "loss": 0.9689676761627197, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.9741379310344827, | |
| "grad_norm": 0.08627466857433319, | |
| "learning_rate": 2.3953843435738775e-05, | |
| "loss": 0.41972166299819946, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.9827586206896552, | |
| "grad_norm": 0.10626411437988281, | |
| "learning_rate": 2.3826179525869836e-05, | |
| "loss": 1.1633706092834473, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9913793103448276, | |
| "grad_norm": 0.15631678700447083, | |
| "learning_rate": 2.36984227301533e-05, | |
| "loss": 0.7487952709197998, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.16628113389015198, | |
| "learning_rate": 2.3570579546744504e-05, | |
| "loss": 0.8847077488899231, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.0086206896551726, | |
| "grad_norm": 0.06411660462617874, | |
| "learning_rate": 2.3442656478192794e-05, | |
| "loss": 0.484560489654541, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.0172413793103448, | |
| "grad_norm": 0.3941573202610016, | |
| "learning_rate": 2.331466003111073e-05, | |
| "loss": 0.6984850764274597, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.0258620689655173, | |
| "grad_norm": 0.044237978756427765, | |
| "learning_rate": 2.318659671584316e-05, | |
| "loss": 0.4863373935222626, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.0344827586206895, | |
| "grad_norm": 0.0645633190870285, | |
| "learning_rate": 2.305847304613609e-05, | |
| "loss": 0.4588513970375061, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.043103448275862, | |
| "grad_norm": 0.05587729066610336, | |
| "learning_rate": 2.293029553880536e-05, | |
| "loss": 0.4486234486103058, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.0517241379310347, | |
| "grad_norm": 0.06679260730743408, | |
| "learning_rate": 2.280207071340517e-05, | |
| "loss": 0.5298870205879211, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.060344827586207, | |
| "grad_norm": 0.08075322210788727, | |
| "learning_rate": 2.26738050918965e-05, | |
| "loss": 0.4382156729698181, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.0689655172413794, | |
| "grad_norm": 0.06546280533075333, | |
| "learning_rate": 2.2545505198315346e-05, | |
| "loss": 0.5762298107147217, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0775862068965516, | |
| "grad_norm": 0.11915218830108643, | |
| "learning_rate": 2.2417177558440907e-05, | |
| "loss": 0.36859992146492004, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.086206896551724, | |
| "grad_norm": 0.22198820114135742, | |
| "learning_rate": 2.2288828699463652e-05, | |
| "loss": 0.5293700098991394, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.0948275862068964, | |
| "grad_norm": 0.0842965617775917, | |
| "learning_rate": 2.2160465149653337e-05, | |
| "loss": 0.49147215485572815, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.103448275862069, | |
| "grad_norm": 0.11753598600625992, | |
| "learning_rate": 2.203209343802692e-05, | |
| "loss": 0.5180780291557312, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.1120689655172415, | |
| "grad_norm": 0.37540075182914734, | |
| "learning_rate": 2.1903720094016537e-05, | |
| "loss": 0.581203818321228, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.1206896551724137, | |
| "grad_norm": 0.062044426798820496, | |
| "learning_rate": 2.1775351647137323e-05, | |
| "loss": 0.4889185130596161, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.1293103448275863, | |
| "grad_norm": 0.07434380799531937, | |
| "learning_rate": 2.1646994626655332e-05, | |
| "loss": 0.6391059756278992, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.1379310344827585, | |
| "grad_norm": 0.10223301500082016, | |
| "learning_rate": 2.151865556125544e-05, | |
| "loss": 0.6237853169441223, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.146551724137931, | |
| "grad_norm": 0.14267216622829437, | |
| "learning_rate": 2.1390340978709254e-05, | |
| "loss": 0.36577755212783813, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.1551724137931036, | |
| "grad_norm": 0.13929963111877441, | |
| "learning_rate": 2.1262057405543115e-05, | |
| "loss": 0.49633127450942993, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.163793103448276, | |
| "grad_norm": 0.05517968162894249, | |
| "learning_rate": 2.1133811366706097e-05, | |
| "loss": 0.38259175419807434, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.1724137931034484, | |
| "grad_norm": 0.058835044503211975, | |
| "learning_rate": 2.100560938523817e-05, | |
| "loss": 0.4427034258842468, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.1810344827586206, | |
| "grad_norm": 0.15045633912086487, | |
| "learning_rate": 2.0877457981938364e-05, | |
| "loss": 0.6942803263664246, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.189655172413793, | |
| "grad_norm": 2.292686700820923, | |
| "learning_rate": 2.074936367503317e-05, | |
| "loss": 0.5671365261077881, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.1982758620689653, | |
| "grad_norm": 0.046695832163095474, | |
| "learning_rate": 2.0621332979844904e-05, | |
| "loss": 0.6063480377197266, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.206896551724138, | |
| "grad_norm": 0.16905461251735687, | |
| "learning_rate": 2.0493372408460425e-05, | |
| "loss": 0.6027957201004028, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.2155172413793105, | |
| "grad_norm": 0.06160572171211243, | |
| "learning_rate": 2.0365488469399795e-05, | |
| "loss": 0.6078309416770935, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.2241379310344827, | |
| "grad_norm": 0.07821284979581833, | |
| "learning_rate": 2.0237687667285345e-05, | |
| "loss": 0.3304949402809143, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.2327586206896552, | |
| "grad_norm": 0.34748536348342896, | |
| "learning_rate": 2.010997650251072e-05, | |
| "loss": 0.12825970351696014, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.2413793103448274, | |
| "grad_norm": 0.11893010139465332, | |
| "learning_rate": 1.9982361470910342e-05, | |
| "loss": 0.1828547865152359, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.12491466104984283, | |
| "learning_rate": 1.9854849063428926e-05, | |
| "loss": 0.6522985696792603, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.2586206896551726, | |
| "grad_norm": 0.15903355181217194, | |
| "learning_rate": 1.9727445765791405e-05, | |
| "loss": 0.47932472825050354, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.2672413793103448, | |
| "grad_norm": 0.09779471158981323, | |
| "learning_rate": 1.9600158058172974e-05, | |
| "loss": 0.4181676208972931, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.2758620689655173, | |
| "grad_norm": 0.07378951460123062, | |
| "learning_rate": 1.9472992414869534e-05, | |
| "loss": 0.46739447116851807, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.2844827586206895, | |
| "grad_norm": 0.04063527286052704, | |
| "learning_rate": 1.9345955303968365e-05, | |
| "loss": 0.38251054286956787, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.293103448275862, | |
| "grad_norm": 0.08258794993162155, | |
| "learning_rate": 1.9219053187019144e-05, | |
| "loss": 0.4366922080516815, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.3017241379310347, | |
| "grad_norm": 0.09015543758869171, | |
| "learning_rate": 1.909229251870528e-05, | |
| "loss": 0.4965798556804657, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.310344827586207, | |
| "grad_norm": 0.08743222802877426, | |
| "learning_rate": 1.8965679746515628e-05, | |
| "loss": 0.43146276473999023, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.3189655172413794, | |
| "grad_norm": 0.084476038813591, | |
| "learning_rate": 1.88392213104165e-05, | |
| "loss": 0.2771337330341339, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.3275862068965516, | |
| "grad_norm": 0.07576002180576324, | |
| "learning_rate": 1.8712923642524175e-05, | |
| "loss": 0.36878013610839844, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.336206896551724, | |
| "grad_norm": 0.10497633367776871, | |
| "learning_rate": 1.858679316677767e-05, | |
| "loss": 0.6058629751205444, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.344827586206897, | |
| "grad_norm": 0.13856923580169678, | |
| "learning_rate": 1.8460836298612056e-05, | |
| "loss": 0.6428977251052856, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.353448275862069, | |
| "grad_norm": 0.1172226220369339, | |
| "learning_rate": 1.8335059444632078e-05, | |
| "loss": 0.2821408212184906, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.3620689655172415, | |
| "grad_norm": 0.1798970252275467, | |
| "learning_rate": 1.820946900228639e-05, | |
| "loss": 0.8290093541145325, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.3706896551724137, | |
| "grad_norm": 0.2738807499408722, | |
| "learning_rate": 1.808407135954204e-05, | |
| "loss": 0.5475698709487915, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.3793103448275863, | |
| "grad_norm": 0.20505401492118835, | |
| "learning_rate": 1.7958872894559666e-05, | |
| "loss": 0.6245191693305969, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.3879310344827585, | |
| "grad_norm": 0.05477019026875496, | |
| "learning_rate": 1.7833879975368994e-05, | |
| "loss": 0.5108689665794373, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.396551724137931, | |
| "grad_norm": 0.09034960716962814, | |
| "learning_rate": 1.7709098959545015e-05, | |
| "loss": 0.5519805550575256, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.405172413793103, | |
| "grad_norm": 0.1560261845588684, | |
| "learning_rate": 1.758453619388453e-05, | |
| "loss": 0.4397192597389221, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.413793103448276, | |
| "grad_norm": 0.118907131254673, | |
| "learning_rate": 1.7460198014083424e-05, | |
| "loss": 0.38739266991615295, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.4224137931034484, | |
| "grad_norm": 0.23784895241260529, | |
| "learning_rate": 1.733609074441433e-05, | |
| "loss": 0.5064358711242676, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.4310344827586206, | |
| "grad_norm": 0.09993483871221542, | |
| "learning_rate": 1.7212220697405003e-05, | |
| "loss": 0.540324330329895, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.439655172413793, | |
| "grad_norm": 0.7780280113220215, | |
| "learning_rate": 1.7088594173517225e-05, | |
| "loss": 0.5431786179542542, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.4482758620689653, | |
| "grad_norm": 0.14646178483963013, | |
| "learning_rate": 1.6965217460826345e-05, | |
| "loss": 0.3365917205810547, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.456896551724138, | |
| "grad_norm": 0.07466763257980347, | |
| "learning_rate": 1.6842096834701443e-05, | |
| "loss": 0.6636412739753723, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.4655172413793105, | |
| "grad_norm": 0.3850714862346649, | |
| "learning_rate": 1.6719238557486143e-05, | |
| "loss": 0.3930183947086334, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.4741379310344827, | |
| "grad_norm": 0.12653613090515137, | |
| "learning_rate": 1.6596648878180088e-05, | |
| "loss": 0.4772527813911438, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.4827586206896552, | |
| "grad_norm": 0.10766978561878204, | |
| "learning_rate": 1.647433403212112e-05, | |
| "loss": 0.6689369082450867, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.4913793103448274, | |
| "grad_norm": 0.1643172800540924, | |
| "learning_rate": 1.635230024066807e-05, | |
| "loss": 0.5050515532493591, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.06176433712244034, | |
| "learning_rate": 1.6230553710884373e-05, | |
| "loss": 0.6936325430870056, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.5086206896551726, | |
| "grad_norm": 0.17540457844734192, | |
| "learning_rate": 1.610910063522233e-05, | |
| "loss": 0.5566367506980896, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.5172413793103448, | |
| "grad_norm": 0.09146937727928162, | |
| "learning_rate": 1.598794719120816e-05, | |
| "loss": 0.5264196991920471, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.5258620689655173, | |
| "grad_norm": 0.08665334433317184, | |
| "learning_rate": 1.5867099541127737e-05, | |
| "loss": 0.4999127686023712, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.5344827586206895, | |
| "grad_norm": 0.05140522122383118, | |
| "learning_rate": 1.5746563831713236e-05, | |
| "loss": 0.5660111308097839, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.543103448275862, | |
| "grad_norm": 0.08618345856666565, | |
| "learning_rate": 1.56263461938304e-05, | |
| "loss": 0.7160353064537048, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.5517241379310347, | |
| "grad_norm": 0.05319703742861748, | |
| "learning_rate": 1.5506452742166796e-05, | |
| "loss": 0.575738251209259, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.560344827586207, | |
| "grad_norm": 0.29011279344558716, | |
| "learning_rate": 1.5386889574920692e-05, | |
| "loss": 0.35511380434036255, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.5689655172413794, | |
| "grad_norm": 0.07296542078256607, | |
| "learning_rate": 1.5267662773491e-05, | |
| "loss": 0.40391749143600464, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.5775862068965516, | |
| "grad_norm": 0.09713292866945267, | |
| "learning_rate": 1.514877840216785e-05, | |
| "loss": 0.5037810802459717, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.586206896551724, | |
| "grad_norm": 0.1726667881011963, | |
| "learning_rate": 1.5030242507824215e-05, | |
| "loss": 0.6312216520309448, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.594827586206897, | |
| "grad_norm": 0.0342765673995018, | |
| "learning_rate": 1.4912061119608292e-05, | |
| "loss": 0.39456382393836975, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.603448275862069, | |
| "grad_norm": 0.45015275478363037, | |
| "learning_rate": 1.4794240248636885e-05, | |
| "loss": 0.5595788359642029, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.612068965517241, | |
| "grad_norm": 0.10634768009185791, | |
| "learning_rate": 1.4676785887689614e-05, | |
| "loss": 0.41876575350761414, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.6206896551724137, | |
| "grad_norm": 0.06522602587938309, | |
| "learning_rate": 1.4559704010904145e-05, | |
| "loss": 0.6346225142478943, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.6293103448275863, | |
| "grad_norm": 0.24831700325012207, | |
| "learning_rate": 1.444300057347229e-05, | |
| "loss": 0.5777739882469177, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.637931034482759, | |
| "grad_norm": 0.06677041202783585, | |
| "learning_rate": 1.432668151133712e-05, | |
| "loss": 0.5916672945022583, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.646551724137931, | |
| "grad_norm": 0.09093949943780899, | |
| "learning_rate": 1.4210752740891032e-05, | |
| "loss": 0.5175487995147705, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.655172413793103, | |
| "grad_norm": 0.1291448175907135, | |
| "learning_rate": 1.4095220158674851e-05, | |
| "loss": 0.37486380338668823, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.663793103448276, | |
| "grad_norm": 0.10089799761772156, | |
| "learning_rate": 1.3980089641077864e-05, | |
| "loss": 0.5902385115623474, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.6724137931034484, | |
| "grad_norm": 0.3151969611644745, | |
| "learning_rate": 1.3865367044038972e-05, | |
| "loss": 0.3626130223274231, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.6810344827586206, | |
| "grad_norm": 0.10858116298913956, | |
| "learning_rate": 1.3751058202748815e-05, | |
| "loss": 0.6260622143745422, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.689655172413793, | |
| "grad_norm": 0.09145694226026535, | |
| "learning_rate": 1.3637168931352952e-05, | |
| "loss": 0.3847617506980896, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.6982758620689653, | |
| "grad_norm": 0.10181720554828644, | |
| "learning_rate": 1.3523705022656194e-05, | |
| "loss": 0.5213911533355713, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.706896551724138, | |
| "grad_norm": 0.07265552878379822, | |
| "learning_rate": 1.3410672247827887e-05, | |
| "loss": 0.3843521475791931, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.7155172413793105, | |
| "grad_norm": 0.06394084542989731, | |
| "learning_rate": 1.3298076356108431e-05, | |
| "loss": 0.7390468716621399, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.7241379310344827, | |
| "grad_norm": 0.08277060091495514, | |
| "learning_rate": 1.318592307451683e-05, | |
| "loss": 0.3152429461479187, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.7327586206896552, | |
| "grad_norm": 0.06954030692577362, | |
| "learning_rate": 1.307421810755938e-05, | |
| "loss": 0.5903550982475281, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.7413793103448274, | |
| "grad_norm": 0.14430810511112213, | |
| "learning_rate": 1.296296713693956e-05, | |
| "loss": 0.4196533262729645, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.049837417900562286, | |
| "learning_rate": 1.2852175821268977e-05, | |
| "loss": 0.5849826335906982, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 0.16439993679523468, | |
| "learning_rate": 1.274184979577963e-05, | |
| "loss": 0.40721848607063293, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.7672413793103448, | |
| "grad_norm": 0.15708234906196594, | |
| "learning_rate": 1.2631994672037205e-05, | |
| "loss": 0.5138668417930603, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.7758620689655173, | |
| "grad_norm": 0.0595339760184288, | |
| "learning_rate": 1.2522616037655713e-05, | |
| "loss": 0.6097421646118164, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.7844827586206895, | |
| "grad_norm": 0.14719434082508087, | |
| "learning_rate": 1.2413719456013231e-05, | |
| "loss": 0.5522211194038391, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.793103448275862, | |
| "grad_norm": 0.06864980608224869, | |
| "learning_rate": 1.2305310465968985e-05, | |
| "loss": 0.3453619182109833, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.8017241379310347, | |
| "grad_norm": 0.05219966545701027, | |
| "learning_rate": 1.2197394581581561e-05, | |
| "loss": 0.7121859788894653, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.810344827586207, | |
| "grad_norm": 0.24679023027420044, | |
| "learning_rate": 1.2089977291828512e-05, | |
| "loss": 0.7990239262580872, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.8189655172413794, | |
| "grad_norm": 0.15024927258491516, | |
| "learning_rate": 1.1983064060327098e-05, | |
| "loss": 0.6081220507621765, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.8275862068965516, | |
| "grad_norm": 0.05443995073437691, | |
| "learning_rate": 1.187666032505645e-05, | |
| "loss": 0.43975335359573364, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.836206896551724, | |
| "grad_norm": 0.05697048828005791, | |
| "learning_rate": 1.1770771498080921e-05, | |
| "loss": 0.6137202978134155, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.844827586206897, | |
| "grad_norm": 0.11451619118452072, | |
| "learning_rate": 1.1665402965274866e-05, | |
| "loss": 0.20562584698200226, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.853448275862069, | |
| "grad_norm": 0.22301547229290009, | |
| "learning_rate": 1.1560560086048632e-05, | |
| "loss": 0.42035165429115295, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.862068965517241, | |
| "grad_norm": 0.15491816401481628, | |
| "learning_rate": 1.1456248193076027e-05, | |
| "loss": 0.6786882877349854, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.8706896551724137, | |
| "grad_norm": 0.06417909264564514, | |
| "learning_rate": 1.1352472592023026e-05, | |
| "loss": 0.34481775760650635, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.8793103448275863, | |
| "grad_norm": 0.2559848129749298, | |
| "learning_rate": 1.1249238561277957e-05, | |
| "loss": 0.37077146768569946, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.887931034482759, | |
| "grad_norm": 0.07367434352636337, | |
| "learning_rate": 1.1146551351682962e-05, | |
| "loss": 0.6234573125839233, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.896551724137931, | |
| "grad_norm": 0.13318119943141937, | |
| "learning_rate": 1.1044416186266985e-05, | |
| "loss": 0.43646591901779175, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.905172413793103, | |
| "grad_norm": 0.04189766198396683, | |
| "learning_rate": 1.0942838259980065e-05, | |
| "loss": 0.6099374890327454, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.913793103448276, | |
| "grad_norm": 0.16093385219573975, | |
| "learning_rate": 1.0841822739429131e-05, | |
| "loss": 0.5961918830871582, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.9224137931034484, | |
| "grad_norm": 0.05338941141963005, | |
| "learning_rate": 1.0741374762615181e-05, | |
| "loss": 0.5247670412063599, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.9310344827586206, | |
| "grad_norm": 0.06662659347057343, | |
| "learning_rate": 1.0641499438671994e-05, | |
| "loss": 0.4245750606060028, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.939655172413793, | |
| "grad_norm": 0.03824161738157272, | |
| "learning_rate": 1.054220184760619e-05, | |
| "loss": 0.21983936429023743, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.9482758620689653, | |
| "grad_norm": 0.061386823654174805, | |
| "learning_rate": 1.0443487040038919e-05, | |
| "loss": 0.3854738771915436, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.956896551724138, | |
| "grad_norm": 0.06032966449856758, | |
| "learning_rate": 1.0345360036948912e-05, | |
| "loss": 0.6782163381576538, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.9655172413793105, | |
| "grad_norm": 0.06708291918039322, | |
| "learning_rate": 1.0247825829417132e-05, | |
| "loss": 0.5401458740234375, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.9741379310344827, | |
| "grad_norm": 0.0782044380903244, | |
| "learning_rate": 1.0150889378372878e-05, | |
| "loss": 0.7114209532737732, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.9827586206896552, | |
| "grad_norm": 0.06770720332860947, | |
| "learning_rate": 1.00545556143415e-05, | |
| "loss": 0.660466730594635, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.9913793103448274, | |
| "grad_norm": 0.07091684639453888, | |
| "learning_rate": 9.958829437193558e-06, | |
| "loss": 0.4320341944694519, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.06834368407726288, | |
| "learning_rate": 9.863715715895658e-06, | |
| "loss": 0.6856396198272705, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.0086206896551726, | |
| "grad_norm": 0.03995652124285698, | |
| "learning_rate": 9.769219288262745e-06, | |
| "loss": 0.16509434580802917, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.0172413793103448, | |
| "grad_norm": 0.043883178383111954, | |
| "learning_rate": 9.675344960712074e-06, | |
| "loss": 0.29928964376449585, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.0258620689655173, | |
| "grad_norm": 0.0733269527554512, | |
| "learning_rate": 9.582097508018724e-06, | |
| "loss": 0.25162428617477417, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.0344827586206895, | |
| "grad_norm": 0.12920475006103516, | |
| "learning_rate": 9.489481673072723e-06, | |
| "loss": 0.3514169454574585, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.043103448275862, | |
| "grad_norm": 0.017986657097935677, | |
| "learning_rate": 9.397502166637837e-06, | |
| "loss": 0.07074951380491257, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.0517241379310347, | |
| "grad_norm": 0.07337481528520584, | |
| "learning_rate": 9.30616366711195e-06, | |
| "loss": 0.20599356293678284, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.060344827586207, | |
| "grad_norm": 0.03576648607850075, | |
| "learning_rate": 9.21547082028908e-06, | |
| "loss": 0.11480194330215454, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.0689655172413794, | |
| "grad_norm": 0.38087305426597595, | |
| "learning_rate": 9.125428239123133e-06, | |
| "loss": 0.26979854702949524, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.0775862068965516, | |
| "grad_norm": 0.0725908949971199, | |
| "learning_rate": 9.036040503493213e-06, | |
| "loss": 0.42210009694099426, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.086206896551724, | |
| "grad_norm": 0.14822497963905334, | |
| "learning_rate": 8.947312159970725e-06, | |
| "loss": 0.1675470620393753, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.0948275862068964, | |
| "grad_norm": 0.08073808997869492, | |
| "learning_rate": 8.859247721588064e-06, | |
| "loss": 0.20833522081375122, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.103448275862069, | |
| "grad_norm": 0.057046178728342056, | |
| "learning_rate": 8.77185166760914e-06, | |
| "loss": 0.16950953006744385, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.1120689655172415, | |
| "grad_norm": 0.10354648530483246, | |
| "learning_rate": 8.685128443301465e-06, | |
| "loss": 0.12641456723213196, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.1206896551724137, | |
| "grad_norm": 0.05845208466053009, | |
| "learning_rate": 8.599082459710125e-06, | |
| "loss": 0.13568329811096191, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.1293103448275863, | |
| "grad_norm": 0.04908813536167145, | |
| "learning_rate": 8.513718093433354e-06, | |
| "loss": 0.21239104866981506, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.1379310344827585, | |
| "grad_norm": 0.13193517923355103, | |
| "learning_rate": 8.42903968639999e-06, | |
| "loss": 0.2763456702232361, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.146551724137931, | |
| "grad_norm": 0.03571261465549469, | |
| "learning_rate": 8.345051545648565e-06, | |
| "loss": 0.12836386263370514, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.1551724137931036, | |
| "grad_norm": 0.06112167611718178, | |
| "learning_rate": 8.261757943108296e-06, | |
| "loss": 0.16560682654380798, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.163793103448276, | |
| "grad_norm": 0.0860171988606453, | |
| "learning_rate": 8.179163115381737e-06, | |
| "loss": 0.2081730216741562, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.1724137931034484, | |
| "grad_norm": 0.03247256577014923, | |
| "learning_rate": 8.097271263529346e-06, | |
| "loss": 0.14392191171646118, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.1810344827586206, | |
| "grad_norm": 0.0918356403708458, | |
| "learning_rate": 8.016086552855771e-06, | |
| "loss": 0.15577132999897003, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.189655172413793, | |
| "grad_norm": 0.06287133693695068, | |
| "learning_rate": 7.935613112698003e-06, | |
| "loss": 0.0789552852511406, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.1982758620689653, | |
| "grad_norm": 0.03986852988600731, | |
| "learning_rate": 7.855855036215328e-06, | |
| "loss": 0.10101716220378876, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.206896551724138, | |
| "grad_norm": 0.13693907856941223, | |
| "learning_rate": 7.776816380181165e-06, | |
| "loss": 0.1658182144165039, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.2155172413793105, | |
| "grad_norm": 0.14548790454864502, | |
| "learning_rate": 7.698501164776679e-06, | |
| "loss": 0.19248032569885254, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.2241379310344827, | |
| "grad_norm": 0.05582420900464058, | |
| "learning_rate": 7.620913373386356e-06, | |
| "loss": 0.21470694243907928, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.2327586206896552, | |
| "grad_norm": 0.04277574643492699, | |
| "learning_rate": 7.5440569523953315e-06, | |
| "loss": 0.15740104019641876, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.2413793103448274, | |
| "grad_norm": 0.14733938872814178, | |
| "learning_rate": 7.467935810988729e-06, | |
| "loss": 0.18646365404129028, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 0.06095249578356743, | |
| "learning_rate": 7.392553820952764e-06, | |
| "loss": 0.22709967195987701, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.2586206896551726, | |
| "grad_norm": 0.04888584464788437, | |
| "learning_rate": 7.317914816477865e-06, | |
| "loss": 0.1782107949256897, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.2672413793103448, | |
| "grad_norm": 0.2761983275413513, | |
| "learning_rate": 7.244022593963609e-06, | |
| "loss": 0.19192323088645935, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.2758620689655173, | |
| "grad_norm": 0.041269440203905106, | |
| "learning_rate": 7.170880911825657e-06, | |
| "loss": 0.13779321312904358, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.2844827586206895, | |
| "grad_norm": 0.2219523787498474, | |
| "learning_rate": 7.098493490304566e-06, | |
| "loss": 0.24427469074726105, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.293103448275862, | |
| "grad_norm": 0.7461491227149963, | |
| "learning_rate": 7.026864011276575e-06, | |
| "loss": 0.32002437114715576, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.3017241379310347, | |
| "grad_norm": 0.100465789437294, | |
| "learning_rate": 6.955996118066326e-06, | |
| "loss": 0.11214806139469147, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.310344827586207, | |
| "grad_norm": 0.06019704416394234, | |
| "learning_rate": 6.8858934152615646e-06, | |
| "loss": 0.1987936794757843, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.3189655172413794, | |
| "grad_norm": 0.12379293888807297, | |
| "learning_rate": 6.816559468529773e-06, | |
| "loss": 0.058321211487054825, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.3275862068965516, | |
| "grad_norm": 0.3285755515098572, | |
| "learning_rate": 6.747997804436846e-06, | |
| "loss": 0.08903615176677704, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.336206896551724, | |
| "grad_norm": 0.11563495546579361, | |
| "learning_rate": 6.680211910267665e-06, | |
| "loss": 0.35364535450935364, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.344827586206897, | |
| "grad_norm": 0.07364711910486221, | |
| "learning_rate": 6.613205233848783e-06, | |
| "loss": 0.20209553837776184, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.353448275862069, | |
| "grad_norm": 0.0495804026722908, | |
| "learning_rate": 6.546981183373009e-06, | |
| "loss": 0.19359779357910156, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.3620689655172415, | |
| "grad_norm": 0.13539589941501617, | |
| "learning_rate": 6.481543127226073e-06, | |
| "loss": 0.28171947598457336, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.3706896551724137, | |
| "grad_norm": 0.07525072246789932, | |
| "learning_rate": 6.4168943938153e-06, | |
| "loss": 0.1644493192434311, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.3793103448275863, | |
| "grad_norm": 0.04455971717834473, | |
| "learning_rate": 6.353038271400319e-06, | |
| "loss": 0.17818251252174377, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.3879310344827585, | |
| "grad_norm": 0.04888049513101578, | |
| "learning_rate": 6.289978007925791e-06, | |
| "loss": 0.08893375098705292, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.396551724137931, | |
| "grad_norm": 0.0592099204659462, | |
| "learning_rate": 6.227716810856235e-06, | |
| "loss": 0.16159863770008087, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.405172413793103, | |
| "grad_norm": 0.05931266024708748, | |
| "learning_rate": 6.1662578470128595e-06, | |
| "loss": 0.19914919137954712, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.413793103448276, | |
| "grad_norm": 0.05199761316180229, | |
| "learning_rate": 6.105604242412507e-06, | |
| "loss": 0.1833517998456955, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.4224137931034484, | |
| "grad_norm": 0.07053744047880173, | |
| "learning_rate": 6.0457590821086364e-06, | |
| "loss": 0.1568892002105713, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.4310344827586206, | |
| "grad_norm": 0.11103974282741547, | |
| "learning_rate": 5.9867254100344305e-06, | |
| "loss": 0.5605343580245972, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.439655172413793, | |
| "grad_norm": 0.1462671458721161, | |
| "learning_rate": 5.92850622884794e-06, | |
| "loss": 0.2542985677719116, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 0.07662937045097351, | |
| "learning_rate": 5.871104499779383e-06, | |
| "loss": 0.3042844533920288, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.456896551724138, | |
| "grad_norm": 0.40208032727241516, | |
| "learning_rate": 5.814523142480514e-06, | |
| "loss": 0.23688863217830658, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.4655172413793105, | |
| "grad_norm": 0.0428071990609169, | |
| "learning_rate": 5.758765034876124e-06, | |
| "loss": 0.1413598358631134, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.4741379310344827, | |
| "grad_norm": 0.18820738792419434, | |
| "learning_rate": 5.703833013017659e-06, | |
| "loss": 0.26621344685554504, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.4827586206896552, | |
| "grad_norm": 0.0474395789206028, | |
| "learning_rate": 5.649729870938974e-06, | |
| "loss": 0.1856929361820221, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.4913793103448274, | |
| "grad_norm": 0.041845474392175674, | |
| "learning_rate": 5.596458360514197e-06, | |
| "loss": 0.11116787791252136, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.06532273441553116, | |
| "learning_rate": 5.544021191317797e-06, | |
| "loss": 0.2585083842277527, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.5086206896551726, | |
| "grad_norm": 0.17792125046253204, | |
| "learning_rate": 5.492421030486723e-06, | |
| "loss": 0.24390508234500885, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.5172413793103448, | |
| "grad_norm": 0.06345438957214355, | |
| "learning_rate": 5.441660502584782e-06, | |
| "loss": 0.19690856337547302, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.5258620689655173, | |
| "grad_norm": 0.06546366214752197, | |
| "learning_rate": 5.391742189469118e-06, | |
| "loss": 0.18222372233867645, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.5344827586206895, | |
| "grad_norm": 0.06039542332291603, | |
| "learning_rate": 5.342668630158901e-06, | |
| "loss": 0.14991328120231628, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.543103448275862, | |
| "grad_norm": 0.08110994100570679, | |
| "learning_rate": 5.294442320706179e-06, | |
| "loss": 0.12025367468595505, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.5517241379310347, | |
| "grad_norm": 0.04069434478878975, | |
| "learning_rate": 5.247065714068933e-06, | |
| "loss": 0.0922561064362526, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.560344827586207, | |
| "grad_norm": 0.012679479084908962, | |
| "learning_rate": 5.200541219986286e-06, | |
| "loss": 0.03818206116557121, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.5689655172413794, | |
| "grad_norm": 0.22782403230667114, | |
| "learning_rate": 5.1548712048559655e-06, | |
| "loss": 0.2238304615020752, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.5775862068965516, | |
| "grad_norm": 0.05799900367856026, | |
| "learning_rate": 5.110057991613912e-06, | |
| "loss": 0.1549633890390396, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.586206896551724, | |
| "grad_norm": 0.05750008672475815, | |
| "learning_rate": 5.0661038596161515e-06, | |
| "loss": 0.14927032589912415, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.594827586206897, | |
| "grad_norm": 0.24478773772716522, | |
| "learning_rate": 5.023011044522834e-06, | |
| "loss": 0.2999204397201538, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.603448275862069, | |
| "grad_norm": 0.07759716361761093, | |
| "learning_rate": 4.980781738184549e-06, | |
| "loss": 0.20024727284908295, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.612068965517241, | |
| "grad_norm": 0.0670485645532608, | |
| "learning_rate": 4.939418088530811e-06, | |
| "loss": 0.13863810896873474, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.6206896551724137, | |
| "grad_norm": 0.12198883295059204, | |
| "learning_rate": 4.898922199460831e-06, | |
| "loss": 0.18534965813159943, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.6293103448275863, | |
| "grad_norm": 0.1402168571949005, | |
| "learning_rate": 4.859296130736489e-06, | |
| "loss": 0.15518294274806976, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.637931034482759, | |
| "grad_norm": 0.06257359683513641, | |
| "learning_rate": 4.820541897877585e-06, | |
| "loss": 0.23298737406730652, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.646551724137931, | |
| "grad_norm": 0.11765491217374802, | |
| "learning_rate": 4.782661472059298e-06, | |
| "loss": 0.264419823884964, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.655172413793103, | |
| "grad_norm": 0.03430064767599106, | |
| "learning_rate": 4.745656780011951e-06, | |
| "loss": 0.15973711013793945, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.663793103448276, | |
| "grad_norm": 0.03395868092775345, | |
| "learning_rate": 4.709529703922993e-06, | |
| "loss": 0.17208503186702728, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.6724137931034484, | |
| "grad_norm": 0.08469868451356888, | |
| "learning_rate": 4.674282081341271e-06, | |
| "loss": 0.19475609064102173, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.6810344827586206, | |
| "grad_norm": 0.06020957604050636, | |
| "learning_rate": 4.639915705083572e-06, | |
| "loss": 0.1562570333480835, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.689655172413793, | |
| "grad_norm": 0.07793577015399933, | |
| "learning_rate": 4.606432323143412e-06, | |
| "loss": 0.15900962054729462, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.6982758620689653, | |
| "grad_norm": 0.13332881033420563, | |
| "learning_rate": 4.573833638602159e-06, | |
| "loss": 0.22381483018398285, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.706896551724138, | |
| "grad_norm": 0.02578054927289486, | |
| "learning_rate": 4.542121309542383e-06, | |
| "loss": 0.09598782658576965, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.7155172413793105, | |
| "grad_norm": 0.15609142184257507, | |
| "learning_rate": 4.511296948963527e-06, | |
| "loss": 0.19943147897720337, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.7241379310344827, | |
| "grad_norm": 0.01152154989540577, | |
| "learning_rate": 4.4813621246998765e-06, | |
| "loss": 0.07272744178771973, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.7327586206896552, | |
| "grad_norm": 0.06737919896841049, | |
| "learning_rate": 4.45231835934079e-06, | |
| "loss": 0.1303609311580658, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.7413793103448274, | |
| "grad_norm": 0.2695164084434509, | |
| "learning_rate": 4.424167130153277e-06, | |
| "loss": 0.18073761463165283, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.044187769293785095, | |
| "learning_rate": 4.396909869006847e-06, | |
| "loss": 0.12275875359773636, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.7586206896551726, | |
| "grad_norm": 0.008070076815783978, | |
| "learning_rate": 4.3705479623006866e-06, | |
| "loss": 0.06019383668899536, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.7672413793103448, | |
| "grad_norm": 0.06783478707075119, | |
| "learning_rate": 4.345082750893132e-06, | |
| "loss": 0.10059908032417297, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.7758620689655173, | |
| "grad_norm": 0.10238350927829742, | |
| "learning_rate": 4.320515530033487e-06, | |
| "loss": 0.30081015825271606, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.7844827586206895, | |
| "grad_norm": 0.14017876982688904, | |
| "learning_rate": 4.296847549296115e-06, | |
| "loss": 0.30149415135383606, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.793103448275862, | |
| "grad_norm": 0.06336027383804321, | |
| "learning_rate": 4.274080012516909e-06, | |
| "loss": 0.13996456563472748, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.8017241379310347, | |
| "grad_norm": 0.04175444692373276, | |
| "learning_rate": 4.2522140777320404e-06, | |
| "loss": 0.09511252492666245, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.810344827586207, | |
| "grad_norm": 0.05849481746554375, | |
| "learning_rate": 4.23125085711907e-06, | |
| "loss": 0.23412549495697021, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.8189655172413794, | |
| "grad_norm": 0.06667976826429367, | |
| "learning_rate": 4.21119141694037e-06, | |
| "loss": 0.160780131816864, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.8275862068965516, | |
| "grad_norm": 0.056150369346141815, | |
| "learning_rate": 4.192036777488896e-06, | |
| "loss": 0.11835036426782608, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.836206896551724, | |
| "grad_norm": 0.12212974578142166, | |
| "learning_rate": 4.173787913036284e-06, | |
| "loss": 0.11370360106229782, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.844827586206897, | |
| "grad_norm": 0.08489777147769928, | |
| "learning_rate": 4.156445751783308e-06, | |
| "loss": 0.17437399923801422, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.853448275862069, | |
| "grad_norm": 0.03076266683638096, | |
| "learning_rate": 4.140011175812656e-06, | |
| "loss": 0.15946733951568604, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.862068965517241, | |
| "grad_norm": 0.044967204332351685, | |
| "learning_rate": 4.124485021044069e-06, | |
| "loss": 0.16160649061203003, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.8706896551724137, | |
| "grad_norm": 0.06540035456418991, | |
| "learning_rate": 4.1098680771918245e-06, | |
| "loss": 0.13039463758468628, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.8793103448275863, | |
| "grad_norm": 0.0530594103038311, | |
| "learning_rate": 4.096161087724573e-06, | |
| "loss": 0.16959071159362793, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.887931034482759, | |
| "grad_norm": 0.15922929346561432, | |
| "learning_rate": 4.0833647498275085e-06, | |
| "loss": 0.20945216715335846, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.896551724137931, | |
| "grad_norm": 0.06301749497652054, | |
| "learning_rate": 4.07147971436692e-06, | |
| "loss": 0.22212789952754974, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.905172413793103, | |
| "grad_norm": 0.15187880396842957, | |
| "learning_rate": 4.060506585857085e-06, | |
| "loss": 0.21481694281101227, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.913793103448276, | |
| "grad_norm": 0.32235345244407654, | |
| "learning_rate": 4.0504459224295174e-06, | |
| "loss": 0.16184020042419434, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.9224137931034484, | |
| "grad_norm": 0.07125352323055267, | |
| "learning_rate": 4.041298235804577e-06, | |
| "loss": 0.1316578984260559, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.9310344827586206, | |
| "grad_norm": 0.04981033504009247, | |
| "learning_rate": 4.0330639912654516e-06, | |
| "loss": 0.11852114647626877, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.939655172413793, | |
| "grad_norm": 0.11605612933635712, | |
| "learning_rate": 4.02574360763448e-06, | |
| "loss": 0.16175302863121033, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.9482758620689653, | |
| "grad_norm": 0.05728490650653839, | |
| "learning_rate": 4.019337457251857e-06, | |
| "loss": 0.16411718726158142, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.956896551724138, | |
| "grad_norm": 0.08225003629922867, | |
| "learning_rate": 4.013845865956692e-06, | |
| "loss": 0.22733992338180542, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.9655172413793105, | |
| "grad_norm": 0.2269326150417328, | |
| "learning_rate": 4.00926911307043e-06, | |
| "loss": 0.1820860654115677, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.9741379310344827, | |
| "grad_norm": 0.06667070835828781, | |
| "learning_rate": 4.005607431382659e-06, | |
| "loss": 0.15438126027584076, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.9827586206896552, | |
| "grad_norm": 0.05220466107130051, | |
| "learning_rate": 4.002861007139253e-06, | |
| "loss": 0.17508190870285034, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.9913793103448274, | |
| "grad_norm": 0.07188162952661514, | |
| "learning_rate": 4.001029980032909e-06, | |
| "loss": 0.1996261328458786, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.3331678509712219, | |
| "learning_rate": 4.000114443196044e-06, | |
| "loss": 0.2806675136089325, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 928, | |
| "total_flos": 3.61934117404672e+18, | |
| "train_loss": 0.7229323635552207, | |
| "train_runtime": 32622.9814, | |
| "train_samples_per_second": 1.707, | |
| "train_steps_per_second": 0.028 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 928, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.61934117404672e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |