Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-58 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-58 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-58") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-58") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-58") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-58 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-58" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-58", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-58
- SGLang
How to use furproxy/9b-58 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-58" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-58", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-58" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-58", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-58 with Docker Model Runner:
docker model run hf.co/furproxy/9b-58
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1098, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00546448087431694, | |
| "grad_norm": 0.9203227758407593, | |
| "learning_rate": 4.545454545454545e-07, | |
| "loss": 2.568605422973633, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01092896174863388, | |
| "grad_norm": 0.8554143905639648, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 1.9374263286590576, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01639344262295082, | |
| "grad_norm": 0.5728222727775574, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 1.878420352935791, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02185792349726776, | |
| "grad_norm": 0.9100278615951538, | |
| "learning_rate": 3.1818181818181817e-06, | |
| "loss": 1.7412551641464233, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0273224043715847, | |
| "grad_norm": 0.7092919945716858, | |
| "learning_rate": 4.0909090909090915e-06, | |
| "loss": 1.7085704803466797, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03278688524590164, | |
| "grad_norm": 0.7366429567337036, | |
| "learning_rate": 5e-06, | |
| "loss": 1.5689289569854736, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03825136612021858, | |
| "grad_norm": 0.8158249258995056, | |
| "learning_rate": 5.909090909090909e-06, | |
| "loss": 1.4770691394805908, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04371584699453552, | |
| "grad_norm": 0.44367218017578125, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 1.1802119016647339, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04918032786885246, | |
| "grad_norm": 0.7750319838523865, | |
| "learning_rate": 7.727272727272727e-06, | |
| "loss": 1.1797850131988525, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0546448087431694, | |
| "grad_norm": 0.5176887512207031, | |
| "learning_rate": 8.636363636363637e-06, | |
| "loss": 1.4043047428131104, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.060109289617486336, | |
| "grad_norm": 0.8839510679244995, | |
| "learning_rate": 9.545454545454547e-06, | |
| "loss": 1.0820410251617432, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06557377049180328, | |
| "grad_norm": 0.5803583860397339, | |
| "learning_rate": 1.0454545454545455e-05, | |
| "loss": 1.4104256629943848, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07103825136612021, | |
| "grad_norm": 1.299857258796692, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 1.594111442565918, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07650273224043716, | |
| "grad_norm": 0.4243783950805664, | |
| "learning_rate": 1.2272727272727273e-05, | |
| "loss": 1.3635478019714355, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08196721311475409, | |
| "grad_norm": 0.36612921953201294, | |
| "learning_rate": 1.318181818181818e-05, | |
| "loss": 1.2694858312606812, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08743169398907104, | |
| "grad_norm": 0.3528824746608734, | |
| "learning_rate": 1.409090909090909e-05, | |
| "loss": 1.2929545640945435, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09289617486338798, | |
| "grad_norm": 0.9442020654678345, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.0059682130813599, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09836065573770492, | |
| "grad_norm": 0.3712392747402191, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 1.1053444147109985, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10382513661202186, | |
| "grad_norm": 0.4434971809387207, | |
| "learning_rate": 1.6818181818181818e-05, | |
| "loss": 1.367356777191162, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1092896174863388, | |
| "grad_norm": 0.33869898319244385, | |
| "learning_rate": 1.772727272727273e-05, | |
| "loss": 1.3058295249938965, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11475409836065574, | |
| "grad_norm": 0.837212860584259, | |
| "learning_rate": 1.8636363636363638e-05, | |
| "loss": 0.9450257420539856, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12021857923497267, | |
| "grad_norm": 0.7006627917289734, | |
| "learning_rate": 1.9545454545454546e-05, | |
| "loss": 0.8914309144020081, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12568306010928962, | |
| "grad_norm": 0.5354660749435425, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 1.3381656408309937, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.13114754098360656, | |
| "grad_norm": 0.5518407821655273, | |
| "learning_rate": 2.1363636363636362e-05, | |
| "loss": 1.320247769355774, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1366120218579235, | |
| "grad_norm": 0.61113440990448, | |
| "learning_rate": 2.2272727272727274e-05, | |
| "loss": 1.279352068901062, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14207650273224043, | |
| "grad_norm": 0.38642510771751404, | |
| "learning_rate": 2.318181818181818e-05, | |
| "loss": 1.2564854621887207, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14754098360655737, | |
| "grad_norm": 3.8424618244171143, | |
| "learning_rate": 2.4090909090909093e-05, | |
| "loss": 0.8689378499984741, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15300546448087432, | |
| "grad_norm": 0.7067618370056152, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.445729374885559, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.15846994535519127, | |
| "grad_norm": 1.6429425477981567, | |
| "learning_rate": 2.4999795867415925e-05, | |
| "loss": 1.4359698295593262, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.16393442622950818, | |
| "grad_norm": 0.42640018463134766, | |
| "learning_rate": 2.499918347707172e-05, | |
| "loss": 1.321084976196289, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16939890710382513, | |
| "grad_norm": 0.5433887243270874, | |
| "learning_rate": 2.499816285119117e-05, | |
| "loss": 1.4983344078063965, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.17486338797814208, | |
| "grad_norm": 6.383620738983154, | |
| "learning_rate": 2.499673402681304e-05, | |
| "loss": 1.1551955938339233, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.18032786885245902, | |
| "grad_norm": 0.5254676938056946, | |
| "learning_rate": 2.49948970557897e-05, | |
| "loss": 1.3022432327270508, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18579234972677597, | |
| "grad_norm": 0.5274835824966431, | |
| "learning_rate": 2.499265200478526e-05, | |
| "loss": 1.2709100246429443, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1912568306010929, | |
| "grad_norm": 0.8439487814903259, | |
| "learning_rate": 2.498999895527316e-05, | |
| "loss": 1.3120120763778687, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19672131147540983, | |
| "grad_norm": 1.2039533853530884, | |
| "learning_rate": 2.4986938003533195e-05, | |
| "loss": 1.0912879705429077, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.20218579234972678, | |
| "grad_norm": 0.45596545934677124, | |
| "learning_rate": 2.498346926064803e-05, | |
| "loss": 0.9286341667175293, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.20765027322404372, | |
| "grad_norm": 0.41955122351646423, | |
| "learning_rate": 2.497959285249916e-05, | |
| "loss": 1.3309372663497925, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.21311475409836064, | |
| "grad_norm": 2.2712788581848145, | |
| "learning_rate": 2.497530891976237e-05, | |
| "loss": 0.9704757332801819, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2185792349726776, | |
| "grad_norm": 0.6490824818611145, | |
| "learning_rate": 2.497061761790258e-05, | |
| "loss": 0.5369635224342346, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22404371584699453, | |
| "grad_norm": 0.5930651426315308, | |
| "learning_rate": 2.4965519117168257e-05, | |
| "loss": 1.442630648612976, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.22950819672131148, | |
| "grad_norm": 0.8481980562210083, | |
| "learning_rate": 2.49600136025852e-05, | |
| "loss": 0.7645593881607056, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.23497267759562843, | |
| "grad_norm": 0.29223793745040894, | |
| "learning_rate": 2.495410127394983e-05, | |
| "loss": 1.2828871011734009, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.24043715846994534, | |
| "grad_norm": 0.5247991681098938, | |
| "learning_rate": 2.494778234582197e-05, | |
| "loss": 1.2668498754501343, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2459016393442623, | |
| "grad_norm": 2.0301473140716553, | |
| "learning_rate": 2.494105704751701e-05, | |
| "loss": 0.9710880517959595, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.25136612021857924, | |
| "grad_norm": 0.6622992753982544, | |
| "learning_rate": 2.4933925623097626e-05, | |
| "loss": 1.313385009765625, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2568306010928962, | |
| "grad_norm": 0.4877598285675049, | |
| "learning_rate": 2.49263883313649e-05, | |
| "loss": 1.1548316478729248, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.26229508196721313, | |
| "grad_norm": 0.9264543652534485, | |
| "learning_rate": 2.4918445445848933e-05, | |
| "loss": 1.5707098245620728, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2677595628415301, | |
| "grad_norm": 0.8082510828971863, | |
| "learning_rate": 2.491009725479893e-05, | |
| "loss": 0.9769288301467896, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.273224043715847, | |
| "grad_norm": 0.27562037110328674, | |
| "learning_rate": 2.4901344061172718e-05, | |
| "loss": 1.083092451095581, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2786885245901639, | |
| "grad_norm": 0.4006403684616089, | |
| "learning_rate": 2.489218618262578e-05, | |
| "loss": 1.2940970659255981, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.28415300546448086, | |
| "grad_norm": 0.868722140789032, | |
| "learning_rate": 2.4882623951499687e-05, | |
| "loss": 0.9618574976921082, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2896174863387978, | |
| "grad_norm": 0.3060719668865204, | |
| "learning_rate": 2.4872657714810083e-05, | |
| "loss": 1.2874293327331543, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.29508196721311475, | |
| "grad_norm": 0.3568110764026642, | |
| "learning_rate": 2.4862287834234058e-05, | |
| "loss": 1.4147411584854126, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3005464480874317, | |
| "grad_norm": 0.6043673753738403, | |
| "learning_rate": 2.4851514686097042e-05, | |
| "loss": 1.2825733423233032, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.30601092896174864, | |
| "grad_norm": 0.35318249464035034, | |
| "learning_rate": 2.4840338661359128e-05, | |
| "loss": 1.2864692211151123, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3114754098360656, | |
| "grad_norm": 0.5835199356079102, | |
| "learning_rate": 2.4828760165600914e-05, | |
| "loss": 1.356719970703125, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.31693989071038253, | |
| "grad_norm": 0.3887172341346741, | |
| "learning_rate": 2.481677961900875e-05, | |
| "loss": 1.2759113311767578, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3224043715846995, | |
| "grad_norm": 0.36369383335113525, | |
| "learning_rate": 2.4804397456359513e-05, | |
| "loss": 1.0305191278457642, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.32786885245901637, | |
| "grad_norm": 0.8058388829231262, | |
| "learning_rate": 2.4791614127004825e-05, | |
| "loss": 1.281233787536621, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.4051971733570099, | |
| "learning_rate": 2.477843009485474e-05, | |
| "loss": 1.3189853429794312, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.33879781420765026, | |
| "grad_norm": 0.3125993013381958, | |
| "learning_rate": 2.4764845838360912e-05, | |
| "loss": 1.0454766750335693, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3442622950819672, | |
| "grad_norm": 0.4938465654850006, | |
| "learning_rate": 2.475086185049923e-05, | |
| "loss": 1.1030340194702148, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.34972677595628415, | |
| "grad_norm": 0.35930484533309937, | |
| "learning_rate": 2.473647863875193e-05, | |
| "loss": 1.3264323472976685, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3551912568306011, | |
| "grad_norm": 0.22644348442554474, | |
| "learning_rate": 2.472169672508918e-05, | |
| "loss": 1.2775449752807617, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.36065573770491804, | |
| "grad_norm": 0.316333532333374, | |
| "learning_rate": 2.4706516645950126e-05, | |
| "loss": 1.2168850898742676, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.366120218579235, | |
| "grad_norm": 0.5648621320724487, | |
| "learning_rate": 2.4690938952223446e-05, | |
| "loss": 1.1108001470565796, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.37158469945355194, | |
| "grad_norm": 0.6015145182609558, | |
| "learning_rate": 2.4674964209227338e-05, | |
| "loss": 0.996741771697998, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3770491803278688, | |
| "grad_norm": 0.5420268177986145, | |
| "learning_rate": 2.4658592996689018e-05, | |
| "loss": 0.985140860080719, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3825136612021858, | |
| "grad_norm": 0.8771296143531799, | |
| "learning_rate": 2.464182590872366e-05, | |
| "loss": 0.6611515879631042, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3879781420765027, | |
| "grad_norm": 0.3858940303325653, | |
| "learning_rate": 2.4624663553812876e-05, | |
| "loss": 1.24741530418396, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.39344262295081966, | |
| "grad_norm": 0.381734699010849, | |
| "learning_rate": 2.4607106554782592e-05, | |
| "loss": 1.2738169431686401, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3989071038251366, | |
| "grad_norm": 0.2820863127708435, | |
| "learning_rate": 2.4589155548780468e-05, | |
| "loss": 1.227925419807434, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.40437158469945356, | |
| "grad_norm": 0.32575523853302, | |
| "learning_rate": 2.4570811187252763e-05, | |
| "loss": 1.2266058921813965, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4098360655737705, | |
| "grad_norm": 0.6094731688499451, | |
| "learning_rate": 2.4552074135920718e-05, | |
| "loss": 1.1857141256332397, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41530054644808745, | |
| "grad_norm": 0.3147105574607849, | |
| "learning_rate": 2.4532945074756356e-05, | |
| "loss": 1.2648040056228638, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4207650273224044, | |
| "grad_norm": 0.3357324004173279, | |
| "learning_rate": 2.4513424697957853e-05, | |
| "loss": 1.243959903717041, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4262295081967213, | |
| "grad_norm": 0.4083969295024872, | |
| "learning_rate": 2.4493513713924312e-05, | |
| "loss": 1.288560152053833, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.43169398907103823, | |
| "grad_norm": 0.6605410575866699, | |
| "learning_rate": 2.4473212845230066e-05, | |
| "loss": 1.032557725906372, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4371584699453552, | |
| "grad_norm": 0.27547332644462585, | |
| "learning_rate": 2.4452522828598448e-05, | |
| "loss": 1.2204992771148682, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4426229508196721, | |
| "grad_norm": 0.5093227624893188, | |
| "learning_rate": 2.4431444414875076e-05, | |
| "loss": 0.7248828411102295, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.44808743169398907, | |
| "grad_norm": 0.5530020594596863, | |
| "learning_rate": 2.440997836900058e-05, | |
| "loss": 1.6318763494491577, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.453551912568306, | |
| "grad_norm": 1.3237181901931763, | |
| "learning_rate": 2.438812546998286e-05, | |
| "loss": 0.7943269610404968, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.45901639344262296, | |
| "grad_norm": 0.2728120684623718, | |
| "learning_rate": 2.4365886510868798e-05, | |
| "loss": 1.1765998601913452, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4644808743169399, | |
| "grad_norm": 0.6889151334762573, | |
| "learning_rate": 2.4343262298715504e-05, | |
| "loss": 1.2575013637542725, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.46994535519125685, | |
| "grad_norm": 0.42103421688079834, | |
| "learning_rate": 2.4320253654560986e-05, | |
| "loss": 1.3248785734176636, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.47540983606557374, | |
| "grad_norm": 1.090513825416565, | |
| "learning_rate": 2.429686141339441e-05, | |
| "loss": 1.4404881000518799, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4808743169398907, | |
| "grad_norm": 0.34014394879341125, | |
| "learning_rate": 2.427308642412576e-05, | |
| "loss": 1.2410756349563599, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.48633879781420764, | |
| "grad_norm": 0.5058013200759888, | |
| "learning_rate": 2.4248929549555024e-05, | |
| "loss": 1.2039165496826172, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4918032786885246, | |
| "grad_norm": 0.25756290555000305, | |
| "learning_rate": 2.422439166634091e-05, | |
| "loss": 1.2025060653686523, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4972677595628415, | |
| "grad_norm": 0.43436670303344727, | |
| "learning_rate": 2.4199473664969033e-05, | |
| "loss": 1.1908947229385376, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5027322404371585, | |
| "grad_norm": 1.5551801919937134, | |
| "learning_rate": 2.4174176449719557e-05, | |
| "loss": 1.3453747034072876, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5081967213114754, | |
| "grad_norm": 1.153810739517212, | |
| "learning_rate": 2.4148500938634432e-05, | |
| "loss": 1.2401556968688965, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5136612021857924, | |
| "grad_norm": 0.48002558946609497, | |
| "learning_rate": 2.412244806348404e-05, | |
| "loss": 1.2031079530715942, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5191256830601093, | |
| "grad_norm": 0.28547030687332153, | |
| "learning_rate": 2.4096018769733402e-05, | |
| "loss": 1.2502820491790771, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5245901639344263, | |
| "grad_norm": 0.5724475383758545, | |
| "learning_rate": 2.4069214016507857e-05, | |
| "loss": 1.216181755065918, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5300546448087432, | |
| "grad_norm": 0.35783788561820984, | |
| "learning_rate": 2.4042034776558252e-05, | |
| "loss": 1.232464075088501, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5355191256830601, | |
| "grad_norm": 0.30288538336753845, | |
| "learning_rate": 2.4014482036225657e-05, | |
| "loss": 1.2955890893936157, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5409836065573771, | |
| "grad_norm": 0.34098443388938904, | |
| "learning_rate": 2.398655679540555e-05, | |
| "loss": 1.135433316230774, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.546448087431694, | |
| "grad_norm": 0.25192174315452576, | |
| "learning_rate": 2.3958260067511546e-05, | |
| "loss": 1.2628716230392456, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5519125683060109, | |
| "grad_norm": 0.32063058018684387, | |
| "learning_rate": 2.392959287943861e-05, | |
| "loss": 1.0893492698669434, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5573770491803278, | |
| "grad_norm": 0.3690483570098877, | |
| "learning_rate": 2.390055627152579e-05, | |
| "loss": 1.004583716392517, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5628415300546448, | |
| "grad_norm": 0.40026623010635376, | |
| "learning_rate": 2.3871151297518478e-05, | |
| "loss": 1.2617326974868774, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5683060109289617, | |
| "grad_norm": 0.3042580485343933, | |
| "learning_rate": 2.384137902453016e-05, | |
| "loss": 0.9801578521728516, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5737704918032787, | |
| "grad_norm": 0.3382396399974823, | |
| "learning_rate": 2.381124053300368e-05, | |
| "loss": 1.219712734222412, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5792349726775956, | |
| "grad_norm": 0.282825231552124, | |
| "learning_rate": 2.378073691667204e-05, | |
| "loss": 1.5147571563720703, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5846994535519126, | |
| "grad_norm": 0.2633031904697418, | |
| "learning_rate": 2.3749869282518718e-05, | |
| "loss": 1.2349286079406738, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5901639344262295, | |
| "grad_norm": 0.5875511765480042, | |
| "learning_rate": 2.3718638750737472e-05, | |
| "loss": 1.5963759422302246, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5956284153005464, | |
| "grad_norm": 0.433083176612854, | |
| "learning_rate": 2.3687046454691712e-05, | |
| "loss": 1.249823808670044, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6010928961748634, | |
| "grad_norm": 0.5638194680213928, | |
| "learning_rate": 2.3655093540873353e-05, | |
| "loss": 1.2851283550262451, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6065573770491803, | |
| "grad_norm": 0.40069255232810974, | |
| "learning_rate": 2.3622781168861214e-05, | |
| "loss": 1.1884411573410034, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6120218579234973, | |
| "grad_norm": 1.8134185075759888, | |
| "learning_rate": 2.3590110511278933e-05, | |
| "loss": 1.1020855903625488, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6174863387978142, | |
| "grad_norm": 0.33945319056510925, | |
| "learning_rate": 2.3557082753752437e-05, | |
| "loss": 1.271837830543518, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6229508196721312, | |
| "grad_norm": 0.8068278431892395, | |
| "learning_rate": 2.352369909486687e-05, | |
| "loss": 1.2739131450653076, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6284153005464481, | |
| "grad_norm": 0.2856035530567169, | |
| "learning_rate": 2.3489960746123128e-05, | |
| "loss": 0.8101754784584045, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6338797814207651, | |
| "grad_norm": 0.3916250467300415, | |
| "learning_rate": 2.3455868931893894e-05, | |
| "loss": 1.2766276597976685, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.639344262295082, | |
| "grad_norm": 6.051559925079346, | |
| "learning_rate": 2.342142488937919e-05, | |
| "loss": 1.4627418518066406, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.644808743169399, | |
| "grad_norm": 1.6619840860366821, | |
| "learning_rate": 2.3386629868561492e-05, | |
| "loss": 1.1788352727890015, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6502732240437158, | |
| "grad_norm": 0.6968739628791809, | |
| "learning_rate": 2.3351485132160346e-05, | |
| "loss": 1.3334816694259644, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6557377049180327, | |
| "grad_norm": 0.5952749848365784, | |
| "learning_rate": 2.331599195558659e-05, | |
| "loss": 1.226516604423523, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6612021857923497, | |
| "grad_norm": 1.0215294361114502, | |
| "learning_rate": 2.3280151626896003e-05, | |
| "loss": 1.1722090244293213, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 3.0483946800231934, | |
| "learning_rate": 2.324396544674262e-05, | |
| "loss": 1.084344506263733, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6721311475409836, | |
| "grad_norm": 0.32553526759147644, | |
| "learning_rate": 2.3207434728331502e-05, | |
| "loss": 1.223487377166748, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6775956284153005, | |
| "grad_norm": 0.4716382324695587, | |
| "learning_rate": 2.317056079737108e-05, | |
| "loss": 1.267648696899414, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6830601092896175, | |
| "grad_norm": 0.38120850920677185, | |
| "learning_rate": 2.3133344992025057e-05, | |
| "loss": 1.2330824136734009, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6885245901639344, | |
| "grad_norm": 0.4164958596229553, | |
| "learning_rate": 2.3095788662863838e-05, | |
| "loss": 1.1659272909164429, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6939890710382514, | |
| "grad_norm": 0.33292973041534424, | |
| "learning_rate": 2.305789317281551e-05, | |
| "loss": 1.2032712697982788, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6994535519125683, | |
| "grad_norm": 0.3936408460140228, | |
| "learning_rate": 2.30196598971164e-05, | |
| "loss": 1.1856945753097534, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7049180327868853, | |
| "grad_norm": 0.3123660385608673, | |
| "learning_rate": 2.2981090223261146e-05, | |
| "loss": 1.1024748086929321, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7103825136612022, | |
| "grad_norm": 0.8626431822776794, | |
| "learning_rate": 2.294218555095236e-05, | |
| "loss": 1.1855603456497192, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7158469945355191, | |
| "grad_norm": 0.4246155917644501, | |
| "learning_rate": 2.2902947292049827e-05, | |
| "loss": 1.285102367401123, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7213114754098361, | |
| "grad_norm": 0.2916768789291382, | |
| "learning_rate": 2.2863376870519273e-05, | |
| "loss": 1.2235549688339233, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.726775956284153, | |
| "grad_norm": 0.3774672746658325, | |
| "learning_rate": 2.282347572238068e-05, | |
| "loss": 1.0752044916152954, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.73224043715847, | |
| "grad_norm": 0.645050585269928, | |
| "learning_rate": 2.278324529565617e-05, | |
| "loss": 1.249227523803711, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7377049180327869, | |
| "grad_norm": 0.3120027184486389, | |
| "learning_rate": 2.274268705031748e-05, | |
| "loss": 1.2243732213974, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7431693989071039, | |
| "grad_norm": 0.48544391989707947, | |
| "learning_rate": 2.2701802458232945e-05, | |
| "loss": 1.180768370628357, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7486338797814208, | |
| "grad_norm": 0.3262249231338501, | |
| "learning_rate": 2.266059300311411e-05, | |
| "loss": 1.2011985778808594, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7540983606557377, | |
| "grad_norm": 0.2651177644729614, | |
| "learning_rate": 2.2619060180461873e-05, | |
| "loss": 1.250745415687561, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7595628415300546, | |
| "grad_norm": 0.9065034985542297, | |
| "learning_rate": 2.2577205497512227e-05, | |
| "loss": 1.216939091682434, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7650273224043715, | |
| "grad_norm": 1.4332304000854492, | |
| "learning_rate": 2.253503047318153e-05, | |
| "loss": 1.2051968574523926, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7704918032786885, | |
| "grad_norm": 0.4732670783996582, | |
| "learning_rate": 2.249253663801143e-05, | |
| "loss": 1.2257208824157715, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7759562841530054, | |
| "grad_norm": 0.3656598925590515, | |
| "learning_rate": 2.244972553411328e-05, | |
| "loss": 1.2091584205627441, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7814207650273224, | |
| "grad_norm": 0.500878095626831, | |
| "learning_rate": 2.2406598715112188e-05, | |
| "loss": 1.2196128368377686, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7868852459016393, | |
| "grad_norm": 0.4196425676345825, | |
| "learning_rate": 2.2363157746090647e-05, | |
| "loss": 1.1886357069015503, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7923497267759563, | |
| "grad_norm": 0.3086436986923218, | |
| "learning_rate": 2.231940420353173e-05, | |
| "loss": 1.176297664642334, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7978142076502732, | |
| "grad_norm": 0.5975199341773987, | |
| "learning_rate": 2.2275339675261865e-05, | |
| "loss": 1.1868562698364258, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8032786885245902, | |
| "grad_norm": 0.3168680667877197, | |
| "learning_rate": 2.2230965760393242e-05, | |
| "loss": 1.1880509853363037, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8087431693989071, | |
| "grad_norm": 0.6303284764289856, | |
| "learning_rate": 2.218628406926575e-05, | |
| "loss": 1.5363126993179321, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8142076502732241, | |
| "grad_norm": 1.672184944152832, | |
| "learning_rate": 2.214129622338856e-05, | |
| "loss": 1.4108844995498657, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.819672131147541, | |
| "grad_norm": 0.33889323472976685, | |
| "learning_rate": 2.2096003855381268e-05, | |
| "loss": 1.6054472923278809, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.825136612021858, | |
| "grad_norm": 0.6121076941490173, | |
| "learning_rate": 2.2050408608914652e-05, | |
| "loss": 1.2369463443756104, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8306010928961749, | |
| "grad_norm": 0.9903159737586975, | |
| "learning_rate": 2.200451213865102e-05, | |
| "loss": 1.0083459615707397, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8360655737704918, | |
| "grad_norm": 3.208958864212036, | |
| "learning_rate": 2.195831611018416e-05, | |
| "loss": 1.557002305984497, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8415300546448088, | |
| "grad_norm": 0.6225981116294861, | |
| "learning_rate": 2.1911822199978905e-05, | |
| "loss": 0.9215947985649109, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8469945355191257, | |
| "grad_norm": 0.45970940589904785, | |
| "learning_rate": 2.186503209531028e-05, | |
| "loss": 1.2569689750671387, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8524590163934426, | |
| "grad_norm": 0.6107621788978577, | |
| "learning_rate": 2.1817947494202283e-05, | |
| "loss": 1.2291556596755981, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8579234972677595, | |
| "grad_norm": 0.5108576416969299, | |
| "learning_rate": 2.1770570105366253e-05, | |
| "loss": 1.2330490350723267, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8633879781420765, | |
| "grad_norm": 0.4778086841106415, | |
| "learning_rate": 2.1722901648138872e-05, | |
| "loss": 1.2342405319213867, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8688524590163934, | |
| "grad_norm": 0.44053593277931213, | |
| "learning_rate": 2.1674943852419754e-05, | |
| "loss": 1.2049052715301514, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8743169398907104, | |
| "grad_norm": 0.5842193961143494, | |
| "learning_rate": 2.1626698458608678e-05, | |
| "loss": 1.3800193071365356, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8797814207650273, | |
| "grad_norm": 0.8365160226821899, | |
| "learning_rate": 2.1578167217542434e-05, | |
| "loss": 1.1121832132339478, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8852459016393442, | |
| "grad_norm": 0.6218600273132324, | |
| "learning_rate": 2.1529351890431267e-05, | |
| "loss": 0.7286252975463867, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8907103825136612, | |
| "grad_norm": 1.4032604694366455, | |
| "learning_rate": 2.1480254248794972e-05, | |
| "loss": 1.2567256689071655, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8961748633879781, | |
| "grad_norm": 0.823307991027832, | |
| "learning_rate": 2.1430876074398613e-05, | |
| "loss": 1.1028892993927002, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9016393442622951, | |
| "grad_norm": 0.49343931674957275, | |
| "learning_rate": 2.138121915918785e-05, | |
| "loss": 1.2607791423797607, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.907103825136612, | |
| "grad_norm": 2.3485193252563477, | |
| "learning_rate": 2.133128530522391e-05, | |
| "loss": 1.3200721740722656, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.912568306010929, | |
| "grad_norm": 0.5272196531295776, | |
| "learning_rate": 2.1281076324618197e-05, | |
| "loss": 1.1300745010375977, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9180327868852459, | |
| "grad_norm": 0.32180359959602356, | |
| "learning_rate": 2.123059403946653e-05, | |
| "loss": 1.2066279649734497, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9234972677595629, | |
| "grad_norm": 0.5927256941795349, | |
| "learning_rate": 2.1179840281783002e-05, | |
| "loss": 1.2223135232925415, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9289617486338798, | |
| "grad_norm": 0.36580249667167664, | |
| "learning_rate": 2.1128816893433524e-05, | |
| "loss": 1.2143908739089966, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9344262295081968, | |
| "grad_norm": 0.3221298158168793, | |
| "learning_rate": 2.107752572606895e-05, | |
| "loss": 1.242573618888855, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9398907103825137, | |
| "grad_norm": 1.2049970626831055, | |
| "learning_rate": 2.1025968641057917e-05, | |
| "loss": 1.2492969036102295, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9453551912568307, | |
| "grad_norm": 0.30961552262306213, | |
| "learning_rate": 2.097414750941927e-05, | |
| "loss": 1.5939970016479492, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9508196721311475, | |
| "grad_norm": 2.088078498840332, | |
| "learning_rate": 2.092206421175416e-05, | |
| "loss": 0.9665464162826538, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9562841530054644, | |
| "grad_norm": 0.36332985758781433, | |
| "learning_rate": 2.0869720638177817e-05, | |
| "loss": 0.918722927570343, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9617486338797814, | |
| "grad_norm": 0.3078669011592865, | |
| "learning_rate": 2.0817118688250933e-05, | |
| "loss": 1.2081761360168457, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9672131147540983, | |
| "grad_norm": 0.9750831127166748, | |
| "learning_rate": 2.0764260270910753e-05, | |
| "loss": 0.9622431993484497, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9726775956284153, | |
| "grad_norm": 0.31717461347579956, | |
| "learning_rate": 2.0711147304401775e-05, | |
| "loss": 1.1162071228027344, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9781420765027322, | |
| "grad_norm": 0.4209156930446625, | |
| "learning_rate": 2.0657781716206156e-05, | |
| "loss": 0.7970324754714966, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9836065573770492, | |
| "grad_norm": 0.3800775110721588, | |
| "learning_rate": 2.0604165442973738e-05, | |
| "loss": 1.2143193483352661, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9890710382513661, | |
| "grad_norm": 0.5515578985214233, | |
| "learning_rate": 2.0550300430451805e-05, | |
| "loss": 1.0052040815353394, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.994535519125683, | |
| "grad_norm": 0.3717899024486542, | |
| "learning_rate": 2.049618863341443e-05, | |
| "loss": 1.1964337825775146, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.28382399678230286, | |
| "learning_rate": 2.0441832015591565e-05, | |
| "loss": 1.1717913150787354, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.005464480874317, | |
| "grad_norm": 0.7105363607406616, | |
| "learning_rate": 2.038723254959775e-05, | |
| "loss": 1.0392205715179443, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.010928961748634, | |
| "grad_norm": 0.40812334418296814, | |
| "learning_rate": 2.033239221686057e-05, | |
| "loss": 0.9397889971733093, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0163934426229508, | |
| "grad_norm": 0.4597087800502777, | |
| "learning_rate": 2.0277313007548702e-05, | |
| "loss": 0.973659873008728, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0218579234972678, | |
| "grad_norm": 0.4998292326927185, | |
| "learning_rate": 2.022199692049971e-05, | |
| "loss": 0.9722001552581787, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.0273224043715847, | |
| "grad_norm": 1.1149412393569946, | |
| "learning_rate": 2.0166445963147514e-05, | |
| "loss": 0.7914890050888062, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.0327868852459017, | |
| "grad_norm": 0.4389345049858093, | |
| "learning_rate": 2.0110662151449538e-05, | |
| "loss": 0.985954999923706, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.0382513661202186, | |
| "grad_norm": 0.44061946868896484, | |
| "learning_rate": 2.0054647509813533e-05, | |
| "loss": 0.6762386560440063, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0437158469945356, | |
| "grad_norm": 1.143591284751892, | |
| "learning_rate": 1.9998404071024132e-05, | |
| "loss": 0.8554490208625793, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.0491803278688525, | |
| "grad_norm": 0.4450607895851135, | |
| "learning_rate": 1.9941933876169064e-05, | |
| "loss": 0.933778703212738, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.0546448087431695, | |
| "grad_norm": 0.9178969860076904, | |
| "learning_rate": 1.9885238974565092e-05, | |
| "loss": 0.7338276505470276, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.0601092896174864, | |
| "grad_norm": 1.0910370349884033, | |
| "learning_rate": 1.982832142368365e-05, | |
| "loss": 0.6850086450576782, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.0655737704918034, | |
| "grad_norm": 0.4918898940086365, | |
| "learning_rate": 1.9771183289076148e-05, | |
| "loss": 1.1391122341156006, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0710382513661203, | |
| "grad_norm": 0.4459122121334076, | |
| "learning_rate": 1.9713826644299056e-05, | |
| "loss": 0.9440389275550842, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.0765027322404372, | |
| "grad_norm": 0.4374397099018097, | |
| "learning_rate": 1.96562535708386e-05, | |
| "loss": 0.7485268115997314, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.0819672131147542, | |
| "grad_norm": 0.3154756426811218, | |
| "learning_rate": 1.959846615803528e-05, | |
| "loss": 1.1058332920074463, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0874316939890711, | |
| "grad_norm": 0.43673595786094666, | |
| "learning_rate": 1.954046650300801e-05, | |
| "loss": 0.9705526232719421, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.092896174863388, | |
| "grad_norm": 0.5820497274398804, | |
| "learning_rate": 1.9482256710578028e-05, | |
| "loss": 1.0358953475952148, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.098360655737705, | |
| "grad_norm": 0.9388911128044128, | |
| "learning_rate": 1.94238388931925e-05, | |
| "loss": 0.9990125894546509, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.1038251366120218, | |
| "grad_norm": 0.34635043144226074, | |
| "learning_rate": 1.9365215170847887e-05, | |
| "loss": 1.1284903287887573, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1092896174863387, | |
| "grad_norm": 1.0394965410232544, | |
| "learning_rate": 1.9306387671012965e-05, | |
| "loss": 1.1225481033325195, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1147540983606556, | |
| "grad_norm": 0.5064483284950256, | |
| "learning_rate": 1.9247358528551662e-05, | |
| "loss": 0.6432326436042786, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.1202185792349726, | |
| "grad_norm": 0.2968895733356476, | |
| "learning_rate": 1.918812988564555e-05, | |
| "loss": 1.1994177103042603, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.1256830601092895, | |
| "grad_norm": 0.6034783720970154, | |
| "learning_rate": 1.9128703891716138e-05, | |
| "loss": 0.7406198382377625, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1311475409836065, | |
| "grad_norm": 0.3230381906032562, | |
| "learning_rate": 1.906908270334683e-05, | |
| "loss": 0.7596896886825562, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1366120218579234, | |
| "grad_norm": 9.573822975158691, | |
| "learning_rate": 1.90092684842047e-05, | |
| "loss": 0.6884374022483826, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1420765027322404, | |
| "grad_norm": 0.9430112838745117, | |
| "learning_rate": 1.8949263404961936e-05, | |
| "loss": 0.970964789390564, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.1475409836065573, | |
| "grad_norm": 0.2956700325012207, | |
| "learning_rate": 1.888906964321711e-05, | |
| "loss": 0.9549157023429871, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.1530054644808743, | |
| "grad_norm": 0.28723564743995667, | |
| "learning_rate": 1.88286893834161e-05, | |
| "loss": 0.9879151582717896, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.1584699453551912, | |
| "grad_norm": 0.968940794467926, | |
| "learning_rate": 1.8768124816772854e-05, | |
| "loss": 0.7193975448608398, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.1639344262295082, | |
| "grad_norm": 0.36670926213264465, | |
| "learning_rate": 1.870737814118987e-05, | |
| "loss": 0.6931469440460205, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.169398907103825, | |
| "grad_norm": 0.34420526027679443, | |
| "learning_rate": 1.86464515611784e-05, | |
| "loss": 1.1592483520507812, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.174863387978142, | |
| "grad_norm": 0.3030218183994293, | |
| "learning_rate": 1.8585347287778493e-05, | |
| "loss": 0.9302864074707031, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.180327868852459, | |
| "grad_norm": 0.4653378427028656, | |
| "learning_rate": 1.8524067538478713e-05, | |
| "loss": 0.9986540675163269, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.185792349726776, | |
| "grad_norm": 0.5103849172592163, | |
| "learning_rate": 1.8462614537135708e-05, | |
| "loss": 0.8803820013999939, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.1912568306010929, | |
| "grad_norm": 0.3337787091732025, | |
| "learning_rate": 1.840099051389346e-05, | |
| "loss": 1.0739984512329102, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1967213114754098, | |
| "grad_norm": 0.38103100657463074, | |
| "learning_rate": 1.8339197705102396e-05, | |
| "loss": 0.928661048412323, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2021857923497268, | |
| "grad_norm": 0.6027284264564514, | |
| "learning_rate": 1.8277238353238193e-05, | |
| "loss": 0.9021416902542114, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2076502732240437, | |
| "grad_norm": 0.4615493416786194, | |
| "learning_rate": 1.8215114706820435e-05, | |
| "loss": 0.9990072250366211, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2131147540983607, | |
| "grad_norm": 0.338590145111084, | |
| "learning_rate": 1.8152829020330975e-05, | |
| "loss": 0.9475133419036865, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2185792349726776, | |
| "grad_norm": 0.4421752095222473, | |
| "learning_rate": 1.8090383554132158e-05, | |
| "loss": 0.6166588068008423, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.2240437158469946, | |
| "grad_norm": 0.760307788848877, | |
| "learning_rate": 1.8027780574384753e-05, | |
| "loss": 0.9786165952682495, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2295081967213115, | |
| "grad_norm": 0.3657715320587158, | |
| "learning_rate": 1.796502235296575e-05, | |
| "loss": 0.9717714786529541, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.2349726775956285, | |
| "grad_norm": 0.5236809253692627, | |
| "learning_rate": 1.7902111167385883e-05, | |
| "loss": 0.9157909750938416, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.2404371584699454, | |
| "grad_norm": 0.8714286684989929, | |
| "learning_rate": 1.7839049300707003e-05, | |
| "loss": 0.5283688306808472, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.2459016393442623, | |
| "grad_norm": 3.8211936950683594, | |
| "learning_rate": 1.7775839041459203e-05, | |
| "loss": 0.760151207447052, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.2513661202185793, | |
| "grad_norm": 0.6044387221336365, | |
| "learning_rate": 1.7712482683557778e-05, | |
| "loss": 0.7560343742370605, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.2568306010928962, | |
| "grad_norm": 0.35686007142066956, | |
| "learning_rate": 1.7648982526219995e-05, | |
| "loss": 0.9490774273872375, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2622950819672132, | |
| "grad_norm": 0.31403011083602905, | |
| "learning_rate": 1.7585340873881617e-05, | |
| "loss": 0.700722336769104, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.2677595628415301, | |
| "grad_norm": 0.49292150139808655, | |
| "learning_rate": 1.7521560036113303e-05, | |
| "loss": 0.9336669445037842, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.273224043715847, | |
| "grad_norm": 0.6907988786697388, | |
| "learning_rate": 1.7457642327536777e-05, | |
| "loss": 1.0117403268814087, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.278688524590164, | |
| "grad_norm": 0.31901487708091736, | |
| "learning_rate": 1.7393590067740852e-05, | |
| "loss": 0.9249045252799988, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.2841530054644807, | |
| "grad_norm": 0.5312005877494812, | |
| "learning_rate": 1.732940558119722e-05, | |
| "loss": 1.0543935298919678, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.289617486338798, | |
| "grad_norm": 0.39483997225761414, | |
| "learning_rate": 1.726509119717612e-05, | |
| "loss": 1.0982601642608643, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.2950819672131146, | |
| "grad_norm": 0.3161986172199249, | |
| "learning_rate": 1.7200649249661806e-05, | |
| "loss": 1.0299053192138672, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3005464480874318, | |
| "grad_norm": 0.7943216562271118, | |
| "learning_rate": 1.7136082077267837e-05, | |
| "loss": 0.44314101338386536, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.3060109289617485, | |
| "grad_norm": 0.2828512191772461, | |
| "learning_rate": 1.7071392023152208e-05, | |
| "loss": 0.9447046518325806, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.3114754098360657, | |
| "grad_norm": 0.48383840918540955, | |
| "learning_rate": 1.700658143493233e-05, | |
| "loss": 1.0983808040618896, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3169398907103824, | |
| "grad_norm": 0.469735711812973, | |
| "learning_rate": 1.6941652664599814e-05, | |
| "loss": 0.8579000234603882, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.3224043715846996, | |
| "grad_norm": 0.8114180564880371, | |
| "learning_rate": 1.687660806843514e-05, | |
| "loss": 0.65126633644104, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.3278688524590163, | |
| "grad_norm": 0.7241629958152771, | |
| "learning_rate": 1.681145000692213e-05, | |
| "loss": 0.9474157691001892, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 0.44979581236839294, | |
| "learning_rate": 1.6746180844662286e-05, | |
| "loss": 0.9454953074455261, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.3387978142076502, | |
| "grad_norm": 0.5671207904815674, | |
| "learning_rate": 1.6680802950288988e-05, | |
| "loss": 0.7097105979919434, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.3442622950819672, | |
| "grad_norm": 0.30622348189353943, | |
| "learning_rate": 1.661531869638153e-05, | |
| "loss": 1.0415374040603638, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.349726775956284, | |
| "grad_norm": 0.1534983515739441, | |
| "learning_rate": 1.6549730459379012e-05, | |
| "loss": 0.7396904230117798, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.355191256830601, | |
| "grad_norm": 0.3498384952545166, | |
| "learning_rate": 1.648404061949412e-05, | |
| "loss": 1.0003196001052856, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.360655737704918, | |
| "grad_norm": 0.5373937487602234, | |
| "learning_rate": 1.641825156062671e-05, | |
| "loss": 1.0593230724334717, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.366120218579235, | |
| "grad_norm": 0.4811841547489166, | |
| "learning_rate": 1.635236567027734e-05, | |
| "loss": 1.1468924283981323, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3715846994535519, | |
| "grad_norm": 0.3064674735069275, | |
| "learning_rate": 1.6286385339460596e-05, | |
| "loss": 0.8854779005050659, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.3770491803278688, | |
| "grad_norm": 0.7713015079498291, | |
| "learning_rate": 1.6220312962618332e-05, | |
| "loss": 0.5700801014900208, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.3825136612021858, | |
| "grad_norm": 0.33278217911720276, | |
| "learning_rate": 1.6154150937532775e-05, | |
| "loss": 0.8875858187675476, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.3879781420765027, | |
| "grad_norm": 0.23079992830753326, | |
| "learning_rate": 1.6087901665239502e-05, | |
| "loss": 0.9823563694953918, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.3934426229508197, | |
| "grad_norm": 0.3576752245426178, | |
| "learning_rate": 1.602156754994032e-05, | |
| "loss": 1.0344420671463013, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.3989071038251366, | |
| "grad_norm": 0.32972317934036255, | |
| "learning_rate": 1.5955150998916003e-05, | |
| "loss": 1.19247305393219, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.4043715846994536, | |
| "grad_norm": 0.21017959713935852, | |
| "learning_rate": 1.5888654422438946e-05, | |
| "loss": 0.7628836035728455, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4098360655737705, | |
| "grad_norm": 0.3349050283432007, | |
| "learning_rate": 1.5822080233685678e-05, | |
| "loss": 0.625062108039856, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.4153005464480874, | |
| "grad_norm": 0.33164167404174805, | |
| "learning_rate": 1.575543084864929e-05, | |
| "loss": 1.0269337892532349, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.4207650273224044, | |
| "grad_norm": 1.8047852516174316, | |
| "learning_rate": 1.5688708686051784e-05, | |
| "loss": 0.6237314939498901, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.4262295081967213, | |
| "grad_norm": 0.5525245070457458, | |
| "learning_rate": 1.5621916167256255e-05, | |
| "loss": 0.9513725638389587, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.4316939890710383, | |
| "grad_norm": 0.5575222373008728, | |
| "learning_rate": 1.555505571617906e-05, | |
| "loss": 0.7160999178886414, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.4371584699453552, | |
| "grad_norm": 5.68476676940918, | |
| "learning_rate": 1.5488129759201822e-05, | |
| "loss": 0.9523558616638184, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.4426229508196722, | |
| "grad_norm": 0.42110544443130493, | |
| "learning_rate": 1.5421140725083388e-05, | |
| "loss": 0.6882522106170654, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.4480874316939891, | |
| "grad_norm": 0.7138206958770752, | |
| "learning_rate": 1.53540910448717e-05, | |
| "loss": 0.8077874779701233, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.453551912568306, | |
| "grad_norm": 0.8821026086807251, | |
| "learning_rate": 1.5286983151815553e-05, | |
| "loss": 0.6050822138786316, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.459016393442623, | |
| "grad_norm": 0.26990213990211487, | |
| "learning_rate": 1.52198194812763e-05, | |
| "loss": 0.9843153357505798, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.46448087431694, | |
| "grad_norm": 0.2653787434101105, | |
| "learning_rate": 1.515260247063948e-05, | |
| "loss": 0.9575402736663818, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.469945355191257, | |
| "grad_norm": 0.3735542893409729, | |
| "learning_rate": 1.5085334559226342e-05, | |
| "loss": 0.9459043145179749, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.4754098360655736, | |
| "grad_norm": 0.3478133976459503, | |
| "learning_rate": 1.5018018188205354e-05, | |
| "loss": 0.9919190406799316, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4808743169398908, | |
| "grad_norm": 0.3831535279750824, | |
| "learning_rate": 1.4950655800503577e-05, | |
| "loss": 0.9412453770637512, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.4863387978142075, | |
| "grad_norm": 0.24345508217811584, | |
| "learning_rate": 1.4883249840718045e-05, | |
| "loss": 0.8606211543083191, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.4918032786885247, | |
| "grad_norm": 1.155060887336731, | |
| "learning_rate": 1.4815802755027019e-05, | |
| "loss": 1.0808299779891968, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.4972677595628414, | |
| "grad_norm": 1.0949746370315552, | |
| "learning_rate": 1.474831699110124e-05, | |
| "loss": 0.4519907534122467, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5027322404371586, | |
| "grad_norm": 0.49436184763908386, | |
| "learning_rate": 1.4680794998015081e-05, | |
| "loss": 0.9571016430854797, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5081967213114753, | |
| "grad_norm": 0.3276909291744232, | |
| "learning_rate": 1.4613239226157693e-05, | |
| "loss": 0.9168761968612671, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5136612021857925, | |
| "grad_norm": 0.524336040019989, | |
| "learning_rate": 1.4545652127144052e-05, | |
| "loss": 0.9712042808532715, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.5191256830601092, | |
| "grad_norm": 0.5152002573013306, | |
| "learning_rate": 1.4478036153726013e-05, | |
| "loss": 0.9781864285469055, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.5245901639344264, | |
| "grad_norm": 0.564622700214386, | |
| "learning_rate": 1.4410393759703286e-05, | |
| "loss": 0.7834886312484741, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.530054644808743, | |
| "grad_norm": 0.35116609930992126, | |
| "learning_rate": 1.4342727399834396e-05, | |
| "loss": 0.4728796184062958, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.5355191256830603, | |
| "grad_norm": 0.8285826444625854, | |
| "learning_rate": 1.4275039529747592e-05, | |
| "loss": 0.8351629376411438, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.540983606557377, | |
| "grad_norm": 4.833861827850342, | |
| "learning_rate": 1.4207332605851725e-05, | |
| "loss": 0.7368656992912292, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.5464480874316942, | |
| "grad_norm": 0.31337863206863403, | |
| "learning_rate": 1.413960908524713e-05, | |
| "loss": 0.9681909680366516, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.5519125683060109, | |
| "grad_norm": 0.648554265499115, | |
| "learning_rate": 1.407187142563643e-05, | |
| "loss": 0.742579460144043, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.5573770491803278, | |
| "grad_norm": 0.29901963472366333, | |
| "learning_rate": 1.4004122085235349e-05, | |
| "loss": 1.0365455150604248, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.5628415300546448, | |
| "grad_norm": 7.1447272300720215, | |
| "learning_rate": 1.3936363522683532e-05, | |
| "loss": 0.9658475518226624, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.5683060109289617, | |
| "grad_norm": 0.29757052659988403, | |
| "learning_rate": 1.3868598196955273e-05, | |
| "loss": 0.9510630965232849, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.5737704918032787, | |
| "grad_norm": 0.2788982093334198, | |
| "learning_rate": 1.3800828567270318e-05, | |
| "loss": 0.7666032910346985, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.5792349726775956, | |
| "grad_norm": 0.5154399275779724, | |
| "learning_rate": 1.3733057093004605e-05, | |
| "loss": 1.097449779510498, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.5846994535519126, | |
| "grad_norm": 0.4370734393596649, | |
| "learning_rate": 1.3665286233601002e-05, | |
| "loss": 0.4699048399925232, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5901639344262295, | |
| "grad_norm": 0.3564499020576477, | |
| "learning_rate": 1.3597518448480075e-05, | |
| "loss": 0.5950034856796265, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.5956284153005464, | |
| "grad_norm": 0.36406558752059937, | |
| "learning_rate": 1.3529756196950815e-05, | |
| "loss": 0.6178205013275146, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6010928961748634, | |
| "grad_norm": 0.8772296905517578, | |
| "learning_rate": 1.3462001938121405e-05, | |
| "loss": 0.9911807775497437, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.6065573770491803, | |
| "grad_norm": 0.36376476287841797, | |
| "learning_rate": 1.3394258130809961e-05, | |
| "loss": 1.0207103490829468, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.6120218579234973, | |
| "grad_norm": 0.30281901359558105, | |
| "learning_rate": 1.332652723345533e-05, | |
| "loss": 0.6261776089668274, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.6174863387978142, | |
| "grad_norm": 0.37459859251976013, | |
| "learning_rate": 1.3258811704027823e-05, | |
| "loss": 0.9526323080062866, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.6229508196721312, | |
| "grad_norm": 1.4346907138824463, | |
| "learning_rate": 1.3191113999940072e-05, | |
| "loss": 0.7816427946090698, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.6284153005464481, | |
| "grad_norm": 0.895928680896759, | |
| "learning_rate": 1.3123436577957814e-05, | |
| "loss": 0.5765082240104675, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.633879781420765, | |
| "grad_norm": 0.3961792588233948, | |
| "learning_rate": 1.305578189411075e-05, | |
| "loss": 0.9641250967979431, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.639344262295082, | |
| "grad_norm": 0.9060362577438354, | |
| "learning_rate": 1.2988152403603387e-05, | |
| "loss": 1.064900517463684, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.644808743169399, | |
| "grad_norm": 0.4039094150066376, | |
| "learning_rate": 1.2920550560725985e-05, | |
| "loss": 1.0425022840499878, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.650273224043716, | |
| "grad_norm": 0.5800713300704956, | |
| "learning_rate": 1.2852978818765457e-05, | |
| "loss": 0.9887942671775818, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.6557377049180326, | |
| "grad_norm": 1.7551496028900146, | |
| "learning_rate": 1.2785439629916346e-05, | |
| "loss": 1.0200444459915161, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.6612021857923498, | |
| "grad_norm": 0.33184605836868286, | |
| "learning_rate": 1.2717935445191823e-05, | |
| "loss": 0.7819110751152039, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.32902389764785767, | |
| "learning_rate": 1.2650468714334764e-05, | |
| "loss": 1.0617104768753052, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.6721311475409837, | |
| "grad_norm": 3.964526414871216, | |
| "learning_rate": 1.2583041885728841e-05, | |
| "loss": 0.703948974609375, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.6775956284153004, | |
| "grad_norm": 0.3932097554206848, | |
| "learning_rate": 1.2515657406309653e-05, | |
| "loss": 0.9675039649009705, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.6830601092896176, | |
| "grad_norm": 0.6059487462043762, | |
| "learning_rate": 1.2448317721475938e-05, | |
| "loss": 0.9838345646858215, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.6885245901639343, | |
| "grad_norm": 0.23712986707687378, | |
| "learning_rate": 1.2381025275000826e-05, | |
| "loss": 0.5614522695541382, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.6939890710382515, | |
| "grad_norm": 0.5339243412017822, | |
| "learning_rate": 1.2313782508943165e-05, | |
| "loss": 0.9133069515228271, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6994535519125682, | |
| "grad_norm": 0.6373515129089355, | |
| "learning_rate": 1.224659186355888e-05, | |
| "loss": 1.2104243040084839, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.7049180327868854, | |
| "grad_norm": 0.3109021484851837, | |
| "learning_rate": 1.2179455777212426e-05, | |
| "loss": 0.6893042325973511, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.710382513661202, | |
| "grad_norm": 0.33058202266693115, | |
| "learning_rate": 1.2112376686288302e-05, | |
| "loss": 0.6424486041069031, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.7158469945355193, | |
| "grad_norm": 0.31533631682395935, | |
| "learning_rate": 1.204535702510263e-05, | |
| "loss": 1.0202336311340332, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.721311475409836, | |
| "grad_norm": 0.5181791186332703, | |
| "learning_rate": 1.1978399225814802e-05, | |
| "loss": 0.7214987277984619, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.7267759562841531, | |
| "grad_norm": 0.577717661857605, | |
| "learning_rate": 1.1911505718339238e-05, | |
| "loss": 0.962523341178894, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.7322404371584699, | |
| "grad_norm": 0.3180966079235077, | |
| "learning_rate": 1.1844678930257189e-05, | |
| "loss": 1.064076542854309, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.737704918032787, | |
| "grad_norm": 0.3360261023044586, | |
| "learning_rate": 1.1777921286728649e-05, | |
| "loss": 0.8713840246200562, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.7431693989071038, | |
| "grad_norm": 0.41811901330947876, | |
| "learning_rate": 1.1711235210404322e-05, | |
| "loss": 0.9733143448829651, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.748633879781421, | |
| "grad_norm": 0.9973921179771423, | |
| "learning_rate": 1.1644623121337744e-05, | |
| "loss": 0.8127912282943726, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.7540983606557377, | |
| "grad_norm": 0.350824236869812, | |
| "learning_rate": 1.1578087436897425e-05, | |
| "loss": 1.1365082263946533, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.7595628415300546, | |
| "grad_norm": 0.5923929810523987, | |
| "learning_rate": 1.1511630571679133e-05, | |
| "loss": 0.9738070368766785, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.7650273224043715, | |
| "grad_norm": 0.6375901699066162, | |
| "learning_rate": 1.1445254937418262e-05, | |
| "loss": 0.658166766166687, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.7704918032786885, | |
| "grad_norm": 0.5522438883781433, | |
| "learning_rate": 1.1378962942902325e-05, | |
| "loss": 0.9430595636367798, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.7759562841530054, | |
| "grad_norm": 0.38589879870414734, | |
| "learning_rate": 1.1312756993883528e-05, | |
| "loss": 0.9222274422645569, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.7814207650273224, | |
| "grad_norm": 0.33732476830482483, | |
| "learning_rate": 1.1246639492991464e-05, | |
| "loss": 0.9152341485023499, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.7868852459016393, | |
| "grad_norm": 0.5345599055290222, | |
| "learning_rate": 1.1180612839645918e-05, | |
| "loss": 0.9442293643951416, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.7923497267759563, | |
| "grad_norm": 6.358261585235596, | |
| "learning_rate": 1.1114679429969799e-05, | |
| "loss": 0.9770071506500244, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.7978142076502732, | |
| "grad_norm": 0.6152437329292297, | |
| "learning_rate": 1.1048841656702183e-05, | |
| "loss": 0.7851976156234741, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.8032786885245902, | |
| "grad_norm": 0.7620416879653931, | |
| "learning_rate": 1.0983101909111476e-05, | |
| "loss": 0.9333474040031433, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.8087431693989071, | |
| "grad_norm": 0.3988627791404724, | |
| "learning_rate": 1.0917462572908709e-05, | |
| "loss": 0.9973476529121399, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.814207650273224, | |
| "grad_norm": 0.9142587780952454, | |
| "learning_rate": 1.0851926030160964e-05, | |
| "loss": 1.0069029331207275, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.819672131147541, | |
| "grad_norm": 0.4281405806541443, | |
| "learning_rate": 1.0786494659204919e-05, | |
| "loss": 0.9625430107116699, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.825136612021858, | |
| "grad_norm": 0.4477498531341553, | |
| "learning_rate": 1.0721170834560539e-05, | |
| "loss": 0.9234293699264526, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.830601092896175, | |
| "grad_norm": 0.2133922129869461, | |
| "learning_rate": 1.0655956926844917e-05, | |
| "loss": 0.6118216514587402, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.8360655737704918, | |
| "grad_norm": 1.268988847732544, | |
| "learning_rate": 1.0590855302686232e-05, | |
| "loss": 1.179476022720337, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.8415300546448088, | |
| "grad_norm": 8.498745918273926, | |
| "learning_rate": 1.0525868324637867e-05, | |
| "loss": 1.0010485649108887, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.8469945355191257, | |
| "grad_norm": 0.42643609642982483, | |
| "learning_rate": 1.046099835109266e-05, | |
| "loss": 0.9508043527603149, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.8524590163934427, | |
| "grad_norm": 0.23793873190879822, | |
| "learning_rate": 1.0396247736197342e-05, | |
| "loss": 0.2523411810398102, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.8579234972677594, | |
| "grad_norm": 0.4815990924835205, | |
| "learning_rate": 1.0331618829767084e-05, | |
| "loss": 0.9425541758537292, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.8633879781420766, | |
| "grad_norm": 0.6651197671890259, | |
| "learning_rate": 1.026711397720023e-05, | |
| "loss": 0.9597347974777222, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.8688524590163933, | |
| "grad_norm": 0.4627138674259186, | |
| "learning_rate": 1.0202735519393172e-05, | |
| "loss": 0.8845030069351196, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.8743169398907105, | |
| "grad_norm": 0.45274391770362854, | |
| "learning_rate": 1.013848579265542e-05, | |
| "loss": 0.6856569647789001, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.8797814207650272, | |
| "grad_norm": 0.784737229347229, | |
| "learning_rate": 1.0074367128624787e-05, | |
| "loss": 0.9269533753395081, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.8852459016393444, | |
| "grad_norm": 0.38772767782211304, | |
| "learning_rate": 1.0010381854182808e-05, | |
| "loss": 0.996447741985321, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.890710382513661, | |
| "grad_norm": 0.47373294830322266, | |
| "learning_rate": 9.946532291370258e-06, | |
| "loss": 0.7233355045318604, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.8961748633879782, | |
| "grad_norm": 0.7931384444236755, | |
| "learning_rate": 9.882820757302921e-06, | |
| "loss": 0.9529883861541748, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.901639344262295, | |
| "grad_norm": 0.4432533383369446, | |
| "learning_rate": 9.819249564087476e-06, | |
| "loss": 0.9290885925292969, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.9071038251366121, | |
| "grad_norm": 0.5274680852890015, | |
| "learning_rate": 9.755821018737602e-06, | |
| "loss": 0.720592737197876, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.9125683060109289, | |
| "grad_norm": 0.50493323802948, | |
| "learning_rate": 9.692537423090254e-06, | |
| "loss": 0.9398930668830872, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.918032786885246, | |
| "grad_norm": 0.6175666451454163, | |
| "learning_rate": 9.629401073722121e-06, | |
| "loss": 0.9208841919898987, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.9234972677595628, | |
| "grad_norm": 0.41871604323387146, | |
| "learning_rate": 9.566414261866308e-06, | |
| "loss": 0.9789915680885315, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.92896174863388, | |
| "grad_norm": 0.37654823064804077, | |
| "learning_rate": 9.503579273329149e-06, | |
| "loss": 1.0130558013916016, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.9344262295081966, | |
| "grad_norm": 0.8550110459327698, | |
| "learning_rate": 9.440898388407277e-06, | |
| "loss": 1.022186040878296, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.9398907103825138, | |
| "grad_norm": 0.2428680807352066, | |
| "learning_rate": 9.378373881804876e-06, | |
| "loss": 0.6459108591079712, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.9453551912568305, | |
| "grad_norm": 0.4639998972415924, | |
| "learning_rate": 9.316008022551121e-06, | |
| "loss": 0.5808688998222351, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.9508196721311475, | |
| "grad_norm": 0.4453340768814087, | |
| "learning_rate": 9.253803073917827e-06, | |
| "loss": 1.0057231187820435, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.9562841530054644, | |
| "grad_norm": 0.38918569684028625, | |
| "learning_rate": 9.19176129333734e-06, | |
| "loss": 0.8170533776283264, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.9617486338797814, | |
| "grad_norm": 0.3418762683868408, | |
| "learning_rate": 9.129884932320593e-06, | |
| "loss": 0.9313240647315979, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.9672131147540983, | |
| "grad_norm": 4.1181488037109375, | |
| "learning_rate": 9.0681762363754e-06, | |
| "loss": 1.023482322692871, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.9726775956284153, | |
| "grad_norm": 0.326570987701416, | |
| "learning_rate": 9.006637444924973e-06, | |
| "loss": 0.9234302639961243, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.9781420765027322, | |
| "grad_norm": 0.37199193239212036, | |
| "learning_rate": 8.945270791226646e-06, | |
| "loss": 0.9178841710090637, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.9836065573770492, | |
| "grad_norm": 1.147835612297058, | |
| "learning_rate": 8.884078502290838e-06, | |
| "loss": 0.90202796459198, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.989071038251366, | |
| "grad_norm": 0.30575594305992126, | |
| "learning_rate": 8.823062798800233e-06, | |
| "loss": 0.9475547671318054, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.994535519125683, | |
| "grad_norm": 2.0851244926452637, | |
| "learning_rate": 8.762225895029169e-06, | |
| "loss": 0.8127981424331665, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.2703329920768738, | |
| "learning_rate": 8.701569998763323e-06, | |
| "loss": 0.9652237892150879, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.0054644808743167, | |
| "grad_norm": 1.118648648262024, | |
| "learning_rate": 8.64109731121955e-06, | |
| "loss": 0.4204362630844116, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.010928961748634, | |
| "grad_norm": 0.5438641309738159, | |
| "learning_rate": 8.580810026966034e-06, | |
| "loss": 0.6547160744667053, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.0163934426229506, | |
| "grad_norm": 0.34117498993873596, | |
| "learning_rate": 8.520710333842605e-06, | |
| "loss": 0.6878098845481873, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.021857923497268, | |
| "grad_norm": 0.32779020071029663, | |
| "learning_rate": 8.460800412881386e-06, | |
| "loss": 0.7106848955154419, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.0273224043715845, | |
| "grad_norm": 0.5938042998313904, | |
| "learning_rate": 8.401082438227617e-06, | |
| "loss": 0.5050097703933716, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.0327868852459017, | |
| "grad_norm": 0.7493045330047607, | |
| "learning_rate": 8.341558577060769e-06, | |
| "loss": 0.6207985281944275, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.0382513661202184, | |
| "grad_norm": 0.37173107266426086, | |
| "learning_rate": 8.282230989515865e-06, | |
| "loss": 0.5415725708007812, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.0437158469945356, | |
| "grad_norm": 0.3972921073436737, | |
| "learning_rate": 8.223101828605141e-06, | |
| "loss": 0.6260330677032471, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.0491803278688523, | |
| "grad_norm": 0.26544108986854553, | |
| "learning_rate": 8.164173240139865e-06, | |
| "loss": 0.41082635521888733, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.0546448087431695, | |
| "grad_norm": 0.21062718331813812, | |
| "learning_rate": 8.105447362652492e-06, | |
| "loss": 0.437963604927063, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.060109289617486, | |
| "grad_norm": 1.982082486152649, | |
| "learning_rate": 8.046926327319037e-06, | |
| "loss": 0.5802850127220154, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.0655737704918034, | |
| "grad_norm": 0.6770855784416199, | |
| "learning_rate": 7.988612257881757e-06, | |
| "loss": 0.7075764536857605, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.07103825136612, | |
| "grad_norm": 0.5049643516540527, | |
| "learning_rate": 7.930507270572065e-06, | |
| "loss": 0.5730771422386169, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.0765027322404372, | |
| "grad_norm": 1.2611545324325562, | |
| "learning_rate": 7.872613474033722e-06, | |
| "loss": 0.6248353719711304, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.081967213114754, | |
| "grad_norm": 0.4194056987762451, | |
| "learning_rate": 7.814932969246342e-06, | |
| "loss": 0.5690703988075256, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.087431693989071, | |
| "grad_norm": 0.28264543414115906, | |
| "learning_rate": 7.75746784944912e-06, | |
| "loss": 0.6123548746109009, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.092896174863388, | |
| "grad_norm": 0.29934635758399963, | |
| "learning_rate": 7.700220200064889e-06, | |
| "loss": 0.6302112340927124, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.098360655737705, | |
| "grad_norm": 1.8415086269378662, | |
| "learning_rate": 7.643192098624413e-06, | |
| "loss": 0.7583162188529968, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.1038251366120218, | |
| "grad_norm": 1.0556191205978394, | |
| "learning_rate": 7.586385614691024e-06, | |
| "loss": 0.6580595970153809, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.109289617486339, | |
| "grad_norm": 0.8480823040008545, | |
| "learning_rate": 7.529802809785497e-06, | |
| "loss": 0.7083532810211182, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.1147540983606556, | |
| "grad_norm": 0.3261125683784485, | |
| "learning_rate": 7.473445737311244e-06, | |
| "loss": 0.7163298726081848, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.120218579234973, | |
| "grad_norm": 0.3656948506832123, | |
| "learning_rate": 7.417316442479783e-06, | |
| "loss": 0.5138447284698486, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.1256830601092895, | |
| "grad_norm": 1.383690595626831, | |
| "learning_rate": 7.361416962236542e-06, | |
| "loss": 0.6027840971946716, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.1311475409836067, | |
| "grad_norm": 0.33852696418762207, | |
| "learning_rate": 7.305749325186914e-06, | |
| "loss": 0.6325546503067017, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.1366120218579234, | |
| "grad_norm": 0.2947288155555725, | |
| "learning_rate": 7.2503155515226575e-06, | |
| "loss": 0.7005398273468018, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.1420765027322406, | |
| "grad_norm": 0.4960687458515167, | |
| "learning_rate": 7.195117652948555e-06, | |
| "loss": 0.8759350776672363, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.1475409836065573, | |
| "grad_norm": 0.24415802955627441, | |
| "learning_rate": 7.14015763260944e-06, | |
| "loss": 0.381770521402359, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.1530054644808745, | |
| "grad_norm": 0.8835662603378296, | |
| "learning_rate": 7.085437485017487e-06, | |
| "loss": 0.6162024736404419, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.158469945355191, | |
| "grad_norm": 0.36485755443573, | |
| "learning_rate": 7.030959195979833e-06, | |
| "loss": 0.6147274374961853, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.1639344262295084, | |
| "grad_norm": 0.3451128304004669, | |
| "learning_rate": 6.976724742526503e-06, | |
| "loss": 0.5143959522247314, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.169398907103825, | |
| "grad_norm": 1.797385573387146, | |
| "learning_rate": 6.92273609283868e-06, | |
| "loss": 0.5045967102050781, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.1748633879781423, | |
| "grad_norm": 0.3312281668186188, | |
| "learning_rate": 6.868995206177264e-06, | |
| "loss": 0.7605709433555603, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.180327868852459, | |
| "grad_norm": 0.3288355767726898, | |
| "learning_rate": 6.815504032811775e-06, | |
| "loss": 0.6243901252746582, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.185792349726776, | |
| "grad_norm": 1.5838336944580078, | |
| "learning_rate": 6.7622645139495815e-06, | |
| "loss": 0.6908902525901794, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.191256830601093, | |
| "grad_norm": 0.6093941926956177, | |
| "learning_rate": 6.7092785816654515e-06, | |
| "loss": 0.5978571772575378, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.19672131147541, | |
| "grad_norm": 0.4110351502895355, | |
| "learning_rate": 6.656548158831436e-06, | |
| "loss": 0.697502851486206, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.202185792349727, | |
| "grad_norm": 0.3014414608478546, | |
| "learning_rate": 6.6040751590470825e-06, | |
| "loss": 0.6352847814559937, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.2076502732240435, | |
| "grad_norm": 0.30577147006988525, | |
| "learning_rate": 6.551861486569997e-06, | |
| "loss": 0.4661785066127777, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.2131147540983607, | |
| "grad_norm": 0.34597665071487427, | |
| "learning_rate": 6.499909036246738e-06, | |
| "loss": 0.36809349060058594, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.2185792349726774, | |
| "grad_norm": 0.4040527045726776, | |
| "learning_rate": 6.448219693444056e-06, | |
| "loss": 0.6888415813446045, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.2240437158469946, | |
| "grad_norm": 0.520148754119873, | |
| "learning_rate": 6.39679533398045e-06, | |
| "loss": 0.6813967823982239, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.2295081967213113, | |
| "grad_norm": 0.5317114591598511, | |
| "learning_rate": 6.345637824058126e-06, | |
| "loss": 0.6947481036186218, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.2349726775956285, | |
| "grad_norm": 0.7962093353271484, | |
| "learning_rate": 6.294749020195253e-06, | |
| "loss": 0.686174213886261, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.240437158469945, | |
| "grad_norm": 0.3029099106788635, | |
| "learning_rate": 6.244130769158594e-06, | |
| "loss": 0.6232653260231018, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.2459016393442623, | |
| "grad_norm": 0.8965455889701843, | |
| "learning_rate": 6.193784907896478e-06, | |
| "loss": 0.6392536163330078, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.251366120218579, | |
| "grad_norm": 0.7625441551208496, | |
| "learning_rate": 6.143713263472156e-06, | |
| "loss": 0.8782878518104553, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.2568306010928962, | |
| "grad_norm": 0.4552961587905884, | |
| "learning_rate": 6.0939176529974765e-06, | |
| "loss": 0.42297598719596863, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.262295081967213, | |
| "grad_norm": 0.43949148058891296, | |
| "learning_rate": 6.044399883566959e-06, | |
| "loss": 0.447804719209671, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.26775956284153, | |
| "grad_norm": 0.32859939336776733, | |
| "learning_rate": 5.995161752192192e-06, | |
| "loss": 0.7380506992340088, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.273224043715847, | |
| "grad_norm": 0.4013267457485199, | |
| "learning_rate": 5.9462050457366446e-06, | |
| "loss": 0.3883093297481537, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.278688524590164, | |
| "grad_norm": 0.34844857454299927, | |
| "learning_rate": 5.897531540850807e-06, | |
| "loss": 0.42827335000038147, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.2841530054644807, | |
| "grad_norm": 0.533165454864502, | |
| "learning_rate": 5.849143003907717e-06, | |
| "loss": 0.680249810218811, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.289617486338798, | |
| "grad_norm": 0.7737491130828857, | |
| "learning_rate": 5.801041190938849e-06, | |
| "loss": 0.7983654141426086, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.2950819672131146, | |
| "grad_norm": 0.31483733654022217, | |
| "learning_rate": 5.75322784757041e-06, | |
| "loss": 0.4408833086490631, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.300546448087432, | |
| "grad_norm": 0.34635281562805176, | |
| "learning_rate": 5.705704708959968e-06, | |
| "loss": 0.7192622423171997, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.3060109289617485, | |
| "grad_norm": 0.49761828780174255, | |
| "learning_rate": 5.658473499733499e-06, | |
| "loss": 0.6560445427894592, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.3114754098360657, | |
| "grad_norm": 0.4457787275314331, | |
| "learning_rate": 5.611535933922781e-06, | |
| "loss": 0.36786019802093506, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.3169398907103824, | |
| "grad_norm": 0.39284324645996094, | |
| "learning_rate": 5.564893714903211e-06, | |
| "loss": 0.600739061832428, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.3224043715846996, | |
| "grad_norm": 0.3350694477558136, | |
| "learning_rate": 5.518548535331984e-06, | |
| "loss": 0.6502145528793335, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.3278688524590163, | |
| "grad_norm": 0.518315851688385, | |
| "learning_rate": 5.472502077086653e-06, | |
| "loss": 0.6010422706604004, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.7739505767822266, | |
| "learning_rate": 5.426756011204108e-06, | |
| "loss": 0.5903787016868591, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.33879781420765, | |
| "grad_norm": 0.8213343024253845, | |
| "learning_rate": 5.381311997819933e-06, | |
| "loss": 0.8286283016204834, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.3442622950819674, | |
| "grad_norm": 0.3943159878253937, | |
| "learning_rate": 5.336171686108153e-06, | |
| "loss": 0.5135136246681213, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.349726775956284, | |
| "grad_norm": 0.3654269278049469, | |
| "learning_rate": 5.291336714221382e-06, | |
| "loss": 0.6539085507392883, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.3551912568306013, | |
| "grad_norm": 0.49811530113220215, | |
| "learning_rate": 5.246808709231382e-06, | |
| "loss": 0.6904571652412415, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.360655737704918, | |
| "grad_norm": 0.4545181393623352, | |
| "learning_rate": 5.202589287070019e-06, | |
| "loss": 0.5245177745819092, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.366120218579235, | |
| "grad_norm": 0.575053334236145, | |
| "learning_rate": 5.158680052470609e-06, | |
| "loss": 0.5184382200241089, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.371584699453552, | |
| "grad_norm": 0.2895403504371643, | |
| "learning_rate": 5.115082598909689e-06, | |
| "loss": 0.6546509265899658, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.3770491803278686, | |
| "grad_norm": 0.43105390667915344, | |
| "learning_rate": 5.071798508549189e-06, | |
| "loss": 0.7096179127693176, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.3825136612021858, | |
| "grad_norm": 0.6513200402259827, | |
| "learning_rate": 5.028829352179017e-06, | |
| "loss": 0.7800822257995605, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.387978142076503, | |
| "grad_norm": 0.30751562118530273, | |
| "learning_rate": 4.986176689160042e-06, | |
| "loss": 0.6386380791664124, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.3934426229508197, | |
| "grad_norm": 3.6829833984375, | |
| "learning_rate": 4.943842067367525e-06, | |
| "loss": 0.7829623818397522, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.3989071038251364, | |
| "grad_norm": 0.2786366641521454, | |
| "learning_rate": 4.901827023134931e-06, | |
| "loss": 0.6524488925933838, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.4043715846994536, | |
| "grad_norm": 0.3131651282310486, | |
| "learning_rate": 4.86013308119818e-06, | |
| "loss": 0.5887367129325867, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.4098360655737707, | |
| "grad_norm": 0.3001626431941986, | |
| "learning_rate": 4.818761754640316e-06, | |
| "loss": 0.4331092834472656, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.4153005464480874, | |
| "grad_norm": 0.4316854774951935, | |
| "learning_rate": 4.777714544836585e-06, | |
| "loss": 0.6600074768066406, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.420765027322404, | |
| "grad_norm": 0.35097604990005493, | |
| "learning_rate": 4.736992941399972e-06, | |
| "loss": 0.694169819355011, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.4262295081967213, | |
| "grad_norm": 0.29989713430404663, | |
| "learning_rate": 4.696598422127119e-06, | |
| "loss": 0.36676090955734253, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.431693989071038, | |
| "grad_norm": 0.2177748829126358, | |
| "learning_rate": 4.656532452944716e-06, | |
| "loss": 0.4195106327533722, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.4371584699453552, | |
| "grad_norm": 0.701615571975708, | |
| "learning_rate": 4.616796487856282e-06, | |
| "loss": 0.6059967279434204, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.442622950819672, | |
| "grad_norm": 0.34977203607559204, | |
| "learning_rate": 4.577391968889416e-06, | |
| "loss": 0.4103294312953949, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.448087431693989, | |
| "grad_norm": 0.30477455258369446, | |
| "learning_rate": 4.53832032604346e-06, | |
| "loss": 0.6227977275848389, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.453551912568306, | |
| "grad_norm": 0.3769943416118622, | |
| "learning_rate": 4.499582977237594e-06, | |
| "loss": 0.6697421073913574, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.459016393442623, | |
| "grad_norm": 0.4096207618713379, | |
| "learning_rate": 4.461181328259397e-06, | |
| "loss": 0.6066514849662781, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.4644808743169397, | |
| "grad_norm": 0.41548797488212585, | |
| "learning_rate": 4.423116772713817e-06, | |
| "loss": 0.6535893678665161, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.469945355191257, | |
| "grad_norm": 0.5664173364639282, | |
| "learning_rate": 4.3853906919726045e-06, | |
| "loss": 0.7845959067344666, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.4754098360655736, | |
| "grad_norm": 0.938927412033081, | |
| "learning_rate": 4.348004455124173e-06, | |
| "loss": 0.47357067465782166, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.480874316939891, | |
| "grad_norm": 0.10545186698436737, | |
| "learning_rate": 4.310959418923926e-06, | |
| "loss": 0.12602397799491882, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.4863387978142075, | |
| "grad_norm": 0.13464003801345825, | |
| "learning_rate": 4.274256927745015e-06, | |
| "loss": 0.46652600169181824, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.4918032786885247, | |
| "grad_norm": 0.29866641759872437, | |
| "learning_rate": 4.23789831352955e-06, | |
| "loss": 0.4135476052761078, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.4972677595628414, | |
| "grad_norm": 0.478240430355072, | |
| "learning_rate": 4.2018848957402605e-06, | |
| "loss": 0.6472349166870117, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.5027322404371586, | |
| "grad_norm": 0.5485062003135681, | |
| "learning_rate": 4.166217981312621e-06, | |
| "loss": 0.670555830001831, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.5081967213114753, | |
| "grad_norm": 0.3627215027809143, | |
| "learning_rate": 4.130898864607417e-06, | |
| "loss": 0.7340973019599915, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.5136612021857925, | |
| "grad_norm": 0.3155612647533417, | |
| "learning_rate": 4.09592882736377e-06, | |
| "loss": 0.760129451751709, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.519125683060109, | |
| "grad_norm": 0.2517678737640381, | |
| "learning_rate": 4.061309138652622e-06, | |
| "loss": 0.7795225977897644, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.5245901639344264, | |
| "grad_norm": 0.24501533806324005, | |
| "learning_rate": 4.027041054830689e-06, | |
| "loss": 0.3959949314594269, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.530054644808743, | |
| "grad_norm": 0.35373446345329285, | |
| "learning_rate": 3.993125819494862e-06, | |
| "loss": 0.3947203457355499, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.5355191256830603, | |
| "grad_norm": 0.3647337853908539, | |
| "learning_rate": 3.9595646634370745e-06, | |
| "loss": 0.444293737411499, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.540983606557377, | |
| "grad_norm": 0.45152467489242554, | |
| "learning_rate": 3.926358804599636e-06, | |
| "loss": 0.5909146070480347, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.546448087431694, | |
| "grad_norm": 0.4575729966163635, | |
| "learning_rate": 3.8935094480310425e-06, | |
| "loss": 0.5589189529418945, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.551912568306011, | |
| "grad_norm": 0.45959311723709106, | |
| "learning_rate": 3.861017785842234e-06, | |
| "loss": 0.7737894654273987, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.557377049180328, | |
| "grad_norm": 2.111557960510254, | |
| "learning_rate": 3.828884997163339e-06, | |
| "loss": 0.5457797050476074, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.5628415300546448, | |
| "grad_norm": 0.6638699769973755, | |
| "learning_rate": 3.7971122481008757e-06, | |
| "loss": 0.4922334849834442, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.5683060109289615, | |
| "grad_norm": 0.36368152499198914, | |
| "learning_rate": 3.7657006916954453e-06, | |
| "loss": 0.6106823086738586, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.5737704918032787, | |
| "grad_norm": 1.6843950748443604, | |
| "learning_rate": 3.734651467879878e-06, | |
| "loss": 0.42035162448883057, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.579234972677596, | |
| "grad_norm": 0.42817074060440063, | |
| "learning_rate": 3.7039657034378726e-06, | |
| "loss": 0.5863317251205444, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.5846994535519126, | |
| "grad_norm": 0.13783489167690277, | |
| "learning_rate": 3.6736445119630964e-06, | |
| "loss": 0.13127169013023376, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.5901639344262293, | |
| "grad_norm": 0.4673374593257904, | |
| "learning_rate": 3.6436889938187833e-06, | |
| "loss": 0.6087139844894409, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.5956284153005464, | |
| "grad_norm": 0.27695614099502563, | |
| "learning_rate": 3.6141002360977864e-06, | |
| "loss": 0.41615864634513855, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.6010928961748636, | |
| "grad_norm": 0.3966348171234131, | |
| "learning_rate": 3.584879312583141e-06, | |
| "loss": 0.8187358379364014, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.6065573770491803, | |
| "grad_norm": 0.648745596408844, | |
| "learning_rate": 3.5560272837090935e-06, | |
| "loss": 0.3758191168308258, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.612021857923497, | |
| "grad_norm": 0.153128981590271, | |
| "learning_rate": 3.527545196522615e-06, | |
| "loss": 0.16518115997314453, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.6174863387978142, | |
| "grad_norm": 0.3588615953922272, | |
| "learning_rate": 3.499434084645407e-06, | |
| "loss": 0.3766753375530243, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.6229508196721314, | |
| "grad_norm": 0.491727352142334, | |
| "learning_rate": 3.4716949682363803e-06, | |
| "loss": 0.694807231426239, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.628415300546448, | |
| "grad_norm": 0.6780725121498108, | |
| "learning_rate": 3.4443288539546508e-06, | |
| "loss": 0.4412088692188263, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.633879781420765, | |
| "grad_norm": 0.280350923538208, | |
| "learning_rate": 3.417336734922998e-06, | |
| "loss": 0.7602761387825012, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.639344262295082, | |
| "grad_norm": 0.33981382846832275, | |
| "learning_rate": 3.3907195906918236e-06, | |
| "loss": 0.6312009692192078, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.644808743169399, | |
| "grad_norm": 0.42649969458580017, | |
| "learning_rate": 3.364478387203602e-06, | |
| "loss": 0.5924428105354309, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.650273224043716, | |
| "grad_norm": 0.5315715074539185, | |
| "learning_rate": 3.338614076757832e-06, | |
| "loss": 0.5652517676353455, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.6557377049180326, | |
| "grad_norm": 0.3227130174636841, | |
| "learning_rate": 3.3131275979764746e-06, | |
| "loss": 0.6779371500015259, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.66120218579235, | |
| "grad_norm": 0.2851775288581848, | |
| "learning_rate": 3.288019875769889e-06, | |
| "loss": 0.37949252128601074, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.32090258598327637, | |
| "learning_rate": 3.2632918213032687e-06, | |
| "loss": 0.6465896964073181, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.6721311475409837, | |
| "grad_norm": 0.5186685919761658, | |
| "learning_rate": 3.2389443319635762e-06, | |
| "loss": 0.6305201649665833, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.6775956284153004, | |
| "grad_norm": 0.3238012194633484, | |
| "learning_rate": 3.2149782913269715e-06, | |
| "loss": 0.7430388927459717, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.6830601092896176, | |
| "grad_norm": 0.3504728078842163, | |
| "learning_rate": 3.1913945691267574e-06, | |
| "loss": 0.7213521003723145, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.6885245901639343, | |
| "grad_norm": 0.22806833684444427, | |
| "learning_rate": 3.1681940212218013e-06, | |
| "loss": 0.30354568362236023, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.6939890710382515, | |
| "grad_norm": 0.4042013883590698, | |
| "learning_rate": 3.145377489565492e-06, | |
| "loss": 0.686044454574585, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.699453551912568, | |
| "grad_norm": 0.2897629737854004, | |
| "learning_rate": 3.1229458021751754e-06, | |
| "loss": 0.6415558457374573, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.7049180327868854, | |
| "grad_norm": 0.31862911581993103, | |
| "learning_rate": 3.100899773102103e-06, | |
| "loss": 0.5853860974311829, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.710382513661202, | |
| "grad_norm": 0.30188605189323425, | |
| "learning_rate": 3.0792402024019028e-06, | |
| "loss": 0.32691124081611633, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.7158469945355193, | |
| "grad_norm": 0.44175922870635986, | |
| "learning_rate": 3.0579678761055294e-06, | |
| "loss": 0.6348515748977661, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.721311475409836, | |
| "grad_norm": 0.32466843724250793, | |
| "learning_rate": 3.0370835661907522e-06, | |
| "loss": 0.6995946168899536, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.726775956284153, | |
| "grad_norm": 0.32357221841812134, | |
| "learning_rate": 3.016588030554128e-06, | |
| "loss": 0.6425853967666626, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.73224043715847, | |
| "grad_norm": 0.8249900937080383, | |
| "learning_rate": 2.996482012983508e-06, | |
| "loss": 0.598800003528595, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.737704918032787, | |
| "grad_norm": 0.31810635328292847, | |
| "learning_rate": 2.9767662431310373e-06, | |
| "loss": 0.6088637709617615, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.7431693989071038, | |
| "grad_norm": 0.37102603912353516, | |
| "learning_rate": 2.957441436486682e-06, | |
| "loss": 0.700023889541626, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.748633879781421, | |
| "grad_norm": 0.38252657651901245, | |
| "learning_rate": 2.9385082943522553e-06, | |
| "loss": 0.29863977432250977, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.7540983606557377, | |
| "grad_norm": 0.36938464641571045, | |
| "learning_rate": 2.9199675038159764e-06, | |
| "loss": 0.616381049156189, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.7595628415300544, | |
| "grad_norm": 0.4928862154483795, | |
| "learning_rate": 2.9018197377275334e-06, | |
| "loss": 0.6010699272155762, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.7650273224043715, | |
| "grad_norm": 0.5395115613937378, | |
| "learning_rate": 2.8840656546736607e-06, | |
| "loss": 0.5935303568840027, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.7704918032786887, | |
| "grad_norm": 0.3676987588405609, | |
| "learning_rate": 2.866705898954241e-06, | |
| "loss": 0.5782245993614197, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.7759562841530054, | |
| "grad_norm": 0.6417854428291321, | |
| "learning_rate": 2.8497411005589283e-06, | |
| "loss": 0.5679749846458435, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.781420765027322, | |
| "grad_norm": 0.6215810179710388, | |
| "learning_rate": 2.8331718751442793e-06, | |
| "loss": 0.5821006894111633, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.7868852459016393, | |
| "grad_norm": 0.23094283044338226, | |
| "learning_rate": 2.8169988240114145e-06, | |
| "loss": 0.3744300901889801, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.7923497267759565, | |
| "grad_norm": 0.3107250928878784, | |
| "learning_rate": 2.801222534084196e-06, | |
| "loss": 0.6346076130867004, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.797814207650273, | |
| "grad_norm": 0.274101197719574, | |
| "learning_rate": 2.785843577887926e-06, | |
| "loss": 0.36076998710632324, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.80327868852459, | |
| "grad_norm": 0.270465224981308, | |
| "learning_rate": 2.7708625135285717e-06, | |
| "loss": 0.7026212811470032, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.808743169398907, | |
| "grad_norm": 1.6409341096878052, | |
| "learning_rate": 2.7562798846725133e-06, | |
| "loss": 0.32709944248199463, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.8142076502732243, | |
| "grad_norm": 0.48914581537246704, | |
| "learning_rate": 2.742096220526808e-06, | |
| "loss": 0.6859836578369141, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.819672131147541, | |
| "grad_norm": 0.7330395579338074, | |
| "learning_rate": 2.728312035819995e-06, | |
| "loss": 0.6433828473091125, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.8251366120218577, | |
| "grad_norm": 0.5986846685409546, | |
| "learning_rate": 2.7149278307834047e-06, | |
| "loss": 0.5111646056175232, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.830601092896175, | |
| "grad_norm": 0.2934345602989197, | |
| "learning_rate": 2.701944091133011e-06, | |
| "loss": 0.6460334658622742, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.836065573770492, | |
| "grad_norm": 0.319169282913208, | |
| "learning_rate": 2.6893612880518064e-06, | |
| "loss": 0.7042778134346008, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.841530054644809, | |
| "grad_norm": 0.4729110598564148, | |
| "learning_rate": 2.677179878172699e-06, | |
| "loss": 0.6785165071487427, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.8469945355191255, | |
| "grad_norm": 0.29816320538520813, | |
| "learning_rate": 2.6654003035619427e-06, | |
| "loss": 0.6465342044830322, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.8524590163934427, | |
| "grad_norm": 0.2716350853443146, | |
| "learning_rate": 2.654022991703093e-06, | |
| "loss": 0.4655543565750122, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.8579234972677594, | |
| "grad_norm": 0.29152220487594604, | |
| "learning_rate": 2.6430483554814956e-06, | |
| "loss": 0.6557070016860962, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.8633879781420766, | |
| "grad_norm": 0.08173277974128723, | |
| "learning_rate": 2.632476793169303e-06, | |
| "loss": 0.29882189631462097, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.8688524590163933, | |
| "grad_norm": 0.3136751055717468, | |
| "learning_rate": 2.622308688411019e-06, | |
| "loss": 0.5553168058395386, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.8743169398907105, | |
| "grad_norm": 0.34415996074676514, | |
| "learning_rate": 2.612544410209574e-06, | |
| "loss": 0.5999379754066467, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.879781420765027, | |
| "grad_norm": 0.3563319742679596, | |
| "learning_rate": 2.6031843129129425e-06, | |
| "loss": 0.5782446265220642, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.8852459016393444, | |
| "grad_norm": 0.8886985778808594, | |
| "learning_rate": 2.594228736201274e-06, | |
| "loss": 0.6308610439300537, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.890710382513661, | |
| "grad_norm": 0.7436570525169373, | |
| "learning_rate": 2.5856780050745726e-06, | |
| "loss": 0.5041358470916748, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.8961748633879782, | |
| "grad_norm": 0.42356082797050476, | |
| "learning_rate": 2.577532429840896e-06, | |
| "loss": 0.6640922427177429, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.901639344262295, | |
| "grad_norm": 0.5104114413261414, | |
| "learning_rate": 2.5697923061051056e-06, | |
| "loss": 0.6469512581825256, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.907103825136612, | |
| "grad_norm": 0.4247635304927826, | |
| "learning_rate": 2.5624579147581233e-06, | |
| "loss": 0.7545850276947021, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.912568306010929, | |
| "grad_norm": 0.36332958936691284, | |
| "learning_rate": 2.555529521966754e-06, | |
| "loss": 0.31389331817626953, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.918032786885246, | |
| "grad_norm": 0.7010604739189148, | |
| "learning_rate": 2.5490073791640125e-06, | |
| "loss": 0.4420192539691925, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.9234972677595628, | |
| "grad_norm": 2.2278835773468018, | |
| "learning_rate": 2.542891723040009e-06, | |
| "loss": 0.577775239944458, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.92896174863388, | |
| "grad_norm": 0.5644426941871643, | |
| "learning_rate": 2.5371827755333562e-06, | |
| "loss": 0.672270655632019, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.9344262295081966, | |
| "grad_norm": 0.4127090275287628, | |
| "learning_rate": 2.5318807438231114e-06, | |
| "loss": 0.31474804878234863, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.939890710382514, | |
| "grad_norm": 0.21029745042324066, | |
| "learning_rate": 2.5269858203212634e-06, | |
| "loss": 0.36191999912261963, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.9453551912568305, | |
| "grad_norm": 4.180262565612793, | |
| "learning_rate": 2.522498182665746e-06, | |
| "loss": 0.49137362837791443, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.9508196721311473, | |
| "grad_norm": 0.6223632097244263, | |
| "learning_rate": 2.5184179937139957e-06, | |
| "loss": 0.5663042664527893, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.9562841530054644, | |
| "grad_norm": 0.24736368656158447, | |
| "learning_rate": 2.5147454015370368e-06, | |
| "loss": 0.47930705547332764, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.9617486338797816, | |
| "grad_norm": 0.2972562611103058, | |
| "learning_rate": 2.511480539414109e-06, | |
| "loss": 0.5615012645721436, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.9672131147540983, | |
| "grad_norm": 0.15695373713970184, | |
| "learning_rate": 2.508623525827835e-06, | |
| "loss": 0.3610137701034546, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.972677595628415, | |
| "grad_norm": 0.38721808791160583, | |
| "learning_rate": 2.5061744644599134e-06, | |
| "loss": 0.622841477394104, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.978142076502732, | |
| "grad_norm": 0.2315075397491455, | |
| "learning_rate": 2.504133444187364e-06, | |
| "loss": 0.47351524233818054, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.9836065573770494, | |
| "grad_norm": 0.33598557114601135, | |
| "learning_rate": 2.5025005390792964e-06, | |
| "loss": 0.5322554707527161, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.989071038251366, | |
| "grad_norm": 0.5088484287261963, | |
| "learning_rate": 2.5012758083942227e-06, | |
| "loss": 0.7013421058654785, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.994535519125683, | |
| "grad_norm": 0.34018367528915405, | |
| "learning_rate": 2.500459296577912e-06, | |
| "loss": 0.4154847264289856, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.34509673714637756, | |
| "learning_rate": 2.5000510332617706e-06, | |
| "loss": 0.582455575466156, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1098, | |
| "total_flos": 4.957143256761631e+18, | |
| "train_loss": 0.8975460599354708, | |
| "train_runtime": 11538.2258, | |
| "train_samples_per_second": 5.71, | |
| "train_steps_per_second": 0.095 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1098, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.957143256761631e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |