Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-41-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-41-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-41-2") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-41-2") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-41-2") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-41-2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-41-2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-41-2
- SGLang
How to use furproxy/9b-41-2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-41-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-41-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-41-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-41-2 with Docker Model Runner:
docker model run hf.co/furproxy/9b-41-2
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 705, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00851063829787234, | |
| "grad_norm": 0.9370151162147522, | |
| "learning_rate": 8.333333333333333e-07, | |
| "loss": 4.115976333618164, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01702127659574468, | |
| "grad_norm": 0.41166678071022034, | |
| "learning_rate": 2.4999999999999998e-06, | |
| "loss": 1.878818154335022, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02553191489361702, | |
| "grad_norm": 0.4669341444969177, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 2.0509164333343506, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03404255319148936, | |
| "grad_norm": 0.6062728762626648, | |
| "learning_rate": 5.833333333333334e-06, | |
| "loss": 1.7545194625854492, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 0.37637069821357727, | |
| "learning_rate": 7.5e-06, | |
| "loss": 1.7690430879592896, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05106382978723404, | |
| "grad_norm": 2.065159797668457, | |
| "learning_rate": 9.166666666666668e-06, | |
| "loss": 2.355275869369507, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.059574468085106386, | |
| "grad_norm": 1.091937780380249, | |
| "learning_rate": 1.0833333333333334e-05, | |
| "loss": 1.876306176185608, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06808510638297872, | |
| "grad_norm": 0.49564921855926514, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.785620927810669, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07659574468085106, | |
| "grad_norm": 1.3353960514068604, | |
| "learning_rate": 1.4166666666666666e-05, | |
| "loss": 1.523728370666504, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 0.30626556277275085, | |
| "learning_rate": 1.5833333333333333e-05, | |
| "loss": 1.510640025138855, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09361702127659574, | |
| "grad_norm": 0.1502075493335724, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 1.4279940128326416, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10212765957446808, | |
| "grad_norm": 0.8587890863418579, | |
| "learning_rate": 1.9166666666666667e-05, | |
| "loss": 1.1794553995132446, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11063829787234042, | |
| "grad_norm": 1.0492326021194458, | |
| "learning_rate": 2.0833333333333333e-05, | |
| "loss": 1.2579714059829712, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11914893617021277, | |
| "grad_norm": 0.264726847410202, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.9510668516159058, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.1276595744680851, | |
| "grad_norm": 0.2859492301940918, | |
| "learning_rate": 2.4166666666666667e-05, | |
| "loss": 1.656806230545044, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13617021276595745, | |
| "grad_norm": 1.1864591836929321, | |
| "learning_rate": 2.5833333333333336e-05, | |
| "loss": 0.683425784111023, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14468085106382977, | |
| "grad_norm": 0.22737205028533936, | |
| "learning_rate": 2.75e-05, | |
| "loss": 1.2381809949874878, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15319148936170213, | |
| "grad_norm": 0.32182246446609497, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 1.422176480293274, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16170212765957448, | |
| "grad_norm": 0.36561885476112366, | |
| "learning_rate": 2.9998734788806287e-05, | |
| "loss": 1.2621259689331055, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 0.7920733690261841, | |
| "learning_rate": 2.9988614605803806e-05, | |
| "loss": 1.2012425661087036, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17872340425531916, | |
| "grad_norm": 0.19826872646808624, | |
| "learning_rate": 2.99683822733885e-05, | |
| "loss": 1.2804166078567505, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18723404255319148, | |
| "grad_norm": 0.5331127643585205, | |
| "learning_rate": 2.9938053852362484e-05, | |
| "loss": 1.1188151836395264, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19574468085106383, | |
| "grad_norm": 0.22702525556087494, | |
| "learning_rate": 2.989765341799095e-05, | |
| "loss": 1.338016152381897, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.20425531914893616, | |
| "grad_norm": 1.6932013034820557, | |
| "learning_rate": 2.9847213040890793e-05, | |
| "loss": 0.8224107027053833, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 0.49766504764556885, | |
| "learning_rate": 2.9786772761572335e-05, | |
| "loss": 0.9487060308456421, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22127659574468084, | |
| "grad_norm": 0.20401130616664886, | |
| "learning_rate": 2.9716380558654445e-05, | |
| "loss": 1.3093581199645996, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2297872340425532, | |
| "grad_norm": 0.4012207090854645, | |
| "learning_rate": 2.9636092310778195e-05, | |
| "loss": 1.2255353927612305, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.23829787234042554, | |
| "grad_norm": 0.8393444418907166, | |
| "learning_rate": 2.954597175224938e-05, | |
| "loss": 0.8253700137138367, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.24680851063829787, | |
| "grad_norm": 0.27147796750068665, | |
| "learning_rate": 2.9446090422445016e-05, | |
| "loss": 1.0494465827941895, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 0.42597004771232605, | |
| "learning_rate": 2.9336527609024072e-05, | |
| "loss": 1.358299970626831, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.26382978723404255, | |
| "grad_norm": 0.20699705183506012, | |
| "learning_rate": 2.9217370284987434e-05, | |
| "loss": 1.335315465927124, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2723404255319149, | |
| "grad_norm": 0.16389895975589752, | |
| "learning_rate": 2.9088713039637117e-05, | |
| "loss": 1.2662416696548462, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.28085106382978725, | |
| "grad_norm": 0.25043565034866333, | |
| "learning_rate": 2.8950658003489534e-05, | |
| "loss": 1.2614431381225586, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.28936170212765955, | |
| "grad_norm": 0.19853799045085907, | |
| "learning_rate": 2.880331476720238e-05, | |
| "loss": 1.2736550569534302, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2978723404255319, | |
| "grad_norm": 2.3273322582244873, | |
| "learning_rate": 2.8646800294579517e-05, | |
| "loss": 1.2623916864395142, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.30638297872340425, | |
| "grad_norm": 0.25601401925086975, | |
| "learning_rate": 2.848123882972295e-05, | |
| "loss": 1.2526882886886597, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3148936170212766, | |
| "grad_norm": 0.342012494802475, | |
| "learning_rate": 2.8306761798405526e-05, | |
| "loss": 0.9114877581596375, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.32340425531914896, | |
| "grad_norm": 0.5187206864356995, | |
| "learning_rate": 2.812350770374273e-05, | |
| "loss": 1.2244638204574585, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.33191489361702126, | |
| "grad_norm": 0.2703041434288025, | |
| "learning_rate": 2.793162201624631e-05, | |
| "loss": 1.2725831270217896, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 0.1924225091934204, | |
| "learning_rate": 2.77312570583471e-05, | |
| "loss": 0.7258127927780151, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.34893617021276596, | |
| "grad_norm": 0.25936806201934814, | |
| "learning_rate": 2.752257188347862e-05, | |
| "loss": 1.2518136501312256, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3574468085106383, | |
| "grad_norm": 0.1409609615802765, | |
| "learning_rate": 2.730573214981751e-05, | |
| "loss": 0.9943649172782898, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3659574468085106, | |
| "grad_norm": 0.18737079203128815, | |
| "learning_rate": 2.7080909988780982e-05, | |
| "loss": 1.001371145248413, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.37446808510638296, | |
| "grad_norm": 0.512141227722168, | |
| "learning_rate": 2.684828386838569e-05, | |
| "loss": 1.2457740306854248, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3829787234042553, | |
| "grad_norm": 0.7956476807594299, | |
| "learning_rate": 2.6608038451576528e-05, | |
| "loss": 0.9049152135848999, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39148936170212767, | |
| "grad_norm": 0.17900533974170685, | |
| "learning_rate": 2.636036444963769e-05, | |
| "loss": 1.0495070219039917, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.28203219175338745, | |
| "learning_rate": 2.6105458470802563e-05, | |
| "loss": 1.163374900817871, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4085106382978723, | |
| "grad_norm": 0.2998636066913605, | |
| "learning_rate": 2.5843522864182394e-05, | |
| "loss": 1.064250111579895, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.41702127659574467, | |
| "grad_norm": 0.34955132007598877, | |
| "learning_rate": 2.557476555913785e-05, | |
| "loss": 1.0862053632736206, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.5751879215240479, | |
| "learning_rate": 2.5299399900220803e-05, | |
| "loss": 0.9672101736068726, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4340425531914894, | |
| "grad_norm": 0.556722104549408, | |
| "learning_rate": 2.5017644477817424e-05, | |
| "loss": 1.38407564163208, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4425531914893617, | |
| "grad_norm": 0.24831920862197876, | |
| "learning_rate": 2.47297229546271e-05, | |
| "loss": 1.0887106657028198, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.451063829787234, | |
| "grad_norm": 0.39581331610679626, | |
| "learning_rate": 2.4435863888114814e-05, | |
| "loss": 0.8722383975982666, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4595744680851064, | |
| "grad_norm": 0.16548699140548706, | |
| "learning_rate": 2.4136300549077976e-05, | |
| "loss": 1.2673208713531494, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.46808510638297873, | |
| "grad_norm": 0.20999404788017273, | |
| "learning_rate": 2.3831270736471703e-05, | |
| "loss": 1.2440522909164429, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4765957446808511, | |
| "grad_norm": 0.18370361626148224, | |
| "learning_rate": 2.352101658863959e-05, | |
| "loss": 1.1419363021850586, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4851063829787234, | |
| "grad_norm": 0.24067164957523346, | |
| "learning_rate": 2.32057843910998e-05, | |
| "loss": 1.0633783340454102, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.49361702127659574, | |
| "grad_norm": 0.47072628140449524, | |
| "learning_rate": 2.288582438103903e-05, | |
| "loss": 1.179775595664978, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.502127659574468, | |
| "grad_norm": 0.2161823809146881, | |
| "learning_rate": 2.256139054866955e-05, | |
| "loss": 1.2002534866333008, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 1.2126752138137817, | |
| "learning_rate": 2.2232740435607067e-05, | |
| "loss": 0.9621443748474121, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5191489361702127, | |
| "grad_norm": 0.1435491442680359, | |
| "learning_rate": 2.19001349304294e-05, | |
| "loss": 1.253274917602539, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5276595744680851, | |
| "grad_norm": 0.19416141510009766, | |
| "learning_rate": 2.156383806157826e-05, | |
| "loss": 1.3183720111846924, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5361702127659574, | |
| "grad_norm": 0.14461560547351837, | |
| "learning_rate": 2.1224116787768552e-05, | |
| "loss": 1.238662838935852, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5446808510638298, | |
| "grad_norm": 0.155701145529747, | |
| "learning_rate": 2.0881240786071588e-05, | |
| "loss": 1.1723605394363403, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5531914893617021, | |
| "grad_norm": 0.17010867595672607, | |
| "learning_rate": 2.05354822378404e-05, | |
| "loss": 1.021695852279663, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5617021276595745, | |
| "grad_norm": 0.17941494286060333, | |
| "learning_rate": 2.018711561264714e-05, | |
| "loss": 1.2407881021499634, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5702127659574469, | |
| "grad_norm": 0.3246072828769684, | |
| "learning_rate": 1.9836417450403978e-05, | |
| "loss": 1.186881422996521, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5787234042553191, | |
| "grad_norm": 0.32305997610092163, | |
| "learning_rate": 1.9483666141840615e-05, | |
| "loss": 0.965923547744751, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5872340425531914, | |
| "grad_norm": 0.15171490609645844, | |
| "learning_rate": 1.9129141707512508e-05, | |
| "loss": 0.5913432836532593, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5957446808510638, | |
| "grad_norm": 0.37349367141723633, | |
| "learning_rate": 1.8773125575515364e-05, | |
| "loss": 1.1055043935775757, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6042553191489362, | |
| "grad_norm": 0.15140922367572784, | |
| "learning_rate": 1.8415900358082268e-05, | |
| "loss": 1.3598577976226807, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6127659574468085, | |
| "grad_norm": 0.5196365118026733, | |
| "learning_rate": 1.805774962724083e-05, | |
| "loss": 0.9218302369117737, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6212765957446809, | |
| "grad_norm": 0.15164780616760254, | |
| "learning_rate": 1.7698957689708426e-05, | |
| "loss": 1.1118155717849731, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6297872340425532, | |
| "grad_norm": 0.2984839677810669, | |
| "learning_rate": 1.7339809361204252e-05, | |
| "loss": 1.2171759605407715, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.28176164627075195, | |
| "learning_rate": 1.6980589740357294e-05, | |
| "loss": 1.1775646209716797, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6468085106382979, | |
| "grad_norm": 0.19051885604858398, | |
| "learning_rate": 1.6621583982389707e-05, | |
| "loss": 1.1114767789840698, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6553191489361702, | |
| "grad_norm": 0.28726738691329956, | |
| "learning_rate": 1.6263077072755326e-05, | |
| "loss": 0.7064566612243652, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6638297872340425, | |
| "grad_norm": 0.3889693021774292, | |
| "learning_rate": 1.5905353600912898e-05, | |
| "loss": 0.8802153468132019, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6723404255319149, | |
| "grad_norm": 0.28420913219451904, | |
| "learning_rate": 1.5548697534413646e-05, | |
| "loss": 0.9915045499801636, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 0.2218930721282959, | |
| "learning_rate": 1.5193391993482582e-05, | |
| "loss": 1.2188233137130737, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6893617021276596, | |
| "grad_norm": 0.1267762929201126, | |
| "learning_rate": 1.4839719026272377e-05, | |
| "loss": 1.0931644439697266, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6978723404255319, | |
| "grad_norm": 0.20958174765110016, | |
| "learning_rate": 1.4487959384968272e-05, | |
| "loss": 0.875884473323822, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7063829787234043, | |
| "grad_norm": 0.2075873166322708, | |
| "learning_rate": 1.4138392302921813e-05, | |
| "loss": 0.9931239485740662, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7148936170212766, | |
| "grad_norm": 0.45486778020858765, | |
| "learning_rate": 1.3791295272990175e-05, | |
| "loss": 1.1112878322601318, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.723404255319149, | |
| "grad_norm": 0.12606321275234222, | |
| "learning_rate": 1.344694382725718e-05, | |
| "loss": 0.8394916653633118, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7319148936170212, | |
| "grad_norm": 1.405964732170105, | |
| "learning_rate": 1.3105611318310818e-05, | |
| "loss": 0.8859961032867432, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7404255319148936, | |
| "grad_norm": 0.24160844087600708, | |
| "learning_rate": 1.2767568702250844e-05, | |
| "loss": 1.1839947700500488, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7489361702127659, | |
| "grad_norm": 0.28286510705947876, | |
| "learning_rate": 1.2433084323598791e-05, | |
| "loss": 0.6160851120948792, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7574468085106383, | |
| "grad_norm": 0.36609017848968506, | |
| "learning_rate": 1.2102423702281116e-05, | |
| "loss": 1.0064830780029297, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7659574468085106, | |
| "grad_norm": 0.25650474429130554, | |
| "learning_rate": 1.1775849322854516e-05, | |
| "loss": 1.0609067678451538, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.774468085106383, | |
| "grad_norm": 0.3248478174209595, | |
| "learning_rate": 1.1453620426140795e-05, | |
| "loss": 0.7533571720123291, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7829787234042553, | |
| "grad_norm": 0.22153323888778687, | |
| "learning_rate": 1.1135992803436695e-05, | |
| "loss": 0.9635902643203735, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7914893617021277, | |
| "grad_norm": 0.11977202445268631, | |
| "learning_rate": 1.0823218593461992e-05, | |
| "loss": 0.9765074849128723, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.2166285663843155, | |
| "learning_rate": 1.0515546082207097e-05, | |
| "loss": 0.730889081954956, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8085106382978723, | |
| "grad_norm": 0.6851885914802551, | |
| "learning_rate": 1.0213219505838983e-05, | |
| "loss": 0.966530442237854, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8170212765957446, | |
| "grad_norm": 0.33713918924331665, | |
| "learning_rate": 9.91647885682201e-06, | |
| "loss": 1.3054033517837524, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.825531914893617, | |
| "grad_norm": 0.196068674325943, | |
| "learning_rate": 9.625559693407413e-06, | |
| "loss": 1.2358617782592773, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8340425531914893, | |
| "grad_norm": 0.2631889283657074, | |
| "learning_rate": 9.340692952642789e-06, | |
| "loss": 0.789035975933075, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8425531914893617, | |
| "grad_norm": 0.165288507938385, | |
| "learning_rate": 9.062104767049956e-06, | |
| "loss": 1.2202699184417725, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.4081096649169922, | |
| "learning_rate": 8.790016285116763e-06, | |
| "loss": 1.005751132965088, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8595744680851064, | |
| "grad_norm": 0.16539311408996582, | |
| "learning_rate": 8.524643495745306e-06, | |
| "loss": 0.9970273971557617, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8680851063829788, | |
| "grad_norm": 0.1629868745803833, | |
| "learning_rate": 8.26619705679589e-06, | |
| "loss": 1.1567330360412598, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8765957446808511, | |
| "grad_norm": 0.13489580154418945, | |
| "learning_rate": 8.014882127862923e-06, | |
| "loss": 1.1192888021469116, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8851063829787233, | |
| "grad_norm": 0.1189538910984993, | |
| "learning_rate": 7.770898207415416e-06, | |
| "loss": 0.9340039491653442, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8936170212765957, | |
| "grad_norm": 0.12542982399463654, | |
| "learning_rate": 7.534438974431351e-06, | |
| "loss": 1.2615220546722412, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.902127659574468, | |
| "grad_norm": 0.2067514955997467, | |
| "learning_rate": 7.305692134651742e-06, | |
| "loss": 0.7793570160865784, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9106382978723404, | |
| "grad_norm": 0.12745940685272217, | |
| "learning_rate": 7.084839271576291e-06, | |
| "loss": 0.9480925798416138, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9191489361702128, | |
| "grad_norm": 0.14118105173110962, | |
| "learning_rate": 6.872055702319054e-06, | |
| "loss": 0.7321507334709167, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9276595744680851, | |
| "grad_norm": 0.19066962599754333, | |
| "learning_rate": 6.667510338438419e-06, | |
| "loss": 1.2155194282531738, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9361702127659575, | |
| "grad_norm": 0.2428826540708542, | |
| "learning_rate": 6.471365551852012e-06, | |
| "loss": 0.39672887325286865, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9446808510638298, | |
| "grad_norm": 0.16802483797073364, | |
| "learning_rate": 6.2837770459428e-06, | |
| "loss": 1.254830241203308, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9531914893617022, | |
| "grad_norm": 0.1394779235124588, | |
| "learning_rate": 6.1048937319588676e-06, | |
| "loss": 1.138830304145813, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9617021276595744, | |
| "grad_norm": 0.2871001362800598, | |
| "learning_rate": 5.9348576108049065e-06, | |
| "loss": 0.8572854399681091, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9702127659574468, | |
| "grad_norm": 0.1366124153137207, | |
| "learning_rate": 5.773803660319234e-06, | |
| "loss": 1.0662239789962769, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9787234042553191, | |
| "grad_norm": 0.12555794417858124, | |
| "learning_rate": 5.621859728125884e-06, | |
| "loss": 1.222013235092163, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9872340425531915, | |
| "grad_norm": 0.29969581961631775, | |
| "learning_rate": 5.479146430146781e-06, | |
| "loss": 1.0417675971984863, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9957446808510638, | |
| "grad_norm": 0.4836138188838959, | |
| "learning_rate": 5.345777054854579e-06, | |
| "loss": 0.9495846629142761, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.004255319148936, | |
| "grad_norm": 0.28257113695144653, | |
| "learning_rate": 5.221857473342149e-06, | |
| "loss": 0.843216061592102, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0127659574468084, | |
| "grad_norm": 0.1423388570547104, | |
| "learning_rate": 5.10748605528015e-06, | |
| "loss": 0.9633752107620239, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0212765957446808, | |
| "grad_norm": 0.12806400656700134, | |
| "learning_rate": 5.002753590829349e-06, | |
| "loss": 0.6067866683006287, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0297872340425531, | |
| "grad_norm": 0.243302121758461, | |
| "learning_rate": 4.9077432185697e-06, | |
| "loss": 0.9341376423835754, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0382978723404255, | |
| "grad_norm": 0.22620677947998047, | |
| "learning_rate": 4.822530359503393e-06, | |
| "loss": 1.046706199645996, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0468085106382978, | |
| "grad_norm": 0.1606007069349289, | |
| "learning_rate": 4.747182657184251e-06, | |
| "loss": 0.6613105535507202, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0553191489361702, | |
| "grad_norm": 0.3230328857898712, | |
| "learning_rate": 4.681759924021033e-06, | |
| "loss": 0.7598414421081543, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 0.13726186752319336, | |
| "learning_rate": 4.626314093797213e-06, | |
| "loss": 0.7334257364273071, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0723404255319149, | |
| "grad_norm": 0.3747357726097107, | |
| "learning_rate": 4.580889180444988e-06, | |
| "loss": 0.5062970519065857, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0808510638297872, | |
| "grad_norm": 0.15663602948188782, | |
| "learning_rate": 4.545521243106197e-06, | |
| "loss": 1.0599923133850098, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0893617021276596, | |
| "grad_norm": 0.1843012273311615, | |
| "learning_rate": 4.520238357507899e-06, | |
| "loss": 0.940389096736908, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.097872340425532, | |
| "grad_norm": 0.22409912943840027, | |
| "learning_rate": 4.505060593675342e-06, | |
| "loss": 1.0018244981765747, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1063829787234043, | |
| "grad_norm": 0.45610666275024414, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.8150784969329834, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1148936170212767, | |
| "grad_norm": 0.19215510785579681, | |
| "learning_rate": 4.505060593675342e-06, | |
| "loss": 0.7950176000595093, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.123404255319149, | |
| "grad_norm": 0.23423391580581665, | |
| "learning_rate": 4.520238357507899e-06, | |
| "loss": 0.7621920704841614, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.1319148936170214, | |
| "grad_norm": 0.15685321390628815, | |
| "learning_rate": 4.545521243106197e-06, | |
| "loss": 0.8206462264060974, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1404255319148937, | |
| "grad_norm": 0.4606432020664215, | |
| "learning_rate": 4.580889180444988e-06, | |
| "loss": 0.9801955819129944, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.148936170212766, | |
| "grad_norm": 0.21424169838428497, | |
| "learning_rate": 4.6263140937972124e-06, | |
| "loss": 0.8219439387321472, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1574468085106382, | |
| "grad_norm": 0.2082989364862442, | |
| "learning_rate": 4.6817599240210315e-06, | |
| "loss": 0.6543676257133484, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1659574468085105, | |
| "grad_norm": 0.1622384637594223, | |
| "learning_rate": 4.747182657184251e-06, | |
| "loss": 0.9012349247932434, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.174468085106383, | |
| "grad_norm": 0.13174618780612946, | |
| "learning_rate": 4.822530359503391e-06, | |
| "loss": 0.7129980325698853, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1829787234042553, | |
| "grad_norm": 0.2709389925003052, | |
| "learning_rate": 4.9077432185697e-06, | |
| "loss": 0.5482591986656189, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1914893617021276, | |
| "grad_norm": 0.2576906979084015, | |
| "learning_rate": 5.002753590829348e-06, | |
| "loss": 0.5497639775276184, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.1526031494140625, | |
| "learning_rate": 5.1074860552801466e-06, | |
| "loss": 0.6242255568504333, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2085106382978723, | |
| "grad_norm": 0.22073198854923248, | |
| "learning_rate": 5.2218574733421455e-06, | |
| "loss": 0.693662703037262, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2170212765957447, | |
| "grad_norm": 0.22191239893436432, | |
| "learning_rate": 5.345777054854578e-06, | |
| "loss": 0.9797627329826355, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.225531914893617, | |
| "grad_norm": 0.17303584516048431, | |
| "learning_rate": 5.479146430146783e-06, | |
| "loss": 0.9435557126998901, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2340425531914894, | |
| "grad_norm": 0.2218431681394577, | |
| "learning_rate": 5.6218597281258834e-06, | |
| "loss": 0.6460038423538208, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2425531914893617, | |
| "grad_norm": 0.22332793474197388, | |
| "learning_rate": 5.773803660319232e-06, | |
| "loss": 0.8468654155731201, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.251063829787234, | |
| "grad_norm": 0.13609401881694794, | |
| "learning_rate": 5.934857610804904e-06, | |
| "loss": 0.8879528641700745, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2595744680851064, | |
| "grad_norm": 0.18097440898418427, | |
| "learning_rate": 6.1048937319588676e-06, | |
| "loss": 0.7949476838111877, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2680851063829788, | |
| "grad_norm": 0.3170534074306488, | |
| "learning_rate": 6.283777045942798e-06, | |
| "loss": 0.8318912386894226, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 0.19770559668540955, | |
| "learning_rate": 6.471365551852011e-06, | |
| "loss": 1.0059137344360352, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2851063829787235, | |
| "grad_norm": 0.14630961418151855, | |
| "learning_rate": 6.66751033843842e-06, | |
| "loss": 0.39432206749916077, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2936170212765958, | |
| "grad_norm": 0.3450065553188324, | |
| "learning_rate": 6.872055702319048e-06, | |
| "loss": 0.9369863271713257, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.302127659574468, | |
| "grad_norm": 0.18490372598171234, | |
| "learning_rate": 7.084839271576289e-06, | |
| "loss": 0.9826689958572388, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3106382978723405, | |
| "grad_norm": 0.1866351068019867, | |
| "learning_rate": 7.30569213465174e-06, | |
| "loss": 0.5573344826698303, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3191489361702127, | |
| "grad_norm": 0.1976342350244522, | |
| "learning_rate": 7.534438974431351e-06, | |
| "loss": 0.5633378624916077, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.327659574468085, | |
| "grad_norm": 0.3162063956260681, | |
| "learning_rate": 7.770898207415414e-06, | |
| "loss": 0.7375507354736328, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3361702127659574, | |
| "grad_norm": 0.2016829252243042, | |
| "learning_rate": 8.014882127862923e-06, | |
| "loss": 0.4668198823928833, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3446808510638297, | |
| "grad_norm": 1.459976315498352, | |
| "learning_rate": 8.266197056795886e-06, | |
| "loss": 0.7280799150466919, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.353191489361702, | |
| "grad_norm": 0.3398215174674988, | |
| "learning_rate": 8.5246434957453e-06, | |
| "loss": 0.5291551351547241, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3617021276595744, | |
| "grad_norm": 0.19973713159561157, | |
| "learning_rate": 8.790016285116766e-06, | |
| "loss": 0.5350017547607422, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3702127659574468, | |
| "grad_norm": 0.14308671653270721, | |
| "learning_rate": 9.062104767049957e-06, | |
| "loss": 0.9615387916564941, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.3787234042553191, | |
| "grad_norm": 0.1939127892255783, | |
| "learning_rate": 9.340692952642787e-06, | |
| "loss": 0.6444461345672607, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.3872340425531915, | |
| "grad_norm": 0.18625043332576752, | |
| "learning_rate": 9.625559693407413e-06, | |
| "loss": 0.9194571375846863, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3957446808510638, | |
| "grad_norm": 0.2951974868774414, | |
| "learning_rate": 9.916478856822006e-06, | |
| "loss": 0.9841423034667969, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4042553191489362, | |
| "grad_norm": 0.16953378915786743, | |
| "learning_rate": 1.0213219505838985e-05, | |
| "loss": 0.8577695488929749, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4127659574468086, | |
| "grad_norm": 0.2879262864589691, | |
| "learning_rate": 1.0515546082207094e-05, | |
| "loss": 0.8521637916564941, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.421276595744681, | |
| "grad_norm": 0.22187970578670502, | |
| "learning_rate": 1.082321859346199e-05, | |
| "loss": 0.6609964966773987, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4297872340425533, | |
| "grad_norm": 0.19994378089904785, | |
| "learning_rate": 1.1135992803436696e-05, | |
| "loss": 0.5523483753204346, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.4382978723404256, | |
| "grad_norm": 0.19332869350910187, | |
| "learning_rate": 1.1453620426140791e-05, | |
| "loss": 0.7419446110725403, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4468085106382977, | |
| "grad_norm": 0.17565295100212097, | |
| "learning_rate": 1.1775849322854508e-05, | |
| "loss": 0.7033047676086426, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4553191489361703, | |
| "grad_norm": 0.3124133050441742, | |
| "learning_rate": 1.210242370228112e-05, | |
| "loss": 0.5836398005485535, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4638297872340424, | |
| "grad_norm": 0.21754996478557587, | |
| "learning_rate": 1.2433084323598791e-05, | |
| "loss": 0.9612702131271362, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.472340425531915, | |
| "grad_norm": 0.21039743721485138, | |
| "learning_rate": 1.2767568702250838e-05, | |
| "loss": 0.9444953799247742, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.4808510638297872, | |
| "grad_norm": 0.42962566018104553, | |
| "learning_rate": 1.3105611318310811e-05, | |
| "loss": 0.8060452938079834, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 0.19513703882694244, | |
| "learning_rate": 1.3446943827257184e-05, | |
| "loss": 0.8150795698165894, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.4978723404255319, | |
| "grad_norm": 0.2640067934989929, | |
| "learning_rate": 1.3791295272990172e-05, | |
| "loss": 0.3241785168647766, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5063829787234042, | |
| "grad_norm": 0.25144851207733154, | |
| "learning_rate": 1.413839230292182e-05, | |
| "loss": 0.7416596412658691, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5148936170212766, | |
| "grad_norm": 0.16774724423885345, | |
| "learning_rate": 1.4487959384968276e-05, | |
| "loss": 0.518079400062561, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.523404255319149, | |
| "grad_norm": 0.1518772393465042, | |
| "learning_rate": 1.4839719026272373e-05, | |
| "loss": 0.5670950412750244, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5319148936170213, | |
| "grad_norm": 0.23323562741279602, | |
| "learning_rate": 1.5193391993482579e-05, | |
| "loss": 1.1480307579040527, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5404255319148936, | |
| "grad_norm": 0.1399996131658554, | |
| "learning_rate": 1.5548697534413636e-05, | |
| "loss": 0.5208263397216797, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.548936170212766, | |
| "grad_norm": 0.19347982108592987, | |
| "learning_rate": 1.5905353600912894e-05, | |
| "loss": 0.9330644607543945, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5574468085106383, | |
| "grad_norm": 0.15133146941661835, | |
| "learning_rate": 1.6263077072755323e-05, | |
| "loss": 0.8360081911087036, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5659574468085107, | |
| "grad_norm": 0.18284079432487488, | |
| "learning_rate": 1.6621583982389707e-05, | |
| "loss": 0.7362462878227234, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.574468085106383, | |
| "grad_norm": 0.19372136890888214, | |
| "learning_rate": 1.6980589740357294e-05, | |
| "loss": 0.6744348406791687, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.5829787234042554, | |
| "grad_norm": 0.12256433069705963, | |
| "learning_rate": 1.733980936120425e-05, | |
| "loss": 0.9976662397384644, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.5914893617021275, | |
| "grad_norm": 0.20529648661613464, | |
| "learning_rate": 1.7698957689708416e-05, | |
| "loss": 1.0515512228012085, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.16672298312187195, | |
| "learning_rate": 1.805774962724083e-05, | |
| "loss": 0.7891988754272461, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6085106382978722, | |
| "grad_norm": 0.1434396356344223, | |
| "learning_rate": 1.8415900358082265e-05, | |
| "loss": 0.6114462018013, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.6170212765957448, | |
| "grad_norm": 0.16634748876094818, | |
| "learning_rate": 1.8773125575515357e-05, | |
| "loss": 0.5973749756813049, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.625531914893617, | |
| "grad_norm": 0.13240687549114227, | |
| "learning_rate": 1.912914170751251e-05, | |
| "loss": 0.7943100929260254, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6340425531914895, | |
| "grad_norm": 0.12580837309360504, | |
| "learning_rate": 1.9483666141840612e-05, | |
| "loss": 0.9919679760932922, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6425531914893616, | |
| "grad_norm": 0.35519301891326904, | |
| "learning_rate": 1.9836417450403974e-05, | |
| "loss": 0.8425494432449341, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.6510638297872342, | |
| "grad_norm": 0.2488897144794464, | |
| "learning_rate": 2.0187115612647133e-05, | |
| "loss": 0.7412829399108887, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.6595744680851063, | |
| "grad_norm": 0.5074354410171509, | |
| "learning_rate": 2.05354822378404e-05, | |
| "loss": 0.7992886900901794, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6680851063829787, | |
| "grad_norm": 0.12573714554309845, | |
| "learning_rate": 2.0881240786071595e-05, | |
| "loss": 0.909046471118927, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.676595744680851, | |
| "grad_norm": 0.33457598090171814, | |
| "learning_rate": 2.122411678776854e-05, | |
| "loss": 0.6243407726287842, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.6851063829787234, | |
| "grad_norm": 0.15411563217639923, | |
| "learning_rate": 2.1563838061578258e-05, | |
| "loss": 0.8719924688339233, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.6936170212765957, | |
| "grad_norm": 0.13289184868335724, | |
| "learning_rate": 2.1900134930429396e-05, | |
| "loss": 1.0623129606246948, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 0.3402920663356781, | |
| "learning_rate": 2.223274043560706e-05, | |
| "loss": 0.9607300162315369, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7106382978723405, | |
| "grad_norm": 0.20696532726287842, | |
| "learning_rate": 2.256139054866954e-05, | |
| "loss": 0.4819542169570923, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7191489361702128, | |
| "grad_norm": 0.1925085484981537, | |
| "learning_rate": 2.2885824381039028e-05, | |
| "loss": 0.6816413402557373, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.7276595744680852, | |
| "grad_norm": 0.5462839603424072, | |
| "learning_rate": 2.3205784391099808e-05, | |
| "loss": 0.5322574377059937, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.7361702127659573, | |
| "grad_norm": 0.12952132523059845, | |
| "learning_rate": 2.352101658863958e-05, | |
| "loss": 0.8366644382476807, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7446808510638299, | |
| "grad_norm": 0.15898458659648895, | |
| "learning_rate": 2.3831270736471703e-05, | |
| "loss": 0.8219179511070251, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.753191489361702, | |
| "grad_norm": 0.10134760290384293, | |
| "learning_rate": 2.4136300549077973e-05, | |
| "loss": 0.8943849802017212, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.7617021276595746, | |
| "grad_norm": 0.21209417283535004, | |
| "learning_rate": 2.443586388811481e-05, | |
| "loss": 0.7499610781669617, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.7702127659574467, | |
| "grad_norm": 0.47646665573120117, | |
| "learning_rate": 2.47297229546271e-05, | |
| "loss": 0.6121576428413391, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.7787234042553193, | |
| "grad_norm": 0.26675277948379517, | |
| "learning_rate": 2.5017644477817418e-05, | |
| "loss": 0.7867974638938904, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.7872340425531914, | |
| "grad_norm": 0.840756356716156, | |
| "learning_rate": 2.5299399900220807e-05, | |
| "loss": 0.6045423150062561, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.795744680851064, | |
| "grad_norm": 0.400618314743042, | |
| "learning_rate": 2.5574765559137848e-05, | |
| "loss": 0.7998336553573608, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.804255319148936, | |
| "grad_norm": 0.15446501970291138, | |
| "learning_rate": 2.584352286418239e-05, | |
| "loss": 0.7926132678985596, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8127659574468085, | |
| "grad_norm": 0.15063929557800293, | |
| "learning_rate": 2.6105458470802563e-05, | |
| "loss": 0.8235700130462646, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8212765957446808, | |
| "grad_norm": 0.11289381980895996, | |
| "learning_rate": 2.6360364449637683e-05, | |
| "loss": 0.8320169448852539, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.8297872340425532, | |
| "grad_norm": 0.14190740883350372, | |
| "learning_rate": 2.6608038451576528e-05, | |
| "loss": 0.8250552415847778, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8382978723404255, | |
| "grad_norm": 0.3685653507709503, | |
| "learning_rate": 2.684828386838569e-05, | |
| "loss": 0.8021817207336426, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.8468085106382979, | |
| "grad_norm": 0.12853652238845825, | |
| "learning_rate": 2.7080909988780982e-05, | |
| "loss": 0.7879712581634521, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.8553191489361702, | |
| "grad_norm": 0.3018054962158203, | |
| "learning_rate": 2.7305732149817502e-05, | |
| "loss": 0.7762024998664856, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.8638297872340426, | |
| "grad_norm": 0.14753182232379913, | |
| "learning_rate": 2.7522571883478617e-05, | |
| "loss": 1.2395200729370117, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.872340425531915, | |
| "grad_norm": 0.1134040355682373, | |
| "learning_rate": 2.77312570583471e-05, | |
| "loss": 0.8664835691452026, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.8808510638297873, | |
| "grad_norm": 0.19325752556324005, | |
| "learning_rate": 2.7931622016246304e-05, | |
| "loss": 1.0493484735488892, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.8893617021276596, | |
| "grad_norm": 0.15366853773593903, | |
| "learning_rate": 2.8123507703742727e-05, | |
| "loss": 0.8971328139305115, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.8978723404255318, | |
| "grad_norm": 0.1713661104440689, | |
| "learning_rate": 2.8306761798405522e-05, | |
| "loss": 0.9821380376815796, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9063829787234043, | |
| "grad_norm": 0.4014803469181061, | |
| "learning_rate": 2.848123882972295e-05, | |
| "loss": 1.0046873092651367, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 0.1893383413553238, | |
| "learning_rate": 2.8646800294579514e-05, | |
| "loss": 0.647858738899231, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.923404255319149, | |
| "grad_norm": 0.5622705221176147, | |
| "learning_rate": 2.8803314767202376e-05, | |
| "loss": 0.5715224742889404, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.9319148936170212, | |
| "grad_norm": 0.14517684280872345, | |
| "learning_rate": 2.8950658003489534e-05, | |
| "loss": 0.8555135726928711, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9404255319148938, | |
| "grad_norm": 0.16155880689620972, | |
| "learning_rate": 2.908871303963711e-05, | |
| "loss": 0.9127392768859863, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.9489361702127659, | |
| "grad_norm": 0.12455891817808151, | |
| "learning_rate": 2.9217370284987434e-05, | |
| "loss": 0.9905153512954712, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.9574468085106385, | |
| "grad_norm": 0.1255619376897812, | |
| "learning_rate": 2.9336527609024075e-05, | |
| "loss": 1.022163987159729, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9659574468085106, | |
| "grad_norm": 0.38456299901008606, | |
| "learning_rate": 2.9446090422445016e-05, | |
| "loss": 0.8251804709434509, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.974468085106383, | |
| "grad_norm": 0.12075914442539215, | |
| "learning_rate": 2.9545971752249376e-05, | |
| "loss": 1.0136280059814453, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.9829787234042553, | |
| "grad_norm": 0.20235367119312286, | |
| "learning_rate": 2.9636092310778195e-05, | |
| "loss": 0.9567205905914307, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.9914893617021276, | |
| "grad_norm": 0.542694091796875, | |
| "learning_rate": 2.9716380558654445e-05, | |
| "loss": 0.6730895042419434, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.11073193699121475, | |
| "learning_rate": 2.9786772761572338e-05, | |
| "loss": 0.6498197317123413, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.008510638297872, | |
| "grad_norm": 0.11644601076841354, | |
| "learning_rate": 2.9847213040890793e-05, | |
| "loss": 0.4679168164730072, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.0170212765957447, | |
| "grad_norm": 0.14129982888698578, | |
| "learning_rate": 2.989765341799095e-05, | |
| "loss": 0.7698261737823486, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.025531914893617, | |
| "grad_norm": 0.13531845808029175, | |
| "learning_rate": 2.9938053852362484e-05, | |
| "loss": 0.47750866413116455, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.0340425531914894, | |
| "grad_norm": 0.37192943692207336, | |
| "learning_rate": 2.99683822733885e-05, | |
| "loss": 0.6370930075645447, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.0425531914893615, | |
| "grad_norm": 0.13162164390087128, | |
| "learning_rate": 2.9988614605803806e-05, | |
| "loss": 0.44695600867271423, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.051063829787234, | |
| "grad_norm": 0.20202530920505524, | |
| "learning_rate": 2.9998734788806287e-05, | |
| "loss": 0.4687855839729309, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.0595744680851062, | |
| "grad_norm": 0.09361886233091354, | |
| "learning_rate": 2.9998734788806287e-05, | |
| "loss": 0.28051936626434326, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.068085106382979, | |
| "grad_norm": 0.1879085749387741, | |
| "learning_rate": 2.9988614605803806e-05, | |
| "loss": 0.6579060554504395, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.076595744680851, | |
| "grad_norm": 0.24100597202777863, | |
| "learning_rate": 2.99683822733885e-05, | |
| "loss": 0.5791319608688354, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.0851063829787235, | |
| "grad_norm": 0.15436111390590668, | |
| "learning_rate": 2.9938053852362484e-05, | |
| "loss": 0.5440673828125, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0936170212765957, | |
| "grad_norm": 0.1665167361497879, | |
| "learning_rate": 2.989765341799095e-05, | |
| "loss": 0.7786204218864441, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.1021276595744682, | |
| "grad_norm": 0.12963709235191345, | |
| "learning_rate": 2.9847213040890793e-05, | |
| "loss": 0.6289861798286438, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.1106382978723404, | |
| "grad_norm": 0.23455171287059784, | |
| "learning_rate": 2.9786772761572338e-05, | |
| "loss": 0.7049335241317749, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.119148936170213, | |
| "grad_norm": 0.10971709340810776, | |
| "learning_rate": 2.9716380558654445e-05, | |
| "loss": 0.9018527269363403, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 0.3122045695781708, | |
| "learning_rate": 2.9636092310778195e-05, | |
| "loss": 0.6065503358840942, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.1361702127659576, | |
| "grad_norm": 0.15959368646144867, | |
| "learning_rate": 2.9545971752249383e-05, | |
| "loss": 0.5845508575439453, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.1446808510638298, | |
| "grad_norm": 0.12507954239845276, | |
| "learning_rate": 2.9446090422445016e-05, | |
| "loss": 0.6952933669090271, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.153191489361702, | |
| "grad_norm": 0.12080808728933334, | |
| "learning_rate": 2.933652760902408e-05, | |
| "loss": 0.7144219279289246, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.1617021276595745, | |
| "grad_norm": 0.3896176815032959, | |
| "learning_rate": 2.9217370284987434e-05, | |
| "loss": 0.5674622654914856, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.1702127659574466, | |
| "grad_norm": 0.14054062962532043, | |
| "learning_rate": 2.908871303963712e-05, | |
| "loss": 0.28315988183021545, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.178723404255319, | |
| "grad_norm": 0.1403239667415619, | |
| "learning_rate": 2.8950658003489534e-05, | |
| "loss": 0.430463582277298, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.1872340425531913, | |
| "grad_norm": 0.14137110114097595, | |
| "learning_rate": 2.8803314767202387e-05, | |
| "loss": 0.4114861488342285, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.195744680851064, | |
| "grad_norm": 0.1280188113451004, | |
| "learning_rate": 2.864680029457952e-05, | |
| "loss": 0.5308094620704651, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.204255319148936, | |
| "grad_norm": 0.25649434328079224, | |
| "learning_rate": 2.8481238829722946e-05, | |
| "loss": 0.6915931701660156, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.2127659574468086, | |
| "grad_norm": 0.10003609210252762, | |
| "learning_rate": 2.8306761798405533e-05, | |
| "loss": 0.4150570333003998, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.2212765957446807, | |
| "grad_norm": 0.15974430739879608, | |
| "learning_rate": 2.812350770374273e-05, | |
| "loss": 0.36256328225135803, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.2297872340425533, | |
| "grad_norm": 0.38845136761665344, | |
| "learning_rate": 2.793162201624631e-05, | |
| "loss": 0.37502366304397583, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.2382978723404254, | |
| "grad_norm": 0.1723078191280365, | |
| "learning_rate": 2.7731257058347098e-05, | |
| "loss": 0.7607256770133972, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.246808510638298, | |
| "grad_norm": 0.255744993686676, | |
| "learning_rate": 2.752257188347862e-05, | |
| "loss": 0.6797502040863037, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.25531914893617, | |
| "grad_norm": 0.3702438771724701, | |
| "learning_rate": 2.7305732149817512e-05, | |
| "loss": 0.7978914380073547, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.2638297872340427, | |
| "grad_norm": 0.26327913999557495, | |
| "learning_rate": 2.708090998878098e-05, | |
| "loss": 0.5080232620239258, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.272340425531915, | |
| "grad_norm": 0.14510440826416016, | |
| "learning_rate": 2.68482838683857e-05, | |
| "loss": 0.2904958724975586, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.2808510638297874, | |
| "grad_norm": 0.1330595761537552, | |
| "learning_rate": 2.6608038451576528e-05, | |
| "loss": 0.5988547205924988, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.2893617021276595, | |
| "grad_norm": 0.15609091520309448, | |
| "learning_rate": 2.6360364449637686e-05, | |
| "loss": 0.8217217922210693, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.297872340425532, | |
| "grad_norm": 0.27816882729530334, | |
| "learning_rate": 2.6105458470802563e-05, | |
| "loss": 0.29454290866851807, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.3063829787234043, | |
| "grad_norm": 0.16807164251804352, | |
| "learning_rate": 2.5843522864182397e-05, | |
| "loss": 0.2661294937133789, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.3148936170212764, | |
| "grad_norm": 0.2340618073940277, | |
| "learning_rate": 2.557476555913786e-05, | |
| "loss": 0.4138932228088379, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.323404255319149, | |
| "grad_norm": 0.12209529429674149, | |
| "learning_rate": 2.52993999002208e-05, | |
| "loss": 0.47122570872306824, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.331914893617021, | |
| "grad_norm": 0.22724129259586334, | |
| "learning_rate": 2.501764447781743e-05, | |
| "loss": 0.5940241813659668, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.3404255319148937, | |
| "grad_norm": 0.10988523811101913, | |
| "learning_rate": 2.4729722954627106e-05, | |
| "loss": 0.443272203207016, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.348936170212766, | |
| "grad_norm": 0.3907153904438019, | |
| "learning_rate": 2.4435863888114814e-05, | |
| "loss": 0.612157940864563, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.3574468085106384, | |
| "grad_norm": 0.15928088128566742, | |
| "learning_rate": 2.413630054907798e-05, | |
| "loss": 0.6884116530418396, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.3659574468085105, | |
| "grad_norm": 0.12084610015153885, | |
| "learning_rate": 2.383127073647171e-05, | |
| "loss": 0.6459460854530334, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.374468085106383, | |
| "grad_norm": 0.14340610802173615, | |
| "learning_rate": 2.3521016588639598e-05, | |
| "loss": 0.5485546588897705, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.382978723404255, | |
| "grad_norm": 0.1330912858247757, | |
| "learning_rate": 2.32057843910998e-05, | |
| "loss": 0.6871886253356934, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.391489361702128, | |
| "grad_norm": 0.1461075246334076, | |
| "learning_rate": 2.2885824381039024e-05, | |
| "loss": 0.5943357944488525, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.4632057547569275, | |
| "learning_rate": 2.2561390548669552e-05, | |
| "loss": 0.44520270824432373, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.4085106382978725, | |
| "grad_norm": 0.16760534048080444, | |
| "learning_rate": 2.2232740435607067e-05, | |
| "loss": 0.4427604377269745, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.4170212765957446, | |
| "grad_norm": 0.17341409623622894, | |
| "learning_rate": 2.1900134930429403e-05, | |
| "loss": 0.7208576798439026, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.425531914893617, | |
| "grad_norm": 0.12514737248420715, | |
| "learning_rate": 2.1563838061578264e-05, | |
| "loss": 0.6061448454856873, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.4340425531914893, | |
| "grad_norm": 0.275291383266449, | |
| "learning_rate": 2.122411678776855e-05, | |
| "loss": 0.48493388295173645, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.4425531914893615, | |
| "grad_norm": 0.35213977098464966, | |
| "learning_rate": 2.0881240786071588e-05, | |
| "loss": 0.6101267337799072, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.451063829787234, | |
| "grad_norm": 0.3173239827156067, | |
| "learning_rate": 2.0535482237840398e-05, | |
| "loss": 0.6204816102981567, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.4595744680851066, | |
| "grad_norm": 0.12788061797618866, | |
| "learning_rate": 2.018711561264715e-05, | |
| "loss": 0.5858104825019836, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.4680851063829787, | |
| "grad_norm": 0.21373364329338074, | |
| "learning_rate": 1.9836417450403978e-05, | |
| "loss": 0.5342880487442017, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.476595744680851, | |
| "grad_norm": 0.1782151162624359, | |
| "learning_rate": 1.948366614184062e-05, | |
| "loss": 0.6852924823760986, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.4851063829787234, | |
| "grad_norm": 0.12118978798389435, | |
| "learning_rate": 1.9129141707512514e-05, | |
| "loss": 0.5905125737190247, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.4936170212765956, | |
| "grad_norm": 0.6266365647315979, | |
| "learning_rate": 1.877312557551536e-05, | |
| "loss": 0.6729872822761536, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.502127659574468, | |
| "grad_norm": 0.13392919301986694, | |
| "learning_rate": 1.841590035808227e-05, | |
| "loss": 0.7322168350219727, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.5106382978723403, | |
| "grad_norm": 0.27286240458488464, | |
| "learning_rate": 1.8057749627240822e-05, | |
| "loss": 0.6285769939422607, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.519148936170213, | |
| "grad_norm": 0.10404688119888306, | |
| "learning_rate": 1.7698957689708436e-05, | |
| "loss": 0.766042172908783, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.527659574468085, | |
| "grad_norm": 0.11238724738359451, | |
| "learning_rate": 1.7339809361204255e-05, | |
| "loss": 0.43586933612823486, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.5361702127659576, | |
| "grad_norm": 0.12042172253131866, | |
| "learning_rate": 1.6980589740357287e-05, | |
| "loss": 0.6923111081123352, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.5446808510638297, | |
| "grad_norm": 0.2841528058052063, | |
| "learning_rate": 1.6621583982389714e-05, | |
| "loss": 0.47686851024627686, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.5531914893617023, | |
| "grad_norm": 0.11969015002250671, | |
| "learning_rate": 1.626307707275533e-05, | |
| "loss": 0.5013985633850098, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.5617021276595744, | |
| "grad_norm": 0.09876365959644318, | |
| "learning_rate": 1.59053536009129e-05, | |
| "loss": 0.6609531044960022, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.570212765957447, | |
| "grad_norm": 0.4649478495121002, | |
| "learning_rate": 1.5548697534413653e-05, | |
| "loss": 0.6927770972251892, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.578723404255319, | |
| "grad_norm": 0.1714295893907547, | |
| "learning_rate": 1.5193391993482573e-05, | |
| "loss": 0.8361954689025879, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.5872340425531917, | |
| "grad_norm": 0.15755438804626465, | |
| "learning_rate": 1.483971902627239e-05, | |
| "loss": 0.5158759951591492, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.595744680851064, | |
| "grad_norm": 0.11576741188764572, | |
| "learning_rate": 1.4487959384968272e-05, | |
| "loss": 0.6141366362571716, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.604255319148936, | |
| "grad_norm": 0.2397107034921646, | |
| "learning_rate": 1.4138392302921813e-05, | |
| "loss": 0.669286847114563, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.6127659574468085, | |
| "grad_norm": 0.17677916586399078, | |
| "learning_rate": 1.3791295272990178e-05, | |
| "loss": 0.8155961036682129, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.621276595744681, | |
| "grad_norm": 0.10534735023975372, | |
| "learning_rate": 1.344694382725719e-05, | |
| "loss": 0.6486954689025879, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.629787234042553, | |
| "grad_norm": 0.17969125509262085, | |
| "learning_rate": 1.3105611318310828e-05, | |
| "loss": 0.7624613046646118, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.6382978723404253, | |
| "grad_norm": 0.10477960854768753, | |
| "learning_rate": 1.2767568702250834e-05, | |
| "loss": 0.44974949955940247, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.646808510638298, | |
| "grad_norm": 0.41400146484375, | |
| "learning_rate": 1.2433084323598806e-05, | |
| "loss": 0.5666506886482239, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.65531914893617, | |
| "grad_norm": 0.12478289753198624, | |
| "learning_rate": 1.2102423702281114e-05, | |
| "loss": 0.7800987362861633, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.6638297872340426, | |
| "grad_norm": 0.16413263976573944, | |
| "learning_rate": 1.1775849322854515e-05, | |
| "loss": 0.5514877438545227, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.6723404255319148, | |
| "grad_norm": 0.09944586455821991, | |
| "learning_rate": 1.1453620426140796e-05, | |
| "loss": 0.554164469242096, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.6808510638297873, | |
| "grad_norm": 0.2592844069004059, | |
| "learning_rate": 1.1135992803436701e-05, | |
| "loss": 0.5563373565673828, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.6893617021276595, | |
| "grad_norm": 0.2102140486240387, | |
| "learning_rate": 1.0823218593462006e-05, | |
| "loss": 0.5627699494361877, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.697872340425532, | |
| "grad_norm": 0.1483510136604309, | |
| "learning_rate": 1.0515546082207089e-05, | |
| "loss": 0.698486328125, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.706382978723404, | |
| "grad_norm": 0.11229369789361954, | |
| "learning_rate": 1.0213219505839e-05, | |
| "loss": 0.7639659643173218, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.7148936170212767, | |
| "grad_norm": 0.10941766947507858, | |
| "learning_rate": 9.916478856822011e-06, | |
| "loss": 0.6158884763717651, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.723404255319149, | |
| "grad_norm": 0.1750131994485855, | |
| "learning_rate": 9.625559693407416e-06, | |
| "loss": 0.9152920842170715, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.731914893617021, | |
| "grad_norm": 0.18939398229122162, | |
| "learning_rate": 9.34069295264279e-06, | |
| "loss": 0.8749684691429138, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.7404255319148936, | |
| "grad_norm": 0.0939684808254242, | |
| "learning_rate": 9.062104767049962e-06, | |
| "loss": 0.8344287872314453, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.748936170212766, | |
| "grad_norm": 0.10787810385227203, | |
| "learning_rate": 8.790016285116773e-06, | |
| "loss": 0.45629745721817017, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.7574468085106383, | |
| "grad_norm": 0.10236497223377228, | |
| "learning_rate": 8.5246434957453e-06, | |
| "loss": 0.3841492533683777, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.7659574468085104, | |
| "grad_norm": 0.31643790006637573, | |
| "learning_rate": 8.266197056795886e-06, | |
| "loss": 0.4197019338607788, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.774468085106383, | |
| "grad_norm": 0.15869994461536407, | |
| "learning_rate": 8.014882127862926e-06, | |
| "loss": 0.6872882843017578, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.7829787234042556, | |
| "grad_norm": 0.15020446479320526, | |
| "learning_rate": 7.77089820741542e-06, | |
| "loss": 0.6434124112129211, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.7914893617021277, | |
| "grad_norm": 0.13434921205043793, | |
| "learning_rate": 7.534438974431356e-06, | |
| "loss": 0.5578271150588989, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.1459910273551941, | |
| "learning_rate": 7.305692134651748e-06, | |
| "loss": 0.6800917983055115, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.8085106382978724, | |
| "grad_norm": 0.13331423699855804, | |
| "learning_rate": 7.0848392715763e-06, | |
| "loss": 0.4372290074825287, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.8170212765957445, | |
| "grad_norm": 0.11432936787605286, | |
| "learning_rate": 6.872055702319048e-06, | |
| "loss": 0.3426898121833801, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.825531914893617, | |
| "grad_norm": 0.47585824131965637, | |
| "learning_rate": 6.66751033843842e-06, | |
| "loss": 0.5213098526000977, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.8340425531914892, | |
| "grad_norm": 0.23138658702373505, | |
| "learning_rate": 6.471365551852014e-06, | |
| "loss": 0.40890708565711975, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.842553191489362, | |
| "grad_norm": 0.1170782670378685, | |
| "learning_rate": 6.283777045942801e-06, | |
| "loss": 0.6171141266822815, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.851063829787234, | |
| "grad_norm": 0.2987634837627411, | |
| "learning_rate": 6.104893731958872e-06, | |
| "loss": 0.79531329870224, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.8595744680851065, | |
| "grad_norm": 0.16172188520431519, | |
| "learning_rate": 5.934857610804912e-06, | |
| "loss": 0.6109257936477661, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.8680851063829786, | |
| "grad_norm": 0.19967441260814667, | |
| "learning_rate": 5.773803660319231e-06, | |
| "loss": 0.7840122580528259, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.876595744680851, | |
| "grad_norm": 0.09976833313703537, | |
| "learning_rate": 5.6218597281258834e-06, | |
| "loss": 0.4490019679069519, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.8851063829787233, | |
| "grad_norm": 0.1243966668844223, | |
| "learning_rate": 5.479146430146783e-06, | |
| "loss": 0.5424228310585022, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.8936170212765955, | |
| "grad_norm": 0.17045721411705017, | |
| "learning_rate": 5.3457770548545805e-06, | |
| "loss": 0.5151770114898682, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.902127659574468, | |
| "grad_norm": 0.1732950657606125, | |
| "learning_rate": 5.22185747334215e-06, | |
| "loss": 0.7383885979652405, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.9106382978723406, | |
| "grad_norm": 0.21424034237861633, | |
| "learning_rate": 5.107486055280145e-06, | |
| "loss": 0.4885507822036743, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.9191489361702128, | |
| "grad_norm": 1.2460668087005615, | |
| "learning_rate": 5.002753590829352e-06, | |
| "loss": 0.29081299901008606, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.927659574468085, | |
| "grad_norm": 0.11700358986854553, | |
| "learning_rate": 4.9077432185697e-06, | |
| "loss": 0.436556339263916, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.9361702127659575, | |
| "grad_norm": 0.2099040299654007, | |
| "learning_rate": 4.822530359503391e-06, | |
| "loss": 0.759993314743042, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.94468085106383, | |
| "grad_norm": 0.12797491252422333, | |
| "learning_rate": 4.747182657184251e-06, | |
| "loss": 0.6529436111450195, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.953191489361702, | |
| "grad_norm": 0.122190460562706, | |
| "learning_rate": 4.681759924021033e-06, | |
| "loss": 0.6622499227523804, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.9617021276595743, | |
| "grad_norm": 0.19574476778507233, | |
| "learning_rate": 4.626314093797213e-06, | |
| "loss": 0.5456458926200867, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.970212765957447, | |
| "grad_norm": 0.15566527843475342, | |
| "learning_rate": 4.580889180444988e-06, | |
| "loss": 0.738301157951355, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.978723404255319, | |
| "grad_norm": 0.13139484822750092, | |
| "learning_rate": 4.5455212431061985e-06, | |
| "loss": 0.4914497435092926, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.9872340425531916, | |
| "grad_norm": 0.24261365830898285, | |
| "learning_rate": 4.520238357507899e-06, | |
| "loss": 0.5938633680343628, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.9957446808510637, | |
| "grad_norm": 0.09112333506345749, | |
| "learning_rate": 4.505060593675342e-06, | |
| "loss": 0.4166713058948517, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 705, | |
| "total_flos": 3.0241141054567875e+18, | |
| "train_loss": 0.8476084547685393, | |
| "train_runtime": 9225.3576, | |
| "train_samples_per_second": 4.891, | |
| "train_steps_per_second": 0.076 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 705, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.0241141054567875e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |