Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-62 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-62 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-62") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-62") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-62") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-62 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-62" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-62", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-62
- SGLang
How to use furproxy/9b-62 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-62" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-62", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-62" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-62", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-62 with Docker Model Runner:
docker model run hf.co/furproxy/9b-62
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 928, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008620689655172414, | |
| "grad_norm": 0.752173662185669, | |
| "learning_rate": 4.2553191489361704e-07, | |
| "loss": 3.6935362815856934, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.017241379310344827, | |
| "grad_norm": 3.0686516761779785, | |
| "learning_rate": 1.276595744680851e-06, | |
| "loss": 2.4514498710632324, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02586206896551724, | |
| "grad_norm": 3.670806407928467, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 2.2222416400909424, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.034482758620689655, | |
| "grad_norm": 0.4838178753852844, | |
| "learning_rate": 2.978723404255319e-06, | |
| "loss": 1.9781272411346436, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04310344827586207, | |
| "grad_norm": 3.7360541820526123, | |
| "learning_rate": 3.8297872340425535e-06, | |
| "loss": 1.768671989440918, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05172413793103448, | |
| "grad_norm": 0.6814174056053162, | |
| "learning_rate": 4.680851063829788e-06, | |
| "loss": 1.7612519264221191, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0603448275862069, | |
| "grad_norm": 0.6352251768112183, | |
| "learning_rate": 5.531914893617022e-06, | |
| "loss": 1.8349356651306152, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 0.4332002103328705, | |
| "learning_rate": 6.382978723404256e-06, | |
| "loss": 0.766176164150238, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07758620689655173, | |
| "grad_norm": 0.5920107364654541, | |
| "learning_rate": 7.234042553191491e-06, | |
| "loss": 0.9314699769020081, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08620689655172414, | |
| "grad_norm": 0.40042662620544434, | |
| "learning_rate": 8.085106382978723e-06, | |
| "loss": 1.5076656341552734, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09482758620689655, | |
| "grad_norm": 0.33145302534103394, | |
| "learning_rate": 8.936170212765958e-06, | |
| "loss": 1.6648786067962646, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10344827586206896, | |
| "grad_norm": 0.7193792462348938, | |
| "learning_rate": 9.787234042553192e-06, | |
| "loss": 1.508145809173584, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11206896551724138, | |
| "grad_norm": 2.580322027206421, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 1.3233157396316528, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1206896551724138, | |
| "grad_norm": 1.6866916418075562, | |
| "learning_rate": 1.1489361702127662e-05, | |
| "loss": 0.5060393214225769, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12931034482758622, | |
| "grad_norm": 0.40954306721687317, | |
| "learning_rate": 1.2340425531914895e-05, | |
| "loss": 1.489959716796875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 0.9065203666687012, | |
| "learning_rate": 1.3191489361702127e-05, | |
| "loss": 1.14947509765625, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14655172413793102, | |
| "grad_norm": 0.3966251611709595, | |
| "learning_rate": 1.4042553191489363e-05, | |
| "loss": 1.3647483587265015, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15517241379310345, | |
| "grad_norm": 0.3009808659553528, | |
| "learning_rate": 1.4893617021276596e-05, | |
| "loss": 1.4109106063842773, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16379310344827586, | |
| "grad_norm": 0.33264291286468506, | |
| "learning_rate": 1.5744680851063832e-05, | |
| "loss": 1.3988299369812012, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 0.3600396513938904, | |
| "learning_rate": 1.6595744680851064e-05, | |
| "loss": 1.4429720640182495, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1810344827586207, | |
| "grad_norm": 0.65086430311203, | |
| "learning_rate": 1.74468085106383e-05, | |
| "loss": 1.5474897623062134, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1896551724137931, | |
| "grad_norm": 0.8449164628982544, | |
| "learning_rate": 1.8297872340425533e-05, | |
| "loss": 0.9270882606506348, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19827586206896552, | |
| "grad_norm": 0.561241626739502, | |
| "learning_rate": 1.914893617021277e-05, | |
| "loss": 1.1169898509979248, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 0.2715710401535034, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3745737075805664, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.21551724137931033, | |
| "grad_norm": 0.3442396819591522, | |
| "learning_rate": 1.999977111433554e-05, | |
| "loss": 1.4785292148590088, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22413793103448276, | |
| "grad_norm": 0.6824942827224731, | |
| "learning_rate": 1.9999084468984077e-05, | |
| "loss": 0.7019240260124207, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.23275862068965517, | |
| "grad_norm": 0.31582292914390564, | |
| "learning_rate": 1.9997940098870788e-05, | |
| "loss": 1.4191925525665283, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2413793103448276, | |
| "grad_norm": 0.27809372544288635, | |
| "learning_rate": 1.9996338062202316e-05, | |
| "loss": 1.3712444305419922, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.5872206091880798, | |
| "learning_rate": 1.9994278440463823e-05, | |
| "loss": 1.4150975942611694, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.25862068965517243, | |
| "grad_norm": 0.2935428321361542, | |
| "learning_rate": 1.9991761338414844e-05, | |
| "loss": 1.3828074932098389, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2672413793103448, | |
| "grad_norm": 1.7192302942276, | |
| "learning_rate": 1.998878688408395e-05, | |
| "loss": 1.0158110857009888, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 0.22481945157051086, | |
| "learning_rate": 1.9985355228762237e-05, | |
| "loss": 1.3290989398956299, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.28448275862068967, | |
| "grad_norm": 0.2971683144569397, | |
| "learning_rate": 1.9981466546995648e-05, | |
| "loss": 1.303815484046936, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.29310344827586204, | |
| "grad_norm": 0.23132647573947906, | |
| "learning_rate": 1.997712103657607e-05, | |
| "loss": 1.2936116456985474, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3017241379310345, | |
| "grad_norm": 0.2712137699127197, | |
| "learning_rate": 1.9972318918531294e-05, | |
| "loss": 1.0591557025909424, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3103448275862069, | |
| "grad_norm": 1.0306100845336914, | |
| "learning_rate": 1.9967060437113752e-05, | |
| "loss": 0.9161477088928223, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.31896551724137934, | |
| "grad_norm": 0.7370746731758118, | |
| "learning_rate": 1.996134585978812e-05, | |
| "loss": 1.130851149559021, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3275862068965517, | |
| "grad_norm": 0.36369481682777405, | |
| "learning_rate": 1.995517547721769e-05, | |
| "loss": 1.167314052581787, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.33620689655172414, | |
| "grad_norm": 0.31680384278297424, | |
| "learning_rate": 1.9948549603249602e-05, | |
| "loss": 1.1086735725402832, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 1.57016921043396, | |
| "learning_rate": 1.9941468574898867e-05, | |
| "loss": 1.0062172412872314, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.35344827586206895, | |
| "grad_norm": 0.39987874031066895, | |
| "learning_rate": 1.993393275233123e-05, | |
| "loss": 1.3131077289581299, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3620689655172414, | |
| "grad_norm": 0.738694965839386, | |
| "learning_rate": 1.9925942518844875e-05, | |
| "loss": 1.1570929288864136, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3706896551724138, | |
| "grad_norm": 0.4991850256919861, | |
| "learning_rate": 1.991749828085088e-05, | |
| "loss": 1.3317445516586304, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3793103448275862, | |
| "grad_norm": 0.496286541223526, | |
| "learning_rate": 1.9908600467852585e-05, | |
| "loss": 1.2484678030014038, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3879310344827586, | |
| "grad_norm": 0.6867333054542542, | |
| "learning_rate": 1.9899249532423733e-05, | |
| "loss": 1.0302324295043945, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39655172413793105, | |
| "grad_norm": 0.2537033259868622, | |
| "learning_rate": 1.9889445950185452e-05, | |
| "loss": 0.8809990882873535, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4051724137931034, | |
| "grad_norm": 0.7086202502250671, | |
| "learning_rate": 1.9879190219782058e-05, | |
| "loss": 1.0718659162521362, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 0.6780977845191956, | |
| "learning_rate": 1.98684828628557e-05, | |
| "loss": 1.2635523080825806, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4224137931034483, | |
| "grad_norm": 0.9746212363243103, | |
| "learning_rate": 1.9857324424019827e-05, | |
| "loss": 1.186186671257019, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.43103448275862066, | |
| "grad_norm": 1.5146777629852295, | |
| "learning_rate": 1.9845715470831476e-05, | |
| "loss": 1.1258275508880615, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4396551724137931, | |
| "grad_norm": 0.6449897289276123, | |
| "learning_rate": 1.983365659376242e-05, | |
| "loss": 1.2144218683242798, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4482758620689655, | |
| "grad_norm": 0.3542836606502533, | |
| "learning_rate": 1.9821148406169124e-05, | |
| "loss": 1.2573832273483276, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.45689655172413796, | |
| "grad_norm": 0.7458034753799438, | |
| "learning_rate": 1.9808191544261545e-05, | |
| "loss": 0.9863552451133728, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.46551724137931033, | |
| "grad_norm": 1.6855500936508179, | |
| "learning_rate": 1.9794786667070786e-05, | |
| "loss": 1.1030296087265015, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.47413793103448276, | |
| "grad_norm": 1.344683289527893, | |
| "learning_rate": 1.9780934456415567e-05, | |
| "loss": 1.1802244186401367, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 0.39590156078338623, | |
| "learning_rate": 1.976663561686753e-05, | |
| "loss": 1.410926103591919, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.49137931034482757, | |
| "grad_norm": 0.6388300657272339, | |
| "learning_rate": 1.975189087571544e-05, | |
| "loss": 0.8382052183151245, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.3271642923355103, | |
| "learning_rate": 1.9736700982928156e-05, | |
| "loss": 0.8902297616004944, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5086206896551724, | |
| "grad_norm": 0.28911092877388, | |
| "learning_rate": 1.9721066711116496e-05, | |
| "loss": 1.2678132057189941, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5172413793103449, | |
| "grad_norm": 0.41628220677375793, | |
| "learning_rate": 1.9704988855493948e-05, | |
| "loss": 1.1739540100097656, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5258620689655172, | |
| "grad_norm": 0.2931750416755676, | |
| "learning_rate": 1.9688468233836224e-05, | |
| "loss": 1.2742866277694702, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5344827586206896, | |
| "grad_norm": 0.3954867422580719, | |
| "learning_rate": 1.9671505686439637e-05, | |
| "loss": 1.3943136930465698, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5431034482758621, | |
| "grad_norm": 0.45475512742996216, | |
| "learning_rate": 1.9654102076078406e-05, | |
| "loss": 1.564052939414978, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 0.2707035541534424, | |
| "learning_rate": 1.963625828796073e-05, | |
| "loss": 1.28196382522583, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5603448275862069, | |
| "grad_norm": 0.32626017928123474, | |
| "learning_rate": 1.9617975229683784e-05, | |
| "loss": 1.2830570936203003, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5689655172413793, | |
| "grad_norm": 0.9617651104927063, | |
| "learning_rate": 1.959925383118756e-05, | |
| "loss": 1.1657885313034058, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5775862068965517, | |
| "grad_norm": 0.7409307956695557, | |
| "learning_rate": 1.9580095044707553e-05, | |
| "loss": 0.7719605565071106, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5862068965517241, | |
| "grad_norm": 0.3257262110710144, | |
| "learning_rate": 1.956049984472634e-05, | |
| "loss": 0.9887097477912903, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5948275862068966, | |
| "grad_norm": 0.13194787502288818, | |
| "learning_rate": 1.954046922792399e-05, | |
| "loss": 0.9446215033531189, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.603448275862069, | |
| "grad_norm": 0.3538772761821747, | |
| "learning_rate": 1.9520004213127412e-05, | |
| "loss": 0.9638822078704834, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6120689655172413, | |
| "grad_norm": 0.307935893535614, | |
| "learning_rate": 1.9499105841258488e-05, | |
| "loss": 1.028863549232483, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 0.2776618003845215, | |
| "learning_rate": 1.9477775175281166e-05, | |
| "loss": 1.0197412967681885, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6293103448275862, | |
| "grad_norm": 0.2247132509946823, | |
| "learning_rate": 1.945601330014737e-05, | |
| "loss": 1.2361186742782593, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6379310344827587, | |
| "grad_norm": 0.47068798542022705, | |
| "learning_rate": 1.9433821322741814e-05, | |
| "loss": 1.2686266899108887, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.646551724137931, | |
| "grad_norm": 0.35479265451431274, | |
| "learning_rate": 1.9411200371825724e-05, | |
| "loss": 1.1161521673202515, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6551724137931034, | |
| "grad_norm": 0.9881391525268555, | |
| "learning_rate": 1.9388151597979407e-05, | |
| "loss": 0.8658434152603149, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6637931034482759, | |
| "grad_norm": 1.1565440893173218, | |
| "learning_rate": 1.9364676173543734e-05, | |
| "loss": 1.0216702222824097, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6724137931034483, | |
| "grad_norm": 1.531794548034668, | |
| "learning_rate": 1.934077529256051e-05, | |
| "loss": 1.0728117227554321, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6810344827586207, | |
| "grad_norm": 0.47022947669029236, | |
| "learning_rate": 1.9316450170711732e-05, | |
| "loss": 1.2310281991958618, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.5281792283058167, | |
| "learning_rate": 1.929170204525779e-05, | |
| "loss": 1.2321813106536865, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6982758620689655, | |
| "grad_norm": 0.5046831369400024, | |
| "learning_rate": 1.9266532174974476e-05, | |
| "loss": 1.5227911472320557, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7068965517241379, | |
| "grad_norm": 0.4423198103904724, | |
| "learning_rate": 1.9240941840089015e-05, | |
| "loss": 0.9460239410400391, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7155172413793104, | |
| "grad_norm": 0.4108419120311737, | |
| "learning_rate": 1.9214932342214923e-05, | |
| "loss": 1.24778151512146, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7241379310344828, | |
| "grad_norm": 0.40896740555763245, | |
| "learning_rate": 1.9188505004285795e-05, | |
| "loss": 1.3147329092025757, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7327586206896551, | |
| "grad_norm": 0.5323073863983154, | |
| "learning_rate": 1.916166117048803e-05, | |
| "loss": 1.223319411277771, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7413793103448276, | |
| "grad_norm": 0.5909634232521057, | |
| "learning_rate": 1.9134402206192468e-05, | |
| "loss": 1.2373883724212646, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 6.457983016967773, | |
| "learning_rate": 1.910672949788491e-05, | |
| "loss": 0.937210202217102, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 0.8323431015014648, | |
| "learning_rate": 1.9078644453095634e-05, | |
| "loss": 0.6929088234901428, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7672413793103449, | |
| "grad_norm": 1.8793997764587402, | |
| "learning_rate": 1.9050148500327783e-05, | |
| "loss": 1.1937777996063232, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7758620689655172, | |
| "grad_norm": 0.40397757291793823, | |
| "learning_rate": 1.9021243088984705e-05, | |
| "loss": 0.9337822198867798, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7844827586206896, | |
| "grad_norm": 0.3196965754032135, | |
| "learning_rate": 1.8991929689296245e-05, | |
| "loss": 0.9228455424308777, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7931034482758621, | |
| "grad_norm": 0.24245284497737885, | |
| "learning_rate": 1.896220979224395e-05, | |
| "loss": 0.77471923828125, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8017241379310345, | |
| "grad_norm": 0.35483860969543457, | |
| "learning_rate": 1.8932084909485233e-05, | |
| "loss": 1.335769772529602, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8103448275862069, | |
| "grad_norm": 1.2779498100280762, | |
| "learning_rate": 1.89015565732765e-05, | |
| "loss": 1.4579006433486938, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8189655172413793, | |
| "grad_norm": 0.5677570104598999, | |
| "learning_rate": 1.8870626336395204e-05, | |
| "loss": 1.2512203454971313, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 0.3824968934059143, | |
| "learning_rate": 1.8839295772060856e-05, | |
| "loss": 1.217123031616211, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8362068965517241, | |
| "grad_norm": 0.3712068200111389, | |
| "learning_rate": 1.8807566473855018e-05, | |
| "loss": 1.2150022983551025, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8448275862068966, | |
| "grad_norm": 0.4604543447494507, | |
| "learning_rate": 1.8775440055640245e-05, | |
| "loss": 1.2615362405776978, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.853448275862069, | |
| "grad_norm": 0.6100837588310242, | |
| "learning_rate": 1.8742918151478008e-05, | |
| "loss": 0.9542128443717957, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 0.43527752161026, | |
| "learning_rate": 1.8710002415545552e-05, | |
| "loss": 1.2213218212127686, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8706896551724138, | |
| "grad_norm": 0.37527814507484436, | |
| "learning_rate": 1.867669452205179e-05, | |
| "loss": 1.2830454111099243, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8793103448275862, | |
| "grad_norm": 1.642726182937622, | |
| "learning_rate": 1.8642996165152126e-05, | |
| "loss": 0.8405295610427856, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8879310344827587, | |
| "grad_norm": 0.20496216416358948, | |
| "learning_rate": 1.86089090588623e-05, | |
| "loss": 1.2193654775619507, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 0.3890923261642456, | |
| "learning_rate": 1.857443493697119e-05, | |
| "loss": 1.2507728338241577, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9051724137931034, | |
| "grad_norm": 0.6140430569648743, | |
| "learning_rate": 1.853957555295265e-05, | |
| "loss": 0.5853071212768555, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9137931034482759, | |
| "grad_norm": 1.1236848831176758, | |
| "learning_rate": 1.850433267987628e-05, | |
| "loss": 0.9337871074676514, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9224137931034483, | |
| "grad_norm": 1.0812921524047852, | |
| "learning_rate": 1.84687081103173e-05, | |
| "loss": 1.1255706548690796, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9310344827586207, | |
| "grad_norm": 0.34510231018066406, | |
| "learning_rate": 1.8432703656265317e-05, | |
| "loss": 1.2182952165603638, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9396551724137931, | |
| "grad_norm": 1.5022380352020264, | |
| "learning_rate": 1.83963211490322e-05, | |
| "loss": 0.755200207233429, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9482758620689655, | |
| "grad_norm": 1.6012593507766724, | |
| "learning_rate": 1.8359562439158915e-05, | |
| "loss": 0.9116122722625732, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9568965517241379, | |
| "grad_norm": 0.5603123903274536, | |
| "learning_rate": 1.8322429396321395e-05, | |
| "loss": 1.314684271812439, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 1.2745404243469238, | |
| "learning_rate": 1.828492390923547e-05, | |
| "loss": 0.8659108877182007, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9741379310344828, | |
| "grad_norm": 0.2233542650938034, | |
| "learning_rate": 1.824704788556076e-05, | |
| "loss": 0.8610473275184631, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9827586206896551, | |
| "grad_norm": 0.27494016289711, | |
| "learning_rate": 1.820880325180368e-05, | |
| "loss": 1.2119630575180054, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9913793103448276, | |
| "grad_norm": 0.8948925733566284, | |
| "learning_rate": 1.817019195321943e-05, | |
| "loss": 0.886069118976593, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.4077295958995819, | |
| "learning_rate": 1.8131215953713065e-05, | |
| "loss": 0.7485522031784058, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0086206896551724, | |
| "grad_norm": 0.6140213012695312, | |
| "learning_rate": 1.809187723573959e-05, | |
| "loss": 0.8965163826942444, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0172413793103448, | |
| "grad_norm": 0.9723572731018066, | |
| "learning_rate": 1.8052177800203142e-05, | |
| "loss": 0.745244026184082, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0258620689655173, | |
| "grad_norm": 1.8601301908493042, | |
| "learning_rate": 1.8012119666355207e-05, | |
| "loss": 0.8928873538970947, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0344827586206897, | |
| "grad_norm": 0.7225033044815063, | |
| "learning_rate": 1.7971704871691913e-05, | |
| "loss": 0.5402289628982544, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.043103448275862, | |
| "grad_norm": 0.3545401394367218, | |
| "learning_rate": 1.7930935471850397e-05, | |
| "loss": 1.0471925735473633, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0517241379310345, | |
| "grad_norm": 0.36405056715011597, | |
| "learning_rate": 1.7889813540504255e-05, | |
| "loss": 0.3569323420524597, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.0603448275862069, | |
| "grad_norm": 0.44639429450035095, | |
| "learning_rate": 1.7848341169258066e-05, | |
| "loss": 1.0155720710754395, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0689655172413792, | |
| "grad_norm": 0.3922642469406128, | |
| "learning_rate": 1.780652046754099e-05, | |
| "loss": 0.9613431096076965, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0775862068965518, | |
| "grad_norm": 0.46293577551841736, | |
| "learning_rate": 1.776435356249949e-05, | |
| "loss": 0.9193274974822998, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0862068965517242, | |
| "grad_norm": 0.6368116736412048, | |
| "learning_rate": 1.772184259888915e-05, | |
| "loss": 0.42583054304122925, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0948275862068966, | |
| "grad_norm": 1.0385651588439941, | |
| "learning_rate": 1.7678989738965554e-05, | |
| "loss": 0.9037254452705383, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.103448275862069, | |
| "grad_norm": 0.548372209072113, | |
| "learning_rate": 1.7635797162374325e-05, | |
| "loss": 0.6561298370361328, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1120689655172413, | |
| "grad_norm": 0.8821052312850952, | |
| "learning_rate": 1.7592267066040266e-05, | |
| "loss": 0.5268462896347046, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1206896551724137, | |
| "grad_norm": 16.59650993347168, | |
| "learning_rate": 1.7548401664055605e-05, | |
| "loss": 0.6683530211448669, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1293103448275863, | |
| "grad_norm": 0.24353168904781342, | |
| "learning_rate": 1.7504203187567378e-05, | |
| "loss": 1.1267895698547363, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.1379310344827587, | |
| "grad_norm": 0.23041954636573792, | |
| "learning_rate": 1.7459673884663956e-05, | |
| "loss": 0.864532470703125, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.146551724137931, | |
| "grad_norm": 0.24124076962471008, | |
| "learning_rate": 1.7414816020260694e-05, | |
| "loss": 1.043933629989624, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1551724137931034, | |
| "grad_norm": 0.36801600456237793, | |
| "learning_rate": 1.7369631875984715e-05, | |
| "loss": 1.0288755893707275, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1637931034482758, | |
| "grad_norm": 0.2811622619628906, | |
| "learning_rate": 1.7324123750058895e-05, | |
| "loss": 1.0703890323638916, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1724137931034484, | |
| "grad_norm": 0.8666506409645081, | |
| "learning_rate": 1.7278293957184924e-05, | |
| "loss": 1.0030405521392822, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1810344827586208, | |
| "grad_norm": 0.8689898252487183, | |
| "learning_rate": 1.7232144828425606e-05, | |
| "loss": 0.8162426352500916, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.1896551724137931, | |
| "grad_norm": 0.28137898445129395, | |
| "learning_rate": 1.718567871108627e-05, | |
| "loss": 0.5113853216171265, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1982758620689655, | |
| "grad_norm": 0.3475714921951294, | |
| "learning_rate": 1.7138897968595394e-05, | |
| "loss": 1.0502547025680542, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.206896551724138, | |
| "grad_norm": 0.31786224246025085, | |
| "learning_rate": 1.7091804980384382e-05, | |
| "loss": 0.9660123586654663, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2155172413793103, | |
| "grad_norm": 0.297315388917923, | |
| "learning_rate": 1.7044402141766547e-05, | |
| "loss": 0.5680896043777466, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2241379310344827, | |
| "grad_norm": 3.397669553756714, | |
| "learning_rate": 1.6996691863815274e-05, | |
| "loss": 0.6948413848876953, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2327586206896552, | |
| "grad_norm": 0.8230817914009094, | |
| "learning_rate": 1.694867657324137e-05, | |
| "loss": 0.8091163635253906, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.2413793103448276, | |
| "grad_norm": 0.5911218523979187, | |
| "learning_rate": 1.6900358712269656e-05, | |
| "loss": 0.4585968852043152, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.30154550075531006, | |
| "learning_rate": 1.6851740738514745e-05, | |
| "loss": 0.9265656471252441, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2586206896551724, | |
| "grad_norm": 0.33858662843704224, | |
| "learning_rate": 1.680282512485601e-05, | |
| "loss": 0.8860665559768677, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2672413793103448, | |
| "grad_norm": 0.6046611070632935, | |
| "learning_rate": 1.675361435931184e-05, | |
| "loss": 1.003743052482605, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.2758620689655173, | |
| "grad_norm": 0.265019029378891, | |
| "learning_rate": 1.670411094491308e-05, | |
| "loss": 0.7721095085144043, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2844827586206897, | |
| "grad_norm": 0.1970166265964508, | |
| "learning_rate": 1.665431739957569e-05, | |
| "loss": 1.0421152114868164, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.293103448275862, | |
| "grad_norm": 0.391703337430954, | |
| "learning_rate": 1.660423625597273e-05, | |
| "loss": 0.9568123817443848, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3017241379310345, | |
| "grad_norm": 0.28192439675331116, | |
| "learning_rate": 1.6553870061405496e-05, | |
| "loss": 0.610411524772644, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3103448275862069, | |
| "grad_norm": 0.2877638339996338, | |
| "learning_rate": 1.6503221377673965e-05, | |
| "loss": 0.5043181777000427, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.3189655172413794, | |
| "grad_norm": 0.3669877350330353, | |
| "learning_rate": 1.6452292780946495e-05, | |
| "loss": 1.1974719762802124, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3275862068965516, | |
| "grad_norm": 0.2513487637042999, | |
| "learning_rate": 1.6401086861628802e-05, | |
| "loss": 0.6518297791481018, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3362068965517242, | |
| "grad_norm": 0.6886904835700989, | |
| "learning_rate": 1.6349606224232193e-05, | |
| "loss": 0.9824197292327881, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3448275862068966, | |
| "grad_norm": 0.27164769172668457, | |
| "learning_rate": 1.6297853487241087e-05, | |
| "loss": 0.9464894533157349, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.353448275862069, | |
| "grad_norm": 0.6101466417312622, | |
| "learning_rate": 1.6245831282979834e-05, | |
| "loss": 1.1367565393447876, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3620689655172413, | |
| "grad_norm": 0.4131507873535156, | |
| "learning_rate": 1.6193542257478837e-05, | |
| "loss": 1.3186949491500854, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.3706896551724137, | |
| "grad_norm": 0.39837753772735596, | |
| "learning_rate": 1.6140989070339947e-05, | |
| "loss": 0.9798977375030518, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3793103448275863, | |
| "grad_norm": 1.0206124782562256, | |
| "learning_rate": 1.60881743946012e-05, | |
| "loss": 0.7535231709480286, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3879310344827587, | |
| "grad_norm": 0.7909939289093018, | |
| "learning_rate": 1.6035100916600856e-05, | |
| "loss": 1.412162184715271, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.396551724137931, | |
| "grad_norm": 0.604773759841919, | |
| "learning_rate": 1.5981771335840744e-05, | |
| "loss": 0.5099858641624451, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4051724137931034, | |
| "grad_norm": 0.4729211926460266, | |
| "learning_rate": 1.5928188364848995e-05, | |
| "loss": 0.5017666220664978, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4137931034482758, | |
| "grad_norm": 0.7915529012680054, | |
| "learning_rate": 1.5874354729042023e-05, | |
| "loss": 0.8372330069541931, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.4224137931034484, | |
| "grad_norm": 0.37416186928749084, | |
| "learning_rate": 1.5820273166585947e-05, | |
| "loss": 0.7255274057388306, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4310344827586206, | |
| "grad_norm": 0.6319824457168579, | |
| "learning_rate": 1.576594642825729e-05, | |
| "loss": 0.6583790183067322, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4396551724137931, | |
| "grad_norm": 0.2038448005914688, | |
| "learning_rate": 1.571137727730307e-05, | |
| "loss": 0.5846832990646362, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4482758620689655, | |
| "grad_norm": 0.442600816488266, | |
| "learning_rate": 1.5656568489300265e-05, | |
| "loss": 0.6714300513267517, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.456896551724138, | |
| "grad_norm": 1.4567723274230957, | |
| "learning_rate": 1.560152285201462e-05, | |
| "loss": 1.0285781621932983, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4655172413793103, | |
| "grad_norm": 0.9464113712310791, | |
| "learning_rate": 1.5546243165258857e-05, | |
| "loss": 1.34324049949646, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4741379310344827, | |
| "grad_norm": 0.3753282129764557, | |
| "learning_rate": 1.5490732240750275e-05, | |
| "loss": 1.1209205389022827, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4827586206896552, | |
| "grad_norm": 0.3289780020713806, | |
| "learning_rate": 1.5434992901967735e-05, | |
| "loss": 0.944566547870636, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.4913793103448276, | |
| "grad_norm": 0.5948997735977173, | |
| "learning_rate": 1.5379027984008024e-05, | |
| "loss": 0.4405466318130493, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.1603318452835083, | |
| "learning_rate": 1.5322840333441687e-05, | |
| "loss": 0.8692161440849304, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5086206896551724, | |
| "grad_norm": 0.3296312093734741, | |
| "learning_rate": 1.526643280816822e-05, | |
| "loss": 1.0229344367980957, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5172413793103448, | |
| "grad_norm": 1.4501453638076782, | |
| "learning_rate": 1.5209808277270714e-05, | |
| "loss": 0.9797338843345642, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5258620689655173, | |
| "grad_norm": 0.7663815021514893, | |
| "learning_rate": 1.515296962086992e-05, | |
| "loss": 1.0694770812988281, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.5344827586206895, | |
| "grad_norm": 0.7203207612037659, | |
| "learning_rate": 1.5095919729977757e-05, | |
| "loss": 0.8981337547302246, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.543103448275862, | |
| "grad_norm": 0.17651817202568054, | |
| "learning_rate": 1.5038661506350267e-05, | |
| "loss": 0.7348160743713379, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5517241379310345, | |
| "grad_norm": 0.3057866096496582, | |
| "learning_rate": 1.4981197862340015e-05, | |
| "loss": 1.3365731239318848, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5603448275862069, | |
| "grad_norm": 0.2411855012178421, | |
| "learning_rate": 1.4923531720747975e-05, | |
| "loss": 1.0422999858856201, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.5689655172413794, | |
| "grad_norm": 0.30987802147865295, | |
| "learning_rate": 1.4865666014674833e-05, | |
| "loss": 1.064208984375, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5775862068965516, | |
| "grad_norm": 0.4095076024532318, | |
| "learning_rate": 1.4807603687371835e-05, | |
| "loss": 0.7693328857421875, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.5862068965517242, | |
| "grad_norm": 0.2905380129814148, | |
| "learning_rate": 1.4749347692091061e-05, | |
| "loss": 1.014492154121399, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.5948275862068966, | |
| "grad_norm": 0.2323673665523529, | |
| "learning_rate": 1.4690900991935218e-05, | |
| "loss": 0.9833155274391174, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.603448275862069, | |
| "grad_norm": 1.5103213787078857, | |
| "learning_rate": 1.4632266559706933e-05, | |
| "loss": 1.0599433183670044, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6120689655172413, | |
| "grad_norm": 0.3394826352596283, | |
| "learning_rate": 1.457344737775753e-05, | |
| "loss": 0.7715789675712585, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6206896551724137, | |
| "grad_norm": 0.24792537093162537, | |
| "learning_rate": 1.451444643783536e-05, | |
| "loss": 1.008685827255249, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6293103448275863, | |
| "grad_norm": 0.2639085054397583, | |
| "learning_rate": 1.4455266740933598e-05, | |
| "loss": 1.1693607568740845, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.6379310344827587, | |
| "grad_norm": 0.27107664942741394, | |
| "learning_rate": 1.439591129713764e-05, | |
| "loss": 0.7295485734939575, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.646551724137931, | |
| "grad_norm": 2.177382707595825, | |
| "learning_rate": 1.4336383125471974e-05, | |
| "loss": 1.3163925409317017, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6551724137931034, | |
| "grad_norm": 0.5485315322875977, | |
| "learning_rate": 1.4276685253746623e-05, | |
| "loss": 1.00065279006958, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6637931034482758, | |
| "grad_norm": 0.25248005986213684, | |
| "learning_rate": 1.4216820718403153e-05, | |
| "loss": 0.9677169322967529, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.6724137931034484, | |
| "grad_norm": 0.3703874945640564, | |
| "learning_rate": 1.4156792564360222e-05, | |
| "loss": 1.3287030458450317, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.6810344827586206, | |
| "grad_norm": 1.0064659118652344, | |
| "learning_rate": 1.4096603844858696e-05, | |
| "loss": 0.8481764197349548, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6896551724137931, | |
| "grad_norm": 0.3846883773803711, | |
| "learning_rate": 1.4036257621306375e-05, | |
| "loss": 0.41724586486816406, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.6982758620689655, | |
| "grad_norm": 1.3841114044189453, | |
| "learning_rate": 1.3975756963122242e-05, | |
| "loss": 1.0509485006332397, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.706896551724138, | |
| "grad_norm": 0.7592343688011169, | |
| "learning_rate": 1.3915104947580382e-05, | |
| "loss": 1.0144211053848267, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7155172413793105, | |
| "grad_norm": 1.6238418817520142, | |
| "learning_rate": 1.3854304659653435e-05, | |
| "loss": 1.0040127038955688, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 0.42679479718208313, | |
| "learning_rate": 1.3793359191855684e-05, | |
| "loss": 1.0181949138641357, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7327586206896552, | |
| "grad_norm": 0.29004621505737305, | |
| "learning_rate": 1.3732271644085779e-05, | |
| "loss": 0.7873343229293823, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.7413793103448276, | |
| "grad_norm": 0.2753044366836548, | |
| "learning_rate": 1.3671045123469038e-05, | |
| "loss": 0.594070553779602, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.860210657119751, | |
| "learning_rate": 1.3609682744199434e-05, | |
| "loss": 0.6642279028892517, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.7586206896551724, | |
| "grad_norm": 0.3674435615539551, | |
| "learning_rate": 1.354818762738118e-05, | |
| "loss": 0.7117359042167664, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7672413793103448, | |
| "grad_norm": 0.23552259802818298, | |
| "learning_rate": 1.3486562900869975e-05, | |
| "loss": 0.7889873385429382, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7758620689655173, | |
| "grad_norm": 0.3450159728527069, | |
| "learning_rate": 1.3424811699113924e-05, | |
| "loss": 0.7825038433074951, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.7844827586206895, | |
| "grad_norm": 0.6731851100921631, | |
| "learning_rate": 1.3362937162994115e-05, | |
| "loss": 0.6144068837165833, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.793103448275862, | |
| "grad_norm": 0.29475781321525574, | |
| "learning_rate": 1.3300942439664826e-05, | |
| "loss": 0.7337537407875061, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8017241379310345, | |
| "grad_norm": 2.000307083129883, | |
| "learning_rate": 1.3238830682393498e-05, | |
| "loss": 0.9792155027389526, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.8103448275862069, | |
| "grad_norm": 0.6585050821304321, | |
| "learning_rate": 1.317660505040031e-05, | |
| "loss": 1.0592936277389526, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8189655172413794, | |
| "grad_norm": 0.6050742268562317, | |
| "learning_rate": 1.3114268708697517e-05, | |
| "loss": 1.1423420906066895, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8275862068965516, | |
| "grad_norm": 0.22784169018268585, | |
| "learning_rate": 1.305182482792845e-05, | |
| "loss": 1.0336556434631348, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8362068965517242, | |
| "grad_norm": 0.28103554248809814, | |
| "learning_rate": 1.298927658420625e-05, | |
| "loss": 0.633764386177063, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8448275862068966, | |
| "grad_norm": 0.3265269696712494, | |
| "learning_rate": 1.292662715895232e-05, | |
| "loss": 1.000012993812561, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.853448275862069, | |
| "grad_norm": 0.25722262263298035, | |
| "learning_rate": 1.2863879738734515e-05, | |
| "loss": 0.972018301486969, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8620689655172413, | |
| "grad_norm": 0.3606722056865692, | |
| "learning_rate": 1.2801037515105048e-05, | |
| "loss": 1.0186474323272705, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.8706896551724137, | |
| "grad_norm": 0.5681195855140686, | |
| "learning_rate": 1.2738103684438167e-05, | |
| "loss": 1.1112819910049438, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.8793103448275863, | |
| "grad_norm": 0.2608093321323395, | |
| "learning_rate": 1.267508144776757e-05, | |
| "loss": 0.6823700070381165, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.8879310344827587, | |
| "grad_norm": 0.5412386059761047, | |
| "learning_rate": 1.2611974010623598e-05, | |
| "loss": 0.9664647579193115, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.896551724137931, | |
| "grad_norm": 0.2498435080051422, | |
| "learning_rate": 1.2548784582870175e-05, | |
| "loss": 0.9077426791191101, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9051724137931034, | |
| "grad_norm": 0.25406646728515625, | |
| "learning_rate": 1.2485516378541562e-05, | |
| "loss": 1.095183253288269, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.9137931034482758, | |
| "grad_norm": 0.3728492558002472, | |
| "learning_rate": 1.2422172615678867e-05, | |
| "loss": 0.2406305968761444, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.9224137931034484, | |
| "grad_norm": 1.3624857664108276, | |
| "learning_rate": 1.2358756516166368e-05, | |
| "loss": 0.7011273503303528, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9310344827586206, | |
| "grad_norm": 0.31631484627723694, | |
| "learning_rate": 1.2295271305567637e-05, | |
| "loss": 1.040317177772522, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9396551724137931, | |
| "grad_norm": 1.0889196395874023, | |
| "learning_rate": 1.2231720212961476e-05, | |
| "loss": 1.0238956212997437, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9482758620689655, | |
| "grad_norm": 0.17187516391277313, | |
| "learning_rate": 1.2168106470777676e-05, | |
| "loss": 0.8691494464874268, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.956896551724138, | |
| "grad_norm": 0.3179636299610138, | |
| "learning_rate": 1.2104433314632605e-05, | |
| "loss": 1.03307044506073, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9655172413793105, | |
| "grad_norm": 0.2639262080192566, | |
| "learning_rate": 1.2040703983164628e-05, | |
| "loss": 1.0065069198608398, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.9741379310344827, | |
| "grad_norm": 8.715909004211426, | |
| "learning_rate": 1.1976921717869387e-05, | |
| "loss": 0.3249227702617645, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.9827586206896552, | |
| "grad_norm": 0.4118382930755615, | |
| "learning_rate": 1.1913089762934918e-05, | |
| "loss": 0.9767223596572876, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9913793103448276, | |
| "grad_norm": 0.8583071827888489, | |
| "learning_rate": 1.184921136507665e-05, | |
| "loss": 0.6757299304008484, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.8066082000732422, | |
| "learning_rate": 1.1785289773372252e-05, | |
| "loss": 0.8873772621154785, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.0086206896551726, | |
| "grad_norm": 0.31068357825279236, | |
| "learning_rate": 1.1721328239096397e-05, | |
| "loss": 0.4577694237232208, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.0172413793103448, | |
| "grad_norm": 1.1041500568389893, | |
| "learning_rate": 1.1657330015555365e-05, | |
| "loss": 0.729465663433075, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.0258620689655173, | |
| "grad_norm": 0.2672136723995209, | |
| "learning_rate": 1.159329835792158e-05, | |
| "loss": 0.58217453956604, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.0344827586206895, | |
| "grad_norm": 2.1604278087615967, | |
| "learning_rate": 1.1529236523068046e-05, | |
| "loss": 0.5115339159965515, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.043103448275862, | |
| "grad_norm": 0.3200737237930298, | |
| "learning_rate": 1.146514776940268e-05, | |
| "loss": 0.5147908329963684, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.0517241379310347, | |
| "grad_norm": 0.6346356272697449, | |
| "learning_rate": 1.1401035356702585e-05, | |
| "loss": 0.5318877100944519, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.060344827586207, | |
| "grad_norm": 0.5195797681808472, | |
| "learning_rate": 1.133690254594825e-05, | |
| "loss": 0.3974412977695465, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.0689655172413794, | |
| "grad_norm": 0.3041704297065735, | |
| "learning_rate": 1.1272752599157673e-05, | |
| "loss": 0.7926555871963501, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0775862068965516, | |
| "grad_norm": 0.3499699831008911, | |
| "learning_rate": 1.1208588779220454e-05, | |
| "loss": 0.4678128659725189, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.086206896551724, | |
| "grad_norm": 0.33471551537513733, | |
| "learning_rate": 1.1144414349731826e-05, | |
| "loss": 0.6200590133666992, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.0948275862068964, | |
| "grad_norm": 0.2929646372795105, | |
| "learning_rate": 1.1080232574826669e-05, | |
| "loss": 0.5513038039207458, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.103448275862069, | |
| "grad_norm": 1.0158162117004395, | |
| "learning_rate": 1.101604671901346e-05, | |
| "loss": 0.6488674283027649, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.1120689655172415, | |
| "grad_norm": 0.3108230531215668, | |
| "learning_rate": 1.0951860047008268e-05, | |
| "loss": 0.6901264786720276, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.1206896551724137, | |
| "grad_norm": 1.0879099369049072, | |
| "learning_rate": 1.0887675823568662e-05, | |
| "loss": 0.6872219443321228, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.1293103448275863, | |
| "grad_norm": 0.6477283835411072, | |
| "learning_rate": 1.0823497313327666e-05, | |
| "loss": 0.7422826886177063, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.1379310344827585, | |
| "grad_norm": 0.44943997263908386, | |
| "learning_rate": 1.075932778062772e-05, | |
| "loss": 0.6594029664993286, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.146551724137931, | |
| "grad_norm": 0.9052010774612427, | |
| "learning_rate": 1.0695170489354627e-05, | |
| "loss": 0.3747510313987732, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.1551724137931036, | |
| "grad_norm": 0.7915022969245911, | |
| "learning_rate": 1.0631028702771558e-05, | |
| "loss": 0.3904026746749878, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.163793103448276, | |
| "grad_norm": 0.3351036012172699, | |
| "learning_rate": 1.0566905683353048e-05, | |
| "loss": 0.42320704460144043, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.1724137931034484, | |
| "grad_norm": 0.39706820249557495, | |
| "learning_rate": 1.0502804692619085e-05, | |
| "loss": 0.5195796489715576, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.1810344827586206, | |
| "grad_norm": 0.7268723249435425, | |
| "learning_rate": 1.0438728990969182e-05, | |
| "loss": 0.7010811567306519, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.189655172413793, | |
| "grad_norm": 2.3827953338623047, | |
| "learning_rate": 1.0374681837516585e-05, | |
| "loss": 0.47865381836891174, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.1982758620689653, | |
| "grad_norm": 0.3070574998855591, | |
| "learning_rate": 1.0310666489922452e-05, | |
| "loss": 0.7136699557304382, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.206896551724138, | |
| "grad_norm": 0.36808905005455017, | |
| "learning_rate": 1.0246686204230213e-05, | |
| "loss": 0.5507462024688721, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.2155172413793105, | |
| "grad_norm": 0.49329182505607605, | |
| "learning_rate": 1.0182744234699897e-05, | |
| "loss": 0.5582562685012817, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.2241379310344827, | |
| "grad_norm": 1.3508820533752441, | |
| "learning_rate": 1.0118843833642672e-05, | |
| "loss": 0.403819739818573, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.2327586206896552, | |
| "grad_norm": 0.1710726022720337, | |
| "learning_rate": 1.005498825125536e-05, | |
| "loss": 0.0842498317360878, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.2413793103448274, | |
| "grad_norm": 0.8797479867935181, | |
| "learning_rate": 9.991180735455171e-06, | |
| "loss": 0.14793738722801208, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.6412666440010071, | |
| "learning_rate": 9.927424531714463e-06, | |
| "loss": 0.7687300443649292, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.2586206896551726, | |
| "grad_norm": 0.3679327070713043, | |
| "learning_rate": 9.863722882895703e-06, | |
| "loss": 0.5136147141456604, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.2672413793103448, | |
| "grad_norm": 0.66014564037323, | |
| "learning_rate": 9.800079029086487e-06, | |
| "loss": 0.48577654361724854, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.2758620689655173, | |
| "grad_norm": 0.36126595735549927, | |
| "learning_rate": 9.736496207434767e-06, | |
| "loss": 0.5481493473052979, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.2844827586206895, | |
| "grad_norm": 0.18358245491981506, | |
| "learning_rate": 9.672977651984183e-06, | |
| "loss": 0.4460916519165039, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.293103448275862, | |
| "grad_norm": 0.5238314867019653, | |
| "learning_rate": 9.609526593509572e-06, | |
| "loss": 0.44660893082618713, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.3017241379310347, | |
| "grad_norm": 0.45486119389533997, | |
| "learning_rate": 9.54614625935264e-06, | |
| "loss": 0.4977568984031677, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.310344827586207, | |
| "grad_norm": 0.3980855643749237, | |
| "learning_rate": 9.482839873257814e-06, | |
| "loss": 0.44564345479011536, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.3189655172413794, | |
| "grad_norm": 0.1288735568523407, | |
| "learning_rate": 9.41961065520825e-06, | |
| "loss": 0.34829026460647583, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.3275862068965516, | |
| "grad_norm": 0.3644492030143738, | |
| "learning_rate": 9.356461821262088e-06, | |
| "loss": 0.42181143164634705, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.336206896551724, | |
| "grad_norm": 0.45030930638313293, | |
| "learning_rate": 9.293396583388835e-06, | |
| "loss": 0.6050075888633728, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.344827586206897, | |
| "grad_norm": 0.49424102902412415, | |
| "learning_rate": 9.230418149306028e-06, | |
| "loss": 0.7277567982673645, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.353448275862069, | |
| "grad_norm": 0.2475537806749344, | |
| "learning_rate": 9.167529722316039e-06, | |
| "loss": 0.25289204716682434, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.3620689655172415, | |
| "grad_norm": 0.5678421258926392, | |
| "learning_rate": 9.104734501143195e-06, | |
| "loss": 0.732049822807312, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.3706896551724137, | |
| "grad_norm": 5.805917739868164, | |
| "learning_rate": 9.04203567977102e-06, | |
| "loss": 0.561985969543457, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.3793103448275863, | |
| "grad_norm": 1.29057776927948, | |
| "learning_rate": 8.979436447279833e-06, | |
| "loss": 0.7146962285041809, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.3879310344827585, | |
| "grad_norm": 0.24367839097976685, | |
| "learning_rate": 8.916939987684497e-06, | |
| "loss": 0.5743066072463989, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.396551724137931, | |
| "grad_norm": 0.29842236638069153, | |
| "learning_rate": 8.854549479772508e-06, | |
| "loss": 0.6506761312484741, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.405172413793103, | |
| "grad_norm": 0.4994705319404602, | |
| "learning_rate": 8.792268096942265e-06, | |
| "loss": 0.4716731607913971, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.413793103448276, | |
| "grad_norm": 0.3811204135417938, | |
| "learning_rate": 8.730099007041712e-06, | |
| "loss": 0.6651344299316406, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.4224137931034484, | |
| "grad_norm": 0.7616989016532898, | |
| "learning_rate": 8.668045372207165e-06, | |
| "loss": 0.5968424081802368, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.4310344827586206, | |
| "grad_norm": 0.31395435333251953, | |
| "learning_rate": 8.606110348702502e-06, | |
| "loss": 0.656068742275238, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.439655172413793, | |
| "grad_norm": 3.1607255935668945, | |
| "learning_rate": 8.544297086758612e-06, | |
| "loss": 0.6287741661071777, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.4482758620689653, | |
| "grad_norm": 0.47689536213874817, | |
| "learning_rate": 8.482608730413173e-06, | |
| "loss": 0.3020762503147125, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.456896551724138, | |
| "grad_norm": 0.2523362636566162, | |
| "learning_rate": 8.421048417350722e-06, | |
| "loss": 1.0642119646072388, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.4655172413793105, | |
| "grad_norm": 0.746324896812439, | |
| "learning_rate": 8.359619278743072e-06, | |
| "loss": 0.46571430563926697, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.4741379310344827, | |
| "grad_norm": 0.2691429555416107, | |
| "learning_rate": 8.298324439090044e-06, | |
| "loss": 0.5474472641944885, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.4827586206896552, | |
| "grad_norm": 0.3174271881580353, | |
| "learning_rate": 8.23716701606056e-06, | |
| "loss": 0.7401635050773621, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.4913793103448274, | |
| "grad_norm": 1.0416983366012573, | |
| "learning_rate": 8.176150120334035e-06, | |
| "loss": 0.4356614053249359, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.4628841280937195, | |
| "learning_rate": 8.115276855442186e-06, | |
| "loss": 0.7132285237312317, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.5086206896551726, | |
| "grad_norm": 0.2986053228378296, | |
| "learning_rate": 8.054550317611166e-06, | |
| "loss": 0.6939521431922913, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.5172413793103448, | |
| "grad_norm": 0.4543410837650299, | |
| "learning_rate": 7.99397359560408e-06, | |
| "loss": 0.5915881395339966, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.5258620689655173, | |
| "grad_norm": 0.42003390192985535, | |
| "learning_rate": 7.933549770563868e-06, | |
| "loss": 0.5379845499992371, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.5344827586206895, | |
| "grad_norm": 0.28614842891693115, | |
| "learning_rate": 7.873281915856618e-06, | |
| "loss": 0.6815016269683838, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.543103448275862, | |
| "grad_norm": 0.32113906741142273, | |
| "learning_rate": 7.8131730969152e-06, | |
| "loss": 0.7555568218231201, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.5517241379310347, | |
| "grad_norm": 0.3256307542324066, | |
| "learning_rate": 7.753226371083398e-06, | |
| "loss": 0.6491318941116333, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.560344827586207, | |
| "grad_norm": 0.3508186638355255, | |
| "learning_rate": 7.693444787460346e-06, | |
| "loss": 0.38945457339286804, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.5689655172413794, | |
| "grad_norm": 0.19432027637958527, | |
| "learning_rate": 7.6338313867455e-06, | |
| "loss": 0.5126080513000488, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.5775862068965516, | |
| "grad_norm": 0.3698360323905945, | |
| "learning_rate": 7.574389201083925e-06, | |
| "loss": 0.6170058250427246, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.586206896551724, | |
| "grad_norm": 0.7802625894546509, | |
| "learning_rate": 7.515121253912107e-06, | |
| "loss": 0.6505869030952454, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.594827586206897, | |
| "grad_norm": 0.12341656535863876, | |
| "learning_rate": 7.456030559804146e-06, | |
| "loss": 0.47480010986328125, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.603448275862069, | |
| "grad_norm": 0.6200041174888611, | |
| "learning_rate": 7.3971201243184425e-06, | |
| "loss": 0.5783487558364868, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.612068965517241, | |
| "grad_norm": 0.9620341062545776, | |
| "learning_rate": 7.338392943844807e-06, | |
| "loss": 0.5066084265708923, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.6206896551724137, | |
| "grad_norm": 0.3163914382457733, | |
| "learning_rate": 7.279852005452072e-06, | |
| "loss": 0.7519516348838806, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.6293103448275863, | |
| "grad_norm": 0.7371922731399536, | |
| "learning_rate": 7.221500286736145e-06, | |
| "loss": 0.6951370239257812, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.637931034482759, | |
| "grad_norm": 0.87571781873703, | |
| "learning_rate": 7.16334075566856e-06, | |
| "loss": 0.7426925301551819, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.646551724137931, | |
| "grad_norm": 0.34502312541007996, | |
| "learning_rate": 7.105376370445516e-06, | |
| "loss": 0.6338915824890137, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.655172413793103, | |
| "grad_norm": 0.551734209060669, | |
| "learning_rate": 7.047610079337426e-06, | |
| "loss": 0.43320754170417786, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.663793103448276, | |
| "grad_norm": 0.3614158034324646, | |
| "learning_rate": 6.990044820538932e-06, | |
| "loss": 0.7193683981895447, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.6724137931034484, | |
| "grad_norm": 0.15170885622501373, | |
| "learning_rate": 6.932683522019486e-06, | |
| "loss": 0.4045131802558899, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.6810344827586206, | |
| "grad_norm": 0.3657248318195343, | |
| "learning_rate": 6.875529101374407e-06, | |
| "loss": 0.7818766832351685, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.689655172413793, | |
| "grad_norm": 0.34267038106918335, | |
| "learning_rate": 6.818584465676476e-06, | |
| "loss": 0.4748293161392212, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.6982758620689653, | |
| "grad_norm": 0.2588467001914978, | |
| "learning_rate": 6.761852511328097e-06, | |
| "loss": 0.6603924036026001, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.706896551724138, | |
| "grad_norm": 0.5171433687210083, | |
| "learning_rate": 6.705336123913943e-06, | |
| "loss": 0.4325425326824188, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.7155172413793105, | |
| "grad_norm": 0.5215042233467102, | |
| "learning_rate": 6.649038178054216e-06, | |
| "loss": 0.8396017551422119, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.7241379310344827, | |
| "grad_norm": 1.0772958993911743, | |
| "learning_rate": 6.592961537258415e-06, | |
| "loss": 0.34978973865509033, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.7327586206896552, | |
| "grad_norm": 0.2992818057537079, | |
| "learning_rate": 6.53710905377969e-06, | |
| "loss": 0.7622851133346558, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.7413793103448274, | |
| "grad_norm": 0.5735934972763062, | |
| "learning_rate": 6.48148356846978e-06, | |
| "loss": 0.5066753625869751, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.30223897099494934, | |
| "learning_rate": 6.426087910634488e-06, | |
| "loss": 0.7104685306549072, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.7586206896551726, | |
| "grad_norm": 0.575935959815979, | |
| "learning_rate": 6.370924897889815e-06, | |
| "loss": 0.45420828461647034, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.7672413793103448, | |
| "grad_norm": 0.6481202244758606, | |
| "learning_rate": 6.315997336018602e-06, | |
| "loss": 0.6258912086486816, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.7758620689655173, | |
| "grad_norm": 0.3766607940196991, | |
| "learning_rate": 6.261308018827857e-06, | |
| "loss": 0.8025404214859009, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.7844827586206895, | |
| "grad_norm": 0.47904494404792786, | |
| "learning_rate": 6.206859728006616e-06, | |
| "loss": 0.6612002849578857, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.793103448275862, | |
| "grad_norm": 0.2501460015773773, | |
| "learning_rate": 6.152655232984493e-06, | |
| "loss": 0.3472132980823517, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.8017241379310347, | |
| "grad_norm": 1.059028148651123, | |
| "learning_rate": 6.0986972907907805e-06, | |
| "loss": 0.5596845746040344, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.810344827586207, | |
| "grad_norm": 1.5540813207626343, | |
| "learning_rate": 6.044988645914256e-06, | |
| "loss": 0.7260704636573792, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.8189655172413794, | |
| "grad_norm": 0.5435662865638733, | |
| "learning_rate": 5.991532030163549e-06, | |
| "loss": 0.7274701595306396, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.8275862068965516, | |
| "grad_norm": 0.24130015075206757, | |
| "learning_rate": 5.938330162528225e-06, | |
| "loss": 0.5958088636398315, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.836206896551724, | |
| "grad_norm": 0.32772472500801086, | |
| "learning_rate": 5.8853857490404605e-06, | |
| "loss": 0.7417409420013428, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.844827586206897, | |
| "grad_norm": 0.34546273946762085, | |
| "learning_rate": 5.832701482637433e-06, | |
| "loss": 0.19964106380939484, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.853448275862069, | |
| "grad_norm": 0.9264662861824036, | |
| "learning_rate": 5.780280043024316e-06, | |
| "loss": 0.3988272547721863, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.862068965517241, | |
| "grad_norm": 0.47499191761016846, | |
| "learning_rate": 5.728124096538014e-06, | |
| "loss": 0.4928115904331207, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.8706896551724137, | |
| "grad_norm": 0.24683460593223572, | |
| "learning_rate": 5.676236296011513e-06, | |
| "loss": 0.42225298285484314, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.8793103448275863, | |
| "grad_norm": 0.4660603106021881, | |
| "learning_rate": 5.624619280638979e-06, | |
| "loss": 0.36263778805732727, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.887931034482759, | |
| "grad_norm": 0.8654661774635315, | |
| "learning_rate": 5.573275675841481e-06, | |
| "loss": 0.720154345035553, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.896551724137931, | |
| "grad_norm": 0.5549874901771545, | |
| "learning_rate": 5.522208093133492e-06, | |
| "loss": 0.44341281056404114, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.905172413793103, | |
| "grad_norm": 0.3595507740974426, | |
| "learning_rate": 5.4714191299900325e-06, | |
| "loss": 0.712740957736969, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.913793103448276, | |
| "grad_norm": 0.898064374923706, | |
| "learning_rate": 5.4209113697145656e-06, | |
| "loss": 0.7482072710990906, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.9224137931034484, | |
| "grad_norm": 0.32744261622428894, | |
| "learning_rate": 5.370687381307591e-06, | |
| "loss": 0.6825791001319885, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.9310344827586206, | |
| "grad_norm": 0.3922792971134186, | |
| "learning_rate": 5.320749719335997e-06, | |
| "loss": 0.5156247615814209, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.939655172413793, | |
| "grad_norm": 0.21706661581993103, | |
| "learning_rate": 5.271100923803095e-06, | |
| "loss": 0.29097798466682434, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.9482758620689653, | |
| "grad_norm": 0.2500763237476349, | |
| "learning_rate": 5.2217435200194595e-06, | |
| "loss": 0.42494314908981323, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.956896551724138, | |
| "grad_norm": 0.28385624289512634, | |
| "learning_rate": 5.172680018474456e-06, | |
| "loss": 0.6773520708084106, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.9655172413793105, | |
| "grad_norm": 0.32070091366767883, | |
| "learning_rate": 5.123912914708566e-06, | |
| "loss": 0.6991716027259827, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.9741379310344827, | |
| "grad_norm": 0.28078293800354004, | |
| "learning_rate": 5.075444689186439e-06, | |
| "loss": 0.9952356815338135, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.9827586206896552, | |
| "grad_norm": 0.30109333992004395, | |
| "learning_rate": 5.02727780717075e-06, | |
| "loss": 0.9628766179084778, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.9913793103448274, | |
| "grad_norm": 0.4280763864517212, | |
| "learning_rate": 4.979414718596779e-06, | |
| "loss": 0.46527406573295593, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.32468336820602417, | |
| "learning_rate": 4.931857857947829e-06, | |
| "loss": 0.8153132200241089, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.0086206896551726, | |
| "grad_norm": 0.38036271929740906, | |
| "learning_rate": 4.884609644131373e-06, | |
| "loss": 0.2907155454158783, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.0172413793103448, | |
| "grad_norm": 0.28837043046951294, | |
| "learning_rate": 4.837672480356037e-06, | |
| "loss": 0.4242869019508362, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.0258620689655173, | |
| "grad_norm": 0.582526683807373, | |
| "learning_rate": 4.791048754009362e-06, | |
| "loss": 0.3136082589626312, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.0344827586206895, | |
| "grad_norm": 0.7384637594223022, | |
| "learning_rate": 4.7447408365363616e-06, | |
| "loss": 0.6226381063461304, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.043103448275862, | |
| "grad_norm": 0.09337347000837326, | |
| "learning_rate": 4.698751083318918e-06, | |
| "loss": 0.0870320051908493, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.0517241379310347, | |
| "grad_norm": 1.523980736732483, | |
| "learning_rate": 4.653081833555975e-06, | |
| "loss": 0.379879891872406, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.060344827586207, | |
| "grad_norm": 0.2947269380092621, | |
| "learning_rate": 4.60773541014454e-06, | |
| "loss": 0.10685593634843826, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.0689655172413794, | |
| "grad_norm": 0.9110690951347351, | |
| "learning_rate": 4.562714119561566e-06, | |
| "loss": 0.40309441089630127, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.0775862068965516, | |
| "grad_norm": 0.4863778054714203, | |
| "learning_rate": 4.518020251746607e-06, | |
| "loss": 0.6567556262016296, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.086206896551724, | |
| "grad_norm": 0.4967314898967743, | |
| "learning_rate": 4.473656079985362e-06, | |
| "loss": 0.14177639782428741, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.0948275862068964, | |
| "grad_norm": 0.4328520596027374, | |
| "learning_rate": 4.429623860794032e-06, | |
| "loss": 0.3785516917705536, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.103448275862069, | |
| "grad_norm": 0.7979432344436646, | |
| "learning_rate": 4.38592583380457e-06, | |
| "loss": 0.18618693947792053, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.1120689655172415, | |
| "grad_norm": 0.3564269542694092, | |
| "learning_rate": 4.3425642216507325e-06, | |
| "loss": 0.20070935785770416, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.1206896551724137, | |
| "grad_norm": 0.34848275780677795, | |
| "learning_rate": 4.2995412298550624e-06, | |
| "loss": 0.3287600874900818, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.1293103448275863, | |
| "grad_norm": 0.4305904507637024, | |
| "learning_rate": 4.256859046716677e-06, | |
| "loss": 0.36046263575553894, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.1379310344827585, | |
| "grad_norm": 0.708617627620697, | |
| "learning_rate": 4.214519843199995e-06, | |
| "loss": 0.4107988774776459, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.146551724137931, | |
| "grad_norm": 0.0694451779127121, | |
| "learning_rate": 4.172525772824283e-06, | |
| "loss": 0.2075681984424591, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.1551724137931036, | |
| "grad_norm": 0.164693221449852, | |
| "learning_rate": 4.130878971554148e-06, | |
| "loss": 0.17175163328647614, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.163793103448276, | |
| "grad_norm": 0.36916273832321167, | |
| "learning_rate": 4.0895815576908686e-06, | |
| "loss": 0.3509948253631592, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.1724137931034484, | |
| "grad_norm": 0.2566669285297394, | |
| "learning_rate": 4.048635631764673e-06, | |
| "loss": 0.22303853929042816, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.1810344827586206, | |
| "grad_norm": 1.5648850202560425, | |
| "learning_rate": 4.008043276427886e-06, | |
| "loss": 0.523650050163269, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.189655172413793, | |
| "grad_norm": 0.22325409948825836, | |
| "learning_rate": 3.967806556349001e-06, | |
| "loss": 0.08064466714859009, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.1982758620689653, | |
| "grad_norm": 0.08049521595239639, | |
| "learning_rate": 3.927927518107664e-06, | |
| "loss": 0.17742744088172913, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.206896551724138, | |
| "grad_norm": 0.3570193350315094, | |
| "learning_rate": 3.888408190090583e-06, | |
| "loss": 0.2840654253959656, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.2155172413793105, | |
| "grad_norm": 0.5950610041618347, | |
| "learning_rate": 3.8492505823883394e-06, | |
| "loss": 0.27302879095077515, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.2241379310344827, | |
| "grad_norm": 0.6008321046829224, | |
| "learning_rate": 3.810456686693178e-06, | |
| "loss": 0.38051924109458923, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.2327586206896552, | |
| "grad_norm": 0.2667677104473114, | |
| "learning_rate": 3.7720284761976657e-06, | |
| "loss": 0.33380451798439026, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.2413793103448274, | |
| "grad_norm": 1.0264617204666138, | |
| "learning_rate": 3.7339679054943645e-06, | |
| "loss": 0.2860594093799591, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 0.5752837657928467, | |
| "learning_rate": 3.696276910476382e-06, | |
| "loss": 0.36991697549819946, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.2586206896551726, | |
| "grad_norm": 0.29292166233062744, | |
| "learning_rate": 3.6589574082389324e-06, | |
| "loss": 0.3221489191055298, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.2672413793103448, | |
| "grad_norm": 0.35932210087776184, | |
| "learning_rate": 3.6220112969818045e-06, | |
| "loss": 0.37856197357177734, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.2758620689655173, | |
| "grad_norm": 0.11113092303276062, | |
| "learning_rate": 3.5854404559128287e-06, | |
| "loss": 0.24704982340335846, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.2844827586206895, | |
| "grad_norm": 1.6768494844436646, | |
| "learning_rate": 3.549246745152283e-06, | |
| "loss": 0.36558791995048523, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.293103448275862, | |
| "grad_norm": 0.4623495638370514, | |
| "learning_rate": 3.5134320056382877e-06, | |
| "loss": 0.3346759080886841, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.3017241379310347, | |
| "grad_norm": 0.18975688517093658, | |
| "learning_rate": 3.477998059033163e-06, | |
| "loss": 0.2471802681684494, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.310344827586207, | |
| "grad_norm": 0.2749144434928894, | |
| "learning_rate": 3.4429467076307823e-06, | |
| "loss": 0.34330496191978455, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.3189655172413794, | |
| "grad_norm": 0.7870236039161682, | |
| "learning_rate": 3.4082797342648867e-06, | |
| "loss": 0.10211128741502762, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.3275862068965516, | |
| "grad_norm": 0.4417794346809387, | |
| "learning_rate": 3.373998902218423e-06, | |
| "loss": 0.22283262014389038, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.336206896551724, | |
| "grad_norm": 2.35225510597229, | |
| "learning_rate": 3.3401059551338326e-06, | |
| "loss": 0.3982272148132324, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.344827586206897, | |
| "grad_norm": 0.32708537578582764, | |
| "learning_rate": 3.3066026169243916e-06, | |
| "loss": 0.35799640417099, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.353448275862069, | |
| "grad_norm": 0.39861956238746643, | |
| "learning_rate": 3.2734905916865045e-06, | |
| "loss": 0.33809182047843933, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.3620689655172415, | |
| "grad_norm": 2.7856783866882324, | |
| "learning_rate": 3.2407715636130365e-06, | |
| "loss": 0.24870137870311737, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.3706896551724137, | |
| "grad_norm": 0.3867567777633667, | |
| "learning_rate": 3.20844719690765e-06, | |
| "loss": 0.25115785002708435, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.3793103448275863, | |
| "grad_norm": 0.2818472385406494, | |
| "learning_rate": 3.1765191357001596e-06, | |
| "loss": 0.34631893038749695, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.3879310344827585, | |
| "grad_norm": 0.6454493999481201, | |
| "learning_rate": 3.1449890039628956e-06, | |
| "loss": 0.21534167230129242, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.396551724137931, | |
| "grad_norm": 0.2576633095741272, | |
| "learning_rate": 3.1138584054281177e-06, | |
| "loss": 0.38198593258857727, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.405172413793103, | |
| "grad_norm": 0.3199053704738617, | |
| "learning_rate": 3.0831289235064297e-06, | |
| "loss": 0.3233962655067444, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.413793103448276, | |
| "grad_norm": 0.24734322726726532, | |
| "learning_rate": 3.0528021212062536e-06, | |
| "loss": 0.39972445368766785, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.4224137931034484, | |
| "grad_norm": 0.7525281310081482, | |
| "learning_rate": 3.0228795410543182e-06, | |
| "loss": 0.2797224819660187, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.4310344827586206, | |
| "grad_norm": 0.38152387738227844, | |
| "learning_rate": 2.9933627050172152e-06, | |
| "loss": 0.4961601793766022, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.439655172413793, | |
| "grad_norm": 0.6566803455352783, | |
| "learning_rate": 2.96425311442397e-06, | |
| "loss": 0.2608637511730194, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 0.2898580729961395, | |
| "learning_rate": 2.9355522498896914e-06, | |
| "loss": 0.389303594827652, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.456896551724138, | |
| "grad_norm": 0.2688350975513458, | |
| "learning_rate": 2.907261571240257e-06, | |
| "loss": 0.416867733001709, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.4655172413793105, | |
| "grad_norm": 0.3749980032444, | |
| "learning_rate": 2.879382517438062e-06, | |
| "loss": 0.28168269991874695, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.4741379310344827, | |
| "grad_norm": 0.5589833855628967, | |
| "learning_rate": 2.8519165065088293e-06, | |
| "loss": 0.39084625244140625, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.4827586206896552, | |
| "grad_norm": 0.2921109199523926, | |
| "learning_rate": 2.824864935469487e-06, | |
| "loss": 0.3399796187877655, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.4913793103448274, | |
| "grad_norm": 0.22237980365753174, | |
| "learning_rate": 2.7982291802570983e-06, | |
| "loss": 0.10816089808940887, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.608170747756958, | |
| "learning_rate": 2.7720105956588983e-06, | |
| "loss": 0.3328501880168915, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.5086206896551726, | |
| "grad_norm": 1.1251869201660156, | |
| "learning_rate": 2.7462105152433617e-06, | |
| "loss": 0.4947846531867981, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.5172413793103448, | |
| "grad_norm": 0.5909225344657898, | |
| "learning_rate": 2.720830251292391e-06, | |
| "loss": 0.3645175099372864, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.5258620689655173, | |
| "grad_norm": 0.3970613181591034, | |
| "learning_rate": 2.695871094734559e-06, | |
| "loss": 0.4159550368785858, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.5344827586206895, | |
| "grad_norm": 0.3088339567184448, | |
| "learning_rate": 2.6713343150794506e-06, | |
| "loss": 0.31124040484428406, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.543103448275862, | |
| "grad_norm": 0.8232600688934326, | |
| "learning_rate": 2.6472211603530894e-06, | |
| "loss": 0.2729651927947998, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.5517241379310347, | |
| "grad_norm": 0.47030189633369446, | |
| "learning_rate": 2.6235328570344666e-06, | |
| "loss": 0.27673032879829407, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.560344827586207, | |
| "grad_norm": 0.07642904669046402, | |
| "learning_rate": 2.600270609993143e-06, | |
| "loss": 0.0557853989303112, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.5689655172413794, | |
| "grad_norm": 1.2752304077148438, | |
| "learning_rate": 2.5774356024279827e-06, | |
| "loss": 0.18158602714538574, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.5775862068965516, | |
| "grad_norm": 0.2896014153957367, | |
| "learning_rate": 2.555028995806956e-06, | |
| "loss": 0.18923550844192505, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.586206896551724, | |
| "grad_norm": 0.3300286829471588, | |
| "learning_rate": 2.5330519298080757e-06, | |
| "loss": 0.22178103029727936, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.594827586206897, | |
| "grad_norm": 0.47191181778907776, | |
| "learning_rate": 2.511505522261417e-06, | |
| "loss": 0.43088600039482117, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.603448275862069, | |
| "grad_norm": 0.5750867128372192, | |
| "learning_rate": 2.4903908690922746e-06, | |
| "loss": 0.3035900592803955, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.612068965517241, | |
| "grad_norm": 0.31900304555892944, | |
| "learning_rate": 2.4697090442654055e-06, | |
| "loss": 0.3073073923587799, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.6206896551724137, | |
| "grad_norm": 0.9884761571884155, | |
| "learning_rate": 2.4494610997304156e-06, | |
| "loss": 0.1451842337846756, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.6293103448275863, | |
| "grad_norm": 0.5963663458824158, | |
| "learning_rate": 2.4296480653682444e-06, | |
| "loss": 0.19056405127048492, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.637931034482759, | |
| "grad_norm": 0.4518829882144928, | |
| "learning_rate": 2.4102709489387925e-06, | |
| "loss": 0.3310995101928711, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.646551724137931, | |
| "grad_norm": 13.950098991394043, | |
| "learning_rate": 2.391330736029649e-06, | |
| "loss": 0.27216413617134094, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.655172413793103, | |
| "grad_norm": 1.5341519117355347, | |
| "learning_rate": 2.3728283900059756e-06, | |
| "loss": 0.13349619507789612, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.663793103448276, | |
| "grad_norm": 0.07572399079799652, | |
| "learning_rate": 2.3547648519614967e-06, | |
| "loss": 0.20533311367034912, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.6724137931034484, | |
| "grad_norm": 0.36925849318504333, | |
| "learning_rate": 2.3371410406706356e-06, | |
| "loss": 0.38371843099594116, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.6810344827586206, | |
| "grad_norm": 0.9557275176048279, | |
| "learning_rate": 2.319957852541786e-06, | |
| "loss": 0.2169187366962433, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.689655172413793, | |
| "grad_norm": 0.5187146663665771, | |
| "learning_rate": 2.303216161571706e-06, | |
| "loss": 0.37597841024398804, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.6982758620689653, | |
| "grad_norm": 0.2956836223602295, | |
| "learning_rate": 2.2869168193010793e-06, | |
| "loss": 0.4011721611022949, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.706896551724138, | |
| "grad_norm": 0.09843911975622177, | |
| "learning_rate": 2.2710606547711917e-06, | |
| "loss": 0.18101970851421356, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.7155172413793105, | |
| "grad_norm": 0.6112839579582214, | |
| "learning_rate": 2.2556484744817635e-06, | |
| "loss": 0.23301751911640167, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.7241379310344827, | |
| "grad_norm": 0.03941560536623001, | |
| "learning_rate": 2.2406810623499382e-06, | |
| "loss": 0.1944858729839325, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.7327586206896552, | |
| "grad_norm": 0.2406272292137146, | |
| "learning_rate": 2.226159179670395e-06, | |
| "loss": 0.11213114112615585, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.7413793103448274, | |
| "grad_norm": 0.9250267148017883, | |
| "learning_rate": 2.2120835650766386e-06, | |
| "loss": 0.25432637333869934, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 0.3792153596878052, | |
| "learning_rate": 2.1984549345034234e-06, | |
| "loss": 0.2917807102203369, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.7586206896551726, | |
| "grad_norm": 0.050461042672395706, | |
| "learning_rate": 2.1852739811503433e-06, | |
| "loss": 0.1030154600739479, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.7672413793103448, | |
| "grad_norm": 0.03659242019057274, | |
| "learning_rate": 2.172541375446566e-06, | |
| "loss": 0.24243952333927155, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.7758620689655173, | |
| "grad_norm": 0.3581734895706177, | |
| "learning_rate": 2.1602577650167436e-06, | |
| "loss": 0.4455357789993286, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.7844827586206895, | |
| "grad_norm": 0.6977261304855347, | |
| "learning_rate": 2.1484237746480574e-06, | |
| "loss": 0.5109292268753052, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.793103448275862, | |
| "grad_norm": 0.25545695424079895, | |
| "learning_rate": 2.1370400062584544e-06, | |
| "loss": 0.1860283464193344, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.8017241379310347, | |
| "grad_norm": 0.5381384491920471, | |
| "learning_rate": 2.1261070388660202e-06, | |
| "loss": 0.20024971663951874, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.810344827586207, | |
| "grad_norm": 0.3756001591682434, | |
| "learning_rate": 2.115625428559535e-06, | |
| "loss": 0.43951112031936646, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.8189655172413794, | |
| "grad_norm": 0.5115352869033813, | |
| "learning_rate": 2.105595708470185e-06, | |
| "loss": 0.16899777948856354, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.8275862068965516, | |
| "grad_norm": 0.2846692204475403, | |
| "learning_rate": 2.096018388744448e-06, | |
| "loss": 0.26339343190193176, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.836206896551724, | |
| "grad_norm": 0.7379516959190369, | |
| "learning_rate": 2.086893956518142e-06, | |
| "loss": 0.11539773643016815, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.844827586206897, | |
| "grad_norm": 0.5147801041603088, | |
| "learning_rate": 2.078222875891654e-06, | |
| "loss": 0.3523973822593689, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.853448275862069, | |
| "grad_norm": 0.15211860835552216, | |
| "learning_rate": 2.070005587906328e-06, | |
| "loss": 0.33388248085975647, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.862068965517241, | |
| "grad_norm": 0.3375397324562073, | |
| "learning_rate": 2.0622425105220346e-06, | |
| "loss": 0.2652340531349182, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.8706896551724137, | |
| "grad_norm": 0.3318890333175659, | |
| "learning_rate": 2.0549340385959123e-06, | |
| "loss": 0.1966915875673294, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.8793103448275863, | |
| "grad_norm": 0.3057389259338379, | |
| "learning_rate": 2.0480805438622865e-06, | |
| "loss": 0.391522079706192, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.887931034482759, | |
| "grad_norm": 0.5989668369293213, | |
| "learning_rate": 2.0416823749137542e-06, | |
| "loss": 0.3690534234046936, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.896551724137931, | |
| "grad_norm": 0.3611077666282654, | |
| "learning_rate": 2.03573985718346e-06, | |
| "loss": 0.35606420040130615, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.905172413793103, | |
| "grad_norm": 0.518487274646759, | |
| "learning_rate": 2.0302532929285423e-06, | |
| "loss": 0.37323978543281555, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.913793103448276, | |
| "grad_norm": 0.3315095603466034, | |
| "learning_rate": 2.0252229612147587e-06, | |
| "loss": 0.22919489443302155, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.9224137931034484, | |
| "grad_norm": 0.22296565771102905, | |
| "learning_rate": 2.0206491179022887e-06, | |
| "loss": 0.17647181451320648, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.9310344827586206, | |
| "grad_norm": 0.2577214539051056, | |
| "learning_rate": 2.0165319956327258e-06, | |
| "loss": 0.23643004894256592, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.939655172413793, | |
| "grad_norm": 0.37906980514526367, | |
| "learning_rate": 2.01287180381724e-06, | |
| "loss": 0.3691279888153076, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.9482758620689653, | |
| "grad_norm": 0.30090105533599854, | |
| "learning_rate": 2.0096687286259286e-06, | |
| "loss": 0.3844853937625885, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.956896551724138, | |
| "grad_norm": 0.3276263475418091, | |
| "learning_rate": 2.006922932978346e-06, | |
| "loss": 0.28372105956077576, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.9655172413793105, | |
| "grad_norm": 2.499201536178589, | |
| "learning_rate": 2.004634556535215e-06, | |
| "loss": 0.36251726746559143, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.9741379310344827, | |
| "grad_norm": 0.3088098466396332, | |
| "learning_rate": 2.0028037156913294e-06, | |
| "loss": 0.2959789037704468, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.9827586206896552, | |
| "grad_norm": 0.5072318911552429, | |
| "learning_rate": 2.0014305035696265e-06, | |
| "loss": 0.3481578230857849, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.9913793103448274, | |
| "grad_norm": 0.469334214925766, | |
| "learning_rate": 2.0005149900164546e-06, | |
| "loss": 0.36124011874198914, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.7579672932624817, | |
| "learning_rate": 2.000057221598022e-06, | |
| "loss": 0.39811837673187256, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 928, | |
| "total_flos": 3.6942184710227886e+18, | |
| "train_loss": 0.7407096255137103, | |
| "train_runtime": 10071.2083, | |
| "train_samples_per_second": 5.529, | |
| "train_steps_per_second": 0.092 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 928, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6942184710227886e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |