Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-36 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-36 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-36") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-36") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-36") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-36 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-36" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-36", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-36
- SGLang
How to use furproxy/9b-36 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-36" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-36", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-36" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-36", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-36 with Docker Model Runner:
docker model run hf.co/furproxy/9b-36
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00425531914893617, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 1.6901408450704225e-07, | |
| "loss": 1.472063422203064, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00851063829787234, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 5.070422535211268e-07, | |
| "loss": 1.9255280494689941, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01276595744680851, | |
| "grad_norm": 1.9296875, | |
| "learning_rate": 8.450704225352114e-07, | |
| "loss": 1.9632502794265747, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01702127659574468, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.1830985915492958e-06, | |
| "loss": 1.6374425888061523, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02127659574468085, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 1.5211267605633803e-06, | |
| "loss": 1.959162950515747, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02553191489361702, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.8591549295774647e-06, | |
| "loss": 1.4726247787475586, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.029787234042553193, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 2.1971830985915494e-06, | |
| "loss": 2.0769670009613037, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03404255319148936, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.535211267605634e-06, | |
| "loss": 1.9118707180023193, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03829787234042553, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 2.8732394366197183e-06, | |
| "loss": 1.7807828187942505, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 3.2112676056338028e-06, | |
| "loss": 1.9391090869903564, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04680851063829787, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 3.549295774647887e-06, | |
| "loss": 1.6522634029388428, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05106382978723404, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 3.887323943661972e-06, | |
| "loss": 1.692237138748169, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05531914893617021, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 4.225352112676056e-06, | |
| "loss": 1.443329930305481, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.059574468085106386, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 4.563380281690141e-06, | |
| "loss": 1.758739948272705, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06382978723404255, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 4.901408450704226e-06, | |
| "loss": 1.6877835988998413, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06808510638297872, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 5.2394366197183095e-06, | |
| "loss": 1.468690037727356, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07234042553191489, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 5.577464788732395e-06, | |
| "loss": 1.6828500032424927, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07659574468085106, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 5.915492957746479e-06, | |
| "loss": 1.6752516031265259, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08085106382978724, | |
| "grad_norm": 2.984375, | |
| "learning_rate": 6.253521126760563e-06, | |
| "loss": 1.9001795053482056, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 6.591549295774649e-06, | |
| "loss": 1.6414787769317627, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08936170212765958, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 6.929577464788733e-06, | |
| "loss": 1.3303271532058716, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.09361702127659574, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 7.267605633802817e-06, | |
| "loss": 1.5457786321640015, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09787234042553192, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 7.605633802816902e-06, | |
| "loss": 1.4271644353866577, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10212765957446808, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 7.943661971830987e-06, | |
| "loss": 1.5979524850845337, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8.28169014084507e-06, | |
| "loss": 1.6684672832489014, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11063829787234042, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8.619718309859156e-06, | |
| "loss": 1.3746291399002075, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1148936170212766, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 8.95774647887324e-06, | |
| "loss": 1.4159908294677734, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11914893617021277, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 9.295774647887323e-06, | |
| "loss": 1.2559518814086914, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.12340425531914893, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 9.63380281690141e-06, | |
| "loss": 1.4071341753005981, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1276595744680851, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 9.971830985915494e-06, | |
| "loss": 1.325224757194519, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13191489361702127, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 1.0309859154929577e-05, | |
| "loss": 1.2854632139205933, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.13617021276595745, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 1.0647887323943662e-05, | |
| "loss": 1.0856443643569946, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14042553191489363, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 1.0985915492957746e-05, | |
| "loss": 1.4248228073120117, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.14468085106382977, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.1323943661971831e-05, | |
| "loss": 1.3485311269760132, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14893617021276595, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 1.1661971830985917e-05, | |
| "loss": 1.3656905889511108, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15319148936170213, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.4325069189071655, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1574468085106383, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 1.1999947154376356e-05, | |
| "loss": 1.541415810585022, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16170212765957448, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.199978861866902e-05, | |
| "loss": 1.385392665863037, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.16595744680851063, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 1.19995243963688e-05, | |
| "loss": 1.2694331407546997, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 1.125, | |
| "learning_rate": 1.1999154493293607e-05, | |
| "loss": 1.5753132104873657, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17446808510638298, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.1998678917588341e-05, | |
| "loss": 1.2333686351776123, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17872340425531916, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.1998097679724704e-05, | |
| "loss": 1.2822571992874146, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.1829787234042553, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.1997410792500985e-05, | |
| "loss": 1.3188749551773071, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.18723404255319148, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.1996618271041757e-05, | |
| "loss": 1.3399384021759033, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19148936170212766, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.1995720132797555e-05, | |
| "loss": 1.3193027973175049, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19574468085106383, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.1994716397544498e-05, | |
| "loss": 1.30392324924469, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 1.1993607087383841e-05, | |
| "loss": 1.1891350746154785, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.20425531914893616, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.1992392226741494e-05, | |
| "loss": 1.2335644960403442, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.20851063829787234, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 1.1991071842367492e-05, | |
| "loss": 1.3029327392578125, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.1989645963335381e-05, | |
| "loss": 1.2645999193191528, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2170212765957447, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 1.1988114621041614e-05, | |
| "loss": 1.2268767356872559, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.22127659574468084, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.1986477849204828e-05, | |
| "loss": 1.1907193660736084, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.225531914893617, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 1.1984735683865123e-05, | |
| "loss": 1.31586754322052, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2297872340425532, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.1982888163383247e-05, | |
| "loss": 1.299729347229004, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.23404255319148937, | |
| "grad_norm": 1.84375, | |
| "learning_rate": 1.1980935328439775e-05, | |
| "loss": 1.708440899848938, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23829787234042554, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 1.1978877222034202e-05, | |
| "loss": 1.2829785346984863, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2425531914893617, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.197671388948399e-05, | |
| "loss": 1.272111415863037, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.24680851063829787, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 1.1974445378423578e-05, | |
| "loss": 1.3535809516906738, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.251063829787234, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.1972071738803339e-05, | |
| "loss": 1.2550489902496338, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.1969593022888462e-05, | |
| "loss": 1.2029892206192017, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25957446808510637, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 1.1967009285257822e-05, | |
| "loss": 1.0597739219665527, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.26382978723404255, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.1964320582802759e-05, | |
| "loss": 1.2965384721755981, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2680851063829787, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 1.196152697472584e-05, | |
| "loss": 1.3368679285049438, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2723404255319149, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 1.1958628522539549e-05, | |
| "loss": 1.3335758447647095, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2765957446808511, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 1.1955625290064935e-05, | |
| "loss": 1.3016529083251953, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28085106382978725, | |
| "grad_norm": 0.8671875, | |
| "learning_rate": 1.1952517343430199e-05, | |
| "loss": 1.253875494003296, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2851063829787234, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.1949304751069256e-05, | |
| "loss": 1.2634450197219849, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.28936170212765955, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 1.1945987583720202e-05, | |
| "loss": 1.294474482536316, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2936170212765957, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 1.194256591442378e-05, | |
| "loss": 1.2694545984268188, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.2978723404255319, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 1.1939039818521758e-05, | |
| "loss": 1.4072679281234741, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3021276595744681, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.1935409373655282e-05, | |
| "loss": 1.3019527196884155, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.30638297872340425, | |
| "grad_norm": 1.125, | |
| "learning_rate": 1.1931674659763148e-05, | |
| "loss": 1.4703279733657837, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.31063829787234043, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 1.1927835759080058e-05, | |
| "loss": 1.1757651567459106, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3148936170212766, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.1923892756134807e-05, | |
| "loss": 1.2418992519378662, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.1919845737748413e-05, | |
| "loss": 1.1974143981933594, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.32340425531914896, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 1.1915694793032215e-05, | |
| "loss": 1.3293455839157104, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3276595744680851, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.1911440013385906e-05, | |
| "loss": 1.1985448598861694, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.33191489361702126, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 1.1907081492495521e-05, | |
| "loss": 1.2568351030349731, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.33617021276595743, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 1.1902619326331371e-05, | |
| "loss": 1.2663094997406006, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 1.1898053613145944e-05, | |
| "loss": 1.1971551179885864, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3446808510638298, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.1893384453471717e-05, | |
| "loss": 1.2108319997787476, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.34893617021276596, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 1.1888611950118964e-05, | |
| "loss": 1.2176121473312378, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.35319148936170214, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.188373620817349e-05, | |
| "loss": 1.2852199077606201, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3574468085106383, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 1.1878757334994293e-05, | |
| "loss": 1.137981653213501, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3617021276595745, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 1.1873675440211238e-05, | |
| "loss": 1.2986195087432861, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3659574468085106, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.1868490635722617e-05, | |
| "loss": 1.2511855363845825, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3702127659574468, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 1.186320303569269e-05, | |
| "loss": 1.2008732557296753, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.37446808510638296, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 1.185781275654917e-05, | |
| "loss": 1.3959091901779175, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.37872340425531914, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.1852319916980676e-05, | |
| "loss": 1.3956475257873535, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3829787234042553, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 1.1846724637934086e-05, | |
| "loss": 1.1432154178619385, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3872340425531915, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 1.184102704261191e-05, | |
| "loss": 1.198095679283142, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.39148936170212767, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.1835227256469556e-05, | |
| "loss": 1.126910924911499, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.39574468085106385, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.1829325407212569e-05, | |
| "loss": 1.340002179145813, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 1.1823321624793831e-05, | |
| "loss": 1.2044755220413208, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.40425531914893614, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 1.1817216041410678e-05, | |
| "loss": 1.1999846696853638, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4085106382978723, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 1.181100879150202e-05, | |
| "loss": 1.2849934101104736, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4127659574468085, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 1.180470001174535e-05, | |
| "loss": 1.3638895750045776, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.41702127659574467, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.179828984105375e-05, | |
| "loss": 1.2097505331039429, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.42127659574468085, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 1.1791778420572834e-05, | |
| "loss": 1.2969235181808472, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 1.1785165893677632e-05, | |
| "loss": 1.3074672222137451, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4297872340425532, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 1.1778452405969437e-05, | |
| "loss": 1.2175475358963013, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4340425531914894, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 1.1771638105272605e-05, | |
| "loss": 1.1964837312698364, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.43829787234042555, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 1.176472314163129e-05, | |
| "loss": 1.2108904123306274, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4425531914893617, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 1.1757707667306142e-05, | |
| "loss": 1.2564092874526978, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.44680851063829785, | |
| "grad_norm": 0.625, | |
| "learning_rate": 1.1750591836770963e-05, | |
| "loss": 1.2397825717926025, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.451063829787234, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 1.1743375806709292e-05, | |
| "loss": 1.141276478767395, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4553191489361702, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.1736059736010964e-05, | |
| "loss": 1.2472527027130127, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4595744680851064, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 1.1728643785768619e-05, | |
| "loss": 1.2373621463775635, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.46382978723404256, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.1721128119274132e-05, | |
| "loss": 1.3174031972885132, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.46808510638297873, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 1.171351290201504e-05, | |
| "loss": 1.4028608798980713, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4723404255319149, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.170579830167089e-05, | |
| "loss": 1.2434858083724976, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4765957446808511, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.1697984488109536e-05, | |
| "loss": 1.2289927005767822, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4808510638297872, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 1.1690071633383422e-05, | |
| "loss": 1.1950970888137817, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4851063829787234, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 1.168205991172577e-05, | |
| "loss": 1.398798942565918, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.48936170212765956, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.1673949499546763e-05, | |
| "loss": 1.2393437623977661, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.49361702127659574, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.166574057542964e-05, | |
| "loss": 1.2385178804397583, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4978723404255319, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1.165743332012679e-05, | |
| "loss": 1.4011635780334473, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.502127659574468, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.1649027916555742e-05, | |
| "loss": 1.2445231676101685, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5063829787234042, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.1640524549795163e-05, | |
| "loss": 1.2868069410324097, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 1.1631923407080772e-05, | |
| "loss": 1.3375487327575684, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5148936170212766, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.1623224677801212e-05, | |
| "loss": 1.109569787979126, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5191489361702127, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 1.1614428553493886e-05, | |
| "loss": 1.1656110286712646, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5234042553191489, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 1.160553522784075e-05, | |
| "loss": 1.159610629081726, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5276595744680851, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 1.1596544896664021e-05, | |
| "loss": 1.24387788772583, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 1.1587457757921896e-05, | |
| "loss": 1.324474811553955, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5361702127659574, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 1.1578274011704169e-05, | |
| "loss": 1.4482465982437134, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5404255319148936, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 1.1568993860227838e-05, | |
| "loss": 1.425924301147461, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5446808510638298, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 1.155961750783265e-05, | |
| "loss": 1.6282589435577393, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.548936170212766, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.1550145160976607e-05, | |
| "loss": 1.294546127319336, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5531914893617021, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 1.1540577028231398e-05, | |
| "loss": 1.2809118032455444, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5574468085106383, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 1.1530913320277837e-05, | |
| "loss": 1.2208646535873413, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5617021276595745, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.1521154249901204e-05, | |
| "loss": 1.2243047952651978, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5659574468085107, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 1.1511300031986567e-05, | |
| "loss": 1.325520634651184, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5702127659574469, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.1501350883514048e-05, | |
| "loss": 1.1810495853424072, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.574468085106383, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.149130702355404e-05, | |
| "loss": 1.360308289527893, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5787234042553191, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 1.14811686732624e-05, | |
| "loss": 1.2189104557037354, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5829787234042553, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.1470936055875562e-05, | |
| "loss": 1.3855215311050415, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5872340425531914, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 1.1460609396705629e-05, | |
| "loss": 1.239030361175537, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5914893617021276, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.1450188923135407e-05, | |
| "loss": 1.2763073444366455, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5957446808510638, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 1.1439674864613413e-05, | |
| "loss": 1.1475056409835815, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 1.14290674526488e-05, | |
| "loss": 1.3000105619430542, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6042553191489362, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 1.1418366920806277e-05, | |
| "loss": 1.2847286462783813, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6085106382978723, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 1.1407573504700965e-05, | |
| "loss": 1.2533907890319824, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6127659574468085, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 1.1396687441993191e-05, | |
| "loss": 1.092968463897705, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6170212765957447, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 1.1385708972383283e-05, | |
| "loss": 1.4811941385269165, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6212765957446809, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.1374638337606272e-05, | |
| "loss": 1.2241995334625244, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.625531914893617, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 1.1363475781426572e-05, | |
| "loss": 1.273016095161438, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6297872340425532, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.1352221549632619e-05, | |
| "loss": 1.3111282587051392, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6340425531914894, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 1.134087589003145e-05, | |
| "loss": 1.2370787858963013, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.132943905244326e-05, | |
| "loss": 1.2171998023986816, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6425531914893617, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 1.1317911288695888e-05, | |
| "loss": 1.3028873205184937, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6468085106382979, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 1.1306292852619274e-05, | |
| "loss": 1.2210191488265991, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6510638297872341, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.129458400003988e-05, | |
| "loss": 1.2221373319625854, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6553191489361702, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1.1282784988775045e-05, | |
| "loss": 1.236470341682434, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6595744680851063, | |
| "grad_norm": 0.390625, | |
| "learning_rate": 1.1270896078627315e-05, | |
| "loss": 1.0521761178970337, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6638297872340425, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 1.125891753137872e-05, | |
| "loss": 1.1648889780044556, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6680851063829787, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 1.1246849610785009e-05, | |
| "loss": 1.2399919033050537, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6723404255319149, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1.1234692582569843e-05, | |
| "loss": 1.2077488899230957, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.676595744680851, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 1.1222446714418947e-05, | |
| "loss": 1.4379267692565918, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 1.1210112275974216e-05, | |
| "loss": 1.2180498838424683, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6851063829787234, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 1.1197689538827766e-05, | |
| "loss": 1.190024971961975, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6893617021276596, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 1.1185178776515973e-05, | |
| "loss": 1.2704949378967285, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6936170212765957, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.1172580264513435e-05, | |
| "loss": 1.2116349935531616, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6978723404255319, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.1159894280226908e-05, | |
| "loss": 1.4247322082519531, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7021276595744681, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 1.114712110298921e-05, | |
| "loss": 1.222773551940918, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7063829787234043, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 1.1134261014053054e-05, | |
| "loss": 1.2406312227249146, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7106382978723405, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 1.1121314296584864e-05, | |
| "loss": 1.1038767099380493, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7148936170212766, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.1108281235658543e-05, | |
| "loss": 1.2219905853271484, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7191489361702128, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 1.1095162118249182e-05, | |
| "loss": 1.2996376752853394, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.723404255319149, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 1.1081957233226762e-05, | |
| "loss": 1.2108495235443115, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7276595744680852, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 1.1068666871349777e-05, | |
| "loss": 1.1036784648895264, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7319148936170212, | |
| "grad_norm": 0.75, | |
| "learning_rate": 1.1055291325258833e-05, | |
| "loss": 1.1888855695724487, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7361702127659574, | |
| "grad_norm": 2.484375, | |
| "learning_rate": 1.1041830889470211e-05, | |
| "loss": 1.2789053916931152, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7404255319148936, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.1028285860369379e-05, | |
| "loss": 1.2360132932662964, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1.1014656536204471e-05, | |
| "loss": 1.271801233291626, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7489361702127659, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 1.1000943217079704e-05, | |
| "loss": 1.177423119544983, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7531914893617021, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 1.098714620494879e-05, | |
| "loss": 1.1379421949386597, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7574468085106383, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 1.0973265803608273e-05, | |
| "loss": 1.293025255203247, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7617021276595745, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 1.0959302318690851e-05, | |
| "loss": 1.1501177549362183, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7659574468085106, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 1.0945256057658632e-05, | |
| "loss": 1.1921217441558838, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7702127659574468, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 1.0931127329796376e-05, | |
| "loss": 1.219430923461914, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.774468085106383, | |
| "grad_norm": 3.84375, | |
| "learning_rate": 1.0916916446204684e-05, | |
| "loss": 1.2632174491882324, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7787234042553192, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 1.090262371979314e-05, | |
| "loss": 1.1648533344268799, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7829787234042553, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 1.0888249465273429e-05, | |
| "loss": 1.1504024267196655, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7872340425531915, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 1.08737939991524e-05, | |
| "loss": 1.2344441413879395, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7914893617021277, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 1.0859257639725105e-05, | |
| "loss": 1.1171855926513672, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7957446808510639, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 1.0844640707067789e-05, | |
| "loss": 1.0803868770599365, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 1.0829943523030833e-05, | |
| "loss": 1.1519043445587158, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8042553191489362, | |
| "grad_norm": 0.478515625, | |
| "learning_rate": 1.0815166411231678e-05, | |
| "loss": 1.2066103219985962, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8085106382978723, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.0800309697047694e-05, | |
| "loss": 1.2266093492507935, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8127659574468085, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.0785373707609015e-05, | |
| "loss": 1.1117401123046875, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8170212765957446, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 1.0770358771791342e-05, | |
| "loss": 1.210506796836853, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8212765957446808, | |
| "grad_norm": 0.5, | |
| "learning_rate": 1.0755265220208694e-05, | |
| "loss": 1.0881282091140747, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.825531914893617, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 1.0740093385206134e-05, | |
| "loss": 1.1627310514450073, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8297872340425532, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 1.0724843600852442e-05, | |
| "loss": 1.3014237880706787, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8340425531914893, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 1.0709516202932775e-05, | |
| "loss": 1.1474575996398926, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8382978723404255, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 1.0694111528941255e-05, | |
| "loss": 1.0830378532409668, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8425531914893617, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 1.0678629918073552e-05, | |
| "loss": 1.3125864267349243, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8468085106382979, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 1.0663071711219407e-05, | |
| "loss": 1.2408422231674194, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 1.0647437250955132e-05, | |
| "loss": 1.164583444595337, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8553191489361702, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 1.0631726881536062e-05, | |
| "loss": 1.215876579284668, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8595744680851064, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 1.0615940948888973e-05, | |
| "loss": 1.1813125610351562, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8638297872340426, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 1.0600079800604474e-05, | |
| "loss": 1.2217594385147095, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8680851063829788, | |
| "grad_norm": 2.921875, | |
| "learning_rate": 1.0584143785929342e-05, | |
| "loss": 1.2609615325927734, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8723404255319149, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 1.0568133255758849e-05, | |
| "loss": 1.143092393875122, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8765957446808511, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.0552048562629009e-05, | |
| "loss": 1.2375463247299194, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8808510638297873, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1.0535890060708838e-05, | |
| "loss": 1.1186902523040771, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8851063829787233, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 1.0519658105792554e-05, | |
| "loss": 1.1387929916381836, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8893617021276595, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 1.0503353055291729e-05, | |
| "loss": 1.181614875793457, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8936170212765957, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 1.0486975268227431e-05, | |
| "loss": 1.308741807937622, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8978723404255319, | |
| "grad_norm": 1.21875, | |
| "learning_rate": 1.0470525105222318e-05, | |
| "loss": 1.0869234800338745, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.902127659574468, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 1.0454002928492686e-05, | |
| "loss": 1.1498181819915771, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9063829787234042, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 1.0437409101840513e-05, | |
| "loss": 1.3278398513793945, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9106382978723404, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 1.0420743990645426e-05, | |
| "loss": 1.2144547700881958, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9148936170212766, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 1.0404007961856676e-05, | |
| "loss": 1.191633701324463, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9191489361702128, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.0387201383985043e-05, | |
| "loss": 1.2432807683944702, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9234042553191489, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 1.0370324627094734e-05, | |
| "loss": 1.5649425983428955, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9276595744680851, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.0353378062795224e-05, | |
| "loss": 1.2039592266082764, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9319148936170213, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 1.033636206423308e-05, | |
| "loss": 1.1712656021118164, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9361702127659575, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 1.0319277006083738e-05, | |
| "loss": 1.030342936515808, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9404255319148936, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 1.0302123264543267e-05, | |
| "loss": 1.1908173561096191, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9446808510638298, | |
| "grad_norm": 1.5234375, | |
| "learning_rate": 1.028490121732007e-05, | |
| "loss": 1.174695611000061, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.948936170212766, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 1.026761124362657e-05, | |
| "loss": 1.3273422718048096, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9531914893617022, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 1.0250253724170875e-05, | |
| "loss": 1.162235975265503, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.0232829041148372e-05, | |
| "loss": 1.1651887893676758, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9617021276595744, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 1.0215337578233328e-05, | |
| "loss": 1.1634246110916138, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9659574468085106, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.019777972057044e-05, | |
| "loss": 1.0295268297195435, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9702127659574468, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 1.0180155854766348e-05, | |
| "loss": 1.178024411201477, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9744680851063829, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 1.0162466368881124e-05, | |
| "loss": 1.2120832204818726, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9787234042553191, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 1.0144711652419738e-05, | |
| "loss": 1.1555849313735962, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9829787234042553, | |
| "grad_norm": 4.0625, | |
| "learning_rate": 1.0126892096323463e-05, | |
| "loss": 1.2941299676895142, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9872340425531915, | |
| "grad_norm": 2.34375, | |
| "learning_rate": 1.0109008092961276e-05, | |
| "loss": 1.0498948097229004, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9914893617021276, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 1.0091060036121233e-05, | |
| "loss": 1.2505208253860474, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9957446808510638, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.0073048321001766e-05, | |
| "loss": 1.1784660816192627, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 1.0054973344203011e-05, | |
| "loss": 1.2162238359451294, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.004255319148936, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 1.003683550371806e-05, | |
| "loss": 0.902032196521759, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0085106382978724, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 1.00186351989242e-05, | |
| "loss": 0.6829485893249512, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0127659574468084, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 1.0000372830574128e-05, | |
| "loss": 0.9958571195602417, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0170212765957447, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 9.982048800787103e-06, | |
| "loss": 0.8577584624290466, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.0212765957446808, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 9.96366351304012e-06, | |
| "loss": 0.7623387575149536, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.025531914893617, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 9.945217372159019e-06, | |
| "loss": 0.6408636569976807, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0297872340425531, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 9.926710784309548e-06, | |
| "loss": 0.8527731895446777, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0340425531914894, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 9.908144156988452e-06, | |
| "loss": 1.0902431011199951, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.0382978723404255, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 9.88951789901448e-06, | |
| "loss": 0.9952311515808105, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0425531914893618, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 9.87083242051939e-06, | |
| "loss": 1.0575801134109497, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0468085106382978, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 9.852088132938916e-06, | |
| "loss": 0.8896694779396057, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0510638297872341, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 9.833285449003712e-06, | |
| "loss": 0.8272213935852051, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.0553191489361702, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 9.814424782730261e-06, | |
| "loss": 0.897000789642334, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.0595744680851065, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 9.79550654941176e-06, | |
| "loss": 0.7115342020988464, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.0638297872340425, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 9.776531165608975e-06, | |
| "loss": 0.7840989232063293, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0680851063829788, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 9.757499049141065e-06, | |
| "loss": 0.8686625361442566, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.0723404255319149, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 9.738410619076393e-06, | |
| "loss": 0.5279070138931274, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.076595744680851, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 9.71926629572329e-06, | |
| "loss": 0.7969399094581604, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.0808510638297872, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 9.7000665006208e-06, | |
| "loss": 0.9214133024215698, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.0851063829787233, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 9.680811656529397e-06, | |
| "loss": 0.8827441930770874, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.0893617021276596, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 9.661502187421687e-06, | |
| "loss": 0.7750219702720642, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0936170212765957, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 9.64213851847306e-06, | |
| "loss": 0.7688886523246765, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.097872340425532, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 9.62272107605233e-06, | |
| "loss": 0.9912289977073669, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.102127659574468, | |
| "grad_norm": 0.9765625, | |
| "learning_rate": 9.603250287712357e-06, | |
| "loss": 0.8116132020950317, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1063829787234043, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 9.583726582180619e-06, | |
| "loss": 0.5431628227233887, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1106382978723404, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 9.564150389349784e-06, | |
| "loss": 0.7063818573951721, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1148936170212767, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 9.544522140268226e-06, | |
| "loss": 0.8259474635124207, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1191489361702127, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 9.524842267130567e-06, | |
| "loss": 0.8532420992851257, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.123404255319149, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 9.505111203268119e-06, | |
| "loss": 0.7610599398612976, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.127659574468085, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.48532938313937e-06, | |
| "loss": 0.8436508178710938, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1319148936170214, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 9.465497242320423e-06, | |
| "loss": 1.2464487552642822, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1361702127659574, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 9.445615217495373e-06, | |
| "loss": 0.7736493945121765, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.1404255319148937, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 9.42568374644672e-06, | |
| "loss": 0.9255214333534241, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1446808510638298, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 9.40570326804573e-06, | |
| "loss": 0.7744427919387817, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.148936170212766, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 9.385674222242742e-06, | |
| "loss": 0.6865782737731934, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1531914893617021, | |
| "grad_norm": 6.25, | |
| "learning_rate": 9.365597050057524e-06, | |
| "loss": 0.8758373260498047, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.1574468085106382, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 9.345472193569518e-06, | |
| "loss": 0.8117732405662537, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.1617021276595745, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 9.325300095908145e-06, | |
| "loss": 0.9483519196510315, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.1659574468085105, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 9.305081201243022e-06, | |
| "loss": 0.660556972026825, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.1702127659574468, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 9.284815954774185e-06, | |
| "loss": 0.7756091952323914, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.174468085106383, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 9.264504802722297e-06, | |
| "loss": 0.8955855369567871, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.1787234042553192, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 9.244148192318819e-06, | |
| "loss": 0.8398646712303162, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.1829787234042553, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 9.223746571796152e-06, | |
| "loss": 0.8468598127365112, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.1872340425531915, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 9.203300390377784e-06, | |
| "loss": 0.6725097298622131, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.1914893617021276, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 9.182810098268377e-06, | |
| "loss": 0.7907771468162537, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.195744680851064, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 9.162276146643881e-06, | |
| "loss": 0.8897430896759033, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.515625, | |
| "learning_rate": 9.141698987641577e-06, | |
| "loss": 0.9244027137756348, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2042553191489362, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 9.121079074350135e-06, | |
| "loss": 0.8451488614082336, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2085106382978723, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 9.100416860799625e-06, | |
| "loss": 0.9149748682975769, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2127659574468086, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 9.079712801951533e-06, | |
| "loss": 0.8140401244163513, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2170212765957447, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 9.058967353688733e-06, | |
| "loss": 0.8866817355155945, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2212765957446807, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 9.038180972805454e-06, | |
| "loss": 0.8173488974571228, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.225531914893617, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 9.017354116997226e-06, | |
| "loss": 0.7841181755065918, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2297872340425533, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 8.99648724485079e-06, | |
| "loss": 0.5890490412712097, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2340425531914894, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8.975580815834008e-06, | |
| "loss": 0.5997076034545898, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2382978723404254, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 8.954635290285748e-06, | |
| "loss": 0.6937717199325562, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.2425531914893617, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8.933651129405741e-06, | |
| "loss": 0.7356208562850952, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2468085106382978, | |
| "grad_norm": 0.875, | |
| "learning_rate": 8.912628795244435e-06, | |
| "loss": 0.8549614548683167, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.251063829787234, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8.891568750692811e-06, | |
| "loss": 0.645767092704773, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.2553191489361701, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8.870471459472202e-06, | |
| "loss": 0.9579916596412659, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2595744680851064, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 8.849337386124065e-06, | |
| "loss": 0.6670525670051575, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.2638297872340425, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 8.828166995999771e-06, | |
| "loss": 0.9148899912834167, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.2680851063829788, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8.806960755250352e-06, | |
| "loss": 0.9241386651992798, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.2723404255319148, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8.785719130816227e-06, | |
| "loss": 0.8401479721069336, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.2765957446808511, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8.76444259041694e-06, | |
| "loss": 0.9863938689231873, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2808510638297872, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8.743131602540837e-06, | |
| "loss": 0.9384634494781494, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.2851063829787235, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8.721786636434773e-06, | |
| "loss": 0.7852924466133118, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.2893617021276595, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 8.70040816209377e-06, | |
| "loss": 0.9877030849456787, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.2936170212765958, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 8.67899665025066e-06, | |
| "loss": 0.7262607216835022, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.297872340425532, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 8.657552572365738e-06, | |
| "loss": 1.0153322219848633, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.302127659574468, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 8.636076400616361e-06, | |
| "loss": 0.8889206051826477, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3063829787234043, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8.614568607886572e-06, | |
| "loss": 1.0539144277572632, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3106382978723405, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8.593029667756665e-06, | |
| "loss": 0.9332261085510254, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.3148936170212766, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 8.57146005449278e-06, | |
| "loss": 0.7537972331047058, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3191489361702127, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8.549860243036443e-06, | |
| "loss": 0.8345380425453186, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.323404255319149, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8.528230708994113e-06, | |
| "loss": 0.8078710436820984, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.327659574468085, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 8.506571928626716e-06, | |
| "loss": 0.6944683790206909, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3319148936170213, | |
| "grad_norm": 0.333984375, | |
| "learning_rate": 8.484884378839148e-06, | |
| "loss": 0.8724764585494995, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3361702127659574, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8.463168537169782e-06, | |
| "loss": 0.9229905009269714, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3404255319148937, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 8.44142488177995e-06, | |
| "loss": 0.8973690271377563, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.3446808510638297, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8.419653891443415e-06, | |
| "loss": 0.8710704445838928, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.348936170212766, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 8.397856045535826e-06, | |
| "loss": 0.9143708348274231, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.353191489361702, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 8.37603182402417e-06, | |
| "loss": 0.7919833660125732, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.3574468085106384, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 8.354181707456192e-06, | |
| "loss": 0.7822130918502808, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.3617021276595744, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 8.332306176949824e-06, | |
| "loss": 0.635791003704071, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3659574468085105, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 8.310405714182593e-06, | |
| "loss": 0.765158474445343, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.3702127659574468, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8.288480801380998e-06, | |
| "loss": 0.526314914226532, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.374468085106383, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 8.266531921309911e-06, | |
| "loss": 0.8815028071403503, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.3787234042553191, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8.244559557261944e-06, | |
| "loss": 0.8624444007873535, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.3829787234042552, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8.22256419304679e-06, | |
| "loss": 1.1067816019058228, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.3872340425531915, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 8.200546312980595e-06, | |
| "loss": 0.8086753487586975, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.3914893617021278, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8.17850640187528e-06, | |
| "loss": 0.8894110321998596, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.3957446808510638, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 8.156444945027855e-06, | |
| "loss": 0.9589279294013977, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8.134362428209765e-06, | |
| "loss": 0.8438636064529419, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4042553191489362, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 8.11225933765616e-06, | |
| "loss": 0.7788761258125305, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4085106382978723, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 8.090136160055213e-06, | |
| "loss": 0.8602153658866882, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4127659574468086, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8.067993382537386e-06, | |
| "loss": 1.1651355028152466, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.4170212765957446, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 8.045831492664716e-06, | |
| "loss": 0.8709754347801208, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.421276595744681, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 8.023650978420076e-06, | |
| "loss": 0.8617551922798157, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.425531914893617, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 8.001452328196425e-06, | |
| "loss": 0.7164908647537231, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4297872340425533, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 7.979236030786065e-06, | |
| "loss": 0.874544084072113, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4340425531914893, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 7.957002575369866e-06, | |
| "loss": 0.8772100806236267, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4382978723404256, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 7.934752451506499e-06, | |
| "loss": 0.8531442880630493, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.4425531914893617, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 7.912486149121662e-06, | |
| "loss": 0.8926745653152466, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.4468085106382977, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 7.89020415849729e-06, | |
| "loss": 0.8355059623718262, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.451063829787234, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 7.867906970260748e-06, | |
| "loss": 0.7553901076316833, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.4553191489361703, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 7.845595075374053e-06, | |
| "loss": 0.7148939967155457, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4595744680851064, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 7.823268965123027e-06, | |
| "loss": 0.7749176621437073, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.4638297872340424, | |
| "grad_norm": 1.6796875, | |
| "learning_rate": 7.800929131106519e-06, | |
| "loss": 1.0506820678710938, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.4680851063829787, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 7.77857606522555e-06, | |
| "loss": 0.5485996603965759, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.472340425531915, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 7.756210259672503e-06, | |
| "loss": 0.8781046271324158, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.476595744680851, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 7.733832206920267e-06, | |
| "loss": 0.8102371692657471, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.4808510638297872, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 7.711442399711406e-06, | |
| "loss": 0.8387575149536133, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.4851063829787234, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 7.689041331047307e-06, | |
| "loss": 0.7191005945205688, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.4893617021276595, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 7.66662949417732e-06, | |
| "loss": 0.560632586479187, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4936170212765958, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 7.644207382587906e-06, | |
| "loss": 0.8454610705375671, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.4978723404255319, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 7.621775489991757e-06, | |
| "loss": 0.5917819738388062, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.5021276595744681, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 7.599334310316937e-06, | |
| "loss": 0.8950475454330444, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5063829787234042, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 7.576884337696004e-06, | |
| "loss": 0.9987728595733643, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5106382978723403, | |
| "grad_norm": 0.5, | |
| "learning_rate": 7.554426066455125e-06, | |
| "loss": 0.8234822154045105, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5148936170212766, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 7.5319599911031986e-06, | |
| "loss": 0.948941707611084, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5191489361702128, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 7.509486606320955e-06, | |
| "loss": 0.8466644883155823, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.523404255319149, | |
| "grad_norm": 2.265625, | |
| "learning_rate": 7.487006406950077e-06, | |
| "loss": 0.7706676721572876, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.527659574468085, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 7.464519887982301e-06, | |
| "loss": 0.8639274835586548, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5319148936170213, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 7.442027544548502e-06, | |
| "loss": 0.8100276589393616, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.5361702127659576, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 7.419529871907815e-06, | |
| "loss": 0.8926405310630798, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.5404255319148936, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 7.397027365436715e-06, | |
| "loss": 0.8414310216903687, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.5446808510638297, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 7.374520520618113e-06, | |
| "loss": 0.8629379868507385, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.548936170212766, | |
| "grad_norm": 0.37890625, | |
| "learning_rate": 7.352009833030451e-06, | |
| "loss": 0.8124608397483826, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.5531914893617023, | |
| "grad_norm": 0.361328125, | |
| "learning_rate": 7.329495798336777e-06, | |
| "loss": 1.0221534967422485, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.5574468085106383, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 7.306978912273843e-06, | |
| "loss": 0.6406850218772888, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.5617021276595744, | |
| "grad_norm": 2.296875, | |
| "learning_rate": 7.284459670641185e-06, | |
| "loss": 0.6190369129180908, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.5659574468085107, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 7.261938569290206e-06, | |
| "loss": 0.8675222396850586, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.570212765957447, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.239416104113262e-06, | |
| "loss": 0.8379670977592468, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.574468085106383, | |
| "grad_norm": 0.376953125, | |
| "learning_rate": 7.216892771032732e-06, | |
| "loss": 0.7264598608016968, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.578723404255319, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 7.1943690659901095e-06, | |
| "loss": 0.8947696685791016, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.5829787234042554, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 7.17184548493508e-06, | |
| "loss": 0.7789361476898193, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.5872340425531914, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 7.149322523814594e-06, | |
| "loss": 0.8117201328277588, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.5914893617021275, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 7.1268006785619575e-06, | |
| "loss": 0.7403523921966553, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.5957446808510638, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.104280445085897e-06, | |
| "loss": 0.8037891387939453, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 7.081762319259662e-06, | |
| "loss": 0.8160814642906189, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.6042553191489362, | |
| "grad_norm": 0.326171875, | |
| "learning_rate": 7.0592467969100836e-06, | |
| "loss": 0.7555669546127319, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6085106382978722, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 7.036734373806672e-06, | |
| "loss": 0.8494399785995483, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.6127659574468085, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 7.01422554565069e-06, | |
| "loss": 1.0269806385040283, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6170212765957448, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 6.991720808064251e-06, | |
| "loss": 0.9812240600585938, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6212765957446809, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 6.969220656579391e-06, | |
| "loss": 0.8393826484680176, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.625531914893617, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 6.946725586627165e-06, | |
| "loss": 0.9660863876342773, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6297872340425532, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 6.924236093526747e-06, | |
| "loss": 1.0426111221313477, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.6340425531914895, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 6.901752672474499e-06, | |
| "loss": 0.6731575727462769, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.6382978723404256, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 6.879275818533095e-06, | |
| "loss": 0.9503965377807617, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.6425531914893616, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 6.8568060266206056e-06, | |
| "loss": 1.0612298250198364, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.646808510638298, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 6.834343791499595e-06, | |
| "loss": 0.7399391531944275, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.6510638297872342, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 6.811889607766242e-06, | |
| "loss": 0.6109141707420349, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.65531914893617, | |
| "grad_norm": 0.3515625, | |
| "learning_rate": 6.789443969839441e-06, | |
| "loss": 0.8604304790496826, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.6595744680851063, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 6.767007371949911e-06, | |
| "loss": 0.864715576171875, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.6638297872340426, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 6.744580308129327e-06, | |
| "loss": 0.8427615165710449, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.6680851063829787, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 6.722163272199424e-06, | |
| "loss": 0.9220309853553772, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.6723404255319148, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 6.69975675776114e-06, | |
| "loss": 0.8783171772956848, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.676595744680851, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 6.677361258183735e-06, | |
| "loss": 0.6494432687759399, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.6808510638297873, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 6.6549772665939346e-06, | |
| "loss": 0.8931559920310974, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6851063829787234, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 6.632605275865074e-06, | |
| "loss": 0.7723158597946167, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.6893617021276595, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 6.610245778606232e-06, | |
| "loss": 0.9853664636611938, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.6936170212765957, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.587899267151401e-06, | |
| "loss": 0.7868849635124207, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.697872340425532, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 6.56556623354864e-06, | |
| "loss": 0.852700412273407, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.702127659574468, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 6.543247169549232e-06, | |
| "loss": 0.8994773626327515, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7063829787234042, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 6.520942566596868e-06, | |
| "loss": 0.8999802470207214, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7106382978723405, | |
| "grad_norm": 0.5, | |
| "learning_rate": 6.4986529158168215e-06, | |
| "loss": 0.7869191765785217, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.7148936170212767, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 6.476378708005135e-06, | |
| "loss": 0.8270288705825806, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7191489361702128, | |
| "grad_norm": 0.75, | |
| "learning_rate": 6.454120433617804e-06, | |
| "loss": 0.9229409694671631, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7234042553191489, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 6.431878582759994e-06, | |
| "loss": 0.7548995614051819, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.7276595744680852, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 6.409653645175241e-06, | |
| "loss": 0.8321532607078552, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.7319148936170212, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 6.387446110234658e-06, | |
| "loss": 0.6601775288581848, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.7361702127659573, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 6.365256466926183e-06, | |
| "loss": 0.8633728623390198, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.7404255319148936, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 6.343085203843786e-06, | |
| "loss": 0.9041755199432373, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.7446808510638299, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.32093280917673e-06, | |
| "loss": 0.8834015727043152, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.748936170212766, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 6.29879977069881e-06, | |
| "loss": 0.7971745133399963, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.753191489361702, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 6.2766865757576164e-06, | |
| "loss": 0.8187481164932251, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.7574468085106383, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.254593711263813e-06, | |
| "loss": 0.8846163153648376, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.7617021276595746, | |
| "grad_norm": 0.875, | |
| "learning_rate": 6.232521663680393e-06, | |
| "loss": 0.9830833077430725, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.7659574468085106, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 6.210470919011992e-06, | |
| "loss": 0.7482036352157593, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.7702127659574467, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 6.188441962794179e-06, | |
| "loss": 0.8920266628265381, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.774468085106383, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 6.166435280082749e-06, | |
| "loss": 0.8772265315055847, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.7787234042553193, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 6.1444513554430745e-06, | |
| "loss": 0.8204891681671143, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.7829787234042553, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 6.122490672939405e-06, | |
| "loss": 0.5873453617095947, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.7872340425531914, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 6.100553716124224e-06, | |
| "loss": 0.8039622902870178, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7914893617021277, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 6.078640968027598e-06, | |
| "loss": 0.6872312426567078, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.795744680851064, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 6.056752911146548e-06, | |
| "loss": 0.8578442931175232, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 6.034890027434413e-06, | |
| "loss": 0.7564026117324829, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.804255319148936, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 6.013052798290241e-06, | |
| "loss": 0.8832213878631592, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8085106382978724, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 5.9912417045482e-06, | |
| "loss": 0.8571723699569702, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8127659574468085, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 5.969457226466977e-06, | |
| "loss": 0.824770450592041, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.8170212765957445, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 5.9476998437192066e-06, | |
| "loss": 0.8723496794700623, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8212765957446808, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 5.925970035380918e-06, | |
| "loss": 0.7535234093666077, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.825531914893617, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 5.904268279920973e-06, | |
| "loss": 0.9033308625221252, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.8297872340425532, | |
| "grad_norm": 0.345703125, | |
| "learning_rate": 5.88259505519054e-06, | |
| "loss": 0.670329749584198, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.8340425531914892, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 5.860950838412565e-06, | |
| "loss": 0.8669137358665466, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.8382978723404255, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 5.839336106171274e-06, | |
| "loss": 0.8537063598632812, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.8425531914893618, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 5.81775133440167e-06, | |
| "loss": 0.8618923425674438, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.8468085106382979, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 5.79619699837905e-06, | |
| "loss": 0.7936420440673828, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.851063829787234, | |
| "grad_norm": 0.46484375, | |
| "learning_rate": 5.774673572708554e-06, | |
| "loss": 0.7838106155395508, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.8553191489361702, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 5.753181531314708e-06, | |
| "loss": 0.8583153486251831, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.8595744680851065, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 5.7317213474309764e-06, | |
| "loss": 0.9282540678977966, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.8638297872340426, | |
| "grad_norm": 0.75, | |
| "learning_rate": 5.710293493589363e-06, | |
| "loss": 0.6059424877166748, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.8680851063829786, | |
| "grad_norm": 3.71875, | |
| "learning_rate": 5.688898441609994e-06, | |
| "loss": 0.9776955842971802, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.872340425531915, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 5.6675366625907264e-06, | |
| "loss": 0.900459885597229, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.8765957446808512, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 5.646208626896784e-06, | |
| "loss": 0.758176326751709, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.8808510638297873, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 5.624914804150397e-06, | |
| "loss": 0.8674149513244629, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.8851063829787233, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 5.6036556632204564e-06, | |
| "loss": 0.778677761554718, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.8893617021276596, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 5.582431672212195e-06, | |
| "loss": 0.8965961933135986, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.8936170212765957, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 5.5612432984568815e-06, | |
| "loss": 0.5581719279289246, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8978723404255318, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 5.5400910085015275e-06, | |
| "loss": 0.8819167017936707, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.902127659574468, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 5.518975268098611e-06, | |
| "loss": 0.9992945194244385, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9063829787234043, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 5.497896542195829e-06, | |
| "loss": 0.6863605976104736, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9106382978723404, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 5.476855294925857e-06, | |
| "loss": 0.7966746687889099, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9148936170212765, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 5.455851989596123e-06, | |
| "loss": 1.0022021532058716, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.9191489361702128, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 5.434887088678614e-06, | |
| "loss": 0.7175713181495667, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.923404255319149, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 5.413961053799693e-06, | |
| "loss": 0.787550687789917, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.9276595744680851, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 5.393074345729926e-06, | |
| "loss": 0.9805369973182678, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.9319148936170212, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 5.372227424373942e-06, | |
| "loss": 0.90399169921875, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.9361702127659575, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 5.351420748760311e-06, | |
| "loss": 0.8127355575561523, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.9404255319148938, | |
| "grad_norm": 0.34765625, | |
| "learning_rate": 5.330654777031428e-06, | |
| "loss": 0.9437844157218933, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.9446808510638298, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 5.309929966433428e-06, | |
| "loss": 1.0004428625106812, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.9489361702127659, | |
| "grad_norm": 1.953125, | |
| "learning_rate": 5.289246773306118e-06, | |
| "loss": 0.8540473580360413, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.9531914893617022, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 5.268605653072935e-06, | |
| "loss": 0.7977997660636902, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.9574468085106385, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 5.248007060230907e-06, | |
| "loss": 0.9748218655586243, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.9617021276595743, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 5.227451448340651e-06, | |
| "loss": 0.86171555519104, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.9659574468085106, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 5.206939270016393e-06, | |
| "loss": 0.8841200470924377, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.9702127659574469, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 5.186470976915983e-06, | |
| "loss": 0.9302433133125305, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.974468085106383, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 5.166047019730971e-06, | |
| "loss": 0.6985507011413574, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.978723404255319, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 5.145667848176675e-06, | |
| "loss": 0.9847785830497742, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.9829787234042553, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 5.1253339109822705e-06, | |
| "loss": 0.9930030703544617, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.9872340425531916, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 5.10504565588092e-06, | |
| "loss": 0.7830001711845398, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.9914893617021276, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 5.084803529599915e-06, | |
| "loss": 0.607052743434906, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.9957446808510637, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 5.064607977850834e-06, | |
| "loss": 0.9631056785583496, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 5.044459445319727e-06, | |
| "loss": 0.6884191036224365, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0042553191489363, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 5.024358375657334e-06, | |
| "loss": 0.5563607215881348, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.008510638297872, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 5.004305211469303e-06, | |
| "loss": 0.5658197402954102, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.0127659574468084, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 4.984300394306453e-06, | |
| "loss": 0.5938859581947327, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.0170212765957447, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 4.964344364655053e-06, | |
| "loss": 0.5363519191741943, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.021276595744681, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 4.944437561927118e-06, | |
| "loss": 0.6647061109542847, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.025531914893617, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 4.92458042445073e-06, | |
| "loss": 0.557117223739624, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.029787234042553, | |
| "grad_norm": 0.427734375, | |
| "learning_rate": 4.9047733894603946e-06, | |
| "loss": 0.3953529894351959, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.0340425531914894, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 4.88501689308741e-06, | |
| "loss": 0.779535710811615, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.0382978723404257, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 4.8653113703502695e-06, | |
| "loss": 0.5275522470474243, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.0425531914893615, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.845657255145068e-06, | |
| "loss": 0.5947195291519165, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.046808510638298, | |
| "grad_norm": 2.640625, | |
| "learning_rate": 4.8260549802359605e-06, | |
| "loss": 0.6270468235015869, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.051063829787234, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 4.806504977245636e-06, | |
| "loss": 0.6905896067619324, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.0553191489361704, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.7870076766457995e-06, | |
| "loss": 0.5533561110496521, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.0595744680851062, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 4.767563507747705e-06, | |
| "loss": 0.6810728311538696, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.0638297872340425, | |
| "grad_norm": 0.337890625, | |
| "learning_rate": 4.748172898692704e-06, | |
| "loss": 0.3691895306110382, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.068085106382979, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 4.728836276442803e-06, | |
| "loss": 0.5883108377456665, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.072340425531915, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 4.7095540667712775e-06, | |
| "loss": 0.5326440334320068, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.076595744680851, | |
| "grad_norm": 1.84375, | |
| "learning_rate": 4.690326694253294e-06, | |
| "loss": 0.41566312313079834, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.0808510638297872, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 4.671154582256559e-06, | |
| "loss": 0.7029457688331604, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.0851063829787235, | |
| "grad_norm": 0.2412109375, | |
| "learning_rate": 4.6520381529319954e-06, | |
| "loss": 0.4108755588531494, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.0893617021276594, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 4.632977827204445e-06, | |
| "loss": 0.4902803599834442, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.0936170212765957, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 4.613974024763411e-06, | |
| "loss": 0.5197535753250122, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.097872340425532, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 4.595027164053805e-06, | |
| "loss": 0.4887603521347046, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1021276595744682, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 4.5761376622667406e-06, | |
| "loss": 0.276875376701355, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.106382978723404, | |
| "grad_norm": 0.375, | |
| "learning_rate": 4.557305935330346e-06, | |
| "loss": 0.6325949430465698, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.1106382978723404, | |
| "grad_norm": 0.451171875, | |
| "learning_rate": 4.538532397900599e-06, | |
| "loss": 0.6041569709777832, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.1148936170212767, | |
| "grad_norm": 0.408203125, | |
| "learning_rate": 4.519817463352204e-06, | |
| "loss": 0.6599090099334717, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.119148936170213, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 4.5011615437694915e-06, | |
| "loss": 0.5671730041503906, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.123404255319149, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.48256504993734e-06, | |
| "loss": 0.5928320288658142, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.127659574468085, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 4.464028391332129e-06, | |
| "loss": 0.20121610164642334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.1319148936170214, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 4.445551976112725e-06, | |
| "loss": 0.7131472826004028, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.1361702127659576, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.4271362111115006e-06, | |
| "loss": 0.5065695643424988, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.1404255319148935, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 4.408781501825362e-06, | |
| "loss": 0.733562707901001, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.1446808510638298, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 4.390488252406838e-06, | |
| "loss": 0.5062799453735352, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.148936170212766, | |
| "grad_norm": 0.310546875, | |
| "learning_rate": 4.372256865655169e-06, | |
| "loss": 0.39719632267951965, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.153191489361702, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 4.354087743007433e-06, | |
| "loss": 0.5480824112892151, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.157446808510638, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 4.335981284529725e-06, | |
| "loss": 0.5634360909461975, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.1617021276595745, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.317937888908333e-06, | |
| "loss": 0.6165044903755188, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.1659574468085108, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 4.2999579534409626e-06, | |
| "loss": 0.3983045220375061, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.1702127659574466, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.282041874027989e-06, | |
| "loss": 0.41795188188552856, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.174468085106383, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 4.264190045163742e-06, | |
| "loss": 0.6024309396743774, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.178723404255319, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 4.246402859927817e-06, | |
| "loss": 0.6394532918930054, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.1829787234042555, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.22868070997642e-06, | |
| "loss": 0.4610865116119385, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.1872340425531913, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 4.211023985533748e-06, | |
| "loss": 0.5758063197135925, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.1914893617021276, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 4.1934330753833885e-06, | |
| "loss": 0.6651563048362732, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.195744680851064, | |
| "grad_norm": 3.75, | |
| "learning_rate": 4.175908366859766e-06, | |
| "loss": 0.5991113185882568, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 4.158450245839608e-06, | |
| "loss": 0.6895382404327393, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.204255319148936, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 4.141059096733455e-06, | |
| "loss": 0.4550260305404663, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.2085106382978723, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 4.123735302477193e-06, | |
| "loss": 0.4480676054954529, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.2127659574468086, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 4.106479244523616e-06, | |
| "loss": 0.5520376563072205, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.217021276595745, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 4.0892913028340335e-06, | |
| "loss": 0.6519399285316467, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.2212765957446807, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 4.072171855869905e-06, | |
| "loss": 0.5653026700019836, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.225531914893617, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 4.055121280584499e-06, | |
| "loss": 0.5862460732460022, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.2297872340425533, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 4.038139952414603e-06, | |
| "loss": 0.8048577308654785, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.2340425531914896, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 4.02122824527225e-06, | |
| "loss": 0.530511736869812, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.2382978723404254, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.004386531536482e-06, | |
| "loss": 0.43314328789711, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.2425531914893617, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 3.987615182045163e-06, | |
| "loss": 0.5556919574737549, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.246808510638298, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 3.9709145660868015e-06, | |
| "loss": 0.6972762942314148, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.251063829787234, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 3.9542850513924275e-06, | |
| "loss": 0.31911152601242065, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.25531914893617, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 3.9377270041274875e-06, | |
| "loss": 0.6526750922203064, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.2595744680851064, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 3.921240788883785e-06, | |
| "loss": 0.5144931077957153, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.2638297872340427, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 3.904826768671458e-06, | |
| "loss": 0.7288011312484741, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.2680851063829786, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 3.888485304910978e-06, | |
| "loss": 0.6101799607276917, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.272340425531915, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 3.8722167574252e-06, | |
| "loss": 0.5383592247962952, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.276595744680851, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 3.856021484431428e-06, | |
| "loss": 0.6244062185287476, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.2808510638297874, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 3.839899842533538e-06, | |
| "loss": 0.4686053991317749, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.2851063829787233, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 3.823852186714121e-06, | |
| "loss": 0.5087999105453491, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.2893617021276595, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 3.80787887032667e-06, | |
| "loss": 0.4900204837322235, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.293617021276596, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 3.7919802450877993e-06, | |
| "loss": 0.5593716502189636, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.297872340425532, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.7761566610694882e-06, | |
| "loss": 0.3470194339752197, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.302127659574468, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.7604084666913924e-06, | |
| "loss": 0.28270450234413147, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.3063829787234043, | |
| "grad_norm": 0.4609375, | |
| "learning_rate": 3.74473600871316e-06, | |
| "loss": 0.6159269213676453, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.3106382978723405, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 3.729139632226795e-06, | |
| "loss": 0.46399620175361633, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.3148936170212764, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.713619680649067e-06, | |
| "loss": 0.39948195219039917, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.3191489361702127, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 3.698176495713943e-06, | |
| "loss": 0.4936513602733612, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.323404255319149, | |
| "grad_norm": 0.453125, | |
| "learning_rate": 3.6828104174650614e-06, | |
| "loss": 0.6025733351707458, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.3276595744680852, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 3.667521784248253e-06, | |
| "loss": 0.5419857501983643, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.331914893617021, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 3.652310932704083e-06, | |
| "loss": 0.5457516312599182, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.3361702127659574, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 3.637178197760443e-06, | |
| "loss": 0.5860179662704468, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.3404255319148937, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 3.6221239126251687e-06, | |
| "loss": 0.4711592197418213, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.34468085106383, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 3.6071484087787147e-06, | |
| "loss": 0.6296599507331848, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.348936170212766, | |
| "grad_norm": 1.96875, | |
| "learning_rate": 3.59225201596685e-06, | |
| "loss": 0.7384690046310425, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.353191489361702, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 3.577435062193391e-06, | |
| "loss": 0.5660156607627869, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.3574468085106384, | |
| "grad_norm": 1.875, | |
| "learning_rate": 3.562697873712993e-06, | |
| "loss": 0.5146188139915466, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.3617021276595747, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.548040775023951e-06, | |
| "loss": 0.4210270643234253, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.3659574468085105, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 3.5334640888610656e-06, | |
| "loss": 0.4388498365879059, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.370212765957447, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 3.5189681361885336e-06, | |
| "loss": 0.3667604327201843, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.374468085106383, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 3.5045532361928817e-06, | |
| "loss": 0.4419676959514618, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.378723404255319, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 3.490219706275933e-06, | |
| "loss": 0.6218468546867371, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.382978723404255, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 3.4759678620478234e-06, | |
| "loss": 0.4756940007209778, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.3872340425531915, | |
| "grad_norm": 0.375, | |
| "learning_rate": 3.4617980173200518e-06, | |
| "loss": 0.6557533144950867, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.391489361702128, | |
| "grad_norm": 0.42578125, | |
| "learning_rate": 3.447710484098571e-06, | |
| "loss": 0.3975709080696106, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.395744680851064, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 3.43370557257691e-06, | |
| "loss": 0.5444962382316589, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 3.4197835911293578e-06, | |
| "loss": 0.48340773582458496, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.404255319148936, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 3.4059448463041582e-06, | |
| "loss": 0.8209078311920166, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.4085106382978725, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 3.3921896428167704e-06, | |
| "loss": 0.6566969156265259, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.4127659574468083, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 3.378518283543155e-06, | |
| "loss": 0.7115936875343323, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.4170212765957446, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 3.3649310695131094e-06, | |
| "loss": 0.48289287090301514, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.421276595744681, | |
| "grad_norm": 0.462890625, | |
| "learning_rate": 3.3514282999036305e-06, | |
| "loss": 0.3552096486091614, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.425531914893617, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 3.3380102720323343e-06, | |
| "loss": 0.635092556476593, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.429787234042553, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 3.324677281350911e-06, | |
| "loss": 0.4491591453552246, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.4340425531914893, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 3.3114296214386135e-06, | |
| "loss": 0.5700670480728149, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.4382978723404256, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 3.2982675839957957e-06, | |
| "loss": 0.6150033473968506, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.4425531914893615, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 3.28519145883749e-06, | |
| "loss": 0.2981261909008026, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.4468085106382977, | |
| "grad_norm": 0.4140625, | |
| "learning_rate": 3.2722015338870253e-06, | |
| "loss": 0.43131235241889954, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.451063829787234, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 3.2592980951696847e-06, | |
| "loss": 0.5070037841796875, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.4553191489361703, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 3.2464814268064147e-06, | |
| "loss": 0.4555862843990326, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.4595744680851066, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 3.2337518110075632e-06, | |
| "loss": 0.5812932252883911, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.4638297872340424, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 3.221109528066664e-06, | |
| "loss": 0.5926228761672974, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.4680851063829787, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 3.2085548563542688e-06, | |
| "loss": 0.6022335290908813, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.472340425531915, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 3.19608807231182e-06, | |
| "loss": 0.5389635562896729, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.476595744680851, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 3.1837094504455587e-06, | |
| "loss": 0.586044192314148, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.480851063829787, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 3.17141926332048e-06, | |
| "loss": 0.5692299604415894, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.4851063829787234, | |
| "grad_norm": 0.44140625, | |
| "learning_rate": 3.159217781554335e-06, | |
| "loss": 0.658069372177124, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.4893617021276597, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 3.1471052738116726e-06, | |
| "loss": 0.5921551585197449, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.4936170212765956, | |
| "grad_norm": 0.35546875, | |
| "learning_rate": 3.135082006797918e-06, | |
| "loss": 0.45771515369415283, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.497872340425532, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 3.123148245253508e-06, | |
| "loss": 0.3539358079433441, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.502127659574468, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 3.111304251948056e-06, | |
| "loss": 0.6486715078353882, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.506382978723404, | |
| "grad_norm": 0.75, | |
| "learning_rate": 3.0995502876745657e-06, | |
| "loss": 0.3491562008857727, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.5106382978723403, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 3.087886611243692e-06, | |
| "loss": 0.554216742515564, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.5148936170212766, | |
| "grad_norm": 0.447265625, | |
| "learning_rate": 3.076313479478042e-06, | |
| "loss": 0.46358993649482727, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.519148936170213, | |
| "grad_norm": 2.625, | |
| "learning_rate": 3.064831147206519e-06, | |
| "loss": 0.7309602499008179, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.523404255319149, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 3.05343986725871e-06, | |
| "loss": 0.5900013446807861, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.527659574468085, | |
| "grad_norm": 1.734375, | |
| "learning_rate": 3.0421398904593186e-06, | |
| "loss": 0.8710350394248962, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.5319148936170213, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 3.030931465622647e-06, | |
| "loss": 0.7665842175483704, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.5361702127659576, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 3.0198148395471105e-06, | |
| "loss": 0.5311375260353088, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.5404255319148934, | |
| "grad_norm": 0.41015625, | |
| "learning_rate": 3.00879025700981e-06, | |
| "loss": 0.2682938873767853, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.5446808510638297, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 2.997857960761137e-06, | |
| "loss": 0.5427325367927551, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.548936170212766, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 2.98701819151943e-06, | |
| "loss": 0.49154531955718994, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.5531914893617023, | |
| "grad_norm": 0.39453125, | |
| "learning_rate": 2.976271187965673e-06, | |
| "loss": 0.5094670057296753, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.5574468085106385, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 2.9656171867382446e-06, | |
| "loss": 0.4511142075061798, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.5617021276595744, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 2.955056422427704e-06, | |
| "loss": 0.5634865760803223, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.5659574468085107, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.9445891275716233e-06, | |
| "loss": 0.3763676583766937, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.570212765957447, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 2.9342155326494704e-06, | |
| "loss": 0.5212900638580322, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.574468085106383, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 2.9239358660775357e-06, | |
| "loss": 0.4663785994052887, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.578723404255319, | |
| "grad_norm": 0.50390625, | |
| "learning_rate": 2.9137503542038966e-06, | |
| "loss": 0.5414974093437195, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.5829787234042554, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 2.903659221303441e-06, | |
| "loss": 0.6152816414833069, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.5872340425531917, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 2.893662689572925e-06, | |
| "loss": 0.42417243123054504, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.5914893617021275, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 2.883760979126076e-06, | |
| "loss": 0.6008761525154114, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.595744680851064, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 2.8739543079887554e-06, | |
| "loss": 0.749297022819519, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 2.8642428920941513e-06, | |
| "loss": 0.6406426429748535, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.604255319148936, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 2.8546269452780275e-06, | |
| "loss": 0.5915369391441345, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.608510638297872, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 2.8451066792740108e-06, | |
| "loss": 0.7708158493041992, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.6127659574468085, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 2.835682303708931e-06, | |
| "loss": 0.2944878339767456, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.617021276595745, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 2.826354026098208e-06, | |
| "loss": 0.4445026218891144, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.621276595744681, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.817122051841277e-06, | |
| "loss": 0.5953022837638855, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.625531914893617, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 2.807986584217072e-06, | |
| "loss": 0.47725632786750793, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.629787234042553, | |
| "grad_norm": 3.375, | |
| "learning_rate": 2.7989478243795434e-06, | |
| "loss": 0.5917444229125977, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.6340425531914895, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 2.790005971353233e-06, | |
| "loss": 0.6352754831314087, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.6382978723404253, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 2.7811612220288905e-06, | |
| "loss": 0.5205258131027222, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.6425531914893616, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.77241377115914e-06, | |
| "loss": 0.716691255569458, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.646808510638298, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 2.7637638113541866e-06, | |
| "loss": 0.3764870762825012, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.651063829787234, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 2.755211533077581e-06, | |
| "loss": 0.5524653196334839, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.65531914893617, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 2.746757124642024e-06, | |
| "loss": 0.5442506074905396, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.6595744680851063, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.7384007722052168e-06, | |
| "loss": 0.5800641775131226, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.6638297872340426, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.7301426597657662e-06, | |
| "loss": 0.5853485465049744, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.6680851063829785, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.721982969159132e-06, | |
| "loss": 0.38345175981521606, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.6723404255319148, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 2.7139218800536224e-06, | |
| "loss": 0.6944982409477234, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.676595744680851, | |
| "grad_norm": 0.384765625, | |
| "learning_rate": 2.7059595699464363e-06, | |
| "loss": 0.5350843667984009, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.6808510638297873, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 2.6980962141597594e-06, | |
| "loss": 0.5438748598098755, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.6851063829787236, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 2.6903319858369005e-06, | |
| "loss": 0.7831379175186157, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.6893617021276595, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 2.6826670559384784e-06, | |
| "loss": 0.3888491094112396, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.6936170212765957, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 2.6751015932386615e-06, | |
| "loss": 0.4081690311431885, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.697872340425532, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 2.6676357643214467e-06, | |
| "loss": 0.757609486579895, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.702127659574468, | |
| "grad_norm": 0.359375, | |
| "learning_rate": 2.660269733576995e-06, | |
| "loss": 0.2168269008398056, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.706382978723404, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 2.6530036631980093e-06, | |
| "loss": 0.5121868848800659, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.7106382978723405, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 2.6458377131761655e-06, | |
| "loss": 0.588572084903717, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.7148936170212767, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 2.6387720412985873e-06, | |
| "loss": 0.5837306380271912, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.719148936170213, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 2.631806803144373e-06, | |
| "loss": 0.5779358148574829, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.723404255319149, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 2.624942152081171e-06, | |
| "loss": 0.42244261503219604, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.727659574468085, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 2.6181782392618002e-06, | |
| "loss": 0.5723677277565002, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.731914893617021, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 2.611515213620924e-06, | |
| "loss": 0.6737433075904846, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.7361702127659573, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 2.604953221871769e-06, | |
| "loss": 0.6697869300842285, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.7404255319148936, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 2.5984924085028968e-06, | |
| "loss": 0.41797778010368347, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.74468085106383, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.5921329157750205e-06, | |
| "loss": 0.6901787519454956, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.748936170212766, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 2.5858748837178724e-06, | |
| "loss": 0.48409298062324524, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.753191489361702, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 2.579718450127124e-06, | |
| "loss": 0.48840850591659546, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.7574468085106383, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.5736637505613453e-06, | |
| "loss": 0.5451318621635437, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.7617021276595746, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 2.5677109183390254e-06, | |
| "loss": 0.3569204807281494, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.7659574468085104, | |
| "grad_norm": 0.43359375, | |
| "learning_rate": 2.5618600845356374e-06, | |
| "loss": 0.6634436845779419, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.7702127659574467, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 2.5561113779807473e-06, | |
| "loss": 0.40077003836631775, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.774468085106383, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 2.550464925255182e-06, | |
| "loss": 0.49653542041778564, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.7787234042553193, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 2.544920850688239e-06, | |
| "loss": 0.3718079626560211, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.7829787234042556, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 2.5394792763549506e-06, | |
| "loss": 0.6696460843086243, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.7872340425531914, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 2.534140322073397e-06, | |
| "loss": 0.4750995337963104, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.7914893617021277, | |
| "grad_norm": 0.412109375, | |
| "learning_rate": 2.5289041054020637e-06, | |
| "loss": 0.38971856236457825, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.795744680851064, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 2.523770741637259e-06, | |
| "loss": 0.5828387141227722, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.494140625, | |
| "learning_rate": 2.518740343810568e-06, | |
| "loss": 0.2812992334365845, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.804255319148936, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 2.513813022686371e-06, | |
| "loss": 0.6449145674705505, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.8085106382978724, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 2.5089888867594004e-06, | |
| "loss": 0.42496779561042786, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.8127659574468087, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 2.5042680422523538e-06, | |
| "loss": 0.6403509974479675, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.8170212765957445, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 2.4996505931135513e-06, | |
| "loss": 0.5965058207511902, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.821276595744681, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.4951366410146506e-06, | |
| "loss": 0.38872432708740234, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.825531914893617, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 2.4907262853484093e-06, | |
| "loss": 0.47181040048599243, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.829787234042553, | |
| "grad_norm": 0.40234375, | |
| "learning_rate": 2.4864196232264913e-06, | |
| "loss": 0.5333115458488464, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.8340425531914892, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 2.4822167494773325e-06, | |
| "loss": 0.6577153205871582, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.8382978723404255, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.4781177566440513e-06, | |
| "loss": 0.544109046459198, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.842553191489362, | |
| "grad_norm": 0.96484375, | |
| "learning_rate": 2.474122734982411e-06, | |
| "loss": 0.26606178283691406, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.846808510638298, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 2.4702317724588332e-06, | |
| "loss": 0.486730694770813, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.851063829787234, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 2.4664449547484595e-06, | |
| "loss": 0.47592607140541077, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.8553191489361702, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 2.462762365233268e-06, | |
| "loss": 0.4367084801197052, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.8595744680851065, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 2.459184085000232e-06, | |
| "loss": 0.3742711842060089, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.8638297872340424, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 2.455710192839539e-06, | |
| "loss": 0.5936036705970764, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.8680851063829786, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 2.452340765242855e-06, | |
| "loss": 0.6136466860771179, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.872340425531915, | |
| "grad_norm": 0.625, | |
| "learning_rate": 2.449075876401641e-06, | |
| "loss": 0.6735158562660217, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.876595744680851, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 2.4459155982055145e-06, | |
| "loss": 0.6614925861358643, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.8808510638297875, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.4428600002406735e-06, | |
| "loss": 0.780015230178833, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.8851063829787233, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.4399091497883596e-06, | |
| "loss": 0.38140493631362915, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.8893617021276596, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 2.4370631118233766e-06, | |
| "loss": 0.38466039299964905, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.8936170212765955, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.4343219490126636e-06, | |
| "loss": 0.6831486821174622, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.8978723404255318, | |
| "grad_norm": 0.474609375, | |
| "learning_rate": 2.4316857217139125e-06, | |
| "loss": 0.5675507187843323, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.902127659574468, | |
| "grad_norm": 0.4765625, | |
| "learning_rate": 2.429154487974237e-06, | |
| "loss": 0.5387779474258423, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.9063829787234043, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 2.4267283035288974e-06, | |
| "loss": 0.5070762634277344, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.9106382978723406, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 2.4244072218000737e-06, | |
| "loss": 0.49968618154525757, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.9148936170212765, | |
| "grad_norm": 0.439453125, | |
| "learning_rate": 2.422191293895687e-06, | |
| "loss": 0.7925405502319336, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.9191489361702128, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 2.4200805686082757e-06, | |
| "loss": 0.4414962828159332, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.923404255319149, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 2.4180750924139205e-06, | |
| "loss": 0.5193897485733032, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.927659574468085, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 2.4161749094712216e-06, | |
| "loss": 0.5439836978912354, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.931914893617021, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 2.414380061620327e-06, | |
| "loss": 0.5974451899528503, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.9361702127659575, | |
| "grad_norm": 0.482421875, | |
| "learning_rate": 2.4126905883820076e-06, | |
| "loss": 0.43398624658584595, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.9404255319148938, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 2.411106526956792e-06, | |
| "loss": 0.7541142702102661, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.94468085106383, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 2.4096279122241438e-06, | |
| "loss": 0.592811107635498, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.948936170212766, | |
| "grad_norm": 0.392578125, | |
| "learning_rate": 2.408254776741697e-06, | |
| "loss": 0.6341920495033264, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.953191489361702, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 2.4069871507445332e-06, | |
| "loss": 0.755580484867096, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.9574468085106385, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 2.4058250621445224e-06, | |
| "loss": 0.682244598865509, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.9617021276595743, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 2.4047685365297056e-06, | |
| "loss": 0.5976744890213013, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.9659574468085106, | |
| "grad_norm": 0.4375, | |
| "learning_rate": 2.403817597163731e-06, | |
| "loss": 0.5079911351203918, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.970212765957447, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 2.402972264985341e-06, | |
| "loss": 0.4225712716579437, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.974468085106383, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 2.4022325586079132e-06, | |
| "loss": 0.6215579509735107, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.978723404255319, | |
| "grad_norm": 0.3828125, | |
| "learning_rate": 2.4015984943190496e-06, | |
| "loss": 0.455652117729187, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.9829787234042553, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 2.401070086080218e-06, | |
| "loss": 0.5189418792724609, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.9872340425531916, | |
| "grad_norm": 2.890625, | |
| "learning_rate": 2.400647345526445e-06, | |
| "loss": 0.5081955790519714, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.9914893617021274, | |
| "grad_norm": 0.443359375, | |
| "learning_rate": 2.400330281966059e-06, | |
| "loss": 0.5243685841560364, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.9957446808510637, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 2.400118902380485e-06, | |
| "loss": 0.6540034413337708, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 2.400013211424094e-06, | |
| "loss": 0.3355269134044647, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1410, | |
| "total_flos": 4.1743170019314893e+18, | |
| "train_loss": 0.8916917941037644, | |
| "train_runtime": 10519.9057, | |
| "train_samples_per_second": 4.289, | |
| "train_steps_per_second": 0.134 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1410, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.1743170019314893e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |