Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-68 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-68 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-68") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-68") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-68") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-68 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-68" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-68", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-68
- SGLang
How to use furproxy/9b-68 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-68" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-68", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-68" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-68", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-68 with Docker Model Runner:
docker model run hf.co/furproxy/9b-68
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00796812749003984, | |
| "grad_norm": 0.8635491728782654, | |
| "learning_rate": 5.882352941176471e-08, | |
| "loss": 2.092526435852051, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 0.65386563539505, | |
| "learning_rate": 1.764705882352941e-07, | |
| "loss": 2.009572982788086, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02390438247011952, | |
| "grad_norm": 2.1573679447174072, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 2.2031428813934326, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 0.7024685740470886, | |
| "learning_rate": 4.11764705882353e-07, | |
| "loss": 1.9222521781921387, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0398406374501992, | |
| "grad_norm": 2.466196298599243, | |
| "learning_rate": 5.294117647058824e-07, | |
| "loss": 1.5961482524871826, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 0.8721945285797119, | |
| "learning_rate": 6.470588235294118e-07, | |
| "loss": 1.8626258373260498, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.055776892430278883, | |
| "grad_norm": 0.9898734092712402, | |
| "learning_rate": 7.647058823529412e-07, | |
| "loss": 2.907916784286499, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 3.7050821781158447, | |
| "learning_rate": 8.823529411764706e-07, | |
| "loss": 5.169388771057129, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07171314741035857, | |
| "grad_norm": 1.2959784269332886, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9087082147598267, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 0.5755355358123779, | |
| "learning_rate": 1.1176470588235294e-06, | |
| "loss": 1.741716980934143, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08764940239043825, | |
| "grad_norm": 0.5503631234169006, | |
| "learning_rate": 1.2352941176470588e-06, | |
| "loss": 2.108546733856201, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 1.9359296560287476, | |
| "learning_rate": 1.3529411764705883e-06, | |
| "loss": 1.5568987131118774, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10358565737051793, | |
| "grad_norm": 0.3511880934238434, | |
| "learning_rate": 1.4705882352941175e-06, | |
| "loss": 1.6464097499847412, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 1.7009022235870361, | |
| "learning_rate": 1.5882352941176472e-06, | |
| "loss": 1.681158185005188, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11952191235059761, | |
| "grad_norm": 0.9740716814994812, | |
| "learning_rate": 1.7058823529411764e-06, | |
| "loss": 1.4671032428741455, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 1.555565595626831, | |
| "learning_rate": 1.8235294117647058e-06, | |
| "loss": 1.2685446739196777, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13545816733067728, | |
| "grad_norm": 0.8954079151153564, | |
| "learning_rate": 1.9411764705882353e-06, | |
| "loss": 1.3833661079406738, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 0.5755887627601624, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 1.5257431268692017, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15139442231075698, | |
| "grad_norm": 2.27645206451416, | |
| "learning_rate": 2.176470588235294e-06, | |
| "loss": 1.2891892194747925, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 0.37568527460098267, | |
| "learning_rate": 2.2941176470588234e-06, | |
| "loss": 1.424462080001831, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16733067729083664, | |
| "grad_norm": 2.9606502056121826, | |
| "learning_rate": 2.411764705882353e-06, | |
| "loss": 0.9945810437202454, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 0.474932461977005, | |
| "learning_rate": 2.5294117647058823e-06, | |
| "loss": 1.1145226955413818, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18326693227091634, | |
| "grad_norm": 0.6873041391372681, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 0.9955132603645325, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 1.075629711151123, | |
| "learning_rate": 2.764705882352941e-06, | |
| "loss": 1.2583197355270386, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.199203187250996, | |
| "grad_norm": 0.3792019784450531, | |
| "learning_rate": 2.882352941176471e-06, | |
| "loss": 1.4063794612884521, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 0.45150187611579895, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3480579853057861, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2151394422310757, | |
| "grad_norm": 37.446346282958984, | |
| "learning_rate": 2.999970658917326e-06, | |
| "loss": 1.171717882156372, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 0.5553145408630371, | |
| "learning_rate": 2.9998826369447094e-06, | |
| "loss": 1.567508339881897, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23107569721115537, | |
| "grad_norm": 0.5614340901374817, | |
| "learning_rate": 2.9997359379083137e-06, | |
| "loss": 1.3946313858032227, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 0.4262371063232422, | |
| "learning_rate": 2.9995305681848922e-06, | |
| "loss": 0.8638534545898438, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24701195219123506, | |
| "grad_norm": 0.37590524554252625, | |
| "learning_rate": 2.9992665367015114e-06, | |
| "loss": 1.3967615365982056, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 0.8519444465637207, | |
| "learning_rate": 2.998943854935163e-06, | |
| "loss": 1.1325318813323975, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26294820717131473, | |
| "grad_norm": 0.3111265301704407, | |
| "learning_rate": 2.9985625369122664e-06, | |
| "loss": 1.312957525253296, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 1.5971601009368896, | |
| "learning_rate": 2.998122599208055e-06, | |
| "loss": 0.9911661148071289, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2788844621513944, | |
| "grad_norm": 0.5279662013053894, | |
| "learning_rate": 2.9976240609458617e-06, | |
| "loss": 1.567615032196045, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 2.7161941528320312, | |
| "learning_rate": 2.9970669437962822e-06, | |
| "loss": 0.8429480791091919, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2948207171314741, | |
| "grad_norm": 1.455247163772583, | |
| "learning_rate": 2.9964512719762347e-06, | |
| "loss": 1.1409013271331787, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 0.41458117961883545, | |
| "learning_rate": 2.9957770722479088e-06, | |
| "loss": 1.4755173921585083, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3107569721115538, | |
| "grad_norm": 2.052013635635376, | |
| "learning_rate": 2.9950443739176006e-06, | |
| "loss": 0.9957228899002075, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 0.35525253415107727, | |
| "learning_rate": 2.99425320883444e-06, | |
| "loss": 0.8386018872261047, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32669322709163345, | |
| "grad_norm": 0.39254093170166016, | |
| "learning_rate": 2.993403611389005e-06, | |
| "loss": 1.3378560543060303, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 0.6873935461044312, | |
| "learning_rate": 2.992495618511827e-06, | |
| "loss": 0.8897277116775513, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3426294820717131, | |
| "grad_norm": 0.6063620448112488, | |
| "learning_rate": 2.991529269671786e-06, | |
| "loss": 1.3212584257125854, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 0.685740053653717, | |
| "learning_rate": 2.9905046068743946e-06, | |
| "loss": 0.8896841406822205, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.35856573705179284, | |
| "grad_norm": 0.3957422375679016, | |
| "learning_rate": 2.9894216746599727e-06, | |
| "loss": 1.3377217054367065, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 2.463545799255371, | |
| "learning_rate": 2.9882805201017116e-06, | |
| "loss": 1.133660912513733, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3745019920318725, | |
| "grad_norm": 0.4274525046348572, | |
| "learning_rate": 2.9870811928036256e-06, | |
| "loss": 1.2548623085021973, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 0.9313490986824036, | |
| "learning_rate": 2.985823744898399e-06, | |
| "loss": 1.1074374914169312, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3904382470119522, | |
| "grad_norm": 0.2799544632434845, | |
| "learning_rate": 2.984508231045117e-06, | |
| "loss": 1.2744643688201904, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.5394402146339417, | |
| "learning_rate": 2.9831347084268923e-06, | |
| "loss": 0.9088782072067261, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4063745019920319, | |
| "grad_norm": 0.5486662983894348, | |
| "learning_rate": 2.981703236748378e-06, | |
| "loss": 1.2874795198440552, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.40334078669548035, | |
| "learning_rate": 2.9802138782331712e-06, | |
| "loss": 0.776800274848938, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.42231075697211157, | |
| "grad_norm": 1.1005347967147827, | |
| "learning_rate": 2.978666697621112e-06, | |
| "loss": 0.733248770236969, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 0.7740170955657959, | |
| "learning_rate": 2.9770617621654656e-06, | |
| "loss": 0.9902894496917725, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.43824701195219123, | |
| "grad_norm": 0.4173789918422699, | |
| "learning_rate": 2.9753991416300007e-06, | |
| "loss": 1.2939473390579224, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 1.0325746536254883, | |
| "learning_rate": 2.9736789082859568e-06, | |
| "loss": 0.8199123740196228, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4541832669322709, | |
| "grad_norm": 0.8412153720855713, | |
| "learning_rate": 2.9719011369089025e-06, | |
| "loss": 1.1337602138519287, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.42725688219070435, | |
| "learning_rate": 2.970065904775485e-06, | |
| "loss": 1.2960267066955566, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4701195219123506, | |
| "grad_norm": 0.4095574617385864, | |
| "learning_rate": 2.968173291660071e-06, | |
| "loss": 1.36135995388031, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.5657700300216675, | |
| "learning_rate": 2.9662233798312805e-06, | |
| "loss": 1.2439430952072144, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4860557768924303, | |
| "grad_norm": 0.4052128791809082, | |
| "learning_rate": 2.9642162540484077e-06, | |
| "loss": 0.9588472247123718, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 1.1219152212142944, | |
| "learning_rate": 2.96215200155774e-06, | |
| "loss": 1.0325348377227783, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.50199203187251, | |
| "grad_norm": 1.6725101470947266, | |
| "learning_rate": 2.9600307120887623e-06, | |
| "loss": 0.9328906536102295, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 0.5302041172981262, | |
| "learning_rate": 2.9578524778502605e-06, | |
| "loss": 1.2722545862197876, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5179282868525896, | |
| "grad_norm": 0.4805465042591095, | |
| "learning_rate": 2.9556173935263094e-06, | |
| "loss": 0.9367498159408569, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 0.34964123368263245, | |
| "learning_rate": 2.9533255562721594e-06, | |
| "loss": 0.8116304278373718, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5338645418326693, | |
| "grad_norm": 0.3873235881328583, | |
| "learning_rate": 2.950977065710012e-06, | |
| "loss": 0.8979853391647339, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.49073103070259094, | |
| "learning_rate": 2.9485720239246913e-06, | |
| "loss": 1.2764328718185425, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.549800796812749, | |
| "grad_norm": 0.9829900860786438, | |
| "learning_rate": 2.946110535459204e-06, | |
| "loss": 0.9340943694114685, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 0.6929047703742981, | |
| "learning_rate": 2.9435927073101974e-06, | |
| "loss": 1.0783215761184692, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5657370517928287, | |
| "grad_norm": 0.33450594544410706, | |
| "learning_rate": 2.9410186489233063e-06, | |
| "loss": 1.015251636505127, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 0.4002437889575958, | |
| "learning_rate": 2.9383884721883973e-06, | |
| "loss": 1.2262054681777954, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5816733067729084, | |
| "grad_norm": 0.5956969261169434, | |
| "learning_rate": 2.9357022914347046e-06, | |
| "loss": 1.382331132888794, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 0.49758458137512207, | |
| "learning_rate": 2.9329602234258606e-06, | |
| "loss": 0.9430940747261047, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5976095617529881, | |
| "grad_norm": 0.6348124146461487, | |
| "learning_rate": 2.9301623873548187e-06, | |
| "loss": 0.9293842315673828, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 0.8025413155555725, | |
| "learning_rate": 2.9273089048386757e-06, | |
| "loss": 0.9237926602363586, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6135458167330677, | |
| "grad_norm": 9.821154594421387, | |
| "learning_rate": 2.9243998999133803e-06, | |
| "loss": 1.1090776920318604, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 1.79253351688385, | |
| "learning_rate": 2.921435499028347e-06, | |
| "loss": 0.8607668280601501, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6294820717131474, | |
| "grad_norm": 0.6355870366096497, | |
| "learning_rate": 2.918415831040955e-06, | |
| "loss": 1.1218218803405762, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 0.4850366711616516, | |
| "learning_rate": 2.91534102721095e-06, | |
| "loss": 1.0082756280899048, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6454183266932271, | |
| "grad_norm": 0.22921444475650787, | |
| "learning_rate": 2.9122112211947373e-06, | |
| "loss": 0.7417871952056885, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 0.2860549986362457, | |
| "learning_rate": 2.9090265490395713e-06, | |
| "loss": 1.1341336965560913, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6613545816733067, | |
| "grad_norm": 0.9676735401153564, | |
| "learning_rate": 2.9057871491776436e-06, | |
| "loss": 0.7781538963317871, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.3801421523094177, | |
| "learning_rate": 2.9024931624200637e-06, | |
| "loss": 1.2313846349716187, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6772908366533864, | |
| "grad_norm": 1.8267383575439453, | |
| "learning_rate": 2.899144731950739e-06, | |
| "loss": 0.8328909873962402, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 0.43613070249557495, | |
| "learning_rate": 2.895742003320152e-06, | |
| "loss": 0.8186226487159729, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6932270916334662, | |
| "grad_norm": 0.6106582283973694, | |
| "learning_rate": 2.8922851244390312e-06, | |
| "loss": 1.2231653928756714, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 0.9366831183433533, | |
| "learning_rate": 2.888774245571924e-06, | |
| "loss": 0.7547324299812317, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7091633466135459, | |
| "grad_norm": 1.2000120878219604, | |
| "learning_rate": 2.8852095193306633e-06, | |
| "loss": 0.8711674809455872, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 0.8977218270301819, | |
| "learning_rate": 2.8815911006677326e-06, | |
| "loss": 1.0363984107971191, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7250996015936255, | |
| "grad_norm": 0.23066113889217377, | |
| "learning_rate": 2.877919146869535e-06, | |
| "loss": 0.4867554008960724, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 0.45316410064697266, | |
| "learning_rate": 2.874193817549551e-06, | |
| "loss": 1.250420331954956, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7410358565737052, | |
| "grad_norm": 0.40230974555015564, | |
| "learning_rate": 2.870415274641405e-06, | |
| "loss": 1.2383266687393188, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 0.46227118372917175, | |
| "learning_rate": 2.866583682391821e-06, | |
| "loss": 1.0794142484664917, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7569721115537849, | |
| "grad_norm": 1.3957983255386353, | |
| "learning_rate": 2.8626992073534888e-06, | |
| "loss": 0.8541224598884583, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 0.618249237537384, | |
| "learning_rate": 2.858762018377821e-06, | |
| "loss": 1.187286615371704, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7729083665338645, | |
| "grad_norm": 0.6879091858863831, | |
| "learning_rate": 2.8547722866076125e-06, | |
| "loss": 0.8607790470123291, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 0.6802543997764587, | |
| "learning_rate": 2.850730185469604e-06, | |
| "loss": 0.5753005743026733, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7888446215139442, | |
| "grad_norm": 2.574225664138794, | |
| "learning_rate": 2.8466358906669423e-06, | |
| "loss": 0.7759158611297607, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 1.023215651512146, | |
| "learning_rate": 2.842489580171541e-06, | |
| "loss": 1.0175625085830688, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8047808764940239, | |
| "grad_norm": 0.2602544128894806, | |
| "learning_rate": 2.838291434216347e-06, | |
| "loss": 0.95418781042099, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.4951968193054199, | |
| "learning_rate": 2.8340416352875057e-06, | |
| "loss": 1.4974547624588013, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8207171314741036, | |
| "grad_norm": 0.4747754633426666, | |
| "learning_rate": 2.8297403681164256e-06, | |
| "loss": 1.2074265480041504, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 0.5524476766586304, | |
| "learning_rate": 2.825387819671754e-06, | |
| "loss": 0.5993782877922058, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8366533864541833, | |
| "grad_norm": 0.6101752519607544, | |
| "learning_rate": 2.820984179151243e-06, | |
| "loss": 1.2540825605392456, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.4768051505088806, | |
| "learning_rate": 2.816529637973531e-06, | |
| "loss": 1.2111059427261353, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.852589641434263, | |
| "grad_norm": 0.4470526874065399, | |
| "learning_rate": 2.8120243897698197e-06, | |
| "loss": 0.936885416507721, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 0.8324171900749207, | |
| "learning_rate": 2.807468630375457e-06, | |
| "loss": 1.4127908945083618, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8685258964143426, | |
| "grad_norm": 0.8571661710739136, | |
| "learning_rate": 2.802862557821425e-06, | |
| "loss": 0.9163059592247009, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 0.656310498714447, | |
| "learning_rate": 2.7982063723257324e-06, | |
| "loss": 1.2317224740982056, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8844621513944223, | |
| "grad_norm": 0.94222491979599, | |
| "learning_rate": 2.7935002762847104e-06, | |
| "loss": 1.4826358556747437, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 0.6392826437950134, | |
| "learning_rate": 2.7887444742642153e-06, | |
| "loss": 0.7295237183570862, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.900398406374502, | |
| "grad_norm": 0.40648454427719116, | |
| "learning_rate": 2.783939172990736e-06, | |
| "loss": 0.9684391021728516, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 1.8654367923736572, | |
| "learning_rate": 2.7790845813424085e-06, | |
| "loss": 1.076025128364563, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9163346613545816, | |
| "grad_norm": 0.5885425806045532, | |
| "learning_rate": 2.7741809103399372e-06, | |
| "loss": 1.1793254613876343, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 0.4949474334716797, | |
| "learning_rate": 2.76922837313742e-06, | |
| "loss": 1.3203648328781128, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9322709163346613, | |
| "grad_norm": 0.6972324252128601, | |
| "learning_rate": 2.7642271850130845e-06, | |
| "loss": 0.8468176126480103, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 0.3574720323085785, | |
| "learning_rate": 2.7591775633599295e-06, | |
| "loss": 0.8511747121810913, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9482071713147411, | |
| "grad_norm": 1.726371169090271, | |
| "learning_rate": 2.7540797276762748e-06, | |
| "loss": 0.43165379762649536, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 1.3261985778808594, | |
| "learning_rate": 2.7489338995562223e-06, | |
| "loss": 0.8357299566268921, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9641434262948207, | |
| "grad_norm": 0.796555757522583, | |
| "learning_rate": 2.743740302680021e-06, | |
| "loss": 1.1258844137191772, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 0.3952471911907196, | |
| "learning_rate": 2.738499162804346e-06, | |
| "loss": 1.2129818201065063, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9800796812749004, | |
| "grad_norm": 3.2808635234832764, | |
| "learning_rate": 2.733210707752483e-06, | |
| "loss": 0.9465621113777161, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.36960551142692566, | |
| "learning_rate": 2.7278751674044277e-06, | |
| "loss": 0.9761192202568054, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9960159362549801, | |
| "grad_norm": 0.40931713581085205, | |
| "learning_rate": 2.7224927736868926e-06, | |
| "loss": 0.9702551364898682, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.00398406374502, | |
| "grad_norm": 0.870992124080658, | |
| "learning_rate": 2.7170637605632236e-06, | |
| "loss": 0.9026790857315063, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0119521912350598, | |
| "grad_norm": 0.5800741910934448, | |
| "learning_rate": 2.7115883640232326e-06, | |
| "loss": 0.696426272392273, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0199203187250996, | |
| "grad_norm": 0.5435763001441956, | |
| "learning_rate": 2.706066822072938e-06, | |
| "loss": 1.1814464330673218, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0278884462151394, | |
| "grad_norm": 0.7086870074272156, | |
| "learning_rate": 2.70049937472422e-06, | |
| "loss": 1.1569794416427612, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0358565737051793, | |
| "grad_norm": 0.6994827389717102, | |
| "learning_rate": 2.694886263984387e-06, | |
| "loss": 1.092099905014038, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.043824701195219, | |
| "grad_norm": 0.5085208415985107, | |
| "learning_rate": 2.6892277338456545e-06, | |
| "loss": 1.0143777132034302, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.051792828685259, | |
| "grad_norm": 0.7572088837623596, | |
| "learning_rate": 2.6835240302745432e-06, | |
| "loss": 1.1160310506820679, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0597609561752988, | |
| "grad_norm": 0.969849705696106, | |
| "learning_rate": 2.6777754012011822e-06, | |
| "loss": 0.6508786678314209, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0677290836653386, | |
| "grad_norm": 0.40249520540237427, | |
| "learning_rate": 2.6719820965085373e-06, | |
| "loss": 0.9302061796188354, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0756972111553784, | |
| "grad_norm": 0.39779043197631836, | |
| "learning_rate": 2.6661443680215436e-06, | |
| "loss": 0.5909485220909119, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0836653386454183, | |
| "grad_norm": 0.4452027976512909, | |
| "learning_rate": 2.6602624694961634e-06, | |
| "loss": 1.2358585596084595, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.091633466135458, | |
| "grad_norm": 1.504374623298645, | |
| "learning_rate": 2.6543366566083536e-06, | |
| "loss": 0.602242648601532, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.099601593625498, | |
| "grad_norm": 0.4246741831302643, | |
| "learning_rate": 2.6483671869429515e-06, | |
| "loss": 0.925746500492096, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1075697211155378, | |
| "grad_norm": 0.4856192171573639, | |
| "learning_rate": 2.6423543199824814e-06, | |
| "loss": 0.8335383534431458, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1155378486055776, | |
| "grad_norm": 0.46481937170028687, | |
| "learning_rate": 2.6362983170958708e-06, | |
| "loss": 1.0746686458587646, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1235059760956174, | |
| "grad_norm": 0.40899351239204407, | |
| "learning_rate": 2.6301994415270927e-06, | |
| "loss": 1.1849327087402344, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1314741035856573, | |
| "grad_norm": 1.2962706089019775, | |
| "learning_rate": 2.62405795838372e-06, | |
| "loss": 1.1970727443695068, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.139442231075697, | |
| "grad_norm": 1.8368911743164062, | |
| "learning_rate": 2.617874134625405e-06, | |
| "loss": 0.3501308858394623, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1474103585657371, | |
| "grad_norm": 0.2166207730770111, | |
| "learning_rate": 2.6116482390522715e-06, | |
| "loss": 0.7515382170677185, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.155378486055777, | |
| "grad_norm": 0.823740541934967, | |
| "learning_rate": 2.605380542293234e-06, | |
| "loss": 0.9569700360298157, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1633466135458168, | |
| "grad_norm": 0.6585810780525208, | |
| "learning_rate": 2.5990713167942306e-06, | |
| "loss": 0.5356516242027283, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1713147410358566, | |
| "grad_norm": 0.4426569640636444, | |
| "learning_rate": 2.5927208368063825e-06, | |
| "loss": 1.1944938898086548, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.1792828685258965, | |
| "grad_norm": 0.3524056673049927, | |
| "learning_rate": 2.586329378374074e-06, | |
| "loss": 1.2448694705963135, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.1872509960159363, | |
| "grad_norm": 0.3419857621192932, | |
| "learning_rate": 2.5798972193229485e-06, | |
| "loss": 1.384804129600525, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.1952191235059761, | |
| "grad_norm": 1.195278286933899, | |
| "learning_rate": 2.573424639247837e-06, | |
| "loss": 0.6737868189811707, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.203187250996016, | |
| "grad_norm": 0.5209558606147766, | |
| "learning_rate": 2.5669119195006016e-06, | |
| "loss": 0.9476598501205444, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2111553784860558, | |
| "grad_norm": 0.6349844336509705, | |
| "learning_rate": 2.560359343177907e-06, | |
| "loss": 0.6187256574630737, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.2191235059760956, | |
| "grad_norm": 0.6629199385643005, | |
| "learning_rate": 2.553767195108914e-06, | |
| "loss": 1.175504207611084, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2270916334661355, | |
| "grad_norm": 2.9705917835235596, | |
| "learning_rate": 2.547135761842899e-06, | |
| "loss": 1.0829224586486816, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2350597609561753, | |
| "grad_norm": 0.5714198350906372, | |
| "learning_rate": 2.5404653316367983e-06, | |
| "loss": 1.107988953590393, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2430278884462151, | |
| "grad_norm": 0.5963674783706665, | |
| "learning_rate": 2.533756194442676e-06, | |
| "loss": 1.066701054573059, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.250996015936255, | |
| "grad_norm": 0.4868275225162506, | |
| "learning_rate": 2.527008641895124e-06, | |
| "loss": 0.8929526209831238, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2589641434262948, | |
| "grad_norm": 0.5656901001930237, | |
| "learning_rate": 2.5202229672985834e-06, | |
| "loss": 0.7201364040374756, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2669322709163346, | |
| "grad_norm": 1.8498417139053345, | |
| "learning_rate": 2.513399465614593e-06, | |
| "loss": 0.7000647187232971, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2749003984063745, | |
| "grad_norm": 1.8695796728134155, | |
| "learning_rate": 2.5065384334489707e-06, | |
| "loss": 0.6135216355323792, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2828685258964143, | |
| "grad_norm": 1.0598129034042358, | |
| "learning_rate": 2.499640169038919e-06, | |
| "loss": 0.9898048639297485, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.2908366533864541, | |
| "grad_norm": 0.43019378185272217, | |
| "learning_rate": 2.4927049722400632e-06, | |
| "loss": 0.9241266250610352, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.298804780876494, | |
| "grad_norm": 8.146377563476562, | |
| "learning_rate": 2.4857331445134148e-06, | |
| "loss": 1.3060351610183716, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3067729083665338, | |
| "grad_norm": 0.3936547338962555, | |
| "learning_rate": 2.4787249889122664e-06, | |
| "loss": 0.7562607526779175, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3147410358565736, | |
| "grad_norm": 0.45019447803497314, | |
| "learning_rate": 2.4716808100690243e-06, | |
| "loss": 0.9096888303756714, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3227091633466135, | |
| "grad_norm": 0.6943168044090271, | |
| "learning_rate": 2.4646009141819594e-06, | |
| "loss": 1.000761866569519, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.3306772908366533, | |
| "grad_norm": 1.2407184839248657, | |
| "learning_rate": 2.4574856090019033e-06, | |
| "loss": 0.7026602029800415, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3386454183266931, | |
| "grad_norm": 1.955567479133606, | |
| "learning_rate": 2.4503352038188665e-06, | |
| "loss": 0.8961894512176514, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.3466135458167332, | |
| "grad_norm": 0.6083151698112488, | |
| "learning_rate": 2.4431500094485963e-06, | |
| "loss": 1.196513056755066, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.354581673306773, | |
| "grad_norm": 0.3481491804122925, | |
| "learning_rate": 2.435930338219066e-06, | |
| "loss": 1.1471824645996094, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3625498007968129, | |
| "grad_norm": 0.3202623128890991, | |
| "learning_rate": 2.4286765039568996e-06, | |
| "loss": 1.1803168058395386, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.3705179282868527, | |
| "grad_norm": 0.5573057532310486, | |
| "learning_rate": 2.4213888219737273e-06, | |
| "loss": 1.145288109779358, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.3784860557768925, | |
| "grad_norm": 0.7530761361122131, | |
| "learning_rate": 2.4140676090524813e-06, | |
| "loss": 0.7112540006637573, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.3864541832669324, | |
| "grad_norm": 0.31881389021873474, | |
| "learning_rate": 2.4067131834336265e-06, | |
| "loss": 1.1765650510787964, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3944223107569722, | |
| "grad_norm": 0.5150675773620605, | |
| "learning_rate": 2.3993258648013265e-06, | |
| "loss": 0.7162399291992188, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.402390438247012, | |
| "grad_norm": 0.4000639021396637, | |
| "learning_rate": 2.391905974269546e-06, | |
| "loss": 1.1982700824737549, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4103585657370519, | |
| "grad_norm": 0.5254287719726562, | |
| "learning_rate": 2.3844538343680954e-06, | |
| "loss": 1.1246976852416992, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4183266932270917, | |
| "grad_norm": 0.7500051259994507, | |
| "learning_rate": 2.376969769028608e-06, | |
| "loss": 0.9707033634185791, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4262948207171315, | |
| "grad_norm": 0.4242282211780548, | |
| "learning_rate": 2.3694541035704623e-06, | |
| "loss": 1.1708297729492188, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.4342629482071714, | |
| "grad_norm": 0.5363942384719849, | |
| "learning_rate": 2.361907164686638e-06, | |
| "loss": 0.6365261077880859, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4422310756972112, | |
| "grad_norm": 0.34740063548088074, | |
| "learning_rate": 2.3543292804295164e-06, | |
| "loss": 0.7796180844306946, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.450199203187251, | |
| "grad_norm": 0.8936453461647034, | |
| "learning_rate": 2.3467207801966217e-06, | |
| "loss": 1.069722056388855, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4581673306772909, | |
| "grad_norm": 0.5224515199661255, | |
| "learning_rate": 2.339081994716301e-06, | |
| "loss": 1.1437441110610962, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4661354581673307, | |
| "grad_norm": 0.3716191351413727, | |
| "learning_rate": 2.3314132560333486e-06, | |
| "loss": 0.6178401708602905, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.4741035856573705, | |
| "grad_norm": 0.38474348187446594, | |
| "learning_rate": 2.3237148974945732e-06, | |
| "loss": 0.6917285323143005, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4820717131474104, | |
| "grad_norm": 0.6348185539245605, | |
| "learning_rate": 2.315987253734307e-06, | |
| "loss": 1.248483657836914, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.4900398406374502, | |
| "grad_norm": 0.5666549205780029, | |
| "learning_rate": 2.308230660659861e-06, | |
| "loss": 1.1805977821350098, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.49800796812749, | |
| "grad_norm": 0.3927704393863678, | |
| "learning_rate": 2.300445455436921e-06, | |
| "loss": 1.144189715385437, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5059760956175299, | |
| "grad_norm": 1.197426676750183, | |
| "learning_rate": 2.292631976474895e-06, | |
| "loss": 0.9748780131340027, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5139442231075697, | |
| "grad_norm": 0.5384941101074219, | |
| "learning_rate": 2.284790563412201e-06, | |
| "loss": 1.1320137977600098, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5219123505976095, | |
| "grad_norm": 0.40527671575546265, | |
| "learning_rate": 2.2769215571015054e-06, | |
| "loss": 0.756669282913208, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5298804780876494, | |
| "grad_norm": 0.5759975910186768, | |
| "learning_rate": 2.2690252995949015e-06, | |
| "loss": 0.7979004979133606, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.5378486055776892, | |
| "grad_norm": 0.7272588014602661, | |
| "learning_rate": 2.2611021341290494e-06, | |
| "loss": 1.049770474433899, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.545816733067729, | |
| "grad_norm": 0.495037317276001, | |
| "learning_rate": 2.2531524051102493e-06, | |
| "loss": 1.1562248468399048, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5537848605577689, | |
| "grad_norm": 0.6938880681991577, | |
| "learning_rate": 2.245176458099474e-06, | |
| "loss": 0.5849276781082153, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5617529880478087, | |
| "grad_norm": 0.6492685675621033, | |
| "learning_rate": 2.237174639797346e-06, | |
| "loss": 0.6089207530021667, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5697211155378485, | |
| "grad_norm": 0.7078109383583069, | |
| "learning_rate": 2.2291472980290696e-06, | |
| "loss": 1.174211025238037, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.5776892430278884, | |
| "grad_norm": 0.5934572219848633, | |
| "learning_rate": 2.221094781729308e-06, | |
| "loss": 1.045981526374817, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.5856573705179282, | |
| "grad_norm": 0.40928709506988525, | |
| "learning_rate": 2.2130174409270204e-06, | |
| "loss": 1.1423792839050293, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.593625498007968, | |
| "grad_norm": 0.9985561370849609, | |
| "learning_rate": 2.204915626730242e-06, | |
| "loss": 0.6377730369567871, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6015936254980079, | |
| "grad_norm": 0.6911800503730774, | |
| "learning_rate": 2.1967896913108245e-06, | |
| "loss": 1.1229146718978882, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.6095617529880477, | |
| "grad_norm": 0.45020508766174316, | |
| "learning_rate": 2.1886399878891288e-06, | |
| "loss": 1.2710224390029907, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6175298804780875, | |
| "grad_norm": 1.1965842247009277, | |
| "learning_rate": 2.1804668707186685e-06, | |
| "loss": 0.7574101686477661, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6254980079681274, | |
| "grad_norm": 0.3679886758327484, | |
| "learning_rate": 2.1722706950707116e-06, | |
| "loss": 0.5999529957771301, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.6334661354581672, | |
| "grad_norm": 0.3366493284702301, | |
| "learning_rate": 2.1640518172188405e-06, | |
| "loss": 0.8446294665336609, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.641434262948207, | |
| "grad_norm": 0.5908185243606567, | |
| "learning_rate": 2.1558105944234613e-06, | |
| "loss": 0.5261144638061523, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6494023904382469, | |
| "grad_norm": 0.41685259342193604, | |
| "learning_rate": 2.147547384916277e-06, | |
| "loss": 0.8532723188400269, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6573705179282867, | |
| "grad_norm": 0.45444801449775696, | |
| "learning_rate": 2.1392625478847147e-06, | |
| "loss": 0.7636860609054565, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.6653386454183265, | |
| "grad_norm": 0.7797396779060364, | |
| "learning_rate": 2.130956443456313e-06, | |
| "loss": 1.0055797100067139, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.6733067729083664, | |
| "grad_norm": 0.48384949564933777, | |
| "learning_rate": 2.1226294326830663e-06, | |
| "loss": 1.1412853002548218, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6812749003984062, | |
| "grad_norm": 0.5391873121261597, | |
| "learning_rate": 2.1142818775257326e-06, | |
| "loss": 1.0306801795959473, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.6892430278884463, | |
| "grad_norm": 0.4576083719730377, | |
| "learning_rate": 2.105914140838099e-06, | |
| "loss": 0.6449341773986816, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.697211155378486, | |
| "grad_norm": 0.40177881717681885, | |
| "learning_rate": 2.0975265863512077e-06, | |
| "loss": 1.2080342769622803, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.705179282868526, | |
| "grad_norm": 0.41376203298568726, | |
| "learning_rate": 2.0891195786575484e-06, | |
| "loss": 1.1480873823165894, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.7131474103585658, | |
| "grad_norm": 0.5125391483306885, | |
| "learning_rate": 2.080693483195205e-06, | |
| "loss": 0.7938195466995239, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7211155378486056, | |
| "grad_norm": 4.272192478179932, | |
| "learning_rate": 2.072248666231976e-06, | |
| "loss": 0.9956310987472534, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7290836653386454, | |
| "grad_norm": 0.7369029521942139, | |
| "learning_rate": 2.0637854948494514e-06, | |
| "loss": 0.6916837096214294, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.7370517928286853, | |
| "grad_norm": 0.5697792768478394, | |
| "learning_rate": 2.0553043369270544e-06, | |
| "loss": 0.37542012333869934, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.745019920318725, | |
| "grad_norm": 0.8396166563034058, | |
| "learning_rate": 2.0468055611260523e-06, | |
| "loss": 0.7680933475494385, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.752988047808765, | |
| "grad_norm": 0.7971356511116028, | |
| "learning_rate": 2.038289536873533e-06, | |
| "loss": 0.6482405066490173, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7609561752988048, | |
| "grad_norm": 0.6245846152305603, | |
| "learning_rate": 2.029756634346343e-06, | |
| "loss": 1.559065818786621, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.7689243027888446, | |
| "grad_norm": 0.3909813165664673, | |
| "learning_rate": 2.021207224454998e-06, | |
| "loss": 0.8277990818023682, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.7768924302788844, | |
| "grad_norm": 0.4216088652610779, | |
| "learning_rate": 2.0126416788275607e-06, | |
| "loss": 1.0609842538833618, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.7848605577689243, | |
| "grad_norm": 0.5274325013160706, | |
| "learning_rate": 2.0040603697934875e-06, | |
| "loss": 1.1131477355957031, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.792828685258964, | |
| "grad_norm": 0.40741127729415894, | |
| "learning_rate": 1.995463670367441e-06, | |
| "loss": 0.8610782623291016, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.800796812749004, | |
| "grad_norm": 0.3124333620071411, | |
| "learning_rate": 1.986851954233079e-06, | |
| "loss": 0.5843238830566406, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8087649402390438, | |
| "grad_norm": 0.5109219551086426, | |
| "learning_rate": 1.9782255957268082e-06, | |
| "loss": 0.9527801275253296, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.8167330677290838, | |
| "grad_norm": 0.6287890672683716, | |
| "learning_rate": 1.969584969821516e-06, | |
| "loss": 0.684226930141449, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.8247011952191237, | |
| "grad_norm": 0.6896864175796509, | |
| "learning_rate": 1.9609304521102664e-06, | |
| "loss": 1.0557795763015747, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8326693227091635, | |
| "grad_norm": 0.5463947057723999, | |
| "learning_rate": 1.9522624187899774e-06, | |
| "loss": 0.730643630027771, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8406374501992033, | |
| "grad_norm": 0.5176675319671631, | |
| "learning_rate": 1.943581246645068e-06, | |
| "loss": 0.792547345161438, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.8486055776892432, | |
| "grad_norm": 0.33698317408561707, | |
| "learning_rate": 1.9348873130310776e-06, | |
| "loss": 0.11931800842285156, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.856573705179283, | |
| "grad_norm": 0.4697937071323395, | |
| "learning_rate": 1.926180995858266e-06, | |
| "loss": 1.1590396165847778, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.8645418326693228, | |
| "grad_norm": 0.6702647805213928, | |
| "learning_rate": 1.9174626735751844e-06, | |
| "loss": 0.5630046129226685, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.8725099601593627, | |
| "grad_norm": 0.5800269246101379, | |
| "learning_rate": 1.9087327251522246e-06, | |
| "loss": 1.14718759059906, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8804780876494025, | |
| "grad_norm": 1.194881796836853, | |
| "learning_rate": 1.8999915300651478e-06, | |
| "loss": 1.059720516204834, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.8884462151394423, | |
| "grad_norm": 0.7230931520462036, | |
| "learning_rate": 1.8912394682785866e-06, | |
| "loss": 1.2374215126037598, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.8964143426294822, | |
| "grad_norm": 0.41785743832588196, | |
| "learning_rate": 1.8824769202295325e-06, | |
| "loss": 0.9771887063980103, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.904382470119522, | |
| "grad_norm": 0.36748501658439636, | |
| "learning_rate": 1.8737042668107945e-06, | |
| "loss": 0.8857436180114746, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9123505976095618, | |
| "grad_norm": 2.2149970531463623, | |
| "learning_rate": 1.8649218893544465e-06, | |
| "loss": 0.6210463047027588, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.9203187250996017, | |
| "grad_norm": 0.5847256779670715, | |
| "learning_rate": 1.8561301696152485e-06, | |
| "loss": 0.758573591709137, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.9282868525896415, | |
| "grad_norm": 1.2704209089279175, | |
| "learning_rate": 1.847329489754052e-06, | |
| "loss": 1.1186835765838623, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9362549800796813, | |
| "grad_norm": 0.3778979778289795, | |
| "learning_rate": 1.8385202323211921e-06, | |
| "loss": 1.2423903942108154, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.9442231075697212, | |
| "grad_norm": 0.33054208755493164, | |
| "learning_rate": 1.8297027802398551e-06, | |
| "loss": 1.1193066835403442, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.952191235059761, | |
| "grad_norm": 0.29046395421028137, | |
| "learning_rate": 1.8208775167894336e-06, | |
| "loss": 1.1333787441253662, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9601593625498008, | |
| "grad_norm": 0.4164031147956848, | |
| "learning_rate": 1.8120448255888684e-06, | |
| "loss": 0.8040657639503479, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.9681274900398407, | |
| "grad_norm": 2.26190185546875, | |
| "learning_rate": 1.8032050905799704e-06, | |
| "loss": 0.6147015690803528, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.9760956175298805, | |
| "grad_norm": 0.3113175332546234, | |
| "learning_rate": 1.7943586960107338e-06, | |
| "loss": 1.0835860967636108, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.9840637450199203, | |
| "grad_norm": 0.9167985916137695, | |
| "learning_rate": 1.785506026418631e-06, | |
| "loss": 0.8612701296806335, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.9920318725099602, | |
| "grad_norm": 0.40828704833984375, | |
| "learning_rate": 1.7766474666139e-06, | |
| "loss": 1.1791200637817383, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0138927698135376, | |
| "learning_rate": 1.7677834016628158e-06, | |
| "loss": 0.616978108882904, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.00796812749004, | |
| "grad_norm": 0.34795933961868286, | |
| "learning_rate": 1.7589142168709526e-06, | |
| "loss": 0.7058793306350708, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 0.6295813322067261, | |
| "learning_rate": 1.7500402977664356e-06, | |
| "loss": 0.6776050925254822, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.0239043824701195, | |
| "grad_norm": 0.09192030876874924, | |
| "learning_rate": 1.741162030083181e-06, | |
| "loss": 0.5675944685935974, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 0.3637365400791168, | |
| "learning_rate": 1.7322797997441324e-06, | |
| "loss": 0.9497091770172119, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.039840637450199, | |
| "grad_norm": 0.9041894674301147, | |
| "learning_rate": 1.7233939928444803e-06, | |
| "loss": 0.8332242369651794, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 0.5637346506118774, | |
| "learning_rate": 1.7145049956348851e-06, | |
| "loss": 0.2743958532810211, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.055776892430279, | |
| "grad_norm": 1.7006338834762573, | |
| "learning_rate": 1.7056131945046828e-06, | |
| "loss": 0.932583212852478, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 0.5010867118835449, | |
| "learning_rate": 1.6967189759650917e-06, | |
| "loss": 0.607980489730835, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.0717131474103585, | |
| "grad_norm": 1.4660816192626953, | |
| "learning_rate": 1.6878227266324096e-06, | |
| "loss": 0.9215792417526245, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 0.5361967086791992, | |
| "learning_rate": 1.6789248332112101e-06, | |
| "loss": 1.0473201274871826, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.087649402390438, | |
| "grad_norm": 0.8016868233680725, | |
| "learning_rate": 1.6700256824775327e-06, | |
| "loss": 1.0217143297195435, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 1.3889236450195312, | |
| "learning_rate": 1.6611256612620702e-06, | |
| "loss": 0.4960322380065918, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.103585657370518, | |
| "grad_norm": 0.5141186714172363, | |
| "learning_rate": 1.6522251564333527e-06, | |
| "loss": 0.8312227725982666, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 0.3196059763431549, | |
| "learning_rate": 1.6433245548809335e-06, | |
| "loss": 0.7172638773918152, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1195219123505975, | |
| "grad_norm": 0.35435977578163147, | |
| "learning_rate": 1.6344242434985692e-06, | |
| "loss": 0.44665032625198364, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 0.41664063930511475, | |
| "learning_rate": 1.6255246091674037e-06, | |
| "loss": 0.8600306510925293, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.135458167330677, | |
| "grad_norm": 14.959745407104492, | |
| "learning_rate": 1.61662603873915e-06, | |
| "loss": 0.6659224629402161, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 0.8924800753593445, | |
| "learning_rate": 1.607728919019277e-06, | |
| "loss": 0.6953543424606323, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.151394422310757, | |
| "grad_norm": 0.5326530337333679, | |
| "learning_rate": 1.5988336367501924e-06, | |
| "loss": 1.0279146432876587, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 1.0475772619247437, | |
| "learning_rate": 1.5899405785944315e-06, | |
| "loss": 0.9707013368606567, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.1673306772908365, | |
| "grad_norm": 0.4958283305168152, | |
| "learning_rate": 1.5810501311178543e-06, | |
| "loss": 0.9405574798583984, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 0.49418023228645325, | |
| "learning_rate": 1.5721626807728383e-06, | |
| "loss": 0.7293884754180908, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.183266932270916, | |
| "grad_norm": 0.5774815678596497, | |
| "learning_rate": 1.5632786138814786e-06, | |
| "loss": 1.044211983680725, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 0.4045267105102539, | |
| "learning_rate": 1.5543983166187998e-06, | |
| "loss": 1.1075928211212158, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.199203187250996, | |
| "grad_norm": 1.0713002681732178, | |
| "learning_rate": 1.5455221749959674e-06, | |
| "loss": 0.7906201481819153, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 1.4011352062225342, | |
| "learning_rate": 1.5366505748435069e-06, | |
| "loss": 0.529036819934845, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.2151394422310755, | |
| "grad_norm": 0.5856610536575317, | |
| "learning_rate": 1.5277839017945342e-06, | |
| "loss": 0.6787835359573364, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 5.952252388000488, | |
| "learning_rate": 1.5189225412679937e-06, | |
| "loss": 0.8648924231529236, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.231075697211155, | |
| "grad_norm": 0.513113260269165, | |
| "learning_rate": 1.5100668784519027e-06, | |
| "loss": 0.9975270628929138, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 0.4902089536190033, | |
| "learning_rate": 1.5012172982866095e-06, | |
| "loss": 1.0877983570098877, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.247011952191235, | |
| "grad_norm": 1.1996077299118042, | |
| "learning_rate": 1.4923741854480581e-06, | |
| "loss": 0.8970789909362793, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 0.8139443397521973, | |
| "learning_rate": 1.4835379243310724e-06, | |
| "loss": 0.7339902520179749, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.2629482071713145, | |
| "grad_norm": 0.3560384511947632, | |
| "learning_rate": 1.4747088990326413e-06, | |
| "loss": 0.8087087869644165, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 0.4162435531616211, | |
| "learning_rate": 1.4658874933352252e-06, | |
| "loss": 1.0762511491775513, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.278884462151394, | |
| "grad_norm": 0.9825732707977295, | |
| "learning_rate": 1.4570740906900752e-06, | |
| "loss": 0.6623214483261108, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 0.3757762908935547, | |
| "learning_rate": 1.448269074200563e-06, | |
| "loss": 0.7657751441001892, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.2948207171314743, | |
| "grad_norm": 0.5589696168899536, | |
| "learning_rate": 1.4394728266055265e-06, | |
| "loss": 0.3244088888168335, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 0.7052986025810242, | |
| "learning_rate": 1.4306857302626383e-06, | |
| "loss": 0.6619569659233093, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.310756972111554, | |
| "grad_norm": 0.425970196723938, | |
| "learning_rate": 1.4219081671317795e-06, | |
| "loss": 0.6385777592658997, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 0.6843652129173279, | |
| "learning_rate": 1.4131405187584408e-06, | |
| "loss": 0.7549704909324646, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.3266932270916336, | |
| "grad_norm": 0.3465102016925812, | |
| "learning_rate": 1.4043831662571323e-06, | |
| "loss": 0.8688426613807678, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 0.3654971122741699, | |
| "learning_rate": 1.3956364902948247e-06, | |
| "loss": 0.6648116707801819, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.3426294820717133, | |
| "grad_norm": 0.744594395160675, | |
| "learning_rate": 1.3869008710743948e-06, | |
| "loss": 0.9290102124214172, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 0.4022054672241211, | |
| "learning_rate": 1.378176688318103e-06, | |
| "loss": 1.0422040224075317, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.358565737051793, | |
| "grad_norm": 0.5497289896011353, | |
| "learning_rate": 1.3694643212510864e-06, | |
| "loss": 1.1029999256134033, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 0.5594106912612915, | |
| "learning_rate": 1.3607641485848747e-06, | |
| "loss": 0.7941989898681641, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.3745019920318726, | |
| "grad_norm": 0.5195335149765015, | |
| "learning_rate": 1.352076548500928e-06, | |
| "loss": 1.055949091911316, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.7671335935592651, | |
| "learning_rate": 1.343401898634197e-06, | |
| "loss": 0.747549831867218, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.3904382470119523, | |
| "grad_norm": 5.928802490234375, | |
| "learning_rate": 1.3347405760567109e-06, | |
| "loss": 1.0375477075576782, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 1.452104926109314, | |
| "learning_rate": 1.326092957261183e-06, | |
| "loss": 1.0605626106262207, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.406374501992032, | |
| "grad_norm": 0.45336440205574036, | |
| "learning_rate": 1.317459418144647e-06, | |
| "loss": 0.6427274942398071, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 0.9673005938529968, | |
| "learning_rate": 1.308840333992118e-06, | |
| "loss": 0.2522476315498352, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.4223107569721116, | |
| "grad_norm": 0.3540583848953247, | |
| "learning_rate": 1.3002360794602787e-06, | |
| "loss": 0.6453299522399902, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 0.20231011509895325, | |
| "learning_rate": 1.2916470285611936e-06, | |
| "loss": 0.9828154444694519, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.4382470119521913, | |
| "grad_norm": 0.3596932291984558, | |
| "learning_rate": 1.283073554646051e-06, | |
| "loss": 0.7582497000694275, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 0.47144603729248047, | |
| "learning_rate": 1.274516030388936e-06, | |
| "loss": 0.6129385232925415, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.454183266932271, | |
| "grad_norm": 0.6285446286201477, | |
| "learning_rate": 1.2659748277706292e-06, | |
| "loss": 0.9842470288276672, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 0.7511582970619202, | |
| "learning_rate": 1.257450318062436e-06, | |
| "loss": 0.8260526657104492, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.4701195219123506, | |
| "grad_norm": 1.097211480140686, | |
| "learning_rate": 1.2489428718100534e-06, | |
| "loss": 0.9079670310020447, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 0.4593997895717621, | |
| "learning_rate": 1.2404528588174562e-06, | |
| "loss": 0.9143153429031372, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.4860557768924303, | |
| "grad_norm": 0.5682267546653748, | |
| "learning_rate": 1.2319806481308265e-06, | |
| "loss": 0.8137405514717102, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 3.3198330402374268, | |
| "learning_rate": 1.2235266080225118e-06, | |
| "loss": 0.705787718296051, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.50199203187251, | |
| "grad_norm": 0.6641273498535156, | |
| "learning_rate": 1.2150911059750159e-06, | |
| "loss": 1.0320712327957153, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 0.8049502372741699, | |
| "learning_rate": 1.2066745086650239e-06, | |
| "loss": 0.5817446708679199, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.5179282868525896, | |
| "grad_norm": 1.7005666494369507, | |
| "learning_rate": 1.1982771819474656e-06, | |
| "loss": 0.49497243762016296, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 0.48148971796035767, | |
| "learning_rate": 1.1898994908396118e-06, | |
| "loss": 1.0635162591934204, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.5338645418326693, | |
| "grad_norm": 1.0913236141204834, | |
| "learning_rate": 1.1815417995052062e-06, | |
| "loss": 0.9964134693145752, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 1.4037988185882568, | |
| "learning_rate": 1.173204471238638e-06, | |
| "loss": 1.0414139032363892, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.549800796812749, | |
| "grad_norm": 0.8760337233543396, | |
| "learning_rate": 1.1648878684491478e-06, | |
| "loss": 0.8619127869606018, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 0.4692465662956238, | |
| "learning_rate": 1.1565923526450775e-06, | |
| "loss": 1.0320566892623901, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.5657370517928286, | |
| "grad_norm": 0.36346501111984253, | |
| "learning_rate": 1.148318284418153e-06, | |
| "loss": 1.2459909915924072, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 0.4973103106021881, | |
| "learning_rate": 1.1400660234278099e-06, | |
| "loss": 0.5828899145126343, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.5816733067729083, | |
| "grad_norm": 0.5622968077659607, | |
| "learning_rate": 1.1318359283855633e-06, | |
| "loss": 1.0787078142166138, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 0.41192811727523804, | |
| "learning_rate": 1.1236283570394122e-06, | |
| "loss": 0.8327584862709045, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.597609561752988, | |
| "grad_norm": 4.0215840339660645, | |
| "learning_rate": 1.1154436661582873e-06, | |
| "loss": 0.997661828994751, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 0.40926310420036316, | |
| "learning_rate": 1.1072822115165488e-06, | |
| "loss": 0.6634190678596497, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.6135458167330676, | |
| "grad_norm": 0.556077241897583, | |
| "learning_rate": 1.0991443478785146e-06, | |
| "loss": 0.38381195068359375, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 0.8958475589752197, | |
| "learning_rate": 1.0910304289830445e-06, | |
| "loss": 0.5694887042045593, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.6294820717131473, | |
| "grad_norm": 0.31501585245132446, | |
| "learning_rate": 1.08294080752816e-06, | |
| "loss": 0.9161617159843445, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 0.8538661003112793, | |
| "learning_rate": 1.074875835155716e-06, | |
| "loss": 1.10606050491333, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.645418326693227, | |
| "grad_norm": 0.8182650804519653, | |
| "learning_rate": 1.0668358624361148e-06, | |
| "loss": 0.672410249710083, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 1.2558993101119995, | |
| "learning_rate": 1.0588212388530662e-06, | |
| "loss": 0.969312310218811, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.6613545816733066, | |
| "grad_norm": 0.24255388975143433, | |
| "learning_rate": 1.050832312788396e-06, | |
| "loss": 0.5811322331428528, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 0.8314858675003052, | |
| "learning_rate": 1.042869431506906e-06, | |
| "loss": 0.7844950556755066, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.6772908366533863, | |
| "grad_norm": 0.1939828097820282, | |
| "learning_rate": 1.034932941141274e-06, | |
| "loss": 0.44791269302368164, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 0.3819931745529175, | |
| "learning_rate": 1.0270231866770115e-06, | |
| "loss": 1.0550973415374756, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.6932270916334664, | |
| "grad_norm": 0.8612610101699829, | |
| "learning_rate": 1.019140511937465e-06, | |
| "loss": 0.7458590269088745, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 0.5282925963401794, | |
| "learning_rate": 1.011285259568875e-06, | |
| "loss": 0.6542291641235352, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.709163346613546, | |
| "grad_norm": 0.45834335684776306, | |
| "learning_rate": 1.0034577710254754e-06, | |
| "loss": 1.0501289367675781, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 0.4280013144016266, | |
| "learning_rate": 9.956583865546576e-07, | |
| "loss": 1.0795950889587402, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.7250996015936257, | |
| "grad_norm": 0.4833294451236725, | |
| "learning_rate": 9.878874451821777e-07, | |
| "loss": 0.7288949489593506, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 1.0351470708847046, | |
| "learning_rate": 9.801452846974161e-07, | |
| "loss": 0.5564233660697937, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.7410358565737054, | |
| "grad_norm": 0.41580677032470703, | |
| "learning_rate": 9.724322416387011e-07, | |
| "loss": 1.0649595260620117, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 1.2443572282791138, | |
| "learning_rate": 9.647486512786754e-07, | |
| "loss": 0.5668457746505737, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.756972111553785, | |
| "grad_norm": 0.47028157114982605, | |
| "learning_rate": 9.570948476097252e-07, | |
| "loss": 1.165488839149475, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 1.4404064416885376, | |
| "learning_rate": 9.494711633294586e-07, | |
| "loss": 0.9548084735870361, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.7729083665338647, | |
| "grad_norm": 2.1831867694854736, | |
| "learning_rate": 9.41877929826247e-07, | |
| "loss": 0.8034579753875732, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 0.36618274450302124, | |
| "learning_rate": 9.343154771648201e-07, | |
| "loss": 0.9508860111236572, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.7888446215139444, | |
| "grad_norm": 0.451259970664978, | |
| "learning_rate": 9.267841340719161e-07, | |
| "loss": 1.0815647840499878, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 0.457039475440979, | |
| "learning_rate": 9.192842279219954e-07, | |
| "loss": 0.7261126041412354, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.804780876494024, | |
| "grad_norm": 0.9726790189743042, | |
| "learning_rate": 9.118160847230074e-07, | |
| "loss": 1.3043347597122192, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 0.2558709681034088, | |
| "learning_rate": 9.043800291022225e-07, | |
| "loss": 1.290697455406189, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.8207171314741037, | |
| "grad_norm": 0.5993645787239075, | |
| "learning_rate": 8.969763842921183e-07, | |
| "loss": 1.1300464868545532, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 0.21556946635246277, | |
| "learning_rate": 8.89605472116331e-07, | |
| "loss": 0.5522478818893433, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8366533864541834, | |
| "grad_norm": 0.5506062507629395, | |
| "learning_rate": 8.822676129756673e-07, | |
| "loss": 1.087594985961914, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 0.34245195984840393, | |
| "learning_rate": 8.749631258341722e-07, | |
| "loss": 0.6451638340950012, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.852589641434263, | |
| "grad_norm": 0.330020934343338, | |
| "learning_rate": 8.676923282052703e-07, | |
| "loss": 1.0628772974014282, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 1.4934767484664917, | |
| "learning_rate": 8.604555361379602e-07, | |
| "loss": 0.6799197196960449, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.8685258964143427, | |
| "grad_norm": 0.47209084033966064, | |
| "learning_rate": 8.532530642030793e-07, | |
| "loss": 0.6012248396873474, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 1.5172817707061768, | |
| "learning_rate": 8.46085225479626e-07, | |
| "loss": 0.8212974667549133, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.8844621513944224, | |
| "grad_norm": 0.3270165026187897, | |
| "learning_rate": 8.389523315411531e-07, | |
| "loss": 0.6892848610877991, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 0.34172943234443665, | |
| "learning_rate": 8.318546924422257e-07, | |
| "loss": 1.040250539779663, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.900398406374502, | |
| "grad_norm": 0.3343369960784912, | |
| "learning_rate": 8.247926167049404e-07, | |
| "loss": 0.591463029384613, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 0.37521132826805115, | |
| "learning_rate": 8.177664113055171e-07, | |
| "loss": 1.0062123537063599, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.9163346613545817, | |
| "grad_norm": 0.31653112173080444, | |
| "learning_rate": 8.107763816609526e-07, | |
| "loss": 0.48367780447006226, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 0.6464031338691711, | |
| "learning_rate": 8.038228316157484e-07, | |
| "loss": 1.0752416849136353, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.9322709163346614, | |
| "grad_norm": 1.5205926895141602, | |
| "learning_rate": 7.96906063428699e-07, | |
| "loss": 0.4515492022037506, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 1.017087459564209, | |
| "learning_rate": 7.900263777597554e-07, | |
| "loss": 0.9680182337760925, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.948207171314741, | |
| "grad_norm": 0.33580952882766724, | |
| "learning_rate": 7.831840736569573e-07, | |
| "loss": 1.2022464275360107, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 0.9194537997245789, | |
| "learning_rate": 7.763794485434306e-07, | |
| "loss": 0.5004313588142395, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.9641434262948207, | |
| "grad_norm": 0.4175858199596405, | |
| "learning_rate": 7.696127982044607e-07, | |
| "loss": 0.9552463889122009, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.9721115537848606, | |
| "grad_norm": 0.7210242748260498, | |
| "learning_rate": 7.628844167746361e-07, | |
| "loss": 0.9815269112586975, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.9800796812749004, | |
| "grad_norm": 1.3052399158477783, | |
| "learning_rate": 7.561945967250626e-07, | |
| "loss": 0.5944569706916809, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.9880478087649402, | |
| "grad_norm": 0.40495985746383667, | |
| "learning_rate": 7.495436288506475e-07, | |
| "loss": 0.1485452651977539, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.99601593625498, | |
| "grad_norm": 0.40999165177345276, | |
| "learning_rate": 7.429318022574623e-07, | |
| "loss": 1.0614784955978394, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.00398406374502, | |
| "grad_norm": 0.4904988408088684, | |
| "learning_rate": 7.363594043501752e-07, | |
| "loss": 0.5685489177703857, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.0119521912350598, | |
| "grad_norm": 0.6486766338348389, | |
| "learning_rate": 7.298267208195567e-07, | |
| "loss": 0.6701474785804749, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.0199203187250996, | |
| "grad_norm": 0.38344699144363403, | |
| "learning_rate": 7.233340356300632e-07, | |
| "loss": 0.4652033746242523, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.0278884462151394, | |
| "grad_norm": 0.3574877679347992, | |
| "learning_rate": 7.16881631007491e-07, | |
| "loss": 0.7689741849899292, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.0358565737051793, | |
| "grad_norm": 0.7075855731964111, | |
| "learning_rate": 7.1046978742671e-07, | |
| "loss": 0.571841835975647, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.043824701195219, | |
| "grad_norm": 0.4493393898010254, | |
| "learning_rate": 7.040987835994727e-07, | |
| "loss": 1.018668532371521, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.051792828685259, | |
| "grad_norm": 1.3887406587600708, | |
| "learning_rate": 6.977688964622978e-07, | |
| "loss": 0.19836187362670898, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.0597609561752988, | |
| "grad_norm": 0.565453290939331, | |
| "learning_rate": 6.914804011644326e-07, | |
| "loss": 0.7899960875511169, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.0677290836653386, | |
| "grad_norm": 0.6862173676490784, | |
| "learning_rate": 6.852335710558922e-07, | |
| "loss": 0.9098981618881226, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.0756972111553784, | |
| "grad_norm": 0.5152872204780579, | |
| "learning_rate": 6.790286776755779e-07, | |
| "loss": 1.0554358959197998, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.0836653386454183, | |
| "grad_norm": 1.018154263496399, | |
| "learning_rate": 6.728659907394755e-07, | |
| "loss": 0.9298610091209412, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.091633466135458, | |
| "grad_norm": 0.14560498297214508, | |
| "learning_rate": 6.667457781289271e-07, | |
| "loss": 0.60512375831604, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.099601593625498, | |
| "grad_norm": 0.48420849442481995, | |
| "learning_rate": 6.606683058789922e-07, | |
| "loss": 0.6643580198287964, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.1075697211155378, | |
| "grad_norm": 0.40179142355918884, | |
| "learning_rate": 6.546338381668782e-07, | |
| "loss": 1.0011398792266846, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.1155378486055776, | |
| "grad_norm": 0.8947471380233765, | |
| "learning_rate": 6.486426373004613e-07, | |
| "loss": 0.41442957520484924, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.1235059760956174, | |
| "grad_norm": 0.4902297556400299, | |
| "learning_rate": 6.42694963706882e-07, | |
| "loss": 1.1005361080169678, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.1314741035856573, | |
| "grad_norm": 0.35919007658958435, | |
| "learning_rate": 6.367910759212253e-07, | |
| "loss": 0.8759674429893494, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.139442231075697, | |
| "grad_norm": 1.3167163133621216, | |
| "learning_rate": 6.309312305752845e-07, | |
| "loss": 0.718060314655304, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.147410358565737, | |
| "grad_norm": 0.24750226736068726, | |
| "learning_rate": 6.251156823864013e-07, | |
| "loss": 0.6157811284065247, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.1553784860557768, | |
| "grad_norm": 1.525368094444275, | |
| "learning_rate": 6.19344684146399e-07, | |
| "loss": 0.8752233982086182, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.1633466135458166, | |
| "grad_norm": 0.071062371134758, | |
| "learning_rate": 6.136184867105907e-07, | |
| "loss": 0.3096681237220764, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.1713147410358564, | |
| "grad_norm": 0.767979085445404, | |
| "learning_rate": 6.079373389868767e-07, | |
| "loss": 0.7163103818893433, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.1792828685258963, | |
| "grad_norm": 0.4465477168560028, | |
| "learning_rate": 6.023014879249236e-07, | |
| "loss": 0.5860901474952698, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.187250996015936, | |
| "grad_norm": 1.0776124000549316, | |
| "learning_rate": 5.967111785054299e-07, | |
| "loss": 0.9502109289169312, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.195219123505976, | |
| "grad_norm": 0.3106825649738312, | |
| "learning_rate": 5.91166653729479e-07, | |
| "loss": 0.7821487188339233, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.2031872509960158, | |
| "grad_norm": 5.8575263023376465, | |
| "learning_rate": 5.85668154607974e-07, | |
| "loss": 0.7681574821472168, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.2111553784860556, | |
| "grad_norm": 1.7226320505142212, | |
| "learning_rate": 5.802159201511634e-07, | |
| "loss": 1.3093531131744385, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.2191235059760954, | |
| "grad_norm": 0.9542534947395325, | |
| "learning_rate": 5.748101873582492e-07, | |
| "loss": 0.5746437907218933, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.2270916334661353, | |
| "grad_norm": 0.6066504716873169, | |
| "learning_rate": 5.69451191207088e-07, | |
| "loss": 0.8110766410827637, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.235059760956175, | |
| "grad_norm": 0.8804686665534973, | |
| "learning_rate": 5.641391646439746e-07, | |
| "loss": 0.42266717553138733, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.243027888446215, | |
| "grad_norm": 0.3170583248138428, | |
| "learning_rate": 5.588743385735169e-07, | |
| "loss": 0.9301043748855591, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.2509960159362548, | |
| "grad_norm": 3.740297317504883, | |
| "learning_rate": 5.536569418486005e-07, | |
| "loss": 0.4588174819946289, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.2589641434262946, | |
| "grad_norm": 0.40485963225364685, | |
| "learning_rate": 5.484872012604372e-07, | |
| "loss": 1.029528260231018, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.2669322709163344, | |
| "grad_norm": 1.3435243368148804, | |
| "learning_rate": 5.433653415287097e-07, | |
| "loss": 0.6181462407112122, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.2749003984063743, | |
| "grad_norm": 0.7431517243385315, | |
| "learning_rate": 5.38291585291804e-07, | |
| "loss": 0.8793559074401855, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.2828685258964145, | |
| "grad_norm": 0.7615469098091125, | |
| "learning_rate": 5.332661530971281e-07, | |
| "loss": 0.6390237808227539, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.2908366533864544, | |
| "grad_norm": 0.2795827388763428, | |
| "learning_rate": 5.282892633915299e-07, | |
| "loss": 1.0185526609420776, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.298804780876494, | |
| "grad_norm": 0.4049527943134308, | |
| "learning_rate": 5.23361132511797e-07, | |
| "loss": 1.022945523262024, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.306772908366534, | |
| "grad_norm": 0.4463924169540405, | |
| "learning_rate": 5.184819746752567e-07, | |
| "loss": 0.9758146405220032, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.314741035856574, | |
| "grad_norm": 1.3453718423843384, | |
| "learning_rate": 5.136520019704622e-07, | |
| "loss": 0.6447398662567139, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.3227091633466137, | |
| "grad_norm": 0.7485759258270264, | |
| "learning_rate": 5.088714243479742e-07, | |
| "loss": 0.3966885209083557, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.3306772908366535, | |
| "grad_norm": 2.261605739593506, | |
| "learning_rate": 5.041404496112345e-07, | |
| "loss": 0.6890445351600647, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.3386454183266934, | |
| "grad_norm": 0.7466967701911926, | |
| "learning_rate": 4.994592834075328e-07, | |
| "loss": 0.9410973191261292, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.346613545816733, | |
| "grad_norm": 1.0626827478408813, | |
| "learning_rate": 4.948281292190692e-07, | |
| "loss": 0.24332815408706665, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.354581673306773, | |
| "grad_norm": 0.5983356833457947, | |
| "learning_rate": 4.90247188354107e-07, | |
| "loss": 0.8691646456718445, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.362549800796813, | |
| "grad_norm": 0.5014941692352295, | |
| "learning_rate": 4.857166599382236e-07, | |
| "loss": 0.7781885862350464, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.3705179282868527, | |
| "grad_norm": 0.3870190382003784, | |
| "learning_rate": 4.812367409056543e-07, | |
| "loss": 1.034981608390808, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.3784860557768925, | |
| "grad_norm": 0.40831807255744934, | |
| "learning_rate": 4.7680762599073167e-07, | |
| "loss": 1.0040619373321533, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.3864541832669324, | |
| "grad_norm": 1.0064787864685059, | |
| "learning_rate": 4.72429507719422e-07, | |
| "loss": 1.1909987926483154, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.394422310756972, | |
| "grad_norm": 0.34172093868255615, | |
| "learning_rate": 4.681025764009543e-07, | |
| "loss": 1.0179778337478638, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.402390438247012, | |
| "grad_norm": 0.6938625574111938, | |
| "learning_rate": 4.638270201195511e-07, | |
| "loss": 0.6021140813827515, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.410358565737052, | |
| "grad_norm": 0.6337777972221375, | |
| "learning_rate": 4.5960302472624914e-07, | |
| "loss": 0.9684332013130188, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.4183266932270917, | |
| "grad_norm": 0.36189448833465576, | |
| "learning_rate": 4.554307738308239e-07, | |
| "loss": 1.1349108219146729, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.4262948207171315, | |
| "grad_norm": 1.1650807857513428, | |
| "learning_rate": 4.513104487938059e-07, | |
| "loss": 1.068455696105957, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.4342629482071714, | |
| "grad_norm": 0.4551082253456116, | |
| "learning_rate": 4.4724222871859915e-07, | |
| "loss": 0.7791970372200012, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.442231075697211, | |
| "grad_norm": 2.264080047607422, | |
| "learning_rate": 4.4322629044369557e-07, | |
| "loss": 0.7574501633644104, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.450199203187251, | |
| "grad_norm": 0.4233779013156891, | |
| "learning_rate": 4.392628085349856e-07, | |
| "loss": 0.947033166885376, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.458167330677291, | |
| "grad_norm": 0.6929165720939636, | |
| "learning_rate": 4.3535195527817413e-07, | |
| "loss": 0.8224045634269714, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.4661354581673307, | |
| "grad_norm": 0.4947459399700165, | |
| "learning_rate": 4.3149390067128893e-07, | |
| "loss": 0.5549885630607605, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.4741035856573705, | |
| "grad_norm": 0.18183910846710205, | |
| "learning_rate": 4.2768881241729196e-07, | |
| "loss": 0.6329528093338013, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.4820717131474104, | |
| "grad_norm": 0.28913554549217224, | |
| "learning_rate": 4.239368559167891e-07, | |
| "loss": 0.760714590549469, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.49003984063745, | |
| "grad_norm": 0.44314852356910706, | |
| "learning_rate": 4.2023819426084043e-07, | |
| "loss": 0.6140300631523132, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.49800796812749, | |
| "grad_norm": 0.6128693222999573, | |
| "learning_rate": 4.165929882238723e-07, | |
| "loss": 0.5876585245132446, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.50597609561753, | |
| "grad_norm": 0.18565566837787628, | |
| "learning_rate": 4.130013962566869e-07, | |
| "loss": 0.5421848297119141, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.5139442231075697, | |
| "grad_norm": 0.5768609046936035, | |
| "learning_rate": 4.094635744795763e-07, | |
| "loss": 0.8825575709342957, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.5219123505976095, | |
| "grad_norm": 0.9254052042961121, | |
| "learning_rate": 4.059796766755343e-07, | |
| "loss": 0.308013379573822, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.5298804780876494, | |
| "grad_norm": 1.7089602947235107, | |
| "learning_rate": 4.0254985428357405e-07, | |
| "loss": 0.9074363708496094, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.537848605577689, | |
| "grad_norm": 0.46798571944236755, | |
| "learning_rate": 3.9917425639214304e-07, | |
| "loss": 0.9869987368583679, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.545816733067729, | |
| "grad_norm": 1.8204545974731445, | |
| "learning_rate": 3.9585302973264424e-07, | |
| "loss": 0.5235753059387207, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.553784860557769, | |
| "grad_norm": 0.8664906620979309, | |
| "learning_rate": 3.9258631867305723e-07, | |
| "loss": 1.2101613283157349, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.5617529880478087, | |
| "grad_norm": 0.4034668207168579, | |
| "learning_rate": 3.893742652116616e-07, | |
| "loss": 1.0782972574234009, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.5697211155378485, | |
| "grad_norm": 0.37115031480789185, | |
| "learning_rate": 3.8621700897086686e-07, | |
| "loss": 0.25802165269851685, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.5776892430278884, | |
| "grad_norm": 0.4131646156311035, | |
| "learning_rate": 3.8311468719114215e-07, | |
| "loss": 0.6915317177772522, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.585657370517928, | |
| "grad_norm": 0.5455676913261414, | |
| "learning_rate": 3.8006743472504945e-07, | |
| "loss": 0.3316478133201599, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.593625498007968, | |
| "grad_norm": 0.9060930609703064, | |
| "learning_rate": 3.7707538403138413e-07, | |
| "loss": 0.11103206127882004, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.601593625498008, | |
| "grad_norm": 0.5972309708595276, | |
| "learning_rate": 3.7413866516941513e-07, | |
| "loss": 0.8609724640846252, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.6095617529880477, | |
| "grad_norm": 2.9889116287231445, | |
| "learning_rate": 3.712574057932332e-07, | |
| "loss": 0.7271422147750854, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.6175298804780875, | |
| "grad_norm": 0.5200643539428711, | |
| "learning_rate": 3.684317311461999e-07, | |
| "loss": 0.9715712070465088, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.6254980079681274, | |
| "grad_norm": 1.3307124376296997, | |
| "learning_rate": 3.656617640555063e-07, | |
| "loss": 0.9659046530723572, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.633466135458167, | |
| "grad_norm": 0.22726085782051086, | |
| "learning_rate": 3.6294762492683114e-07, | |
| "loss": 0.06467059254646301, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.641434262948207, | |
| "grad_norm": 0.4885714054107666, | |
| "learning_rate": 3.6028943173910846e-07, | |
| "loss": 0.8727531433105469, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.649402390438247, | |
| "grad_norm": 1.1886482238769531, | |
| "learning_rate": 3.5768730003939934e-07, | |
| "loss": 0.8834037184715271, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.6573705179282867, | |
| "grad_norm": 6.707517147064209, | |
| "learning_rate": 3.551413429378685e-07, | |
| "loss": 0.28755173087120056, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.6653386454183265, | |
| "grad_norm": 0.6478546857833862, | |
| "learning_rate": 3.526516711028687e-07, | |
| "loss": 1.0173288583755493, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.6733067729083664, | |
| "grad_norm": 0.3290223479270935, | |
| "learning_rate": 3.502183927561286e-07, | |
| "loss": 0.20666202902793884, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.681274900398406, | |
| "grad_norm": 0.8621473908424377, | |
| "learning_rate": 3.478416136680499e-07, | |
| "loss": 1.1904857158660889, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.6892430278884465, | |
| "grad_norm": 6.629832744598389, | |
| "learning_rate": 3.455214371531096e-07, | |
| "loss": 0.448214590549469, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.6972111553784863, | |
| "grad_norm": 0.4641083776950836, | |
| "learning_rate": 3.432579640653678e-07, | |
| "loss": 0.6058546304702759, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.705179282868526, | |
| "grad_norm": 0.16589830815792084, | |
| "learning_rate": 3.4105129279408574e-07, | |
| "loss": 0.9167323112487793, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.713147410358566, | |
| "grad_norm": 0.8476380109786987, | |
| "learning_rate": 3.389015192594471e-07, | |
| "loss": 1.018453598022461, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.721115537848606, | |
| "grad_norm": 1.218807578086853, | |
| "learning_rate": 3.3680873690839e-07, | |
| "loss": 0.6338685154914856, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.7290836653386457, | |
| "grad_norm": 0.4406608045101166, | |
| "learning_rate": 3.347730367105437e-07, | |
| "loss": 0.9283577799797058, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.7370517928286855, | |
| "grad_norm": 1.626468539237976, | |
| "learning_rate": 3.327945071542754e-07, | |
| "loss": 0.9253164529800415, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.7450199203187253, | |
| "grad_norm": 0.49745914340019226, | |
| "learning_rate": 3.308732342428437e-07, | |
| "loss": 0.8693242073059082, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.752988047808765, | |
| "grad_norm": 1.1181275844573975, | |
| "learning_rate": 3.2900930149065883e-07, | |
| "loss": 0.9114285111427307, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.760956175298805, | |
| "grad_norm": 0.4131788909435272, | |
| "learning_rate": 3.2720278991965424e-07, | |
| "loss": 0.5458086729049683, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.768924302788845, | |
| "grad_norm": 0.3882419466972351, | |
| "learning_rate": 3.2545377805576414e-07, | |
| "loss": 1.0222781896591187, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.7768924302788847, | |
| "grad_norm": 0.4516999423503876, | |
| "learning_rate": 3.2376234192550955e-07, | |
| "loss": 0.6500365138053894, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.7848605577689245, | |
| "grad_norm": 0.7559425234794617, | |
| "learning_rate": 3.221285550526936e-07, | |
| "loss": 0.9952846765518188, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.7928286852589643, | |
| "grad_norm": 0.43123456835746765, | |
| "learning_rate": 3.205524884552062e-07, | |
| "loss": 1.0094038248062134, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.800796812749004, | |
| "grad_norm": 0.43853896856307983, | |
| "learning_rate": 3.1903421064193714e-07, | |
| "loss": 0.6855465769767761, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.808764940239044, | |
| "grad_norm": 0.9686547517776489, | |
| "learning_rate": 3.1757378760979694e-07, | |
| "loss": 0.8955079317092896, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.816733067729084, | |
| "grad_norm": 1.594388484954834, | |
| "learning_rate": 3.161712828408494e-07, | |
| "loss": 0.6716915965080261, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.8247011952191237, | |
| "grad_norm": 1.1490174531936646, | |
| "learning_rate": 3.1482675729955115e-07, | |
| "loss": 0.8457735776901245, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.8326693227091635, | |
| "grad_norm": 0.44997137784957886, | |
| "learning_rate": 3.135402694301026e-07, | |
| "loss": 0.6334148645401001, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.8406374501992033, | |
| "grad_norm": 0.7384113073348999, | |
| "learning_rate": 3.123118751539064e-07, | |
| "loss": 1.0639876127243042, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.848605577689243, | |
| "grad_norm": 0.7392749786376953, | |
| "learning_rate": 3.111416278671374e-07, | |
| "loss": 0.40739232301712036, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.856573705179283, | |
| "grad_norm": 0.3943544030189514, | |
| "learning_rate": 3.1002957843842143e-07, | |
| "loss": 0.5916213393211365, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.864541832669323, | |
| "grad_norm": 1.1247427463531494, | |
| "learning_rate": 3.0897577520662403e-07, | |
| "loss": 0.8500593304634094, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.8725099601593627, | |
| "grad_norm": 0.3284965753555298, | |
| "learning_rate": 3.0798026397874904e-07, | |
| "loss": 0.631747841835022, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.8804780876494025, | |
| "grad_norm": 0.27052104473114014, | |
| "learning_rate": 3.070430880279484e-07, | |
| "loss": 0.5036011934280396, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 3.8884462151394423, | |
| "grad_norm": 0.4196729063987732, | |
| "learning_rate": 3.0616428809163936e-07, | |
| "loss": 1.0151411294937134, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 3.896414342629482, | |
| "grad_norm": 0.604816198348999, | |
| "learning_rate": 3.053439023697359e-07, | |
| "loss": 0.5701797008514404, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 3.904382470119522, | |
| "grad_norm": 0.21591706573963165, | |
| "learning_rate": 3.0458196652298623e-07, | |
| "loss": 0.46466773748397827, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.912350597609562, | |
| "grad_norm": 0.5833430886268616, | |
| "learning_rate": 3.038785136714242e-07, | |
| "loss": 0.5528253316879272, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 3.9203187250996017, | |
| "grad_norm": 0.3425147235393524, | |
| "learning_rate": 3.0323357439292857e-07, | |
| "loss": 0.9604100584983826, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 3.9282868525896415, | |
| "grad_norm": 0.6084439754486084, | |
| "learning_rate": 3.026471767218946e-07, | |
| "loss": 0.1784566193819046, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 3.9362549800796813, | |
| "grad_norm": 0.6684430241584778, | |
| "learning_rate": 3.0211934614801484e-07, | |
| "loss": 0.6319288611412048, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 3.944223107569721, | |
| "grad_norm": 0.1623208075761795, | |
| "learning_rate": 3.016501056151714e-07, | |
| "loss": 0.6323052048683167, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.952191235059761, | |
| "grad_norm": 0.598673939704895, | |
| "learning_rate": 3.01239475520439e-07, | |
| "loss": 1.1039389371871948, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 3.960159362549801, | |
| "grad_norm": 0.10632996261119843, | |
| "learning_rate": 3.008874737131976e-07, | |
| "loss": 0.30895280838012695, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 3.9681274900398407, | |
| "grad_norm": 0.7597146034240723, | |
| "learning_rate": 3.00594115494357e-07, | |
| "loss": 0.9472017884254456, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 3.9760956175298805, | |
| "grad_norm": 0.3815767765045166, | |
| "learning_rate": 3.0035941361569174e-07, | |
| "loss": 0.6094083189964294, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 3.9840637450199203, | |
| "grad_norm": 1.381508708000183, | |
| "learning_rate": 3.0018337827928646e-07, | |
| "loss": 0.5018728971481323, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.99203187250996, | |
| "grad_norm": 0.7763594388961792, | |
| "learning_rate": 3.0006601713709283e-07, | |
| "loss": 1.0228667259216309, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.1577903777360916, | |
| "learning_rate": 3.000073352905969e-07, | |
| "loss": 0.34115365147590637, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1004, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_loss": 0.9239894755600221, | |
| "train_runtime": 10281.2731, | |
| "train_samples_per_second": 5.859, | |
| "train_steps_per_second": 0.098 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |