Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-65 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-65 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-65") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-65") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-65") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-65 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-65" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-65", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-65
- SGLang
How to use furproxy/9b-65 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-65" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-65", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-65" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-65", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-65 with Docker Model Runner:
docker model run hf.co/furproxy/9b-65
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1004, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00796812749003984, | |
| "grad_norm": 0.8701496124267578, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 2.091559410095215, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01593625498007968, | |
| "grad_norm": 0.9730402231216431, | |
| "learning_rate": 8.823529411764706e-07, | |
| "loss": 2.0084309577941895, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02390438247011952, | |
| "grad_norm": 23.87286949157715, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 2.1494643688201904, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03187250996015936, | |
| "grad_norm": 0.5717560052871704, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 1.8582701683044434, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0398406374501992, | |
| "grad_norm": 1.2766757011413574, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 1.387536644935608, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04780876494023904, | |
| "grad_norm": 0.6310649514198303, | |
| "learning_rate": 3.235294117647059e-06, | |
| "loss": 1.6384257078170776, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.055776892430278883, | |
| "grad_norm": 0.5051367282867432, | |
| "learning_rate": 3.8235294117647055e-06, | |
| "loss": 1.8997098207473755, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06374501992031872, | |
| "grad_norm": 3.0508971214294434, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 1.5508460998535156, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07171314741035857, | |
| "grad_norm": 0.6429860591888428, | |
| "learning_rate": 4.9999999999999996e-06, | |
| "loss": 1.544956922531128, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0796812749003984, | |
| "grad_norm": 0.5023921132087708, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 1.4685248136520386, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08764940239043825, | |
| "grad_norm": 0.4997398853302002, | |
| "learning_rate": 6.176470588235294e-06, | |
| "loss": 1.1129087209701538, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09561752988047809, | |
| "grad_norm": 1.2241058349609375, | |
| "learning_rate": 6.7647058823529414e-06, | |
| "loss": 1.2971528768539429, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10358565737051793, | |
| "grad_norm": 0.583693265914917, | |
| "learning_rate": 7.3529411764705884e-06, | |
| "loss": 1.399789810180664, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11155378486055777, | |
| "grad_norm": 0.41760727763175964, | |
| "learning_rate": 7.941176470588236e-06, | |
| "loss": 1.6126567125320435, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11952191235059761, | |
| "grad_norm": 0.6942929625511169, | |
| "learning_rate": 8.529411764705882e-06, | |
| "loss": 1.3107324838638306, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12749003984063745, | |
| "grad_norm": 2.4918148517608643, | |
| "learning_rate": 9.117647058823529e-06, | |
| "loss": 1.1656028032302856, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13545816733067728, | |
| "grad_norm": 0.6997283101081848, | |
| "learning_rate": 9.705882352941177e-06, | |
| "loss": 1.2270398139953613, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14342629482071714, | |
| "grad_norm": 0.41730615496635437, | |
| "learning_rate": 1.0294117647058824e-05, | |
| "loss": 1.3723477125167847, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15139442231075698, | |
| "grad_norm": 0.5808508992195129, | |
| "learning_rate": 1.0882352941176471e-05, | |
| "loss": 1.166778802871704, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1593625498007968, | |
| "grad_norm": 0.29741156101226807, | |
| "learning_rate": 1.1470588235294117e-05, | |
| "loss": 1.2935056686401367, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16733067729083664, | |
| "grad_norm": 1.2481650114059448, | |
| "learning_rate": 1.2058823529411765e-05, | |
| "loss": 0.765558123588562, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1752988047808765, | |
| "grad_norm": 0.4549512267112732, | |
| "learning_rate": 1.2647058823529412e-05, | |
| "loss": 0.9544646739959717, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18326693227091634, | |
| "grad_norm": 2.7968297004699707, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 0.9361187815666199, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19123505976095617, | |
| "grad_norm": 0.6919461488723755, | |
| "learning_rate": 1.3823529411764705e-05, | |
| "loss": 1.17107093334198, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.199203187250996, | |
| "grad_norm": 0.5921279191970825, | |
| "learning_rate": 1.4411764705882353e-05, | |
| "loss": 1.3282028436660767, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20717131474103587, | |
| "grad_norm": 0.5274451971054077, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.2876099348068237, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2151394422310757, | |
| "grad_norm": 1.5639928579330444, | |
| "learning_rate": 1.4999853294586629e-05, | |
| "loss": 1.109473466873169, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22310756972111553, | |
| "grad_norm": 0.6973602771759033, | |
| "learning_rate": 1.4999413184723549e-05, | |
| "loss": 1.5242366790771484, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23107569721115537, | |
| "grad_norm": 0.5269781351089478, | |
| "learning_rate": 1.4998679689541569e-05, | |
| "loss": 1.3331416845321655, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.23904382470119523, | |
| "grad_norm": 0.4338107109069824, | |
| "learning_rate": 1.499765284092446e-05, | |
| "loss": 0.9126222729682922, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24701195219123506, | |
| "grad_norm": 0.3536894917488098, | |
| "learning_rate": 1.4996332683507557e-05, | |
| "loss": 1.3404982089996338, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2549800796812749, | |
| "grad_norm": 0.7808045148849487, | |
| "learning_rate": 1.4994719274675816e-05, | |
| "loss": 1.1124142408370972, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26294820717131473, | |
| "grad_norm": 0.3446694314479828, | |
| "learning_rate": 1.4992812684561331e-05, | |
| "loss": 1.2747009992599487, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27091633466135456, | |
| "grad_norm": 13.088342666625977, | |
| "learning_rate": 1.4990612996040276e-05, | |
| "loss": 1.282449722290039, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2788844621513944, | |
| "grad_norm": 2.078386068344116, | |
| "learning_rate": 1.498812030472931e-05, | |
| "loss": 1.5724037885665894, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2868525896414343, | |
| "grad_norm": 0.6237571239471436, | |
| "learning_rate": 1.498533471898141e-05, | |
| "loss": 0.8898400068283081, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2948207171314741, | |
| "grad_norm": 1.2999846935272217, | |
| "learning_rate": 1.4982256359881172e-05, | |
| "loss": 1.1757071018218994, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.30278884462151395, | |
| "grad_norm": 0.5385910868644714, | |
| "learning_rate": 1.4978885361239544e-05, | |
| "loss": 1.4709817171096802, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3107569721115538, | |
| "grad_norm": 1.2187010049819946, | |
| "learning_rate": 1.4975221869588004e-05, | |
| "loss": 0.9453757405281067, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3187250996015936, | |
| "grad_norm": 0.3541651964187622, | |
| "learning_rate": 1.4971266044172201e-05, | |
| "loss": 0.8519526720046997, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32669322709163345, | |
| "grad_norm": 0.4355093836784363, | |
| "learning_rate": 1.4967018056945026e-05, | |
| "loss": 1.3587875366210938, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3346613545816733, | |
| "grad_norm": 0.6306200623512268, | |
| "learning_rate": 1.4962478092559135e-05, | |
| "loss": 0.9281608462333679, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3426294820717131, | |
| "grad_norm": 0.4139735698699951, | |
| "learning_rate": 1.495764634835893e-05, | |
| "loss": 1.3322420120239258, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.350597609561753, | |
| "grad_norm": 0.9012618660926819, | |
| "learning_rate": 1.4952523034371973e-05, | |
| "loss": 0.9445306658744812, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.35856573705179284, | |
| "grad_norm": 0.46748796105384827, | |
| "learning_rate": 1.4947108373299864e-05, | |
| "loss": 1.3331313133239746, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3665338645418327, | |
| "grad_norm": 1.0903550386428833, | |
| "learning_rate": 1.4941402600508558e-05, | |
| "loss": 1.128015398979187, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3745019920318725, | |
| "grad_norm": 0.4805486798286438, | |
| "learning_rate": 1.4935405964018128e-05, | |
| "loss": 1.2455147504806519, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.38247011952191234, | |
| "grad_norm": 0.7429084181785583, | |
| "learning_rate": 1.4929118724491996e-05, | |
| "loss": 1.1041914224624634, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3904382470119522, | |
| "grad_norm": 0.27306675910949707, | |
| "learning_rate": 1.4922541155225586e-05, | |
| "loss": 1.2655969858169556, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.398406374501992, | |
| "grad_norm": 0.41318008303642273, | |
| "learning_rate": 1.4915673542134462e-05, | |
| "loss": 0.8851726651191711, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4063745019920319, | |
| "grad_norm": 0.4386235773563385, | |
| "learning_rate": 1.4908516183741889e-05, | |
| "loss": 1.265491008758545, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.41434262948207173, | |
| "grad_norm": 0.6781812906265259, | |
| "learning_rate": 1.4901069391165857e-05, | |
| "loss": 0.8081492185592651, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.42231075697211157, | |
| "grad_norm": 1.4451416730880737, | |
| "learning_rate": 1.4893333488105559e-05, | |
| "loss": 0.7170528173446655, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4302788844621514, | |
| "grad_norm": 0.6063726544380188, | |
| "learning_rate": 1.4885308810827328e-05, | |
| "loss": 0.9935809969902039, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.43824701195219123, | |
| "grad_norm": 0.40737852454185486, | |
| "learning_rate": 1.4876995708150003e-05, | |
| "loss": 1.2845995426177979, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44621513944223107, | |
| "grad_norm": 0.4796580374240875, | |
| "learning_rate": 1.4868394541429784e-05, | |
| "loss": 0.8904252052307129, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4541832669322709, | |
| "grad_norm": 3.001218318939209, | |
| "learning_rate": 1.4859505684544512e-05, | |
| "loss": 1.1530516147613525, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46215139442231074, | |
| "grad_norm": 0.4466836452484131, | |
| "learning_rate": 1.4850329523877425e-05, | |
| "loss": 1.2753629684448242, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4701195219123506, | |
| "grad_norm": 0.28066951036453247, | |
| "learning_rate": 1.4840866458300357e-05, | |
| "loss": 1.3401973247528076, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.47808764940239046, | |
| "grad_norm": 0.2835182249546051, | |
| "learning_rate": 1.4831116899156402e-05, | |
| "loss": 1.2199780941009521, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4860557768924303, | |
| "grad_norm": 0.36116963624954224, | |
| "learning_rate": 1.4821081270242039e-05, | |
| "loss": 0.9814391136169434, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4940239043824701, | |
| "grad_norm": 0.6912099123001099, | |
| "learning_rate": 1.48107600077887e-05, | |
| "loss": 1.0494424104690552, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.50199203187251, | |
| "grad_norm": 0.8504573702812195, | |
| "learning_rate": 1.480015356044381e-05, | |
| "loss": 0.9379956126213074, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5099601593625498, | |
| "grad_norm": 0.5862733125686646, | |
| "learning_rate": 1.4789262389251301e-05, | |
| "loss": 1.2821743488311768, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5179282868525896, | |
| "grad_norm": 0.5818023681640625, | |
| "learning_rate": 1.4778086967631548e-05, | |
| "loss": 0.9355220198631287, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5258964143426295, | |
| "grad_norm": 0.31655120849609375, | |
| "learning_rate": 1.4766627781360796e-05, | |
| "loss": 0.826532244682312, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5338645418326693, | |
| "grad_norm": 0.5141142010688782, | |
| "learning_rate": 1.4754885328550062e-05, | |
| "loss": 0.9170287251472473, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5418326693227091, | |
| "grad_norm": 0.47723662853240967, | |
| "learning_rate": 1.4742860119623458e-05, | |
| "loss": 1.3180201053619385, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.549800796812749, | |
| "grad_norm": 0.32824379205703735, | |
| "learning_rate": 1.473055267729602e-05, | |
| "loss": 0.9599122405052185, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5577689243027888, | |
| "grad_norm": 1.1303349733352661, | |
| "learning_rate": 1.4717963536550988e-05, | |
| "loss": 1.0953630208969116, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5657370517928287, | |
| "grad_norm": 0.49718862771987915, | |
| "learning_rate": 1.470509324461653e-05, | |
| "loss": 1.0326279401779175, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5737051792828686, | |
| "grad_norm": 0.2485317885875702, | |
| "learning_rate": 1.4691942360941986e-05, | |
| "loss": 1.2258632183074951, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5816733067729084, | |
| "grad_norm": 3.5433390140533447, | |
| "learning_rate": 1.4678511457173523e-05, | |
| "loss": 1.202100396156311, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5896414342629482, | |
| "grad_norm": 0.3908817172050476, | |
| "learning_rate": 1.4664801117129303e-05, | |
| "loss": 0.9758645296096802, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5976095617529881, | |
| "grad_norm": 0.5502234697341919, | |
| "learning_rate": 1.4650811936774093e-05, | |
| "loss": 0.9454991817474365, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6055776892430279, | |
| "grad_norm": 4.790173530578613, | |
| "learning_rate": 1.4636544524193378e-05, | |
| "loss": 0.9398374557495117, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6135458167330677, | |
| "grad_norm": 0.638011634349823, | |
| "learning_rate": 1.46219994995669e-05, | |
| "loss": 1.090728998184204, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6215139442231076, | |
| "grad_norm": 2.4593403339385986, | |
| "learning_rate": 1.4607177495141734e-05, | |
| "loss": 1.1246390342712402, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6294820717131474, | |
| "grad_norm": 0.8616807460784912, | |
| "learning_rate": 1.4592079155204776e-05, | |
| "loss": 1.1782993078231812, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6374501992031872, | |
| "grad_norm": 0.2915763854980469, | |
| "learning_rate": 1.457670513605475e-05, | |
| "loss": 1.0174801349639893, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6454183266932271, | |
| "grad_norm": 0.27435067296028137, | |
| "learning_rate": 1.4561056105973688e-05, | |
| "loss": 0.8091227412223816, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6533864541832669, | |
| "grad_norm": 0.2575240731239319, | |
| "learning_rate": 1.4545132745197857e-05, | |
| "loss": 1.1529077291488647, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6613545816733067, | |
| "grad_norm": 0.777723491191864, | |
| "learning_rate": 1.4528935745888218e-05, | |
| "loss": 0.8908942937850952, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6693227091633466, | |
| "grad_norm": 0.2517397105693817, | |
| "learning_rate": 1.4512465812100317e-05, | |
| "loss": 1.2097852230072021, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6772908366533864, | |
| "grad_norm": 3.4033937454223633, | |
| "learning_rate": 1.4495723659753695e-05, | |
| "loss": 1.2028913497924805, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6852589641434262, | |
| "grad_norm": 0.3606719374656677, | |
| "learning_rate": 1.447871001660076e-05, | |
| "loss": 0.8955773115158081, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6932270916334662, | |
| "grad_norm": 0.2552003860473633, | |
| "learning_rate": 1.4461425622195157e-05, | |
| "loss": 1.2185531854629517, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.701195219123506, | |
| "grad_norm": 1.0111852884292603, | |
| "learning_rate": 1.4443871227859621e-05, | |
| "loss": 0.7776660919189453, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7091633466135459, | |
| "grad_norm": 0.7659691572189331, | |
| "learning_rate": 1.4426047596653316e-05, | |
| "loss": 0.9216206669807434, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7171314741035857, | |
| "grad_norm": 1.132752776145935, | |
| "learning_rate": 1.4407955503338663e-05, | |
| "loss": 1.0899910926818848, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7250996015936255, | |
| "grad_norm": 0.16658742725849152, | |
| "learning_rate": 1.4389595734347675e-05, | |
| "loss": 0.5195258855819702, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7330677290836654, | |
| "grad_norm": 0.6180145144462585, | |
| "learning_rate": 1.4370969087747755e-05, | |
| "loss": 1.3304177522659302, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7410358565737052, | |
| "grad_norm": 0.35436052083969116, | |
| "learning_rate": 1.4352076373207023e-05, | |
| "loss": 1.2653801441192627, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.749003984063745, | |
| "grad_norm": 0.2843472361564636, | |
| "learning_rate": 1.4332918411959106e-05, | |
| "loss": 1.1138914823532104, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7569721115537849, | |
| "grad_norm": 1.0151716470718384, | |
| "learning_rate": 1.4313496036767444e-05, | |
| "loss": 0.8904833197593689, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7649402390438247, | |
| "grad_norm": 0.7267096042633057, | |
| "learning_rate": 1.4293810091889105e-05, | |
| "loss": 1.2340463399887085, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7729083665338645, | |
| "grad_norm": 0.47353217005729675, | |
| "learning_rate": 1.4273861433038063e-05, | |
| "loss": 0.9082501530647278, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7808764940239044, | |
| "grad_norm": 0.9817029237747192, | |
| "learning_rate": 1.425365092734802e-05, | |
| "loss": 0.663750946521759, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7888446215139442, | |
| "grad_norm": 0.7875825762748718, | |
| "learning_rate": 1.423317945333471e-05, | |
| "loss": 0.7919776439666748, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.796812749003984, | |
| "grad_norm": 0.5649994015693665, | |
| "learning_rate": 1.4212447900857703e-05, | |
| "loss": 1.0543051958084106, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8047808764940239, | |
| "grad_norm": 0.1523721069097519, | |
| "learning_rate": 1.4191457171081736e-05, | |
| "loss": 1.0212864875793457, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8127490039840638, | |
| "grad_norm": 0.28413787484169006, | |
| "learning_rate": 1.417020817643753e-05, | |
| "loss": 1.5364233255386353, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8207171314741036, | |
| "grad_norm": 0.2831563651561737, | |
| "learning_rate": 1.4148701840582129e-05, | |
| "loss": 1.2227693796157837, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8286852589641435, | |
| "grad_norm": 2.0232136249542236, | |
| "learning_rate": 1.412693909835877e-05, | |
| "loss": 0.7362918853759766, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8366533864541833, | |
| "grad_norm": 0.6372008323669434, | |
| "learning_rate": 1.4104920895756216e-05, | |
| "loss": 1.265373945236206, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8446215139442231, | |
| "grad_norm": 0.22620588541030884, | |
| "learning_rate": 1.4082648189867656e-05, | |
| "loss": 1.2132854461669922, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.852589641434263, | |
| "grad_norm": 0.287081241607666, | |
| "learning_rate": 1.4060121948849098e-05, | |
| "loss": 0.9602269530296326, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8605577689243028, | |
| "grad_norm": 0.8160057067871094, | |
| "learning_rate": 1.4037343151877285e-05, | |
| "loss": 1.452444076538086, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8685258964143426, | |
| "grad_norm": 1.8605669736862183, | |
| "learning_rate": 1.4014312789107124e-05, | |
| "loss": 1.3142669200897217, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8764940239043825, | |
| "grad_norm": 0.28666868805885315, | |
| "learning_rate": 1.3991031861628662e-05, | |
| "loss": 1.2287095785140991, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8844621513944223, | |
| "grad_norm": 0.29921239614486694, | |
| "learning_rate": 1.3967501381423552e-05, | |
| "loss": 1.48736572265625, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8924302788844621, | |
| "grad_norm": 1.2563499212265015, | |
| "learning_rate": 1.3943722371321075e-05, | |
| "loss": 0.9397075176239014, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.900398406374502, | |
| "grad_norm": 0.39466801285743713, | |
| "learning_rate": 1.3919695864953679e-05, | |
| "loss": 1.0238375663757324, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9083665338645418, | |
| "grad_norm": 2.8415801525115967, | |
| "learning_rate": 1.3895422906712042e-05, | |
| "loss": 1.1098148822784424, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9163346613545816, | |
| "grad_norm": 0.6246854662895203, | |
| "learning_rate": 1.3870904551699686e-05, | |
| "loss": 1.1869398355484009, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9243027888446215, | |
| "grad_norm": 0.308601975440979, | |
| "learning_rate": 1.38461418656871e-05, | |
| "loss": 1.3266777992248535, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9322709163346613, | |
| "grad_norm": 0.3320607841014862, | |
| "learning_rate": 1.3821135925065423e-05, | |
| "loss": 0.8920221924781799, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9402390438247012, | |
| "grad_norm": 0.2533508837223053, | |
| "learning_rate": 1.3795887816799647e-05, | |
| "loss": 0.8552533984184265, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9482071713147411, | |
| "grad_norm": 0.37766775488853455, | |
| "learning_rate": 1.3770398638381374e-05, | |
| "loss": 0.5838753581047058, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9561752988047809, | |
| "grad_norm": 0.5343811511993408, | |
| "learning_rate": 1.3744669497781111e-05, | |
| "loss": 0.8912972807884216, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9641434262948207, | |
| "grad_norm": 0.5110613107681274, | |
| "learning_rate": 1.3718701513400104e-05, | |
| "loss": 1.1340361833572388, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9721115537848606, | |
| "grad_norm": 0.34986478090286255, | |
| "learning_rate": 1.369249581402173e-05, | |
| "loss": 1.2093524932861328, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9800796812749004, | |
| "grad_norm": 0.6902351975440979, | |
| "learning_rate": 1.3666053538762414e-05, | |
| "loss": 0.973604142665863, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9880478087649402, | |
| "grad_norm": 0.364798903465271, | |
| "learning_rate": 1.363937583702214e-05, | |
| "loss": 1.004298448562622, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9960159362549801, | |
| "grad_norm": 0.594591498374939, | |
| "learning_rate": 1.3612463868434462e-05, | |
| "loss": 1.005676031112671, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.00398406374502, | |
| "grad_norm": 0.9396325349807739, | |
| "learning_rate": 1.3585318802816118e-05, | |
| "loss": 0.9656413197517395, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.0119521912350598, | |
| "grad_norm": 0.5345960855484009, | |
| "learning_rate": 1.3557941820116163e-05, | |
| "loss": 0.7036761045455933, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.0199203187250996, | |
| "grad_norm": 0.8415208458900452, | |
| "learning_rate": 1.3530334110364691e-05, | |
| "loss": 1.0861495733261108, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.0278884462151394, | |
| "grad_norm": 0.4500897228717804, | |
| "learning_rate": 1.35024968736211e-05, | |
| "loss": 1.0180453062057495, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.0358565737051793, | |
| "grad_norm": 0.3588436245918274, | |
| "learning_rate": 1.3474431319921936e-05, | |
| "loss": 0.9354724884033203, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.043824701195219, | |
| "grad_norm": 0.3891165852546692, | |
| "learning_rate": 1.3446138669228274e-05, | |
| "loss": 0.9144407510757446, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.051792828685259, | |
| "grad_norm": 1.5371865034103394, | |
| "learning_rate": 1.3417620151372716e-05, | |
| "loss": 0.9848403930664062, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.0597609561752988, | |
| "grad_norm": 0.6903578639030457, | |
| "learning_rate": 1.3388877006005911e-05, | |
| "loss": 0.6154371500015259, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0677290836653386, | |
| "grad_norm": 0.19243323802947998, | |
| "learning_rate": 1.3359910482542686e-05, | |
| "loss": 0.8479989171028137, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.0756972111553784, | |
| "grad_norm": 0.5195255279541016, | |
| "learning_rate": 1.3330721840107718e-05, | |
| "loss": 0.5587765574455261, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0836653386454183, | |
| "grad_norm": 0.3604806661605835, | |
| "learning_rate": 1.3301312347480817e-05, | |
| "loss": 1.1884621381759644, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.091633466135458, | |
| "grad_norm": 1.2894952297210693, | |
| "learning_rate": 1.3271683283041767e-05, | |
| "loss": 0.625873863697052, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.099601593625498, | |
| "grad_norm": 0.27667495608329773, | |
| "learning_rate": 1.3241835934714759e-05, | |
| "loss": 0.7773606181144714, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.1075697211155378, | |
| "grad_norm": 0.23738747835159302, | |
| "learning_rate": 1.3211771599912408e-05, | |
| "loss": 0.7299227714538574, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.1155378486055776, | |
| "grad_norm": 0.3089618980884552, | |
| "learning_rate": 1.3181491585479354e-05, | |
| "loss": 0.8809335231781006, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1235059760956174, | |
| "grad_norm": 0.25363025069236755, | |
| "learning_rate": 1.3150997207635463e-05, | |
| "loss": 1.0729031562805176, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.1314741035856573, | |
| "grad_norm": 0.47339093685150146, | |
| "learning_rate": 1.31202897919186e-05, | |
| "loss": 1.0833154916763306, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.139442231075697, | |
| "grad_norm": 0.18187947571277618, | |
| "learning_rate": 1.3089370673127026e-05, | |
| "loss": 0.3476455509662628, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.1474103585657371, | |
| "grad_norm": 0.226917564868927, | |
| "learning_rate": 1.3058241195261357e-05, | |
| "loss": 0.6067731976509094, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.155378486055777, | |
| "grad_norm": 1.2993286848068237, | |
| "learning_rate": 1.3026902711466169e-05, | |
| "loss": 0.8683360815048218, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1633466135458168, | |
| "grad_norm": 0.4915187954902649, | |
| "learning_rate": 1.2995356583971152e-05, | |
| "loss": 0.6069297790527344, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.1713147410358566, | |
| "grad_norm": 0.24846410751342773, | |
| "learning_rate": 1.2963604184031913e-05, | |
| "loss": 1.096907615661621, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.1792828685258965, | |
| "grad_norm": 0.40995633602142334, | |
| "learning_rate": 1.2931646891870371e-05, | |
| "loss": 1.1847357749938965, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.1872509960159363, | |
| "grad_norm": 0.42281821370124817, | |
| "learning_rate": 1.2899486096614742e-05, | |
| "loss": 1.1937490701675415, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.1952191235059761, | |
| "grad_norm": 0.5707376003265381, | |
| "learning_rate": 1.2867123196239186e-05, | |
| "loss": 0.5830255746841431, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.203187250996016, | |
| "grad_norm": 0.5589886903762817, | |
| "learning_rate": 1.2834559597503008e-05, | |
| "loss": 0.8486528992652893, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.2111553784860558, | |
| "grad_norm": 0.4859887361526489, | |
| "learning_rate": 1.2801796715889535e-05, | |
| "loss": 0.7010272145271301, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.2191235059760956, | |
| "grad_norm": 0.3184964060783386, | |
| "learning_rate": 1.2768835975544572e-05, | |
| "loss": 1.087632179260254, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.2270916334661355, | |
| "grad_norm": 0.6567210555076599, | |
| "learning_rate": 1.2735678809214497e-05, | |
| "loss": 0.8818908333778381, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.2350597609561753, | |
| "grad_norm": 0.8336063027381897, | |
| "learning_rate": 1.270232665818399e-05, | |
| "loss": 1.1307973861694336, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.2430278884462151, | |
| "grad_norm": 0.46054601669311523, | |
| "learning_rate": 1.266878097221338e-05, | |
| "loss": 1.0041382312774658, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.250996015936255, | |
| "grad_norm": 1.004090666770935, | |
| "learning_rate": 1.263504320947562e-05, | |
| "loss": 0.8790667057037354, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.2589641434262948, | |
| "grad_norm": 0.6220927834510803, | |
| "learning_rate": 1.2601114836492917e-05, | |
| "loss": 0.7389086484909058, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.2669322709163346, | |
| "grad_norm": 1.0630143880844116, | |
| "learning_rate": 1.2566997328072966e-05, | |
| "loss": 0.6448332667350769, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.2749003984063745, | |
| "grad_norm": 0.4616350829601288, | |
| "learning_rate": 1.2532692167244852e-05, | |
| "loss": 0.5268493890762329, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2828685258964143, | |
| "grad_norm": 3.0412392616271973, | |
| "learning_rate": 1.2498200845194596e-05, | |
| "loss": 0.9104723930358887, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.2908366533864541, | |
| "grad_norm": 0.4327695369720459, | |
| "learning_rate": 1.2463524861200316e-05, | |
| "loss": 0.8771180510520935, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.298804780876494, | |
| "grad_norm": 0.6371755599975586, | |
| "learning_rate": 1.2428665722567073e-05, | |
| "loss": 1.1892993450164795, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.3067729083665338, | |
| "grad_norm": 0.503496527671814, | |
| "learning_rate": 1.2393624944561334e-05, | |
| "loss": 0.7128881216049194, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.3147410358565736, | |
| "grad_norm": 0.43169552087783813, | |
| "learning_rate": 1.2358404050345122e-05, | |
| "loss": 0.7095832824707031, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3227091633466135, | |
| "grad_norm": 0.5526296496391296, | |
| "learning_rate": 1.2323004570909798e-05, | |
| "loss": 0.8684831261634827, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.3306772908366533, | |
| "grad_norm": 1.0183297395706177, | |
| "learning_rate": 1.2287428045009517e-05, | |
| "loss": 0.665216863155365, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.3386454183266931, | |
| "grad_norm": 0.7202191352844238, | |
| "learning_rate": 1.2251676019094331e-05, | |
| "loss": 0.8956350684165955, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.3466135458167332, | |
| "grad_norm": 0.38658207654953003, | |
| "learning_rate": 1.2215750047242982e-05, | |
| "loss": 1.0827162265777588, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.354581673306773, | |
| "grad_norm": 0.2367570847272873, | |
| "learning_rate": 1.2179651691095329e-05, | |
| "loss": 1.0241369009017944, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3625498007968129, | |
| "grad_norm": 0.3254954218864441, | |
| "learning_rate": 1.2143382519784498e-05, | |
| "loss": 1.1053788661956787, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.3705179282868527, | |
| "grad_norm": 0.25897926092147827, | |
| "learning_rate": 1.2106944109868636e-05, | |
| "loss": 1.037227988243103, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.3784860557768925, | |
| "grad_norm": 0.4815937876701355, | |
| "learning_rate": 1.2070338045262406e-05, | |
| "loss": 0.7165056467056274, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.3864541832669324, | |
| "grad_norm": 0.2625236213207245, | |
| "learning_rate": 1.2033565917168133e-05, | |
| "loss": 1.0718673467636108, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3944223107569722, | |
| "grad_norm": 0.4188198447227478, | |
| "learning_rate": 1.1996629324006632e-05, | |
| "loss": 0.6164529323577881, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.402390438247012, | |
| "grad_norm": 0.25191178917884827, | |
| "learning_rate": 1.195952987134773e-05, | |
| "loss": 1.0730476379394531, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4103585657370519, | |
| "grad_norm": 0.5282136797904968, | |
| "learning_rate": 1.1922269171840477e-05, | |
| "loss": 1.1133763790130615, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.4183266932270917, | |
| "grad_norm": 0.39372941851615906, | |
| "learning_rate": 1.1884848845143039e-05, | |
| "loss": 0.9437786936759949, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.4262948207171315, | |
| "grad_norm": 0.27430135011672974, | |
| "learning_rate": 1.1847270517852312e-05, | |
| "loss": 1.101191759109497, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.4342629482071714, | |
| "grad_norm": 0.4338213801383972, | |
| "learning_rate": 1.180953582343319e-05, | |
| "loss": 0.5615993738174438, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4422310756972112, | |
| "grad_norm": 0.20297643542289734, | |
| "learning_rate": 1.177164640214758e-05, | |
| "loss": 0.648676335811615, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.450199203187251, | |
| "grad_norm": 0.43412458896636963, | |
| "learning_rate": 1.1733603900983107e-05, | |
| "loss": 0.9797654747962952, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.4581673306772909, | |
| "grad_norm": 0.27069559693336487, | |
| "learning_rate": 1.1695409973581504e-05, | |
| "loss": 1.0201314687728882, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.4661354581673307, | |
| "grad_norm": 0.21320168673992157, | |
| "learning_rate": 1.1657066280166745e-05, | |
| "loss": 0.5693846940994263, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.4741035856573705, | |
| "grad_norm": 0.609273374080658, | |
| "learning_rate": 1.1618574487472867e-05, | |
| "loss": 0.6598872542381287, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4820717131474104, | |
| "grad_norm": 1.0151580572128296, | |
| "learning_rate": 1.1579936268671537e-05, | |
| "loss": 1.1873997449874878, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.4900398406374502, | |
| "grad_norm": 0.5126774907112122, | |
| "learning_rate": 1.1541153303299305e-05, | |
| "loss": 1.0114318132400513, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.49800796812749, | |
| "grad_norm": 0.4790279269218445, | |
| "learning_rate": 1.1502227277184605e-05, | |
| "loss": 1.0180116891860962, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.5059760956175299, | |
| "grad_norm": 3.794914722442627, | |
| "learning_rate": 1.1463159882374477e-05, | |
| "loss": 0.8887977004051208, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.5139442231075697, | |
| "grad_norm": 0.2821894884109497, | |
| "learning_rate": 1.1423952817061005e-05, | |
| "loss": 1.0826634168624878, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5219123505976095, | |
| "grad_norm": 0.26013344526290894, | |
| "learning_rate": 1.1384607785507527e-05, | |
| "loss": 0.6501424312591553, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.5298804780876494, | |
| "grad_norm": 0.21201461553573608, | |
| "learning_rate": 1.1345126497974507e-05, | |
| "loss": 0.6929817795753479, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.5378486055776892, | |
| "grad_norm": 0.603386402130127, | |
| "learning_rate": 1.1305510670645247e-05, | |
| "loss": 0.9329879879951477, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.545816733067729, | |
| "grad_norm": 0.3552367389202118, | |
| "learning_rate": 1.1265762025551246e-05, | |
| "loss": 1.1002554893493652, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.5537848605577689, | |
| "grad_norm": 0.8357146382331848, | |
| "learning_rate": 1.122588229049737e-05, | |
| "loss": 0.5634505152702332, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.5617529880478087, | |
| "grad_norm": 0.9403584003448486, | |
| "learning_rate": 1.118587319898673e-05, | |
| "loss": 0.6033604145050049, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.5697211155378485, | |
| "grad_norm": 2.087606430053711, | |
| "learning_rate": 1.1145736490145346e-05, | |
| "loss": 1.0487326383590698, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.5776892430278884, | |
| "grad_norm": 0.7443987727165222, | |
| "learning_rate": 1.110547390864654e-05, | |
| "loss": 0.9917337894439697, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.5856573705179282, | |
| "grad_norm": 0.4282863438129425, | |
| "learning_rate": 1.1065087204635103e-05, | |
| "loss": 1.0512839555740356, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.593625498007968, | |
| "grad_norm": 0.6512730121612549, | |
| "learning_rate": 1.1024578133651209e-05, | |
| "loss": 0.6531898975372314, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6015936254980079, | |
| "grad_norm": 0.47180187702178955, | |
| "learning_rate": 1.0983948456554123e-05, | |
| "loss": 1.0244213342666626, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.6095617529880477, | |
| "grad_norm": 0.41504454612731934, | |
| "learning_rate": 1.0943199939445644e-05, | |
| "loss": 1.141480803489685, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.6175298804780875, | |
| "grad_norm": 0.7667415142059326, | |
| "learning_rate": 1.0902334353593342e-05, | |
| "loss": 0.6996335387229919, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.6254980079681274, | |
| "grad_norm": 0.23972085118293762, | |
| "learning_rate": 1.0861353475353559e-05, | |
| "loss": 0.5143875479698181, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.6334661354581672, | |
| "grad_norm": 0.1878281980752945, | |
| "learning_rate": 1.08202590860942e-05, | |
| "loss": 0.6930667757987976, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.641434262948207, | |
| "grad_norm": 0.3578081429004669, | |
| "learning_rate": 1.0779052972117306e-05, | |
| "loss": 0.4972156882286072, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.6494023904382469, | |
| "grad_norm": 0.26842987537384033, | |
| "learning_rate": 1.0737736924581386e-05, | |
| "loss": 0.7380754351615906, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.6573705179282867, | |
| "grad_norm": 0.31403297185897827, | |
| "learning_rate": 1.0696312739423573e-05, | |
| "loss": 0.7590941190719604, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.6653386454183265, | |
| "grad_norm": 0.5314321517944336, | |
| "learning_rate": 1.0654782217281563e-05, | |
| "loss": 0.8922839760780334, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.6733067729083664, | |
| "grad_norm": 0.2761631906032562, | |
| "learning_rate": 1.0613147163415331e-05, | |
| "loss": 1.112337350845337, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6812749003984062, | |
| "grad_norm": 0.3739781081676483, | |
| "learning_rate": 1.0571409387628661e-05, | |
| "loss": 0.9243249893188477, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.6892430278884463, | |
| "grad_norm": 0.8964663147926331, | |
| "learning_rate": 1.0529570704190493e-05, | |
| "loss": 0.5647684335708618, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.697211155378486, | |
| "grad_norm": 0.333854079246521, | |
| "learning_rate": 1.0487632931756039e-05, | |
| "loss": 1.0856620073318481, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.705179282868526, | |
| "grad_norm": 0.26213064789772034, | |
| "learning_rate": 1.0445597893287742e-05, | |
| "loss": 1.0230387449264526, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.7131474103585658, | |
| "grad_norm": 0.4736036956310272, | |
| "learning_rate": 1.0403467415976025e-05, | |
| "loss": 0.6771261692047119, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7211155378486056, | |
| "grad_norm": 0.8969900608062744, | |
| "learning_rate": 1.036124333115988e-05, | |
| "loss": 0.8703440427780151, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.7290836653386454, | |
| "grad_norm": 0.9138644337654114, | |
| "learning_rate": 1.0318927474247258e-05, | |
| "loss": 0.6527059674263, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.7370517928286853, | |
| "grad_norm": 1.2199382781982422, | |
| "learning_rate": 1.0276521684635272e-05, | |
| "loss": 0.42034152150154114, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.745019920318725, | |
| "grad_norm": 0.753322422504425, | |
| "learning_rate": 1.0234027805630263e-05, | |
| "loss": 0.8424271941184998, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.752988047808765, | |
| "grad_norm": 0.6605293154716492, | |
| "learning_rate": 1.0191447684367665e-05, | |
| "loss": 0.6778283715248108, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7609561752988048, | |
| "grad_norm": 0.8106198310852051, | |
| "learning_rate": 1.0148783171731716e-05, | |
| "loss": 1.4355847835540771, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.7689243027888446, | |
| "grad_norm": 0.3683789074420929, | |
| "learning_rate": 1.0106036122274989e-05, | |
| "loss": 0.6579235196113586, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.7768924302788844, | |
| "grad_norm": 0.2205553501844406, | |
| "learning_rate": 1.0063208394137804e-05, | |
| "loss": 0.9973717927932739, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.7848605577689243, | |
| "grad_norm": 0.8739639520645142, | |
| "learning_rate": 1.0020301848967437e-05, | |
| "loss": 1.029483437538147, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.792828685258964, | |
| "grad_norm": 0.2899617552757263, | |
| "learning_rate": 9.977318351837206e-06, | |
| "loss": 0.7871066331863403, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.800796812749004, | |
| "grad_norm": 0.42468908429145813, | |
| "learning_rate": 9.934259771165394e-06, | |
| "loss": 0.3967509865760803, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.8087649402390438, | |
| "grad_norm": 0.8459072113037109, | |
| "learning_rate": 9.89112797863404e-06, | |
| "loss": 0.9443418383598328, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.8167330677290838, | |
| "grad_norm": 0.7007260322570801, | |
| "learning_rate": 9.847924849107578e-06, | |
| "loss": 0.7411941289901733, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.8247011952191237, | |
| "grad_norm": 1.2606959342956543, | |
| "learning_rate": 9.804652260551332e-06, | |
| "loss": 0.9570497274398804, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.8326693227091635, | |
| "grad_norm": 1.1064777374267578, | |
| "learning_rate": 9.761312093949886e-06, | |
| "loss": 0.7529144883155823, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8406374501992033, | |
| "grad_norm": 0.7540960907936096, | |
| "learning_rate": 9.717906233225339e-06, | |
| "loss": 0.7726236581802368, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.8486055776892432, | |
| "grad_norm": 0.4733653962612152, | |
| "learning_rate": 9.674436565155389e-06, | |
| "loss": 0.15728430449962616, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.856573705179283, | |
| "grad_norm": 0.2718278169631958, | |
| "learning_rate": 9.63090497929133e-06, | |
| "loss": 1.0682100057601929, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.8645418326693228, | |
| "grad_norm": 1.8510208129882812, | |
| "learning_rate": 9.587313367875922e-06, | |
| "loss": 0.4695431590080261, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.8725099601593627, | |
| "grad_norm": 0.4119950234889984, | |
| "learning_rate": 9.543663625761121e-06, | |
| "loss": 1.0789568424224854, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.8804780876494025, | |
| "grad_norm": 0.7990518808364868, | |
| "learning_rate": 9.499957650325738e-06, | |
| "loss": 1.02091383934021, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.8884462151394423, | |
| "grad_norm": 0.7012404799461365, | |
| "learning_rate": 9.456197341392932e-06, | |
| "loss": 0.9402192831039429, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.8964143426294822, | |
| "grad_norm": 0.3745291531085968, | |
| "learning_rate": 9.412384601147663e-06, | |
| "loss": 0.9166637063026428, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.904382470119522, | |
| "grad_norm": 0.30497679114341736, | |
| "learning_rate": 9.368521334053973e-06, | |
| "loss": 0.812641978263855, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.9123505976095618, | |
| "grad_norm": 1.237668514251709, | |
| "learning_rate": 9.324609446772233e-06, | |
| "loss": 0.5746023058891296, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.9203187250996017, | |
| "grad_norm": 0.6451582908630371, | |
| "learning_rate": 9.280650848076242e-06, | |
| "loss": 0.760349690914154, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.9282868525896415, | |
| "grad_norm": 0.288142591714859, | |
| "learning_rate": 9.23664744877026e-06, | |
| "loss": 1.0170018672943115, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.9362549800796813, | |
| "grad_norm": 0.407728374004364, | |
| "learning_rate": 9.19260116160596e-06, | |
| "loss": 0.9356874227523804, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.9442231075697212, | |
| "grad_norm": 0.23040206730365753, | |
| "learning_rate": 9.148513901199276e-06, | |
| "loss": 1.0043561458587646, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.952191235059761, | |
| "grad_norm": 0.2875385880470276, | |
| "learning_rate": 9.104387583947168e-06, | |
| "loss": 1.023063063621521, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.9601593625498008, | |
| "grad_norm": 0.3855358362197876, | |
| "learning_rate": 9.060224127944343e-06, | |
| "loss": 0.6780633330345154, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.9681274900398407, | |
| "grad_norm": 2.7685351371765137, | |
| "learning_rate": 9.016025452899853e-06, | |
| "loss": 0.7522924542427063, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.9760956175298805, | |
| "grad_norm": 0.37701013684272766, | |
| "learning_rate": 8.971793480053668e-06, | |
| "loss": 0.9699747562408447, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.9840637450199203, | |
| "grad_norm": 5.959843635559082, | |
| "learning_rate": 8.927530132093156e-06, | |
| "loss": 0.8083460927009583, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.9920318725099602, | |
| "grad_norm": 0.27620622515678406, | |
| "learning_rate": 8.8832373330695e-06, | |
| "loss": 1.1264008283615112, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.4609326422214508, | |
| "learning_rate": 8.83891700831408e-06, | |
| "loss": 0.5836660265922546, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.00796812749004, | |
| "grad_norm": 0.4023412764072418, | |
| "learning_rate": 8.794571084354764e-06, | |
| "loss": 0.47467219829559326, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.0159362549800797, | |
| "grad_norm": 1.0555591583251953, | |
| "learning_rate": 8.750201488832178e-06, | |
| "loss": 0.44583338499069214, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.0239043824701195, | |
| "grad_norm": 0.038634952157735825, | |
| "learning_rate": 8.705810150415905e-06, | |
| "loss": 0.42819151282310486, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.0318725099601593, | |
| "grad_norm": 0.418973833322525, | |
| "learning_rate": 8.661398998720662e-06, | |
| "loss": 0.6882845163345337, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.039840637450199, | |
| "grad_norm": 0.32119250297546387, | |
| "learning_rate": 8.616969964222403e-06, | |
| "loss": 0.5964008569717407, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.047808764940239, | |
| "grad_norm": 0.9128912091255188, | |
| "learning_rate": 8.572524978174426e-06, | |
| "loss": 0.33640968799591064, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.055776892430279, | |
| "grad_norm": 0.3310595452785492, | |
| "learning_rate": 8.528065972523414e-06, | |
| "loss": 0.7787442207336426, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.0637450199203187, | |
| "grad_norm": 1.0674067735671997, | |
| "learning_rate": 8.483594879825458e-06, | |
| "loss": 0.4966733455657959, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.0717131474103585, | |
| "grad_norm": 1.0013618469238281, | |
| "learning_rate": 8.439113633162048e-06, | |
| "loss": 0.6508659720420837, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.0796812749003983, | |
| "grad_norm": 0.3296944797039032, | |
| "learning_rate": 8.39462416605605e-06, | |
| "loss": 0.7466489672660828, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.087649402390438, | |
| "grad_norm": 0.7697274684906006, | |
| "learning_rate": 8.350128412387663e-06, | |
| "loss": 0.754063606262207, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.095617529880478, | |
| "grad_norm": 1.24392831325531, | |
| "learning_rate": 8.305628306310352e-06, | |
| "loss": 0.3448694050312042, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.103585657370518, | |
| "grad_norm": 0.42689138650894165, | |
| "learning_rate": 8.261125782166764e-06, | |
| "loss": 0.6862057447433472, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.1115537848605577, | |
| "grad_norm": 0.13302293419837952, | |
| "learning_rate": 8.216622774404667e-06, | |
| "loss": 0.42651891708374023, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1195219123505975, | |
| "grad_norm": 1.496959924697876, | |
| "learning_rate": 8.172121217492846e-06, | |
| "loss": 0.2123342901468277, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.1274900398406373, | |
| "grad_norm": 0.46577370166778564, | |
| "learning_rate": 8.127623045837018e-06, | |
| "loss": 0.7218248844146729, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.135458167330677, | |
| "grad_norm": 1.5315691232681274, | |
| "learning_rate": 8.08313019369575e-06, | |
| "loss": 0.610504686832428, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.143426294820717, | |
| "grad_norm": 0.7654959559440613, | |
| "learning_rate": 8.038644595096385e-06, | |
| "loss": 0.6098729372024536, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.151394422310757, | |
| "grad_norm": 0.5512191653251648, | |
| "learning_rate": 7.994168183750962e-06, | |
| "loss": 0.7628468871116638, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.1593625498007967, | |
| "grad_norm": 0.3205984830856323, | |
| "learning_rate": 7.949702892972157e-06, | |
| "loss": 0.6645801067352295, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.1673306772908365, | |
| "grad_norm": 0.1639721542596817, | |
| "learning_rate": 7.905250655589271e-06, | |
| "loss": 0.5173146724700928, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.1752988047808763, | |
| "grad_norm": 0.9050138592720032, | |
| "learning_rate": 7.860813403864191e-06, | |
| "loss": 0.6048539876937866, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.183266932270916, | |
| "grad_norm": 0.3164230287075043, | |
| "learning_rate": 7.816393069407394e-06, | |
| "loss": 0.7414080500602722, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.191235059760956, | |
| "grad_norm": 0.24208378791809082, | |
| "learning_rate": 7.771991583094e-06, | |
| "loss": 0.7846360206604004, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.199203187250996, | |
| "grad_norm": 0.35901176929473877, | |
| "learning_rate": 7.727610874979838e-06, | |
| "loss": 0.48403286933898926, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.2071713147410357, | |
| "grad_norm": 0.506497323513031, | |
| "learning_rate": 7.683252874217535e-06, | |
| "loss": 0.43215182423591614, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.2151394422310755, | |
| "grad_norm": 0.31206437945365906, | |
| "learning_rate": 7.638919508972672e-06, | |
| "loss": 0.5736108422279358, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.2231075697211153, | |
| "grad_norm": 1.6536140441894531, | |
| "learning_rate": 7.594612706339969e-06, | |
| "loss": 0.8024041056632996, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.231075697211155, | |
| "grad_norm": 0.21574831008911133, | |
| "learning_rate": 7.550334392259514e-06, | |
| "loss": 0.8128300905227661, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.239043824701195, | |
| "grad_norm": 0.6206152439117432, | |
| "learning_rate": 7.506086491433047e-06, | |
| "loss": 0.833297610282898, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.247011952191235, | |
| "grad_norm": 0.8244820237159729, | |
| "learning_rate": 7.461870927240291e-06, | |
| "loss": 0.7118552327156067, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.2549800796812747, | |
| "grad_norm": 0.2986677587032318, | |
| "learning_rate": 7.417689621655362e-06, | |
| "loss": 0.5102535486221313, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.2629482071713145, | |
| "grad_norm": 0.2273208200931549, | |
| "learning_rate": 7.373544495163206e-06, | |
| "loss": 0.6329899430274963, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.2709163346613543, | |
| "grad_norm": 0.23210270702838898, | |
| "learning_rate": 7.329437466676127e-06, | |
| "loss": 0.7478767037391663, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.278884462151394, | |
| "grad_norm": 0.6402852535247803, | |
| "learning_rate": 7.285370453450376e-06, | |
| "loss": 0.6049424409866333, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.2868525896414345, | |
| "grad_norm": 0.48132938146591187, | |
| "learning_rate": 7.2413453710028155e-06, | |
| "loss": 0.5839511156082153, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 2.2948207171314743, | |
| "grad_norm": 0.29688745737075806, | |
| "learning_rate": 7.197364133027632e-06, | |
| "loss": 0.25525566935539246, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.302788844621514, | |
| "grad_norm": 0.6722139120101929, | |
| "learning_rate": 7.153428651313191e-06, | |
| "loss": 0.5150002241134644, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 2.310756972111554, | |
| "grad_norm": 0.4063420593738556, | |
| "learning_rate": 7.109540835658898e-06, | |
| "loss": 0.5354428887367249, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.318725099601594, | |
| "grad_norm": 0.9487866163253784, | |
| "learning_rate": 7.065702593792204e-06, | |
| "loss": 0.5104379653930664, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 2.3266932270916336, | |
| "grad_norm": 0.2526935040950775, | |
| "learning_rate": 7.021915831285661e-06, | |
| "loss": 0.6450150609016418, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 2.3346613545816735, | |
| "grad_norm": 0.3406190276145935, | |
| "learning_rate": 6.978182451474124e-06, | |
| "loss": 0.5338073968887329, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 2.3426294820717133, | |
| "grad_norm": 1.3200128078460693, | |
| "learning_rate": 6.934504355371974e-06, | |
| "loss": 0.7506805062294006, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 2.350597609561753, | |
| "grad_norm": 0.27950209379196167, | |
| "learning_rate": 6.890883441590515e-06, | |
| "loss": 0.7645633220672607, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.358565737051793, | |
| "grad_norm": 0.29245108366012573, | |
| "learning_rate": 6.847321606255432e-06, | |
| "loss": 0.7928623557090759, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 2.366533864541833, | |
| "grad_norm": 0.4357150197029114, | |
| "learning_rate": 6.803820742924374e-06, | |
| "loss": 0.5477173924446106, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.3745019920318726, | |
| "grad_norm": 0.3675963878631592, | |
| "learning_rate": 6.76038274250464e-06, | |
| "loss": 0.8036378622055054, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.3824701195219125, | |
| "grad_norm": 0.5962640047073364, | |
| "learning_rate": 6.717009493170986e-06, | |
| "loss": 0.5513007044792175, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.3904382470119523, | |
| "grad_norm": 0.8920307159423828, | |
| "learning_rate": 6.673702880283554e-06, | |
| "loss": 0.8076795935630798, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.398406374501992, | |
| "grad_norm": 0.22857658565044403, | |
| "learning_rate": 6.6304647863059155e-06, | |
| "loss": 0.7613834142684937, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.406374501992032, | |
| "grad_norm": 0.9126567244529724, | |
| "learning_rate": 6.587297090723235e-06, | |
| "loss": 0.47278252243995667, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.414342629482072, | |
| "grad_norm": 0.8739012479782104, | |
| "learning_rate": 6.54420166996059e-06, | |
| "loss": 0.23272567987442017, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.4223107569721116, | |
| "grad_norm": 0.09651335328817368, | |
| "learning_rate": 6.501180397301394e-06, | |
| "loss": 0.32919982075691223, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.4302788844621515, | |
| "grad_norm": 0.1508469432592392, | |
| "learning_rate": 6.458235142805968e-06, | |
| "loss": 0.6115418672561646, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.4382470119521913, | |
| "grad_norm": 0.223999485373497, | |
| "learning_rate": 6.415367773230254e-06, | |
| "loss": 0.656358540058136, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.446215139442231, | |
| "grad_norm": 0.3630542755126953, | |
| "learning_rate": 6.372580151944681e-06, | |
| "loss": 0.4408586919307709, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.454183266932271, | |
| "grad_norm": 0.5294836163520813, | |
| "learning_rate": 6.329874138853146e-06, | |
| "loss": 0.7569445371627808, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.462151394422311, | |
| "grad_norm": 0.7806637287139893, | |
| "learning_rate": 6.287251590312181e-06, | |
| "loss": 0.5635365843772888, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.4701195219123506, | |
| "grad_norm": 0.8465815186500549, | |
| "learning_rate": 6.244714359050267e-06, | |
| "loss": 0.6494905352592468, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.4780876494023905, | |
| "grad_norm": 0.30154383182525635, | |
| "learning_rate": 6.20226429408728e-06, | |
| "loss": 0.722070038318634, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.4860557768924303, | |
| "grad_norm": 0.49222832918167114, | |
| "learning_rate": 6.159903240654132e-06, | |
| "loss": 0.6191802620887756, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.49402390438247, | |
| "grad_norm": 0.4883638322353363, | |
| "learning_rate": 6.117633040112559e-06, | |
| "loss": 0.3768939673900604, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.50199203187251, | |
| "grad_norm": 0.7983854413032532, | |
| "learning_rate": 6.0754555298750795e-06, | |
| "loss": 0.7864499688148499, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.50996015936255, | |
| "grad_norm": 0.3459266126155853, | |
| "learning_rate": 6.033372543325119e-06, | |
| "loss": 0.3463517427444458, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.5179282868525896, | |
| "grad_norm": 1.2402698993682861, | |
| "learning_rate": 5.991385909737327e-06, | |
| "loss": 0.3873278796672821, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.5258964143426295, | |
| "grad_norm": 0.28206056356430054, | |
| "learning_rate": 5.949497454198058e-06, | |
| "loss": 0.7801554799079895, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.5338645418326693, | |
| "grad_norm": 0.5089584589004517, | |
| "learning_rate": 5.907708997526031e-06, | |
| "loss": 0.7173982262611389, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.541832669322709, | |
| "grad_norm": 0.7955684065818787, | |
| "learning_rate": 5.86602235619319e-06, | |
| "loss": 0.9195908904075623, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.549800796812749, | |
| "grad_norm": 0.17236770689487457, | |
| "learning_rate": 5.824439342245739e-06, | |
| "loss": 0.40686023235321045, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.557768924302789, | |
| "grad_norm": 0.4617612063884735, | |
| "learning_rate": 5.782961763225388e-06, | |
| "loss": 0.7664303183555603, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.5657370517928286, | |
| "grad_norm": 0.2930012345314026, | |
| "learning_rate": 5.741591422090765e-06, | |
| "loss": 0.7867609858512878, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.5737051792828685, | |
| "grad_norm": 0.436357706785202, | |
| "learning_rate": 5.70033011713905e-06, | |
| "loss": 0.5984311699867249, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.5816733067729083, | |
| "grad_norm": 0.40557265281677246, | |
| "learning_rate": 5.659179641927816e-06, | |
| "loss": 0.7649792432785034, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.589641434262948, | |
| "grad_norm": 0.17836439609527588, | |
| "learning_rate": 5.61814178519706e-06, | |
| "loss": 0.5768654346466064, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.597609561752988, | |
| "grad_norm": 0.40341848134994507, | |
| "learning_rate": 5.577218330791436e-06, | |
| "loss": 0.5763181447982788, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.605577689243028, | |
| "grad_norm": 0.5692223906517029, | |
| "learning_rate": 5.536411057582744e-06, | |
| "loss": 0.5641070008277893, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.6135458167330676, | |
| "grad_norm": 0.29569053649902344, | |
| "learning_rate": 5.4957217393925734e-06, | |
| "loss": 0.2429419606924057, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.6215139442231075, | |
| "grad_norm": 0.3552258610725403, | |
| "learning_rate": 5.4551521449152216e-06, | |
| "loss": 0.42948848009109497, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.6294820717131473, | |
| "grad_norm": 0.41975289583206177, | |
| "learning_rate": 5.4147040376408e-06, | |
| "loss": 0.7414237260818481, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.637450199203187, | |
| "grad_norm": 0.8263479471206665, | |
| "learning_rate": 5.37437917577858e-06, | |
| "loss": 0.6220693588256836, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.645418326693227, | |
| "grad_norm": 1.41019868850708, | |
| "learning_rate": 5.334179312180574e-06, | |
| "loss": 0.4769461154937744, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.653386454183267, | |
| "grad_norm": 9.828413009643555, | |
| "learning_rate": 5.2941061942653315e-06, | |
| "loss": 0.7357695698738098, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.6613545816733066, | |
| "grad_norm": 0.20442984998226166, | |
| "learning_rate": 5.254161563941981e-06, | |
| "loss": 0.545133650302887, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.6693227091633465, | |
| "grad_norm": 1.070529818534851, | |
| "learning_rate": 5.2143471575345295e-06, | |
| "loss": 0.5713125467300415, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.6772908366533863, | |
| "grad_norm": 0.08597006648778915, | |
| "learning_rate": 5.174664705706371e-06, | |
| "loss": 0.2371898740530014, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.685258964143426, | |
| "grad_norm": 0.2467171996831894, | |
| "learning_rate": 5.135115933385058e-06, | |
| "loss": 0.7705000638961792, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.6932270916334664, | |
| "grad_norm": 1.5602085590362549, | |
| "learning_rate": 5.0957025596873256e-06, | |
| "loss": 0.5420997142791748, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.7011952191235062, | |
| "grad_norm": 0.6235253810882568, | |
| "learning_rate": 5.0564262978443745e-06, | |
| "loss": 0.4899404048919678, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.709163346613546, | |
| "grad_norm": 0.2874850332736969, | |
| "learning_rate": 5.017288855127377e-06, | |
| "loss": 0.778532862663269, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.717131474103586, | |
| "grad_norm": 0.26746895909309387, | |
| "learning_rate": 4.978291932773289e-06, | |
| "loss": 0.7769652605056763, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.7250996015936257, | |
| "grad_norm": 0.25973984599113464, | |
| "learning_rate": 4.9394372259108886e-06, | |
| "loss": 0.5638492107391357, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.7330677290836656, | |
| "grad_norm": 0.8309025168418884, | |
| "learning_rate": 4.9007264234870805e-06, | |
| "loss": 0.41929128766059875, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.7410358565737054, | |
| "grad_norm": 0.3012772798538208, | |
| "learning_rate": 4.862161208193505e-06, | |
| "loss": 0.7767641544342041, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.7490039840637452, | |
| "grad_norm": 0.35578370094299316, | |
| "learning_rate": 4.823743256393377e-06, | |
| "loss": 0.47287169098854065, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.756972111553785, | |
| "grad_norm": 0.49327176809310913, | |
| "learning_rate": 4.785474238048626e-06, | |
| "loss": 0.8931385278701782, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.764940239043825, | |
| "grad_norm": 1.3697088956832886, | |
| "learning_rate": 4.747355816647293e-06, | |
| "loss": 0.6319751143455505, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.7729083665338647, | |
| "grad_norm": 1.342233657836914, | |
| "learning_rate": 4.709389649131235e-06, | |
| "loss": 0.4150761365890503, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.7808764940239046, | |
| "grad_norm": 0.27556970715522766, | |
| "learning_rate": 4.6715773858241e-06, | |
| "loss": 0.8045108318328857, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.7888446215139444, | |
| "grad_norm": 0.31476858258247375, | |
| "learning_rate": 4.63392067035958e-06, | |
| "loss": 0.8101509213447571, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.7968127490039842, | |
| "grad_norm": 0.5621429681777954, | |
| "learning_rate": 4.596421139609977e-06, | |
| "loss": 0.4465515911579132, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.804780876494024, | |
| "grad_norm": 0.8817136287689209, | |
| "learning_rate": 4.5590804236150365e-06, | |
| "loss": 0.9612689018249512, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.812749003984064, | |
| "grad_norm": 0.19133038818836212, | |
| "learning_rate": 4.521900145511112e-06, | |
| "loss": 0.8152596950531006, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.8207171314741037, | |
| "grad_norm": 0.4524690508842468, | |
| "learning_rate": 4.484881921460591e-06, | |
| "loss": 0.8935415744781494, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.8286852589641436, | |
| "grad_norm": 0.24354888498783112, | |
| "learning_rate": 4.4480273605816556e-06, | |
| "loss": 0.4386708736419678, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.8366533864541834, | |
| "grad_norm": 0.2424662858247757, | |
| "learning_rate": 4.411338064878337e-06, | |
| "loss": 0.8338403701782227, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.8446215139442232, | |
| "grad_norm": 0.319381445646286, | |
| "learning_rate": 4.374815629170861e-06, | |
| "loss": 0.5186902284622192, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.852589641434263, | |
| "grad_norm": 0.2536839246749878, | |
| "learning_rate": 4.338461641026351e-06, | |
| "loss": 0.769604504108429, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.860557768924303, | |
| "grad_norm": 0.8778960108757019, | |
| "learning_rate": 4.302277680689801e-06, | |
| "loss": 0.6171420216560364, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.8685258964143427, | |
| "grad_norm": 0.39766034483909607, | |
| "learning_rate": 4.2662653210153965e-06, | |
| "loss": 0.5202685594558716, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.8764940239043826, | |
| "grad_norm": 0.8559178113937378, | |
| "learning_rate": 4.23042612739813e-06, | |
| "loss": 0.4717506766319275, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.8844621513944224, | |
| "grad_norm": 0.3448426127433777, | |
| "learning_rate": 4.194761657705765e-06, | |
| "loss": 0.5054087042808533, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.8924302788844622, | |
| "grad_norm": 0.29262322187423706, | |
| "learning_rate": 4.159273462211129e-06, | |
| "loss": 0.7536461353302002, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.900398406374502, | |
| "grad_norm": 0.4943152964115143, | |
| "learning_rate": 4.123963083524702e-06, | |
| "loss": 0.43974122405052185, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.908366533864542, | |
| "grad_norm": 0.24242062866687775, | |
| "learning_rate": 4.0888320565275854e-06, | |
| "loss": 0.7488172650337219, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.9163346613545817, | |
| "grad_norm": 0.6715952754020691, | |
| "learning_rate": 4.053881908304764e-06, | |
| "loss": 0.2420373111963272, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.9243027888446216, | |
| "grad_norm": 0.2856823205947876, | |
| "learning_rate": 4.019114158078742e-06, | |
| "loss": 0.8018136620521545, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.9322709163346614, | |
| "grad_norm": 0.7715031504631042, | |
| "learning_rate": 3.984530317143495e-06, | |
| "loss": 0.41188791394233704, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.9402390438247012, | |
| "grad_norm": 1.3740425109863281, | |
| "learning_rate": 3.950131888798777e-06, | |
| "loss": 0.6634250283241272, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.948207171314741, | |
| "grad_norm": 0.7085353136062622, | |
| "learning_rate": 3.915920368284786e-06, | |
| "loss": 0.8047435283660889, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.956175298804781, | |
| "grad_norm": 1.6132349967956543, | |
| "learning_rate": 3.881897242717153e-06, | |
| "loss": 0.2846962511539459, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.9641434262948207, | |
| "grad_norm": 0.3325771987438202, | |
| "learning_rate": 3.848063991022304e-06, | |
| "loss": 0.679719865322113, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.9721115537848606, | |
| "grad_norm": 0.3333672881126404, | |
| "learning_rate": 3.814422083873181e-06, | |
| "loss": 0.716017484664917, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.9800796812749004, | |
| "grad_norm": 0.31956031918525696, | |
| "learning_rate": 3.7809729836253126e-06, | |
| "loss": 0.44896891713142395, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.9880478087649402, | |
| "grad_norm": 0.14244171977043152, | |
| "learning_rate": 3.7477181442532373e-06, | |
| "loss": 0.11532896757125854, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.99601593625498, | |
| "grad_norm": 0.196710005402565, | |
| "learning_rate": 3.7146590112873117e-06, | |
| "loss": 0.7710368633270264, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.00398406374502, | |
| "grad_norm": 0.2597305178642273, | |
| "learning_rate": 3.6817970217508766e-06, | |
| "loss": 0.37589359283447266, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.0119521912350598, | |
| "grad_norm": 0.36714112758636475, | |
| "learning_rate": 3.649133604097784e-06, | |
| "loss": 0.34749507904052734, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.0199203187250996, | |
| "grad_norm": 2.629531145095825, | |
| "learning_rate": 3.616670178150316e-06, | |
| "loss": 0.18874035775661469, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.0278884462151394, | |
| "grad_norm": 0.3082272410392761, | |
| "learning_rate": 3.5844081550374545e-06, | |
| "loss": 0.37505829334259033, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.0358565737051793, | |
| "grad_norm": 0.6151975989341736, | |
| "learning_rate": 3.5523489371335502e-06, | |
| "loss": 0.3742624819278717, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 3.043824701195219, | |
| "grad_norm": 0.3428267538547516, | |
| "learning_rate": 3.5204939179973634e-06, | |
| "loss": 0.4816422462463379, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 3.051792828685259, | |
| "grad_norm": 0.6483787894248962, | |
| "learning_rate": 3.488844482311489e-06, | |
| "loss": 0.16634498536586761, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 3.0597609561752988, | |
| "grad_norm": 0.5806704163551331, | |
| "learning_rate": 3.457402005822163e-06, | |
| "loss": 0.31581252813339233, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.0677290836653386, | |
| "grad_norm": 0.3666588068008423, | |
| "learning_rate": 3.4261678552794615e-06, | |
| "loss": 0.3485649824142456, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.0756972111553784, | |
| "grad_norm": 0.3737334609031677, | |
| "learning_rate": 3.39514338837789e-06, | |
| "loss": 0.5422434210777283, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 3.0836653386454183, | |
| "grad_norm": 1.312560796737671, | |
| "learning_rate": 3.364329953697377e-06, | |
| "loss": 0.5372627973556519, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 3.091633466135458, | |
| "grad_norm": 0.025555025786161423, | |
| "learning_rate": 3.3337288906446356e-06, | |
| "loss": 0.30303874611854553, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 3.099601593625498, | |
| "grad_norm": 0.36579927802085876, | |
| "learning_rate": 3.303341529394961e-06, | |
| "loss": 0.3074573278427124, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 3.1075697211155378, | |
| "grad_norm": 0.36329302191734314, | |
| "learning_rate": 3.2731691908343907e-06, | |
| "loss": 0.4981156885623932, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.1155378486055776, | |
| "grad_norm": 0.08173166960477829, | |
| "learning_rate": 3.2432131865023065e-06, | |
| "loss": 0.160829097032547, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.1235059760956174, | |
| "grad_norm": 0.3885779082775116, | |
| "learning_rate": 3.2134748185344098e-06, | |
| "loss": 0.5554381608963013, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 3.1314741035856573, | |
| "grad_norm": 0.317030668258667, | |
| "learning_rate": 3.1839553796061266e-06, | |
| "loss": 0.45913565158843994, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 3.139442231075697, | |
| "grad_norm": 0.6573988795280457, | |
| "learning_rate": 3.1546561528764227e-06, | |
| "loss": 0.32907965779304504, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 3.147410358565737, | |
| "grad_norm": 0.35516512393951416, | |
| "learning_rate": 3.1255784119320064e-06, | |
| "loss": 0.25920620560646057, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.1553784860557768, | |
| "grad_norm": 0.789368748664856, | |
| "learning_rate": 3.0967234207319946e-06, | |
| "loss": 0.3322998285293579, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 3.1633466135458166, | |
| "grad_norm": 0.08553847670555115, | |
| "learning_rate": 3.0680924335529536e-06, | |
| "loss": 0.20808134973049164, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 3.1713147410358564, | |
| "grad_norm": 1.4314020872116089, | |
| "learning_rate": 3.0396866949343833e-06, | |
| "loss": 0.33690834045410156, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 3.1792828685258963, | |
| "grad_norm": 0.279748797416687, | |
| "learning_rate": 3.0115074396246176e-06, | |
| "loss": 0.36214491724967957, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 3.187250996015936, | |
| "grad_norm": 0.6961238384246826, | |
| "learning_rate": 2.9835558925271495e-06, | |
| "loss": 0.501541018486023, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.195219123505976, | |
| "grad_norm": 0.18416091799736023, | |
| "learning_rate": 2.955833268647395e-06, | |
| "loss": 0.3577136993408203, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 3.2031872509960158, | |
| "grad_norm": 0.6885810494422913, | |
| "learning_rate": 2.9283407730398702e-06, | |
| "loss": 0.29195672273635864, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 3.2111553784860556, | |
| "grad_norm": 1.7156380414962769, | |
| "learning_rate": 2.901079600755817e-06, | |
| "loss": 0.5778890252113342, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 3.2191235059760954, | |
| "grad_norm": 0.6983752846717834, | |
| "learning_rate": 2.8740509367912457e-06, | |
| "loss": 0.18633845448493958, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 3.2270916334661353, | |
| "grad_norm": 0.6704440712928772, | |
| "learning_rate": 2.8472559560354404e-06, | |
| "loss": 0.3643829822540283, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.235059760956175, | |
| "grad_norm": 1.632941484451294, | |
| "learning_rate": 2.820695823219873e-06, | |
| "loss": 0.2959984838962555, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 3.243027888446215, | |
| "grad_norm": 0.39763104915618896, | |
| "learning_rate": 2.794371692867585e-06, | |
| "loss": 0.44851499795913696, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 3.2509960159362548, | |
| "grad_norm": 1.5863844156265259, | |
| "learning_rate": 2.768284709243002e-06, | |
| "loss": 0.13297411799430847, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.2589641434262946, | |
| "grad_norm": 0.5324887633323669, | |
| "learning_rate": 2.7424360063021855e-06, | |
| "loss": 0.5013939142227173, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 3.2669322709163344, | |
| "grad_norm": 2.0388095378875732, | |
| "learning_rate": 2.7168267076435485e-06, | |
| "loss": 0.2653783857822418, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.2749003984063743, | |
| "grad_norm": 0.6833744049072266, | |
| "learning_rate": 2.69145792645902e-06, | |
| "loss": 0.41534146666526794, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 3.2828685258964145, | |
| "grad_norm": 0.8634832501411438, | |
| "learning_rate": 2.6663307654856407e-06, | |
| "loss": 0.3562511205673218, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 3.2908366533864544, | |
| "grad_norm": 0.39872676134109497, | |
| "learning_rate": 2.6414463169576492e-06, | |
| "loss": 0.4844256043434143, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 3.298804780876494, | |
| "grad_norm": 0.434477299451828, | |
| "learning_rate": 2.616805662558985e-06, | |
| "loss": 0.6063498854637146, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.306772908366534, | |
| "grad_norm": 2.5125367641448975, | |
| "learning_rate": 2.5924098733762835e-06, | |
| "loss": 0.4092828035354614, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.314741035856574, | |
| "grad_norm": 0.5066865682601929, | |
| "learning_rate": 2.5682600098523105e-06, | |
| "loss": 0.28628939390182495, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 3.3227091633466137, | |
| "grad_norm": 0.6276751160621643, | |
| "learning_rate": 2.5443571217398705e-06, | |
| "loss": 0.2303668111562729, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 3.3306772908366535, | |
| "grad_norm": 3.6938159465789795, | |
| "learning_rate": 2.5207022480561722e-06, | |
| "loss": 0.3531423807144165, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 3.3386454183266934, | |
| "grad_norm": 0.4289490580558777, | |
| "learning_rate": 2.497296417037664e-06, | |
| "loss": 0.4265778660774231, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 3.346613545816733, | |
| "grad_norm": 0.859740674495697, | |
| "learning_rate": 2.474140646095346e-06, | |
| "loss": 0.11164703965187073, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.354581673306773, | |
| "grad_norm": 0.573935866355896, | |
| "learning_rate": 2.451235941770535e-06, | |
| "loss": 0.36163708567619324, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 3.362549800796813, | |
| "grad_norm": 0.38408342003822327, | |
| "learning_rate": 2.428583299691118e-06, | |
| "loss": 0.4686431884765625, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 3.3705179282868527, | |
| "grad_norm": 0.3920894265174866, | |
| "learning_rate": 2.4061837045282717e-06, | |
| "loss": 0.544544517993927, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 3.3784860557768925, | |
| "grad_norm": 0.2612384259700775, | |
| "learning_rate": 2.3840381299536584e-06, | |
| "loss": 0.4954265058040619, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 3.3864541832669324, | |
| "grad_norm": 0.9370325207710266, | |
| "learning_rate": 2.36214753859711e-06, | |
| "loss": 0.45011717081069946, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.394422310756972, | |
| "grad_norm": 0.42760828137397766, | |
| "learning_rate": 2.3405128820047716e-06, | |
| "loss": 0.4825401306152344, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.402390438247012, | |
| "grad_norm": 0.4402712285518646, | |
| "learning_rate": 2.3191351005977556e-06, | |
| "loss": 0.31368541717529297, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 3.410358565737052, | |
| "grad_norm": 0.4192966818809509, | |
| "learning_rate": 2.298015123631246e-06, | |
| "loss": 0.4709932208061218, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 3.4183266932270917, | |
| "grad_norm": 0.5742263197898865, | |
| "learning_rate": 2.2771538691541196e-06, | |
| "loss": 0.439094603061676, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 3.4262948207171315, | |
| "grad_norm": 0.41355282068252563, | |
| "learning_rate": 2.256552243969029e-06, | |
| "loss": 0.5255416035652161, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.4342629482071714, | |
| "grad_norm": 0.2783606946468353, | |
| "learning_rate": 2.2362111435929956e-06, | |
| "loss": 0.3297284245491028, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 3.442231075697211, | |
| "grad_norm": 1.933573842048645, | |
| "learning_rate": 2.2161314522184778e-06, | |
| "loss": 0.4290310740470886, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 3.450199203187251, | |
| "grad_norm": 0.4777624309062958, | |
| "learning_rate": 2.1963140426749277e-06, | |
| "loss": 0.5890864729881287, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 3.458167330677291, | |
| "grad_norm": 0.30510279536247253, | |
| "learning_rate": 2.176759776390871e-06, | |
| "loss": 0.4166991114616394, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 3.4661354581673307, | |
| "grad_norm": 0.32527998089790344, | |
| "learning_rate": 2.1574695033564447e-06, | |
| "loss": 0.343144029378891, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.4741035856573705, | |
| "grad_norm": 0.06616739183664322, | |
| "learning_rate": 2.1384440620864597e-06, | |
| "loss": 0.32270875573158264, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 3.4820717131474104, | |
| "grad_norm": 0.08085694909095764, | |
| "learning_rate": 2.1196842795839454e-06, | |
| "loss": 0.28370317816734314, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.49003984063745, | |
| "grad_norm": 0.543026328086853, | |
| "learning_rate": 2.101190971304202e-06, | |
| "loss": 0.2624368369579315, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 3.49800796812749, | |
| "grad_norm": 0.3456118702888489, | |
| "learning_rate": 2.0829649411193613e-06, | |
| "loss": 0.3216794431209564, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 3.50597609561753, | |
| "grad_norm": 0.2047196626663208, | |
| "learning_rate": 2.0650069812834345e-06, | |
| "loss": 0.2091296762228012, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.5139442231075697, | |
| "grad_norm": 0.286630779504776, | |
| "learning_rate": 2.0473178723978813e-06, | |
| "loss": 0.20823848247528076, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 3.5219123505976095, | |
| "grad_norm": 0.5212514400482178, | |
| "learning_rate": 2.0298983833776717e-06, | |
| "loss": 0.08361003547906876, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 3.5298804780876494, | |
| "grad_norm": 0.5501599311828613, | |
| "learning_rate": 2.01274927141787e-06, | |
| "loss": 0.4509070813655853, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 3.537848605577689, | |
| "grad_norm": 0.39404916763305664, | |
| "learning_rate": 1.995871281960715e-06, | |
| "loss": 0.44182029366493225, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 3.545816733067729, | |
| "grad_norm": 0.39789876341819763, | |
| "learning_rate": 1.9792651486632213e-06, | |
| "loss": 0.27486419677734375, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.553784860557769, | |
| "grad_norm": 1.1401015520095825, | |
| "learning_rate": 1.962931593365286e-06, | |
| "loss": 0.3863001763820648, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 3.5617529880478087, | |
| "grad_norm": 0.37662816047668457, | |
| "learning_rate": 1.946871326058308e-06, | |
| "loss": 0.6621991395950317, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 3.5697211155378485, | |
| "grad_norm": 0.10905114561319351, | |
| "learning_rate": 1.9310850448543344e-06, | |
| "loss": 0.10537078976631165, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 3.5776892430278884, | |
| "grad_norm": 0.3512639105319977, | |
| "learning_rate": 1.915573435955711e-06, | |
| "loss": 0.2836357057094574, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 3.585657370517928, | |
| "grad_norm": 0.43090760707855225, | |
| "learning_rate": 1.9003371736252472e-06, | |
| "loss": 0.15316523611545563, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.593625498007968, | |
| "grad_norm": 0.6001132726669312, | |
| "learning_rate": 1.8853769201569208e-06, | |
| "loss": 0.09330594539642334, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 3.601593625498008, | |
| "grad_norm": 0.5064031481742859, | |
| "learning_rate": 1.8706933258470757e-06, | |
| "loss": 0.4762483835220337, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 3.6095617529880477, | |
| "grad_norm": 0.3355056643486023, | |
| "learning_rate": 1.8562870289661659e-06, | |
| "loss": 0.27884015440940857, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 3.6175298804780875, | |
| "grad_norm": 0.3930132985115051, | |
| "learning_rate": 1.8421586557309996e-06, | |
| "loss": 0.5141717195510864, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 3.6254980079681274, | |
| "grad_norm": 0.46708425879478455, | |
| "learning_rate": 1.8283088202775314e-06, | |
| "loss": 0.4603351652622223, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.633466135458167, | |
| "grad_norm": 0.037517350167036057, | |
| "learning_rate": 1.8147381246341558e-06, | |
| "loss": 0.02768601104617119, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 3.641434262948207, | |
| "grad_norm": 0.3065638542175293, | |
| "learning_rate": 1.8014471586955424e-06, | |
| "loss": 0.3721899390220642, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 3.649402390438247, | |
| "grad_norm": 0.43989261984825134, | |
| "learning_rate": 1.7884365001969967e-06, | |
| "loss": 0.43738237023353577, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 3.6573705179282867, | |
| "grad_norm": 0.6677345633506775, | |
| "learning_rate": 1.7757067146893425e-06, | |
| "loss": 0.0748777762055397, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 3.6653386454183265, | |
| "grad_norm": 0.3021090030670166, | |
| "learning_rate": 1.7632583555143435e-06, | |
| "loss": 0.5561968684196472, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.6733067729083664, | |
| "grad_norm": 0.42820993065834045, | |
| "learning_rate": 1.751091963780643e-06, | |
| "loss": 0.07096469402313232, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 3.681274900398406, | |
| "grad_norm": 0.8393615484237671, | |
| "learning_rate": 1.7392080683402496e-06, | |
| "loss": 0.46100661158561707, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 3.6892430278884465, | |
| "grad_norm": 0.6544818878173828, | |
| "learning_rate": 1.7276071857655479e-06, | |
| "loss": 0.1528330296278, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 3.6972111553784863, | |
| "grad_norm": 0.31229308247566223, | |
| "learning_rate": 1.716289820326839e-06, | |
| "loss": 0.29350802302360535, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 3.705179282868526, | |
| "grad_norm": 0.1188875362277031, | |
| "learning_rate": 1.7052564639704286e-06, | |
| "loss": 0.3660446107387543, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.713147410358566, | |
| "grad_norm": 0.5841293931007385, | |
| "learning_rate": 1.6945075962972356e-06, | |
| "loss": 0.45137277245521545, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 3.721115537848606, | |
| "grad_norm": 0.7146270275115967, | |
| "learning_rate": 1.6840436845419498e-06, | |
| "loss": 0.4348509907722473, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 3.7290836653386457, | |
| "grad_norm": 0.326523095369339, | |
| "learning_rate": 1.6738651835527184e-06, | |
| "loss": 0.4922831654548645, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 3.7370517928286855, | |
| "grad_norm": 0.8038604855537415, | |
| "learning_rate": 1.6639725357713769e-06, | |
| "loss": 0.21507446467876434, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 3.7450199203187253, | |
| "grad_norm": 0.36527737975120544, | |
| "learning_rate": 1.6543661712142184e-06, | |
| "loss": 0.4618900418281555, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.752988047808765, | |
| "grad_norm": 0.6607845425605774, | |
| "learning_rate": 1.645046507453294e-06, | |
| "loss": 0.36659500002861023, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 3.760956175298805, | |
| "grad_norm": 0.421165406703949, | |
| "learning_rate": 1.6360139495982712e-06, | |
| "loss": 0.28992268443107605, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 3.768924302788845, | |
| "grad_norm": 0.5764026641845703, | |
| "learning_rate": 1.6272688902788207e-06, | |
| "loss": 0.5770589709281921, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 3.7768924302788847, | |
| "grad_norm": 0.3415585160255432, | |
| "learning_rate": 1.6188117096275477e-06, | |
| "loss": 0.3260127305984497, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 3.7848605577689245, | |
| "grad_norm": 0.2808169424533844, | |
| "learning_rate": 1.610642775263468e-06, | |
| "loss": 0.5168456435203552, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.7928286852589643, | |
| "grad_norm": 0.3828094005584717, | |
| "learning_rate": 1.6027624422760312e-06, | |
| "loss": 0.5155588388442993, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 3.800796812749004, | |
| "grad_norm": 0.616820216178894, | |
| "learning_rate": 1.5951710532096857e-06, | |
| "loss": 0.2644089460372925, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 3.808764940239044, | |
| "grad_norm": 0.531527042388916, | |
| "learning_rate": 1.5878689380489846e-06, | |
| "loss": 0.38867413997650146, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 3.816733067729084, | |
| "grad_norm": 0.9145589470863342, | |
| "learning_rate": 1.580856414204247e-06, | |
| "loss": 0.3010810315608978, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 3.8247011952191237, | |
| "grad_norm": 1.3794469833374023, | |
| "learning_rate": 1.5741337864977558e-06, | |
| "loss": 0.21975839138031006, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.8326693227091635, | |
| "grad_norm": 0.11370343714952469, | |
| "learning_rate": 1.567701347150513e-06, | |
| "loss": 0.3248888850212097, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 3.8406374501992033, | |
| "grad_norm": 0.4226270616054535, | |
| "learning_rate": 1.5615593757695319e-06, | |
| "loss": 0.6149446964263916, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 3.848605577689243, | |
| "grad_norm": 0.4638464152812958, | |
| "learning_rate": 1.555708139335687e-06, | |
| "loss": 0.21839484572410583, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.856573705179283, | |
| "grad_norm": 0.28881698846817017, | |
| "learning_rate": 1.5501478921921071e-06, | |
| "loss": 0.3512417674064636, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 3.864541832669323, | |
| "grad_norm": 0.8101674318313599, | |
| "learning_rate": 1.54487887603312e-06, | |
| "loss": 0.36906710267066956, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.8725099601593627, | |
| "grad_norm": 0.10503221303224564, | |
| "learning_rate": 1.5399013198937452e-06, | |
| "loss": 0.3287951946258545, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 3.8804780876494025, | |
| "grad_norm": 0.1844586879014969, | |
| "learning_rate": 1.5352154401397418e-06, | |
| "loss": 0.32523638010025024, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 3.8884462151394423, | |
| "grad_norm": 0.38847818970680237, | |
| "learning_rate": 1.5308214404581968e-06, | |
| "loss": 0.5000988245010376, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 3.896414342629482, | |
| "grad_norm": 0.4692430794239044, | |
| "learning_rate": 1.5267195118486794e-06, | |
| "loss": 0.2642746567726135, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 3.904382470119522, | |
| "grad_norm": 0.2286023050546646, | |
| "learning_rate": 1.522909832614931e-06, | |
| "loss": 0.2238185554742813, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.912350597609562, | |
| "grad_norm": 0.3902443051338196, | |
| "learning_rate": 1.5193925683571211e-06, | |
| "loss": 0.19102515280246735, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 3.9203187250996017, | |
| "grad_norm": 0.4479309320449829, | |
| "learning_rate": 1.516167871964643e-06, | |
| "loss": 0.5202714800834656, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 3.9282868525896415, | |
| "grad_norm": 0.5768634080886841, | |
| "learning_rate": 1.5132358836094728e-06, | |
| "loss": 0.16960352659225464, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 3.9362549800796813, | |
| "grad_norm": 3.171630620956421, | |
| "learning_rate": 1.510596730740074e-06, | |
| "loss": 0.4183100461959839, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 3.944223107569721, | |
| "grad_norm": 0.10318754613399506, | |
| "learning_rate": 1.508250528075857e-06, | |
| "loss": 0.3005601763725281, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.952191235059761, | |
| "grad_norm": 0.5118750929832458, | |
| "learning_rate": 1.5061973776021949e-06, | |
| "loss": 0.4696381390094757, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 3.960159362549801, | |
| "grad_norm": 0.3834742307662964, | |
| "learning_rate": 1.504437368565988e-06, | |
| "loss": 0.1685551553964615, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 3.9681274900398407, | |
| "grad_norm": 0.34701693058013916, | |
| "learning_rate": 1.502970577471785e-06, | |
| "loss": 0.4004333019256592, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 3.9760956175298805, | |
| "grad_norm": 0.14854289591312408, | |
| "learning_rate": 1.5017970680784587e-06, | |
| "loss": 0.32395121455192566, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 3.9840637450199203, | |
| "grad_norm": 0.3810655474662781, | |
| "learning_rate": 1.5009168913964322e-06, | |
| "loss": 0.23012831807136536, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.99203187250996, | |
| "grad_norm": 0.3381198048591614, | |
| "learning_rate": 1.5003300856854642e-06, | |
| "loss": 0.48588454723358154, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.07251780480146408, | |
| "learning_rate": 1.5000366764529846e-06, | |
| "loss": 0.11095666140317917, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1004, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_loss": 0.7418717171225059, | |
| "train_runtime": 10936.9117, | |
| "train_samples_per_second": 5.508, | |
| "train_steps_per_second": 0.092 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1004, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.038502240003031e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |