Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-85 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-85 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-85") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-85") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-85") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-85 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-85" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-85", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-85
- SGLang
How to use furproxy/9b-85 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-85" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-85", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-85" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-85", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-85 with Docker Model Runner:
docker model run hf.co/furproxy/9b-85
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1804, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004434589800443459, | |
| "grad_norm": 8.667221069335938, | |
| "learning_rate": 2.197802197802198e-07, | |
| "loss": 1.8642287254333496, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008869179600886918, | |
| "grad_norm": 8.565436363220215, | |
| "learning_rate": 6.593406593406594e-07, | |
| "loss": 2.1231369972229004, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013303769401330377, | |
| "grad_norm": 3.455594301223755, | |
| "learning_rate": 1.098901098901099e-06, | |
| "loss": 1.89163339138031, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017738359201773836, | |
| "grad_norm": 1.2133512496948242, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 1.7869961261749268, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022172949002217297, | |
| "grad_norm": 4.369198799133301, | |
| "learning_rate": 1.9780219780219782e-06, | |
| "loss": 1.5530983209609985, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026607538802660754, | |
| "grad_norm": 6.942768096923828, | |
| "learning_rate": 2.4175824175824177e-06, | |
| "loss": 1.5389991998672485, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.031042128603104215, | |
| "grad_norm": 2.6731338500976562, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.016729474067688, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03547671840354767, | |
| "grad_norm": 6.184485912322998, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "loss": 0.9146304130554199, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03991130820399113, | |
| "grad_norm": 1.2878278493881226, | |
| "learning_rate": 3.7362637362637367e-06, | |
| "loss": 0.9207720756530762, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04434589800443459, | |
| "grad_norm": 3.814846992492676, | |
| "learning_rate": 4.175824175824177e-06, | |
| "loss": 1.1863677501678467, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 6.385776042938232, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 1.320576548576355, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05321507760532151, | |
| "grad_norm": 1.8087103366851807, | |
| "learning_rate": 5.054945054945055e-06, | |
| "loss": 1.4690011739730835, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.057649667405764965, | |
| "grad_norm": 1.2201143503189087, | |
| "learning_rate": 5.494505494505495e-06, | |
| "loss": 1.2042573690414429, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06208425720620843, | |
| "grad_norm": 1.1334809064865112, | |
| "learning_rate": 5.934065934065935e-06, | |
| "loss": 1.040234923362732, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.06651884700665188, | |
| "grad_norm": 1.8232964277267456, | |
| "learning_rate": 6.373626373626373e-06, | |
| "loss": 1.169386386871338, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07095343680709534, | |
| "grad_norm": 2.845280885696411, | |
| "learning_rate": 6.813186813186814e-06, | |
| "loss": 1.0687059164047241, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.07538802660753881, | |
| "grad_norm": 0.9850583672523499, | |
| "learning_rate": 7.252747252747253e-06, | |
| "loss": 1.4395848512649536, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07982261640798226, | |
| "grad_norm": 3.9745352268218994, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 1.1598668098449707, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.08425720620842572, | |
| "grad_norm": 4.545168399810791, | |
| "learning_rate": 8.131868131868132e-06, | |
| "loss": 1.515150785446167, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08869179600886919, | |
| "grad_norm": 2.1682798862457275, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.7324872612953186, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09312638580931264, | |
| "grad_norm": 0.8849499225616455, | |
| "learning_rate": 9.010989010989011e-06, | |
| "loss": 1.3759689331054688, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 1.3699463605880737, | |
| "learning_rate": 9.450549450549452e-06, | |
| "loss": 1.3197706937789917, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.10199556541019955, | |
| "grad_norm": 1.9112352132797241, | |
| "learning_rate": 9.890109890109892e-06, | |
| "loss": 1.355299472808838, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.10643015521064302, | |
| "grad_norm": 4.099548816680908, | |
| "learning_rate": 1.0329670329670332e-05, | |
| "loss": 1.583066701889038, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.11086474501108648, | |
| "grad_norm": 1.642897605895996, | |
| "learning_rate": 1.076923076923077e-05, | |
| "loss": 1.4291114807128906, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11529933481152993, | |
| "grad_norm": 2.4580774307250977, | |
| "learning_rate": 1.120879120879121e-05, | |
| "loss": 1.3548572063446045, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1197339246119734, | |
| "grad_norm": 1.3751822710037231, | |
| "learning_rate": 1.164835164835165e-05, | |
| "loss": 1.367175579071045, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.12416851441241686, | |
| "grad_norm": 5.466642379760742, | |
| "learning_rate": 1.2087912087912089e-05, | |
| "loss": 1.136663794517517, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1286031042128603, | |
| "grad_norm": 1.1212538480758667, | |
| "learning_rate": 1.2527472527472529e-05, | |
| "loss": 1.3549809455871582, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.13303769401330376, | |
| "grad_norm": 0.9152220487594604, | |
| "learning_rate": 1.296703296703297e-05, | |
| "loss": 1.4315998554229736, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13747228381374724, | |
| "grad_norm": 1.2018588781356812, | |
| "learning_rate": 1.3406593406593406e-05, | |
| "loss": 1.3350356817245483, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1419068736141907, | |
| "grad_norm": 1.993096947669983, | |
| "learning_rate": 1.3846153846153847e-05, | |
| "loss": 1.3968464136123657, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 1.467322826385498, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 1.4635425806045532, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.15077605321507762, | |
| "grad_norm": 0.7607141137123108, | |
| "learning_rate": 1.4725274725274727e-05, | |
| "loss": 1.3317251205444336, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.15521064301552107, | |
| "grad_norm": 7.009274959564209, | |
| "learning_rate": 1.5164835164835166e-05, | |
| "loss": 1.3160146474838257, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15964523281596452, | |
| "grad_norm": 1.0283435583114624, | |
| "learning_rate": 1.5604395604395605e-05, | |
| "loss": 1.3538073301315308, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.164079822616408, | |
| "grad_norm": 0.7895150184631348, | |
| "learning_rate": 1.6043956043956047e-05, | |
| "loss": 1.2550619840621948, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.16851441241685144, | |
| "grad_norm": 0.7530434131622314, | |
| "learning_rate": 1.6483516483516486e-05, | |
| "loss": 1.355035424232483, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1729490022172949, | |
| "grad_norm": 0.6738516688346863, | |
| "learning_rate": 1.6923076923076924e-05, | |
| "loss": 1.396584391593933, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.17738359201773837, | |
| "grad_norm": 1.0755456686019897, | |
| "learning_rate": 1.7362637362637363e-05, | |
| "loss": 1.3568543195724487, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.3478541374206543, | |
| "learning_rate": 1.78021978021978e-05, | |
| "loss": 0.8403951525688171, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.18625277161862527, | |
| "grad_norm": 0.7471117973327637, | |
| "learning_rate": 1.8241758241758244e-05, | |
| "loss": 1.0819566249847412, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.19068736141906872, | |
| "grad_norm": 3.1562721729278564, | |
| "learning_rate": 1.8681318681318682e-05, | |
| "loss": 1.0565105676651, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 0.9117481708526611, | |
| "learning_rate": 1.9120879120879124e-05, | |
| "loss": 1.336931586265564, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.19955654101995565, | |
| "grad_norm": 1.8324049711227417, | |
| "learning_rate": 1.9560439560439563e-05, | |
| "loss": 1.4609527587890625, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2039911308203991, | |
| "grad_norm": 0.8476412892341614, | |
| "learning_rate": 2e-05, | |
| "loss": 1.317558765411377, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.20842572062084258, | |
| "grad_norm": 0.6812918782234192, | |
| "learning_rate": 1.999993945796182e-05, | |
| "loss": 1.309884786605835, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.21286031042128603, | |
| "grad_norm": 1.9555091857910156, | |
| "learning_rate": 1.9999757832661787e-05, | |
| "loss": 1.8222039937973022, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.21729490022172948, | |
| "grad_norm": 1.6802914142608643, | |
| "learning_rate": 1.9999455126543454e-05, | |
| "loss": 1.0341295003890991, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.22172949002217296, | |
| "grad_norm": 0.9253756403923035, | |
| "learning_rate": 1.9999031343679364e-05, | |
| "loss": 1.2889328002929688, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2261640798226164, | |
| "grad_norm": 0.9691144824028015, | |
| "learning_rate": 1.9998486489770998e-05, | |
| "loss": 1.4229637384414673, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.23059866962305986, | |
| "grad_norm": 0.7583999037742615, | |
| "learning_rate": 1.999782057214871e-05, | |
| "loss": 1.1750223636627197, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.23503325942350334, | |
| "grad_norm": 0.7559353709220886, | |
| "learning_rate": 1.999703359977161e-05, | |
| "loss": 1.3722642660140991, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2394678492239468, | |
| "grad_norm": 1.8747915029525757, | |
| "learning_rate": 1.9996125583227458e-05, | |
| "loss": 1.5751910209655762, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.7324615120887756, | |
| "learning_rate": 1.999509653473251e-05, | |
| "loss": 1.1686367988586426, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24833702882483372, | |
| "grad_norm": 5.023177623748779, | |
| "learning_rate": 1.999394646813137e-05, | |
| "loss": 1.368462324142456, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.25277161862527714, | |
| "grad_norm": 2.301079750061035, | |
| "learning_rate": 1.9992675398896784e-05, | |
| "loss": 0.8811516761779785, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2572062084257206, | |
| "grad_norm": 0.6491958498954773, | |
| "learning_rate": 1.9991283344129452e-05, | |
| "loss": 1.4907201528549194, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2616407982261641, | |
| "grad_norm": 0.6563892364501953, | |
| "learning_rate": 1.998977032255777e-05, | |
| "loss": 1.224129557609558, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2660753880266075, | |
| "grad_norm": 0.708153486251831, | |
| "learning_rate": 1.9988136354537615e-05, | |
| "loss": 1.3663833141326904, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.270509977827051, | |
| "grad_norm": 1.3739961385726929, | |
| "learning_rate": 1.9986381462052048e-05, | |
| "loss": 1.2798233032226562, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2749445676274945, | |
| "grad_norm": 1.1927521228790283, | |
| "learning_rate": 1.9984505668711006e-05, | |
| "loss": 1.6487520933151245, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2793791574279379, | |
| "grad_norm": 1.0914132595062256, | |
| "learning_rate": 1.998250899975102e-05, | |
| "loss": 0.9563515186309814, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2838137472283814, | |
| "grad_norm": 0.6142106056213379, | |
| "learning_rate": 1.9980391482034844e-05, | |
| "loss": 1.2922307252883911, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.28824833702882485, | |
| "grad_norm": 0.9818975925445557, | |
| "learning_rate": 1.9978153144051108e-05, | |
| "loss": 1.0446155071258545, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 1.9593212604522705, | |
| "learning_rate": 1.9975794015913936e-05, | |
| "loss": 1.0657705068588257, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.29711751662971175, | |
| "grad_norm": 2.4713385105133057, | |
| "learning_rate": 1.9973314129362533e-05, | |
| "loss": 1.0481352806091309, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.30155210643015523, | |
| "grad_norm": 9.34296703338623, | |
| "learning_rate": 1.997071351776076e-05, | |
| "loss": 1.2620774507522583, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.30598669623059865, | |
| "grad_norm": 2.3951597213745117, | |
| "learning_rate": 1.996799221609669e-05, | |
| "loss": 0.8199646472930908, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.31042128603104213, | |
| "grad_norm": 1.0207390785217285, | |
| "learning_rate": 1.9965150260982137e-05, | |
| "loss": 1.2821062803268433, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3148558758314856, | |
| "grad_norm": 0.808794379234314, | |
| "learning_rate": 1.9962187690652157e-05, | |
| "loss": 1.0488530397415161, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.31929046563192903, | |
| "grad_norm": 1.9180113077163696, | |
| "learning_rate": 1.9959104544964536e-05, | |
| "loss": 1.0375815629959106, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3237250554323725, | |
| "grad_norm": 1.6617244482040405, | |
| "learning_rate": 1.9955900865399257e-05, | |
| "loss": 1.0013810396194458, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.328159645232816, | |
| "grad_norm": 0.9878438711166382, | |
| "learning_rate": 1.9952576695057944e-05, | |
| "loss": 1.4907773733139038, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3325942350332594, | |
| "grad_norm": 1.895961046218872, | |
| "learning_rate": 1.9949132078663268e-05, | |
| "loss": 1.254366397857666, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3370288248337029, | |
| "grad_norm": 1.2166792154312134, | |
| "learning_rate": 1.9945567062558368e-05, | |
| "loss": 1.1661312580108643, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.8109827637672424, | |
| "learning_rate": 1.9941881694706206e-05, | |
| "loss": 1.3392776250839233, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3458980044345898, | |
| "grad_norm": 2.391664505004883, | |
| "learning_rate": 1.993807602468893e-05, | |
| "loss": 1.3111441135406494, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.35033259423503327, | |
| "grad_norm": 0.941863477230072, | |
| "learning_rate": 1.9934150103707217e-05, | |
| "loss": 1.3535107374191284, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.35476718403547675, | |
| "grad_norm": 0.6483902335166931, | |
| "learning_rate": 1.9930103984579564e-05, | |
| "loss": 1.3064088821411133, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35920177383592017, | |
| "grad_norm": 0.682521641254425, | |
| "learning_rate": 1.9925937721741595e-05, | |
| "loss": 0.9179922938346863, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.1320902109146118, | |
| "learning_rate": 1.992165137124532e-05, | |
| "loss": 1.0206555128097534, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.36807095343680707, | |
| "grad_norm": 0.8146328926086426, | |
| "learning_rate": 1.9917244990758385e-05, | |
| "loss": 1.3475308418273926, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.37250554323725055, | |
| "grad_norm": 1.6250571012496948, | |
| "learning_rate": 1.9912718639563285e-05, | |
| "loss": 1.31868577003479, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.376940133037694, | |
| "grad_norm": 0.8682546615600586, | |
| "learning_rate": 1.9908072378556585e-05, | |
| "loss": 1.2381749153137207, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.38137472283813745, | |
| "grad_norm": 2.6199824810028076, | |
| "learning_rate": 1.990330627024809e-05, | |
| "loss": 0.8625264167785645, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3858093126385809, | |
| "grad_norm": 1.7685837745666504, | |
| "learning_rate": 1.989842037876e-05, | |
| "loss": 1.7184687852859497, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.9849699139595032, | |
| "learning_rate": 1.9893414769826053e-05, | |
| "loss": 1.369092583656311, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3946784922394678, | |
| "grad_norm": 1.686566948890686, | |
| "learning_rate": 1.9888289510790643e-05, | |
| "loss": 1.383589744567871, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3991130820399113, | |
| "grad_norm": 0.793823778629303, | |
| "learning_rate": 1.988304467060791e-05, | |
| "loss": 1.1963413953781128, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4035476718403548, | |
| "grad_norm": 0.6959115266799927, | |
| "learning_rate": 1.9877680319840813e-05, | |
| "loss": 1.335618257522583, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4079822616407982, | |
| "grad_norm": 1.3807117938995361, | |
| "learning_rate": 1.987219653066018e-05, | |
| "loss": 0.8666111826896667, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4124168514412417, | |
| "grad_norm": 1.2673057317733765, | |
| "learning_rate": 1.9866593376843743e-05, | |
| "loss": 1.0503551959991455, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.41685144124168516, | |
| "grad_norm": 0.8807701468467712, | |
| "learning_rate": 1.9860870933775128e-05, | |
| "loss": 1.0260038375854492, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4212860310421286, | |
| "grad_norm": 2.0024898052215576, | |
| "learning_rate": 1.9855029278442865e-05, | |
| "loss": 1.1095020771026611, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.42572062084257206, | |
| "grad_norm": 2.057466745376587, | |
| "learning_rate": 1.984906848943934e-05, | |
| "loss": 1.100471019744873, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.43015521064301554, | |
| "grad_norm": 1.057753324508667, | |
| "learning_rate": 1.9842988646959723e-05, | |
| "loss": 1.3441250324249268, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.43458980044345896, | |
| "grad_norm": 1.4172452688217163, | |
| "learning_rate": 1.983678983280093e-05, | |
| "loss": 1.6131374835968018, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 1.8611360788345337, | |
| "learning_rate": 1.983047213036047e-05, | |
| "loss": 1.3363574743270874, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4434589800443459, | |
| "grad_norm": 1.018568992614746, | |
| "learning_rate": 1.9824035624635368e-05, | |
| "loss": 1.2478539943695068, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44789356984478934, | |
| "grad_norm": 1.5161771774291992, | |
| "learning_rate": 1.9817480402220995e-05, | |
| "loss": 1.3159914016723633, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4523281596452328, | |
| "grad_norm": 1.3039575815200806, | |
| "learning_rate": 1.9810806551309903e-05, | |
| "loss": 1.2693634033203125, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4567627494456763, | |
| "grad_norm": 1.2496814727783203, | |
| "learning_rate": 1.9804014161690672e-05, | |
| "loss": 1.1507153511047363, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4611973392461197, | |
| "grad_norm": 4.592546463012695, | |
| "learning_rate": 1.979710332474665e-05, | |
| "loss": 1.1844661235809326, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4656319290465632, | |
| "grad_norm": 0.856142520904541, | |
| "learning_rate": 1.9790074133454765e-05, | |
| "loss": 0.7224380970001221, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4700665188470067, | |
| "grad_norm": 10.285343170166016, | |
| "learning_rate": 1.9782926682384248e-05, | |
| "loss": 0.8978222012519836, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4745011086474501, | |
| "grad_norm": 0.5395671129226685, | |
| "learning_rate": 1.977566106769538e-05, | |
| "loss": 1.1894056797027588, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.4789356984478936, | |
| "grad_norm": 0.6740292310714722, | |
| "learning_rate": 1.976827738713819e-05, | |
| "loss": 1.2027900218963623, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.48337028824833705, | |
| "grad_norm": 0.9326871037483215, | |
| "learning_rate": 1.976077574005114e-05, | |
| "loss": 1.1885857582092285, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 2.4017117023468018, | |
| "learning_rate": 1.9753156227359783e-05, | |
| "loss": 1.32407546043396, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.49223946784922396, | |
| "grad_norm": 1.3293203115463257, | |
| "learning_rate": 1.9745418951575415e-05, | |
| "loss": 1.2708196640014648, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.49667405764966743, | |
| "grad_norm": 0.804009199142456, | |
| "learning_rate": 1.9737564016793696e-05, | |
| "loss": 1.2493350505828857, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5011086474501109, | |
| "grad_norm": 0.6624335050582886, | |
| "learning_rate": 1.972959152869323e-05, | |
| "loss": 1.236510992050171, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5055432372505543, | |
| "grad_norm": 1.1144077777862549, | |
| "learning_rate": 1.972150159453417e-05, | |
| "loss": 1.2882966995239258, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5099778270509978, | |
| "grad_norm": 2.013320207595825, | |
| "learning_rate": 1.9713294323156768e-05, | |
| "loss": 1.8960356712341309, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5144124168514412, | |
| "grad_norm": 0.9120582342147827, | |
| "learning_rate": 1.9704969824979893e-05, | |
| "loss": 1.0289053916931152, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5188470066518847, | |
| "grad_norm": 1.22536301612854, | |
| "learning_rate": 1.9696528211999567e-05, | |
| "loss": 1.3444561958312988, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5232815964523282, | |
| "grad_norm": 0.7821425199508667, | |
| "learning_rate": 1.9687969597787445e-05, | |
| "loss": 1.1790920495986938, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5277161862527716, | |
| "grad_norm": 0.8863709568977356, | |
| "learning_rate": 1.967929409748929e-05, | |
| "loss": 1.0798450708389282, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.532150776053215, | |
| "grad_norm": 0.5844965577125549, | |
| "learning_rate": 1.967050182782344e-05, | |
| "loss": 1.3156877756118774, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.46499544382095337, | |
| "learning_rate": 1.96615929070792e-05, | |
| "loss": 1.3678405284881592, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.541019955654102, | |
| "grad_norm": 12.881924629211426, | |
| "learning_rate": 1.9652567455115287e-05, | |
| "loss": 1.0557224750518799, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 1.112845778465271, | |
| "learning_rate": 1.9643425593358212e-05, | |
| "loss": 1.308203101158142, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.549889135254989, | |
| "grad_norm": 1.1576392650604248, | |
| "learning_rate": 1.9634167444800618e-05, | |
| "loss": 1.5463697910308838, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5543237250554324, | |
| "grad_norm": 1.7358508110046387, | |
| "learning_rate": 1.9624793133999663e-05, | |
| "loss": 1.3133127689361572, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5587583148558758, | |
| "grad_norm": 1.8306182622909546, | |
| "learning_rate": 1.9615302787075317e-05, | |
| "loss": 0.7901706695556641, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.5631929046563193, | |
| "grad_norm": 1.574388861656189, | |
| "learning_rate": 1.9605696531708687e-05, | |
| "loss": 1.5300947427749634, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5676274944567627, | |
| "grad_norm": 0.6506041884422302, | |
| "learning_rate": 1.9595974497140275e-05, | |
| "loss": 1.3747804164886475, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5720620842572062, | |
| "grad_norm": 0.5146905779838562, | |
| "learning_rate": 1.958613681416825e-05, | |
| "loss": 1.3938028812408447, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5764966740576497, | |
| "grad_norm": 0.540286123752594, | |
| "learning_rate": 1.95761836151467e-05, | |
| "loss": 1.3356225490570068, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5809312638580931, | |
| "grad_norm": 1.7904235124588013, | |
| "learning_rate": 1.9566115033983843e-05, | |
| "loss": 0.817384660243988, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 1.602072834968567, | |
| "learning_rate": 1.955593120614021e-05, | |
| "loss": 1.4035075902938843, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5898004434589801, | |
| "grad_norm": 0.951567530632019, | |
| "learning_rate": 1.954563226862685e-05, | |
| "loss": 1.164678692817688, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5942350332594235, | |
| "grad_norm": 0.9935126900672913, | |
| "learning_rate": 1.953521836000346e-05, | |
| "loss": 1.6089775562286377, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5986696230598669, | |
| "grad_norm": 1.1680865287780762, | |
| "learning_rate": 1.9524689620376552e-05, | |
| "loss": 1.2622849941253662, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6031042128603105, | |
| "grad_norm": 0.8502325415611267, | |
| "learning_rate": 1.9514046191397532e-05, | |
| "loss": 1.2814254760742188, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6075388026607539, | |
| "grad_norm": 0.822547972202301, | |
| "learning_rate": 1.950328821626081e-05, | |
| "loss": 1.278984785079956, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6119733924611973, | |
| "grad_norm": 0.9921445250511169, | |
| "learning_rate": 1.9492415839701902e-05, | |
| "loss": 1.2716035842895508, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6164079822616408, | |
| "grad_norm": 2.4694504737854004, | |
| "learning_rate": 1.9481429207995424e-05, | |
| "loss": 1.2899194955825806, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6208425720620843, | |
| "grad_norm": 0.6362584829330444, | |
| "learning_rate": 1.9470328468953176e-05, | |
| "loss": 1.3732231855392456, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6252771618625277, | |
| "grad_norm": 1.02562415599823, | |
| "learning_rate": 1.9459113771922128e-05, | |
| "loss": 1.0229641199111938, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6297117516629712, | |
| "grad_norm": 0.6536508798599243, | |
| "learning_rate": 1.944778526778242e-05, | |
| "loss": 1.315395474433899, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 0.7477055788040161, | |
| "learning_rate": 1.9436343108945323e-05, | |
| "loss": 1.3944462537765503, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6385809312638581, | |
| "grad_norm": 0.5282856822013855, | |
| "learning_rate": 1.9424787449351194e-05, | |
| "loss": 1.3006008863449097, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.6430155210643016, | |
| "grad_norm": 0.4401320219039917, | |
| "learning_rate": 1.9413118444467408e-05, | |
| "loss": 1.2911877632141113, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.647450110864745, | |
| "grad_norm": 1.1002235412597656, | |
| "learning_rate": 1.9401336251286264e-05, | |
| "loss": 1.43943190574646, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.6518847006651884, | |
| "grad_norm": 0.5872219204902649, | |
| "learning_rate": 1.9389441028322874e-05, | |
| "loss": 1.026016116142273, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.656319290465632, | |
| "grad_norm": 0.5707578659057617, | |
| "learning_rate": 1.9377432935613016e-05, | |
| "loss": 1.0756226778030396, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6607538802660754, | |
| "grad_norm": 0.6579997539520264, | |
| "learning_rate": 1.936531213471101e-05, | |
| "loss": 1.2744524478912354, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6651884700665188, | |
| "grad_norm": 4.350220680236816, | |
| "learning_rate": 1.935307878868752e-05, | |
| "loss": 1.3224852085113525, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6696230598669624, | |
| "grad_norm": 0.5770370364189148, | |
| "learning_rate": 1.9340733062127373e-05, | |
| "loss": 1.2629750967025757, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6740576496674058, | |
| "grad_norm": 0.7492507696151733, | |
| "learning_rate": 1.9328275121127325e-05, | |
| "loss": 1.3276405334472656, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6784922394678492, | |
| "grad_norm": 0.9730760455131531, | |
| "learning_rate": 1.9315705133293857e-05, | |
| "loss": 1.2819868326187134, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.6775749921798706, | |
| "learning_rate": 1.9303023267740902e-05, | |
| "loss": 1.0328669548034668, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6873614190687362, | |
| "grad_norm": 0.6441645622253418, | |
| "learning_rate": 1.9290229695087562e-05, | |
| "loss": 1.2884297370910645, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6917960088691796, | |
| "grad_norm": 0.6459354162216187, | |
| "learning_rate": 1.9277324587455833e-05, | |
| "loss": 1.3426930904388428, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6962305986696231, | |
| "grad_norm": 0.715065598487854, | |
| "learning_rate": 1.9264308118468274e-05, | |
| "loss": 1.2753427028656006, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7006651884700665, | |
| "grad_norm": 0.7442984580993652, | |
| "learning_rate": 1.9251180463245675e-05, | |
| "loss": 1.4162836074829102, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.70509977827051, | |
| "grad_norm": 0.6542792916297913, | |
| "learning_rate": 1.9237941798404708e-05, | |
| "loss": 1.1363985538482666, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7095343680709535, | |
| "grad_norm": 1.2111639976501465, | |
| "learning_rate": 1.922459230205553e-05, | |
| "loss": 1.0583592653274536, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7139689578713969, | |
| "grad_norm": 1.301080346107483, | |
| "learning_rate": 1.921113215379943e-05, | |
| "loss": 1.300571322441101, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.7184035476718403, | |
| "grad_norm": 0.6510736346244812, | |
| "learning_rate": 1.9197561534726347e-05, | |
| "loss": 1.1844992637634277, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7228381374722838, | |
| "grad_norm": 2.18395733833313, | |
| "learning_rate": 1.9183880627412496e-05, | |
| "loss": 1.2481743097305298, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.287090003490448, | |
| "learning_rate": 1.9170089615917884e-05, | |
| "loss": 0.9507350325584412, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.0061886310577393, | |
| "learning_rate": 1.915618868578383e-05, | |
| "loss": 0.961956799030304, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7361419068736141, | |
| "grad_norm": 1.3770501613616943, | |
| "learning_rate": 1.9142178024030475e-05, | |
| "loss": 1.4702783823013306, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.7405764966740577, | |
| "grad_norm": 4.349529266357422, | |
| "learning_rate": 1.9128057819154264e-05, | |
| "loss": 1.3034319877624512, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.7450110864745011, | |
| "grad_norm": 0.5903530716896057, | |
| "learning_rate": 1.911382826112542e-05, | |
| "loss": 1.2915682792663574, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.7494456762749445, | |
| "grad_norm": 1.9257632493972778, | |
| "learning_rate": 1.909948954138538e-05, | |
| "loss": 0.859005868434906, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.753880266075388, | |
| "grad_norm": 4.127213001251221, | |
| "learning_rate": 1.908504185284421e-05, | |
| "loss": 0.4267387092113495, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7583148558758315, | |
| "grad_norm": 0.7010712027549744, | |
| "learning_rate": 1.9070485389878023e-05, | |
| "loss": 0.9848529696464539, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7627494456762749, | |
| "grad_norm": 0.5236337780952454, | |
| "learning_rate": 1.9055820348326358e-05, | |
| "loss": 1.400795340538025, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7671840354767184, | |
| "grad_norm": 0.49078261852264404, | |
| "learning_rate": 1.9041046925489552e-05, | |
| "loss": 1.304659128189087, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7716186252771619, | |
| "grad_norm": 0.8199257850646973, | |
| "learning_rate": 1.902616532012608e-05, | |
| "loss": 1.1828995943069458, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7760532150776053, | |
| "grad_norm": 0.66054368019104, | |
| "learning_rate": 1.9011175732449878e-05, | |
| "loss": 1.2884124517440796, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 1.6785452365875244, | |
| "learning_rate": 1.8996078364127655e-05, | |
| "loss": 1.2245346307754517, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7849223946784922, | |
| "grad_norm": 1.8945343494415283, | |
| "learning_rate": 1.898087341827618e-05, | |
| "loss": 1.0871098041534424, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7893569844789357, | |
| "grad_norm": 1.0700933933258057, | |
| "learning_rate": 1.896556109945954e-05, | |
| "loss": 1.2871757745742798, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7937915742793792, | |
| "grad_norm": 1.8673183917999268, | |
| "learning_rate": 1.8950141613686404e-05, | |
| "loss": 1.358439564704895, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7982261640798226, | |
| "grad_norm": 0.603571891784668, | |
| "learning_rate": 1.8934615168407237e-05, | |
| "loss": 1.295249104499817, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.802660753880266, | |
| "grad_norm": 1.2410091161727905, | |
| "learning_rate": 1.891898197251151e-05, | |
| "loss": 0.8056436777114868, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8070953436807096, | |
| "grad_norm": 1.200040578842163, | |
| "learning_rate": 1.8903242236324907e-05, | |
| "loss": 1.4234434366226196, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.811529933481153, | |
| "grad_norm": 0.47995078563690186, | |
| "learning_rate": 1.888739617160647e-05, | |
| "loss": 1.261313557624817, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8159645232815964, | |
| "grad_norm": 1.1774096488952637, | |
| "learning_rate": 1.8871443991545768e-05, | |
| "loss": 1.0709372758865356, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.8203991130820399, | |
| "grad_norm": 1.2842013835906982, | |
| "learning_rate": 1.885538591076002e-05, | |
| "loss": 0.9137963652610779, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8248337028824834, | |
| "grad_norm": 0.7302650809288025, | |
| "learning_rate": 1.8839222145291217e-05, | |
| "loss": 1.29634690284729, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.611242413520813, | |
| "learning_rate": 1.88229529126032e-05, | |
| "loss": 1.2931721210479736, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.8337028824833703, | |
| "grad_norm": 0.4736635684967041, | |
| "learning_rate": 1.8806578431578747e-05, | |
| "loss": 1.2644020318984985, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.8381374722838137, | |
| "grad_norm": 1.342694640159607, | |
| "learning_rate": 1.8790098922516637e-05, | |
| "loss": 1.3544180393218994, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.8425720620842572, | |
| "grad_norm": 1.1542247533798218, | |
| "learning_rate": 1.8773514607128647e-05, | |
| "loss": 0.9301992654800415, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8470066518847007, | |
| "grad_norm": 0.5767038464546204, | |
| "learning_rate": 1.875682570853662e-05, | |
| "loss": 1.3983073234558105, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.8514412416851441, | |
| "grad_norm": 0.7877940535545349, | |
| "learning_rate": 1.8740032451269438e-05, | |
| "loss": 1.195070743560791, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.8558758314855875, | |
| "grad_norm": 0.44601938128471375, | |
| "learning_rate": 1.8723135061259977e-05, | |
| "loss": 1.3003090620040894, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.8603104212860311, | |
| "grad_norm": 0.35469481348991394, | |
| "learning_rate": 1.8706133765842126e-05, | |
| "loss": 1.2766008377075195, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.8647450110864745, | |
| "grad_norm": 0.4653916358947754, | |
| "learning_rate": 1.8689028793747673e-05, | |
| "loss": 1.3040666580200195, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8691796008869179, | |
| "grad_norm": 0.6527778506278992, | |
| "learning_rate": 1.8671820375103256e-05, | |
| "loss": 1.0266871452331543, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8736141906873615, | |
| "grad_norm": 0.5240263938903809, | |
| "learning_rate": 1.8654508741427272e-05, | |
| "loss": 1.2564506530761719, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.437155157327652, | |
| "learning_rate": 1.863709412562672e-05, | |
| "loss": 1.246124505996704, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8824833702882483, | |
| "grad_norm": 0.8538821935653687, | |
| "learning_rate": 1.8619576761994137e-05, | |
| "loss": 1.2513529062271118, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8869179600886918, | |
| "grad_norm": 0.49160391092300415, | |
| "learning_rate": 1.860195688620438e-05, | |
| "loss": 0.6274079084396362, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8913525498891353, | |
| "grad_norm": 0.4428112506866455, | |
| "learning_rate": 1.8584234735311497e-05, | |
| "loss": 1.119248390197754, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8957871396895787, | |
| "grad_norm": 0.24119903147220612, | |
| "learning_rate": 1.8566410547745514e-05, | |
| "loss": 1.0662287473678589, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9002217294900222, | |
| "grad_norm": 1.1826022863388062, | |
| "learning_rate": 1.8548484563309243e-05, | |
| "loss": 1.3069649934768677, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9046563192904656, | |
| "grad_norm": 2.6790738105773926, | |
| "learning_rate": 1.853045702317505e-05, | |
| "loss": 1.210648536682129, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.8086345195770264, | |
| "learning_rate": 1.85123281698816e-05, | |
| "loss": 1.22344172000885, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9135254988913526, | |
| "grad_norm": 0.46482929587364197, | |
| "learning_rate": 1.8494098247330613e-05, | |
| "loss": 1.2734506130218506, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.917960088691796, | |
| "grad_norm": 0.6504107117652893, | |
| "learning_rate": 1.847576750078357e-05, | |
| "loss": 1.2879432439804077, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.9223946784922394, | |
| "grad_norm": 2.2455458641052246, | |
| "learning_rate": 1.8457336176858425e-05, | |
| "loss": 1.043541431427002, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.47505319118499756, | |
| "learning_rate": 1.8438804523526258e-05, | |
| "loss": 1.339963674545288, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.9312638580931264, | |
| "grad_norm": 1.4694486856460571, | |
| "learning_rate": 1.8420172790107983e-05, | |
| "loss": 0.8636243939399719, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9356984478935698, | |
| "grad_norm": 1.0414270162582397, | |
| "learning_rate": 1.8401441227270953e-05, | |
| "loss": 1.5467491149902344, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.9401330376940134, | |
| "grad_norm": 1.4648017883300781, | |
| "learning_rate": 1.838261008702561e-05, | |
| "loss": 1.1460201740264893, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.9445676274944568, | |
| "grad_norm": 0.5038813352584839, | |
| "learning_rate": 1.8363679622722096e-05, | |
| "loss": 1.2603991031646729, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.9490022172949002, | |
| "grad_norm": 0.6404750347137451, | |
| "learning_rate": 1.8344650089046826e-05, | |
| "loss": 1.1844969987869263, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.9534368070953437, | |
| "grad_norm": 2.21321439743042, | |
| "learning_rate": 1.832552174201908e-05, | |
| "loss": 0.8131325840950012, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9578713968957872, | |
| "grad_norm": 0.49369490146636963, | |
| "learning_rate": 1.830629483898755e-05, | |
| "loss": 1.2790230512619019, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.9623059866962306, | |
| "grad_norm": 6.766321659088135, | |
| "learning_rate": 1.8286969638626882e-05, | |
| "loss": 1.2089905738830566, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.9667405764966741, | |
| "grad_norm": 0.7347844839096069, | |
| "learning_rate": 1.826754640093419e-05, | |
| "loss": 1.3173238039016724, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.9711751662971175, | |
| "grad_norm": 0.564915657043457, | |
| "learning_rate": 1.824802538722556e-05, | |
| "loss": 1.2954607009887695, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 2.1599206924438477, | |
| "learning_rate": 1.8228406860132545e-05, | |
| "loss": 0.8611724376678467, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9800443458980045, | |
| "grad_norm": 0.5106037259101868, | |
| "learning_rate": 1.8208691083598607e-05, | |
| "loss": 1.1488136053085327, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9844789356984479, | |
| "grad_norm": 0.9815554618835449, | |
| "learning_rate": 1.8188878322875594e-05, | |
| "loss": 1.3558589220046997, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9889135254988913, | |
| "grad_norm": 0.6858358979225159, | |
| "learning_rate": 1.8168968844520157e-05, | |
| "loss": 1.2466365098953247, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.9933481152993349, | |
| "grad_norm": 1.2758557796478271, | |
| "learning_rate": 1.8148962916390154e-05, | |
| "loss": 1.2831544876098633, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.9977827050997783, | |
| "grad_norm": 0.47892308235168457, | |
| "learning_rate": 1.8128860807641076e-05, | |
| "loss": 1.1054222583770752, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0022172949002217, | |
| "grad_norm": 1.9382197856903076, | |
| "learning_rate": 1.810866278872239e-05, | |
| "loss": 1.0697418451309204, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0066518847006651, | |
| "grad_norm": 1.1436439752578735, | |
| "learning_rate": 1.8088369131373925e-05, | |
| "loss": 1.2170673608779907, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.0110864745011086, | |
| "grad_norm": 0.7328348159790039, | |
| "learning_rate": 1.8067980108622217e-05, | |
| "loss": 1.1548501253128052, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.0155210643015522, | |
| "grad_norm": 0.8741162419319153, | |
| "learning_rate": 1.8047495994776817e-05, | |
| "loss": 0.7017601132392883, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.0199556541019956, | |
| "grad_norm": 0.3321545124053955, | |
| "learning_rate": 1.8026917065426605e-05, | |
| "loss": 0.7321120500564575, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 1.3679202795028687, | |
| "learning_rate": 1.800624359743611e-05, | |
| "loss": 0.5792034864425659, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.0288248337028825, | |
| "grad_norm": 0.7569698095321655, | |
| "learning_rate": 1.798547586894175e-05, | |
| "loss": 0.7689359188079834, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.033259423503326, | |
| "grad_norm": 0.7644620537757874, | |
| "learning_rate": 1.7964614159348103e-05, | |
| "loss": 0.698060154914856, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.0376940133037693, | |
| "grad_norm": 1.2388887405395508, | |
| "learning_rate": 1.794365874932415e-05, | |
| "loss": 0.8797460198402405, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.042128603104213, | |
| "grad_norm": 0.9471485018730164, | |
| "learning_rate": 1.7922609920799493e-05, | |
| "loss": 0.6286487579345703, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.0465631929046564, | |
| "grad_norm": 2.5266878604888916, | |
| "learning_rate": 1.790146795696059e-05, | |
| "loss": 1.0638426542282104, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.0509977827050998, | |
| "grad_norm": 0.6257596015930176, | |
| "learning_rate": 1.7880233142246884e-05, | |
| "loss": 1.0050872564315796, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.0554323725055432, | |
| "grad_norm": 0.5379915237426758, | |
| "learning_rate": 1.7858905762347044e-05, | |
| "loss": 0.9805111289024353, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.0598669623059866, | |
| "grad_norm": 0.8328865170478821, | |
| "learning_rate": 1.783748610419508e-05, | |
| "loss": 1.1784859895706177, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.06430155210643, | |
| "grad_norm": 0.44074714183807373, | |
| "learning_rate": 1.7815974455966488e-05, | |
| "loss": 0.6814610958099365, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.0687361419068737, | |
| "grad_norm": 1.1742632389068604, | |
| "learning_rate": 1.7794371107074398e-05, | |
| "loss": 1.1012016534805298, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.0731707317073171, | |
| "grad_norm": 0.37530067563056946, | |
| "learning_rate": 1.7772676348165637e-05, | |
| "loss": 0.9307145476341248, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.0776053215077606, | |
| "grad_norm": 0.42450839281082153, | |
| "learning_rate": 1.7750890471116858e-05, | |
| "loss": 0.963620662689209, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.082039911308204, | |
| "grad_norm": 0.47807762026786804, | |
| "learning_rate": 1.7729013769030596e-05, | |
| "loss": 0.7537004351615906, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.0864745011086474, | |
| "grad_norm": 0.4078989028930664, | |
| "learning_rate": 1.7707046536231325e-05, | |
| "loss": 0.854632556438446, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 0.6203530430793762, | |
| "learning_rate": 1.76849890682615e-05, | |
| "loss": 0.9603514671325684, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.0953436807095343, | |
| "grad_norm": 1.7032476663589478, | |
| "learning_rate": 1.7662841661877574e-05, | |
| "loss": 1.0737708806991577, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.099778270509978, | |
| "grad_norm": 1.1234840154647827, | |
| "learning_rate": 1.7640604615046025e-05, | |
| "loss": 0.9386560320854187, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.1042128603104213, | |
| "grad_norm": 0.427051842212677, | |
| "learning_rate": 1.7618278226939327e-05, | |
| "loss": 0.9625406265258789, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.1086474501108647, | |
| "grad_norm": 0.7077636122703552, | |
| "learning_rate": 1.7595862797931936e-05, | |
| "loss": 0.6286700367927551, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.1130820399113082, | |
| "grad_norm": 0.5965766310691833, | |
| "learning_rate": 1.757335862959624e-05, | |
| "loss": 0.9419457316398621, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.1175166297117516, | |
| "grad_norm": 0.7379962801933289, | |
| "learning_rate": 1.755076602469851e-05, | |
| "loss": 0.8069853186607361, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.1219512195121952, | |
| "grad_norm": 1.0986132621765137, | |
| "learning_rate": 1.7528085287194827e-05, | |
| "loss": 0.8290332555770874, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.1263858093126387, | |
| "grad_norm": 1.4528342485427856, | |
| "learning_rate": 1.750531672222698e-05, | |
| "loss": 0.6308746933937073, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.130820399113082, | |
| "grad_norm": 0.7668278217315674, | |
| "learning_rate": 1.7482460636118377e-05, | |
| "loss": 1.0766762495040894, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.1352549889135255, | |
| "grad_norm": 1.3378920555114746, | |
| "learning_rate": 1.745951733636992e-05, | |
| "loss": 0.5383997559547424, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.139689578713969, | |
| "grad_norm": 1.2324367761611938, | |
| "learning_rate": 1.7436487131655855e-05, | |
| "loss": 0.4129646420478821, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.1441241685144123, | |
| "grad_norm": 0.6832541823387146, | |
| "learning_rate": 1.7413370331819634e-05, | |
| "loss": 0.8020773530006409, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.1485587583148558, | |
| "grad_norm": 1.0301239490509033, | |
| "learning_rate": 1.7390167247869743e-05, | |
| "loss": 0.9460446238517761, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.1529933481152994, | |
| "grad_norm": 1.7787998914718628, | |
| "learning_rate": 1.7366878191975516e-05, | |
| "loss": 1.080168604850769, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.1574279379157428, | |
| "grad_norm": 1.1747550964355469, | |
| "learning_rate": 1.7343503477462927e-05, | |
| "loss": 0.534135639667511, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.1618625277161863, | |
| "grad_norm": 0.5435235500335693, | |
| "learning_rate": 1.7320043418810394e-05, | |
| "loss": 0.9134470820426941, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.1662971175166297, | |
| "grad_norm": 0.5852527022361755, | |
| "learning_rate": 1.729649833164453e-05, | |
| "loss": 1.0747884511947632, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 0.5314655900001526, | |
| "learning_rate": 1.727286853273591e-05, | |
| "loss": 0.6440135836601257, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.1751662971175167, | |
| "grad_norm": 0.5095431208610535, | |
| "learning_rate": 1.7249154339994788e-05, | |
| "loss": 0.8419979810714722, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.1796008869179602, | |
| "grad_norm": 0.4051227569580078, | |
| "learning_rate": 1.7225356072466856e-05, | |
| "loss": 0.8316261768341064, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.1840354767184036, | |
| "grad_norm": 0.3643783628940582, | |
| "learning_rate": 1.720147405032891e-05, | |
| "loss": 0.9231957197189331, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.188470066518847, | |
| "grad_norm": 0.32051339745521545, | |
| "learning_rate": 1.7177508594884576e-05, | |
| "loss": 0.6917131543159485, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.1929046563192904, | |
| "grad_norm": 0.6921893358230591, | |
| "learning_rate": 1.7153460028559964e-05, | |
| "loss": 1.00527024269104, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.1973392461197339, | |
| "grad_norm": 0.6226311922073364, | |
| "learning_rate": 1.7129328674899354e-05, | |
| "loss": 0.7679756879806519, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.2017738359201773, | |
| "grad_norm": 1.1230734586715698, | |
| "learning_rate": 1.7105114858560813e-05, | |
| "loss": 0.6591505408287048, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.206208425720621, | |
| "grad_norm": 0.9631316661834717, | |
| "learning_rate": 1.7080818905311853e-05, | |
| "loss": 0.9413385987281799, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.2106430155210643, | |
| "grad_norm": 0.3299412727355957, | |
| "learning_rate": 1.7056441142025037e-05, | |
| "loss": 0.7805101275444031, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.2150776053215078, | |
| "grad_norm": 0.6347978115081787, | |
| "learning_rate": 1.703198189667358e-05, | |
| "loss": 1.2124230861663818, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 1.2306925058364868, | |
| "learning_rate": 1.7007441498326943e-05, | |
| "loss": 0.6341520547866821, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.2239467849223946, | |
| "grad_norm": 0.6283694505691528, | |
| "learning_rate": 1.6982820277146403e-05, | |
| "loss": 0.971120297908783, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.2283813747228383, | |
| "grad_norm": 2.13574481010437, | |
| "learning_rate": 1.6958118564380596e-05, | |
| "loss": 0.7344387173652649, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.2328159645232817, | |
| "grad_norm": 0.3253254294395447, | |
| "learning_rate": 1.6933336692361097e-05, | |
| "loss": 0.7349171042442322, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.237250554323725, | |
| "grad_norm": 2.8170223236083984, | |
| "learning_rate": 1.6908474994497912e-05, | |
| "loss": 0.588421106338501, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.2416851441241685, | |
| "grad_norm": 1.3332557678222656, | |
| "learning_rate": 1.688353380527501e-05, | |
| "loss": 1.1083375215530396, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.246119733924612, | |
| "grad_norm": 1.18131685256958, | |
| "learning_rate": 1.6858513460245818e-05, | |
| "loss": 0.8837442398071289, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.2505543237250554, | |
| "grad_norm": 0.6048891544342041, | |
| "learning_rate": 1.6833414296028717e-05, | |
| "loss": 0.6526999473571777, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.2549889135254988, | |
| "grad_norm": 0.5266470909118652, | |
| "learning_rate": 1.680823665030249e-05, | |
| "loss": 0.8695023655891418, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.2594235033259422, | |
| "grad_norm": 0.5137091279029846, | |
| "learning_rate": 1.6782980861801804e-05, | |
| "loss": 0.8212327361106873, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.2638580931263859, | |
| "grad_norm": 0.5950433015823364, | |
| "learning_rate": 1.6757647270312637e-05, | |
| "loss": 1.1734381914138794, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.2682926829268293, | |
| "grad_norm": 0.4560319185256958, | |
| "learning_rate": 1.6732236216667722e-05, | |
| "loss": 0.739474892616272, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 0.6213061809539795, | |
| "learning_rate": 1.6706748042741935e-05, | |
| "loss": 1.2839826345443726, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.2771618625277161, | |
| "grad_norm": 0.5989497900009155, | |
| "learning_rate": 1.6681183091447722e-05, | |
| "loss": 0.9160253405570984, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.2815964523281598, | |
| "grad_norm": 1.3319306373596191, | |
| "learning_rate": 1.6655541706730476e-05, | |
| "loss": 1.093945860862732, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.2860310421286032, | |
| "grad_norm": 0.5771936774253845, | |
| "learning_rate": 1.6629824233563908e-05, | |
| "loss": 1.0052553415298462, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.2904656319290466, | |
| "grad_norm": 2.056089401245117, | |
| "learning_rate": 1.6604031017945403e-05, | |
| "loss": 1.3277779817581177, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.29490022172949, | |
| "grad_norm": 0.4700315594673157, | |
| "learning_rate": 1.657816240689137e-05, | |
| "loss": 0.7094478607177734, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.2993348115299335, | |
| "grad_norm": 0.4772210419178009, | |
| "learning_rate": 1.6552218748432572e-05, | |
| "loss": 0.7443241477012634, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.3037694013303769, | |
| "grad_norm": 1.3316142559051514, | |
| "learning_rate": 1.6526200391609445e-05, | |
| "loss": 0.5478697419166565, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.3082039911308203, | |
| "grad_norm": 2.8271443843841553, | |
| "learning_rate": 1.6500107686467407e-05, | |
| "loss": 1.0316827297210693, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.3126385809312637, | |
| "grad_norm": 0.5958804488182068, | |
| "learning_rate": 1.6473940984052125e-05, | |
| "loss": 0.9526193141937256, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 0.8103643655776978, | |
| "learning_rate": 1.644770063640483e-05, | |
| "loss": 0.956438422203064, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.3215077605321508, | |
| "grad_norm": 0.49165335297584534, | |
| "learning_rate": 1.6421386996557546e-05, | |
| "loss": 1.1481645107269287, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.3259423503325942, | |
| "grad_norm": 0.7782723903656006, | |
| "learning_rate": 1.6395000418528362e-05, | |
| "loss": 0.9521985650062561, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.3303769401330376, | |
| "grad_norm": 0.4783051609992981, | |
| "learning_rate": 1.636854125731666e-05, | |
| "loss": 0.47762957215309143, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.3348115299334813, | |
| "grad_norm": 0.8502888679504395, | |
| "learning_rate": 1.6342009868898332e-05, | |
| "loss": 0.7853302955627441, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.3392461197339247, | |
| "grad_norm": 0.7362395524978638, | |
| "learning_rate": 1.6315406610221017e-05, | |
| "loss": 0.842612087726593, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.3436807095343681, | |
| "grad_norm": 0.31031566858291626, | |
| "learning_rate": 1.6288731839199265e-05, | |
| "loss": 0.8278278708457947, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.3481152993348116, | |
| "grad_norm": 0.6640880703926086, | |
| "learning_rate": 1.6261985914709745e-05, | |
| "loss": 1.028430461883545, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.352549889135255, | |
| "grad_norm": 1.618883490562439, | |
| "learning_rate": 1.6235169196586408e-05, | |
| "loss": 1.1671243906021118, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.3569844789356984, | |
| "grad_norm": 0.8194751739501953, | |
| "learning_rate": 1.6208282045615648e-05, | |
| "loss": 0.717631459236145, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.3614190687361418, | |
| "grad_norm": 0.5236591100692749, | |
| "learning_rate": 1.618132482353145e-05, | |
| "loss": 1.0824005603790283, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.3658536585365852, | |
| "grad_norm": 0.30997705459594727, | |
| "learning_rate": 1.6154297893010516e-05, | |
| "loss": 0.705600917339325, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.370288248337029, | |
| "grad_norm": 0.5286486744880676, | |
| "learning_rate": 1.6127201617667396e-05, | |
| "loss": 0.8719974756240845, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.3747228381374723, | |
| "grad_norm": 0.5527012348175049, | |
| "learning_rate": 1.6100036362049576e-05, | |
| "loss": 0.10983101278543472, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.3791574279379157, | |
| "grad_norm": 0.4935061037540436, | |
| "learning_rate": 1.6072802491632612e-05, | |
| "loss": 0.9561376571655273, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.3835920177383592, | |
| "grad_norm": 0.8581332564353943, | |
| "learning_rate": 1.6045500372815173e-05, | |
| "loss": 0.9489790201187134, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.3880266075388026, | |
| "grad_norm": 1.1202986240386963, | |
| "learning_rate": 1.6018130372914123e-05, | |
| "loss": 0.9768886566162109, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.3924611973392462, | |
| "grad_norm": 0.5203282833099365, | |
| "learning_rate": 1.5990692860159597e-05, | |
| "loss": 0.8944608569145203, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.3968957871396896, | |
| "grad_norm": 0.44260817766189575, | |
| "learning_rate": 1.5963188203690025e-05, | |
| "loss": 1.0010405778884888, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.401330376940133, | |
| "grad_norm": 0.5329799652099609, | |
| "learning_rate": 1.5935616773547182e-05, | |
| "loss": 0.8816275000572205, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.4057649667405765, | |
| "grad_norm": 0.8102928400039673, | |
| "learning_rate": 1.5907978940671183e-05, | |
| "loss": 0.9644457101821899, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.41019955654102, | |
| "grad_norm": 0.551501989364624, | |
| "learning_rate": 1.5880275076895537e-05, | |
| "loss": 0.9486368894577026, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.4146341463414633, | |
| "grad_norm": 4.090445041656494, | |
| "learning_rate": 1.58525055549421e-05, | |
| "loss": 0.6854583024978638, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.4190687361419068, | |
| "grad_norm": 0.5645637512207031, | |
| "learning_rate": 1.5824670748416085e-05, | |
| "loss": 0.900244414806366, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.4235033259423504, | |
| "grad_norm": 0.7116575837135315, | |
| "learning_rate": 1.5796771031801034e-05, | |
| "loss": 0.8295862674713135, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.4279379157427938, | |
| "grad_norm": 0.7264999747276306, | |
| "learning_rate": 1.5768806780453766e-05, | |
| "loss": 0.6157872676849365, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.4323725055432373, | |
| "grad_norm": 0.608518123626709, | |
| "learning_rate": 1.5740778370599344e-05, | |
| "loss": 1.0620026588439941, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.4368070953436807, | |
| "grad_norm": 0.5453920364379883, | |
| "learning_rate": 1.5712686179326004e-05, | |
| "loss": 1.2050490379333496, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.441241685144124, | |
| "grad_norm": 0.42610880732536316, | |
| "learning_rate": 1.5684530584580077e-05, | |
| "loss": 1.1291793584823608, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.4456762749445677, | |
| "grad_norm": 2.327178716659546, | |
| "learning_rate": 1.565631196516093e-05, | |
| "loss": 0.8947151899337769, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.4501108647450112, | |
| "grad_norm": 0.7120440602302551, | |
| "learning_rate": 1.5628030700715824e-05, | |
| "loss": 0.8887991905212402, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 1.0359218120574951, | |
| "learning_rate": 1.5599687171734853e-05, | |
| "loss": 0.7058618664741516, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.458980044345898, | |
| "grad_norm": 0.5742671489715576, | |
| "learning_rate": 1.5571281759545793e-05, | |
| "loss": 0.7722383141517639, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.6867632865905762, | |
| "learning_rate": 1.5542814846308996e-05, | |
| "loss": 0.9778433442115784, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.4678492239467849, | |
| "grad_norm": 0.42144981026649475, | |
| "learning_rate": 1.5514286815012222e-05, | |
| "loss": 0.9305572509765625, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.4722838137472283, | |
| "grad_norm": 0.5244068503379822, | |
| "learning_rate": 1.548569804946551e-05, | |
| "loss": 0.7543381452560425, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.476718403547672, | |
| "grad_norm": 0.4360713064670563, | |
| "learning_rate": 1.5457048934296e-05, | |
| "loss": 0.4798527956008911, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.4811529933481153, | |
| "grad_norm": 0.905125081539154, | |
| "learning_rate": 1.5428339854942757e-05, | |
| "loss": 0.5245689749717712, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.4855875831485588, | |
| "grad_norm": 0.6136901378631592, | |
| "learning_rate": 1.539957119765161e-05, | |
| "loss": 0.9089503884315491, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.4900221729490022, | |
| "grad_norm": 0.4613928496837616, | |
| "learning_rate": 1.537074334946992e-05, | |
| "loss": 0.9715514779090881, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.4944567627494456, | |
| "grad_norm": 0.6848336458206177, | |
| "learning_rate": 1.5341856698241397e-05, | |
| "loss": 0.6604840755462646, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.4988913525498893, | |
| "grad_norm": 0.7074861526489258, | |
| "learning_rate": 1.531291163260087e-05, | |
| "loss": 0.6721962094306946, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.5033259423503327, | |
| "grad_norm": 0.7671158909797668, | |
| "learning_rate": 1.5283908541969064e-05, | |
| "loss": 1.0287514925003052, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.507760532150776, | |
| "grad_norm": 0.46018627285957336, | |
| "learning_rate": 1.5254847816547366e-05, | |
| "loss": 0.5789790153503418, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.5121951219512195, | |
| "grad_norm": 0.5391964316368103, | |
| "learning_rate": 1.522572984731256e-05, | |
| "loss": 0.5692949295043945, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.516629711751663, | |
| "grad_norm": 0.523459792137146, | |
| "learning_rate": 1.5196555026011585e-05, | |
| "loss": 0.934548556804657, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.5210643015521064, | |
| "grad_norm": 0.328876793384552, | |
| "learning_rate": 1.5167323745156248e-05, | |
| "loss": 0.9151366949081421, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.5254988913525498, | |
| "grad_norm": 0.5242407321929932, | |
| "learning_rate": 1.5138036398017953e-05, | |
| "loss": 0.5513712763786316, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.5299334811529932, | |
| "grad_norm": 0.38611844182014465, | |
| "learning_rate": 1.510869337862241e-05, | |
| "loss": 0.281048059463501, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.5343680709534369, | |
| "grad_norm": 1.463240146636963, | |
| "learning_rate": 1.507929508174433e-05, | |
| "loss": 0.8684556484222412, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.5388026607538803, | |
| "grad_norm": 1.3095505237579346, | |
| "learning_rate": 1.5049841902902119e-05, | |
| "loss": 0.8829594254493713, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.5432372505543237, | |
| "grad_norm": 1.3540315628051758, | |
| "learning_rate": 1.5020334238352546e-05, | |
| "loss": 0.5511650443077087, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.5476718403547673, | |
| "grad_norm": 0.36952298879623413, | |
| "learning_rate": 1.499077248508542e-05, | |
| "loss": 1.02639639377594, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.5521064301552108, | |
| "grad_norm": 1.0932236909866333, | |
| "learning_rate": 1.496115704081826e-05, | |
| "loss": 1.0058600902557373, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.5565410199556542, | |
| "grad_norm": 0.49011874198913574, | |
| "learning_rate": 1.4931488303990916e-05, | |
| "loss": 1.0263029336929321, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.5609756097560976, | |
| "grad_norm": 1.3680771589279175, | |
| "learning_rate": 1.4901766673760232e-05, | |
| "loss": 0.824455738067627, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.565410199556541, | |
| "grad_norm": 0.5223835110664368, | |
| "learning_rate": 1.4871992549994673e-05, | |
| "loss": 0.4502509832382202, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.5698447893569845, | |
| "grad_norm": 0.5144345164299011, | |
| "learning_rate": 1.4842166333268932e-05, | |
| "loss": 1.0360265970230103, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.5742793791574279, | |
| "grad_norm": 1.030713438987732, | |
| "learning_rate": 1.481228842485856e-05, | |
| "loss": 0.8033937215805054, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.5787139689578713, | |
| "grad_norm": 0.8714462518692017, | |
| "learning_rate": 1.4782359226734544e-05, | |
| "loss": 0.6804985404014587, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.5831485587583147, | |
| "grad_norm": 0.4418451488018036, | |
| "learning_rate": 1.475237914155792e-05, | |
| "loss": 0.9747523665428162, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.5875831485587582, | |
| "grad_norm": 0.4844651520252228, | |
| "learning_rate": 1.472234857267435e-05, | |
| "loss": 0.9988541603088379, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.5920177383592018, | |
| "grad_norm": 1.146903395652771, | |
| "learning_rate": 1.4692267924108683e-05, | |
| "loss": 1.0589611530303955, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.5964523281596452, | |
| "grad_norm": 1.1565581560134888, | |
| "learning_rate": 1.466213760055954e-05, | |
| "loss": 0.5897700786590576, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.6008869179600886, | |
| "grad_norm": 0.23559361696243286, | |
| "learning_rate": 1.4631958007393854e-05, | |
| "loss": 0.4846925735473633, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.6053215077605323, | |
| "grad_norm": 0.4940757751464844, | |
| "learning_rate": 1.4601729550641417e-05, | |
| "loss": 1.0242489576339722, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 1.7630901336669922, | |
| "learning_rate": 1.4571452636989433e-05, | |
| "loss": 1.0372512340545654, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.6141906873614191, | |
| "grad_norm": 0.36424028873443604, | |
| "learning_rate": 1.4541127673777021e-05, | |
| "loss": 0.7359429001808167, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.6186252771618626, | |
| "grad_norm": 0.4631586968898773, | |
| "learning_rate": 1.451075506898975e-05, | |
| "loss": 0.9926391839981079, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.623059866962306, | |
| "grad_norm": 0.43977200984954834, | |
| "learning_rate": 1.4480335231254164e-05, | |
| "loss": 0.9845470786094666, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.6274944567627494, | |
| "grad_norm": 0.5064222812652588, | |
| "learning_rate": 1.4449868569832253e-05, | |
| "loss": 0.9982655048370361, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.6319290465631928, | |
| "grad_norm": 0.2603287994861603, | |
| "learning_rate": 1.4419355494615963e-05, | |
| "loss": 0.45653244853019714, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 0.5068104863166809, | |
| "learning_rate": 1.4388796416121696e-05, | |
| "loss": 1.2514511346817017, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.6407982261640797, | |
| "grad_norm": 0.39673784375190735, | |
| "learning_rate": 1.4358191745484755e-05, | |
| "loss": 0.9661815166473389, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.6452328159645233, | |
| "grad_norm": 0.9892500638961792, | |
| "learning_rate": 1.432754189445384e-05, | |
| "loss": 1.1122088432312012, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.6496674057649667, | |
| "grad_norm": 0.6944724917411804, | |
| "learning_rate": 1.4296847275385495e-05, | |
| "loss": 0.7954747080802917, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.6541019955654102, | |
| "grad_norm": 1.078669548034668, | |
| "learning_rate": 1.4266108301238564e-05, | |
| "loss": 0.856575071811676, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.6585365853658538, | |
| "grad_norm": 0.7615432143211365, | |
| "learning_rate": 1.4235325385568636e-05, | |
| "loss": 0.6531709432601929, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.6629711751662972, | |
| "grad_norm": 0.47316062450408936, | |
| "learning_rate": 1.4204498942522482e-05, | |
| "loss": 0.971373975276947, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.6674057649667406, | |
| "grad_norm": 0.4431406259536743, | |
| "learning_rate": 1.4173629386832473e-05, | |
| "loss": 0.7244459390640259, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.671840354767184, | |
| "grad_norm": 0.5017882585525513, | |
| "learning_rate": 1.4142717133811013e-05, | |
| "loss": 0.5894262790679932, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.6762749445676275, | |
| "grad_norm": 0.7016173005104065, | |
| "learning_rate": 1.4111762599344952e-05, | |
| "loss": 1.006710171699524, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.680709534368071, | |
| "grad_norm": 0.8765194416046143, | |
| "learning_rate": 1.4080766199889976e-05, | |
| "loss": 0.9072303771972656, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.6851441241685143, | |
| "grad_norm": 1.2686158418655396, | |
| "learning_rate": 1.404972835246502e-05, | |
| "loss": 0.8974109292030334, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.6895787139689578, | |
| "grad_norm": 0.8306912183761597, | |
| "learning_rate": 1.401864947464665e-05, | |
| "loss": 0.8825592994689941, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.6940133037694012, | |
| "grad_norm": 1.107991337776184, | |
| "learning_rate": 1.3987529984563444e-05, | |
| "loss": 0.9357943534851074, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.6984478935698448, | |
| "grad_norm": 1.4103295803070068, | |
| "learning_rate": 1.3956370300890374e-05, | |
| "loss": 1.0407212972640991, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.7028824833702882, | |
| "grad_norm": 1.0025876760482788, | |
| "learning_rate": 1.392517084284316e-05, | |
| "loss": 0.6954239010810852, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.5951728224754333, | |
| "learning_rate": 1.3893932030172642e-05, | |
| "loss": 0.9474072456359863, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.7117516629711753, | |
| "grad_norm": 1.6196831464767456, | |
| "learning_rate": 1.386265428315913e-05, | |
| "loss": 0.9979518055915833, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.7161862527716187, | |
| "grad_norm": 0.4795306622982025, | |
| "learning_rate": 1.3831338022606748e-05, | |
| "loss": 0.8625308275222778, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.7206208425720622, | |
| "grad_norm": 0.9456951022148132, | |
| "learning_rate": 1.3799983669837768e-05, | |
| "loss": 0.9803452491760254, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.7250554323725056, | |
| "grad_norm": 0.46205422282218933, | |
| "learning_rate": 1.3768591646686957e-05, | |
| "loss": 1.0163923501968384, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.729490022172949, | |
| "grad_norm": 0.6149927377700806, | |
| "learning_rate": 1.3737162375495883e-05, | |
| "loss": 0.5648257732391357, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.7339246119733924, | |
| "grad_norm": 0.35180729627609253, | |
| "learning_rate": 1.3705696279107238e-05, | |
| "loss": 0.9397526979446411, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.7383592017738358, | |
| "grad_norm": 0.3703164756298065, | |
| "learning_rate": 1.3674193780859163e-05, | |
| "loss": 0.6409098505973816, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.7427937915742793, | |
| "grad_norm": 0.4282858371734619, | |
| "learning_rate": 1.3642655304579535e-05, | |
| "loss": 0.7513792514801025, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.7472283813747227, | |
| "grad_norm": 0.3827633857727051, | |
| "learning_rate": 1.3611081274580269e-05, | |
| "loss": 0.6845064759254456, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.7516629711751663, | |
| "grad_norm": 1.2396421432495117, | |
| "learning_rate": 1.3579472115651623e-05, | |
| "loss": 0.6268539428710938, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 0.40521490573883057, | |
| "learning_rate": 1.354782825305646e-05, | |
| "loss": 0.6478447914123535, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.7605321507760532, | |
| "grad_norm": 0.32460105419158936, | |
| "learning_rate": 1.3516150112524542e-05, | |
| "loss": 0.8190337419509888, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.7649667405764968, | |
| "grad_norm": 0.8050366640090942, | |
| "learning_rate": 1.3484438120246806e-05, | |
| "loss": 0.8022271394729614, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.7694013303769403, | |
| "grad_norm": 0.4470427930355072, | |
| "learning_rate": 1.3452692702869619e-05, | |
| "loss": 0.9513342380523682, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.7738359201773837, | |
| "grad_norm": 0.43522873520851135, | |
| "learning_rate": 1.3420914287489037e-05, | |
| "loss": 0.9605931043624878, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.778270509977827, | |
| "grad_norm": 0.6569511890411377, | |
| "learning_rate": 1.3389103301645065e-05, | |
| "loss": 0.9895227551460266, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.7827050997782705, | |
| "grad_norm": 0.5629826188087463, | |
| "learning_rate": 1.3357260173315918e-05, | |
| "loss": 1.1033282279968262, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.787139689578714, | |
| "grad_norm": 0.4114173352718353, | |
| "learning_rate": 1.332538533091223e-05, | |
| "loss": 0.74909508228302, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.7915742793791574, | |
| "grad_norm": 0.39374831318855286, | |
| "learning_rate": 1.3293479203271322e-05, | |
| "loss": 0.9650196433067322, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.7960088691796008, | |
| "grad_norm": 1.316881537437439, | |
| "learning_rate": 1.3261542219651415e-05, | |
| "loss": 0.5823323130607605, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.8004434589800442, | |
| "grad_norm": 0.8751013278961182, | |
| "learning_rate": 1.3229574809725859e-05, | |
| "loss": 0.5940043926239014, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.8048780487804879, | |
| "grad_norm": 1.2625625133514404, | |
| "learning_rate": 1.3197577403577355e-05, | |
| "loss": 0.9879517555236816, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.8093126385809313, | |
| "grad_norm": 2.798226833343506, | |
| "learning_rate": 1.3165550431692164e-05, | |
| "loss": 0.8953067064285278, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.8137472283813747, | |
| "grad_norm": 0.4607974588871002, | |
| "learning_rate": 1.3133494324954328e-05, | |
| "loss": 0.4630458652973175, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.7473888993263245, | |
| "learning_rate": 1.3101409514639847e-05, | |
| "loss": 1.0197738409042358, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.8226164079822618, | |
| "grad_norm": 0.7188895344734192, | |
| "learning_rate": 1.3069296432410905e-05, | |
| "loss": 1.0835227966308594, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.8270509977827052, | |
| "grad_norm": 0.7948015928268433, | |
| "learning_rate": 1.3037155510310047e-05, | |
| "loss": 1.1620758771896362, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.8314855875831486, | |
| "grad_norm": 2.9718968868255615, | |
| "learning_rate": 1.3004987180754367e-05, | |
| "loss": 0.9052017331123352, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.835920177383592, | |
| "grad_norm": 2.999119281768799, | |
| "learning_rate": 1.29727918765297e-05, | |
| "loss": 0.8258069753646851, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.8403547671840355, | |
| "grad_norm": 0.6131216287612915, | |
| "learning_rate": 1.2940570030784783e-05, | |
| "loss": 0.9284101128578186, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.8447893569844789, | |
| "grad_norm": 1.4488681554794312, | |
| "learning_rate": 1.290832207702544e-05, | |
| "loss": 0.9328111410140991, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.8492239467849223, | |
| "grad_norm": 0.4498242139816284, | |
| "learning_rate": 1.2876048449108756e-05, | |
| "loss": 0.9122157096862793, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.8536585365853657, | |
| "grad_norm": 0.4527730643749237, | |
| "learning_rate": 1.2843749581237216e-05, | |
| "loss": 0.951221227645874, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.8580931263858091, | |
| "grad_norm": 0.5404245257377625, | |
| "learning_rate": 1.2811425907952887e-05, | |
| "loss": 0.904753565788269, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.8625277161862528, | |
| "grad_norm": 0.6924629807472229, | |
| "learning_rate": 1.2779077864131566e-05, | |
| "loss": 1.0605340003967285, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.8669623059866962, | |
| "grad_norm": 0.4970324635505676, | |
| "learning_rate": 1.274670588497691e-05, | |
| "loss": 0.5903202295303345, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.8713968957871396, | |
| "grad_norm": 0.793752133846283, | |
| "learning_rate": 1.2714310406014613e-05, | |
| "loss": 0.7120020389556885, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.8758314855875833, | |
| "grad_norm": 0.8532220721244812, | |
| "learning_rate": 1.2681891863086526e-05, | |
| "loss": 0.7570974230766296, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.8802660753880267, | |
| "grad_norm": 0.6667500734329224, | |
| "learning_rate": 1.2649450692344798e-05, | |
| "loss": 1.010290265083313, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.8847006651884701, | |
| "grad_norm": 0.5184866786003113, | |
| "learning_rate": 1.2616987330246e-05, | |
| "loss": 0.9949779510498047, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.8891352549889135, | |
| "grad_norm": 0.41842129826545715, | |
| "learning_rate": 1.2584502213545273e-05, | |
| "loss": 0.6566750407218933, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.893569844789357, | |
| "grad_norm": 0.47411566972732544, | |
| "learning_rate": 1.2551995779290431e-05, | |
| "loss": 0.9789588451385498, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.8980044345898004, | |
| "grad_norm": 0.41428887844085693, | |
| "learning_rate": 1.2519468464816094e-05, | |
| "loss": 0.8622305989265442, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.5540589094161987, | |
| "learning_rate": 1.2486920707737795e-05, | |
| "loss": 0.7378232479095459, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.9068736141906872, | |
| "grad_norm": 0.9826019406318665, | |
| "learning_rate": 1.2454352945946105e-05, | |
| "loss": 0.7468891143798828, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.9113082039911307, | |
| "grad_norm": 1.3631356954574585, | |
| "learning_rate": 1.2421765617600732e-05, | |
| "loss": 0.9804845452308655, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.9157427937915743, | |
| "grad_norm": 0.6108648777008057, | |
| "learning_rate": 1.238915916112462e-05, | |
| "loss": 0.7339483499526978, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.9201773835920177, | |
| "grad_norm": 1.0804190635681152, | |
| "learning_rate": 1.2356534015198067e-05, | |
| "loss": 0.6702901721000671, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.9246119733924612, | |
| "grad_norm": 0.8905138373374939, | |
| "learning_rate": 1.2323890618752818e-05, | |
| "loss": 1.140580415725708, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.9290465631929048, | |
| "grad_norm": 0.4676206409931183, | |
| "learning_rate": 1.229122941096615e-05, | |
| "loss": 0.9294151663780212, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.9334811529933482, | |
| "grad_norm": 0.30312380194664, | |
| "learning_rate": 1.225855083125497e-05, | |
| "loss": 0.6089338660240173, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.9379157427937916, | |
| "grad_norm": 0.8847364783287048, | |
| "learning_rate": 1.22258553192699e-05, | |
| "loss": 0.645588219165802, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.942350332594235, | |
| "grad_norm": 0.6345183253288269, | |
| "learning_rate": 1.219314331488938e-05, | |
| "loss": 0.6743212938308716, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.9467849223946785, | |
| "grad_norm": 1.4533907175064087, | |
| "learning_rate": 1.2160415258213719e-05, | |
| "loss": 0.8229029774665833, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 0.656122624874115, | |
| "learning_rate": 1.2127671589559195e-05, | |
| "loss": 0.8455672860145569, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.9556541019955653, | |
| "grad_norm": 1.9663106203079224, | |
| "learning_rate": 1.2094912749452134e-05, | |
| "loss": 0.6619812846183777, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.9600886917960088, | |
| "grad_norm": 0.43535202741622925, | |
| "learning_rate": 1.2062139178622963e-05, | |
| "loss": 0.81618332862854, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.9645232815964522, | |
| "grad_norm": 0.42277711629867554, | |
| "learning_rate": 1.20293513180003e-05, | |
| "loss": 0.9992027878761292, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.9689578713968958, | |
| "grad_norm": 0.40196138620376587, | |
| "learning_rate": 1.199654960870502e-05, | |
| "loss": 0.9606343507766724, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.9733924611973392, | |
| "grad_norm": 0.42394229769706726, | |
| "learning_rate": 1.1963734492044299e-05, | |
| "loss": 0.9592314958572388, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.9778270509977827, | |
| "grad_norm": 0.549923300743103, | |
| "learning_rate": 1.193090640950571e-05, | |
| "loss": 1.0462260246276855, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.9822616407982263, | |
| "grad_norm": 0.6976901292800903, | |
| "learning_rate": 1.1898065802751254e-05, | |
| "loss": 0.9654414653778076, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.9866962305986697, | |
| "grad_norm": 1.4644861221313477, | |
| "learning_rate": 1.1865213113611438e-05, | |
| "loss": 0.8772508502006531, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.9911308203991132, | |
| "grad_norm": 0.6265084147453308, | |
| "learning_rate": 1.1832348784079319e-05, | |
| "loss": 0.9136525988578796, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.9955654101995566, | |
| "grad_norm": 0.4937969148159027, | |
| "learning_rate": 1.1799473256304567e-05, | |
| "loss": 0.7895318269729614, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5102665424346924, | |
| "learning_rate": 1.17665869725875e-05, | |
| "loss": 0.9466162919998169, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.0044345898004434, | |
| "grad_norm": 0.4070099890232086, | |
| "learning_rate": 1.1733690375373147e-05, | |
| "loss": 0.715006411075592, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.008869179600887, | |
| "grad_norm": 0.5904584527015686, | |
| "learning_rate": 1.1700783907245304e-05, | |
| "loss": 0.6284165978431702, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.0133037694013303, | |
| "grad_norm": 0.4084486961364746, | |
| "learning_rate": 1.1667868010920555e-05, | |
| "loss": 0.4244351387023926, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.0177383592017737, | |
| "grad_norm": 0.8332369923591614, | |
| "learning_rate": 1.1634943129242337e-05, | |
| "loss": 0.5955982208251953, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.022172949002217, | |
| "grad_norm": 0.8778854012489319, | |
| "learning_rate": 1.160200970517497e-05, | |
| "loss": 0.50541752576828, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.0266075388026605, | |
| "grad_norm": 4.370595932006836, | |
| "learning_rate": 1.1569068181797699e-05, | |
| "loss": 0.5145138502120972, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.0310421286031044, | |
| "grad_norm": 1.4196687936782837, | |
| "learning_rate": 1.1536119002298737e-05, | |
| "loss": 0.47636979818344116, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.035476718403548, | |
| "grad_norm": 0.7198065519332886, | |
| "learning_rate": 1.1503162609969314e-05, | |
| "loss": 0.5563622713088989, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.0399113082039912, | |
| "grad_norm": 0.43456801772117615, | |
| "learning_rate": 1.1470199448197677e-05, | |
| "loss": 0.5351572632789612, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.0443458980044347, | |
| "grad_norm": 0.5137150287628174, | |
| "learning_rate": 1.1437229960463163e-05, | |
| "loss": 0.5629701614379883, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.048780487804878, | |
| "grad_norm": 0.3429313004016876, | |
| "learning_rate": 1.1404254590330213e-05, | |
| "loss": 0.15150287747383118, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.0532150776053215, | |
| "grad_norm": 0.5494690537452698, | |
| "learning_rate": 1.137127378144241e-05, | |
| "loss": 0.5665069222450256, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.057649667405765, | |
| "grad_norm": 1.4760738611221313, | |
| "learning_rate": 1.1338287977516507e-05, | |
| "loss": 0.23657920956611633, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.0620842572062084, | |
| "grad_norm": 0.3918812870979309, | |
| "learning_rate": 1.1305297622336457e-05, | |
| "loss": 0.3985291123390198, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.066518847006652, | |
| "grad_norm": 3.808762788772583, | |
| "learning_rate": 1.1272303159747451e-05, | |
| "loss": 0.46506452560424805, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.070953436807095, | |
| "grad_norm": 0.577021062374115, | |
| "learning_rate": 1.1239305033649934e-05, | |
| "loss": 0.5112553834915161, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.0753880266075386, | |
| "grad_norm": 0.7988712787628174, | |
| "learning_rate": 1.1206303687993644e-05, | |
| "loss": 0.7404617071151733, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.079822616407982, | |
| "grad_norm": 0.4242592751979828, | |
| "learning_rate": 1.1173299566771626e-05, | |
| "loss": 0.33282893896102905, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.084257206208426, | |
| "grad_norm": 0.46631020307540894, | |
| "learning_rate": 1.1140293114014282e-05, | |
| "loss": 0.4563349485397339, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.0886917960088693, | |
| "grad_norm": 1.1207689046859741, | |
| "learning_rate": 1.1107284773783367e-05, | |
| "loss": 0.5358268022537231, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.0931263858093128, | |
| "grad_norm": 0.6466286182403564, | |
| "learning_rate": 1.1074274990166036e-05, | |
| "loss": 0.406946063041687, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.097560975609756, | |
| "grad_norm": 0.6163548827171326, | |
| "learning_rate": 1.1041264207268861e-05, | |
| "loss": 0.5453028678894043, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.1019955654101996, | |
| "grad_norm": 0.7833722233772278, | |
| "learning_rate": 1.1008252869211864e-05, | |
| "loss": 0.5683756470680237, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.106430155210643, | |
| "grad_norm": 0.8931224942207336, | |
| "learning_rate": 1.0975241420122524e-05, | |
| "loss": 0.4366806149482727, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.1108647450110865, | |
| "grad_norm": 0.5928601026535034, | |
| "learning_rate": 1.0942230304129831e-05, | |
| "loss": 0.4392179846763611, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.11529933481153, | |
| "grad_norm": 1.2183982133865356, | |
| "learning_rate": 1.0909219965358275e-05, | |
| "loss": 0.49065983295440674, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.1197339246119733, | |
| "grad_norm": 0.6264125108718872, | |
| "learning_rate": 1.0876210847921905e-05, | |
| "loss": 0.5899641513824463, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.1241685144124167, | |
| "grad_norm": 0.6409426927566528, | |
| "learning_rate": 1.0843203395918327e-05, | |
| "loss": 0.4045730531215668, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.12860310421286, | |
| "grad_norm": 1.60128653049469, | |
| "learning_rate": 1.0810198053422747e-05, | |
| "loss": 0.22457213699817657, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.1330376940133036, | |
| "grad_norm": 1.197357177734375, | |
| "learning_rate": 1.0777195264481988e-05, | |
| "loss": 0.3387850224971771, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.1374722838137474, | |
| "grad_norm": 0.8524078130722046, | |
| "learning_rate": 1.0744195473108522e-05, | |
| "loss": 0.44860363006591797, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.141906873614191, | |
| "grad_norm": 0.548141598701477, | |
| "learning_rate": 1.071119912327448e-05, | |
| "loss": 0.7017822861671448, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.1463414634146343, | |
| "grad_norm": 0.4515199363231659, | |
| "learning_rate": 1.0678206658905712e-05, | |
| "loss": 0.3781665563583374, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.1507760532150777, | |
| "grad_norm": 0.6646062731742859, | |
| "learning_rate": 1.0645218523875773e-05, | |
| "loss": 0.51128089427948, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.155210643015521, | |
| "grad_norm": 0.5504773855209351, | |
| "learning_rate": 1.0612235161999987e-05, | |
| "loss": 0.3802485764026642, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.1596452328159645, | |
| "grad_norm": 0.527137279510498, | |
| "learning_rate": 1.057925701702945e-05, | |
| "loss": 0.6255434155464172, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.164079822616408, | |
| "grad_norm": 0.8251080513000488, | |
| "learning_rate": 1.0546284532645077e-05, | |
| "loss": 0.49471452832221985, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.1685144124168514, | |
| "grad_norm": 1.3507685661315918, | |
| "learning_rate": 1.0513318152451627e-05, | |
| "loss": 0.3210045397281647, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.172949002217295, | |
| "grad_norm": 0.6633515357971191, | |
| "learning_rate": 1.0480358319971731e-05, | |
| "loss": 0.6007053852081299, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.1773835920177382, | |
| "grad_norm": 0.36952632665634155, | |
| "learning_rate": 1.0447405478639929e-05, | |
| "loss": 0.2838934361934662, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.5688261985778809, | |
| "learning_rate": 1.0414460071796712e-05, | |
| "loss": 0.18350011110305786, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.186252771618625, | |
| "grad_norm": 1.1831949949264526, | |
| "learning_rate": 1.0381522542682536e-05, | |
| "loss": 0.40068039298057556, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.1906873614190685, | |
| "grad_norm": 1.4388840198516846, | |
| "learning_rate": 1.0348593334431878e-05, | |
| "loss": 0.23880073428153992, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.6307854652404785, | |
| "learning_rate": 1.0315672890067271e-05, | |
| "loss": 0.5894753932952881, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.199556541019956, | |
| "grad_norm": 2.421830415725708, | |
| "learning_rate": 1.0282761652493334e-05, | |
| "loss": 0.4432171583175659, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.203991130820399, | |
| "grad_norm": 0.5128687620162964, | |
| "learning_rate": 1.024986006449083e-05, | |
| "loss": 0.48450496792793274, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.2084257206208426, | |
| "grad_norm": 0.5676178932189941, | |
| "learning_rate": 1.0216968568710679e-05, | |
| "loss": 0.5746522545814514, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.212860310421286, | |
| "grad_norm": 0.5976463556289673, | |
| "learning_rate": 1.0184087607668039e-05, | |
| "loss": 0.5264995694160461, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.2172949002217295, | |
| "grad_norm": 0.7051799297332764, | |
| "learning_rate": 1.0151217623736338e-05, | |
| "loss": 0.46825850009918213, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.221729490022173, | |
| "grad_norm": 0.8515892624855042, | |
| "learning_rate": 1.0118359059141313e-05, | |
| "loss": 0.27047228813171387, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.2261640798226163, | |
| "grad_norm": 0.4068000316619873, | |
| "learning_rate": 1.0085512355955067e-05, | |
| "loss": 0.5676589608192444, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.2305986696230597, | |
| "grad_norm": 0.8601819276809692, | |
| "learning_rate": 1.0052677956090125e-05, | |
| "loss": 0.46005040407180786, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.235033259423503, | |
| "grad_norm": 0.7253012657165527, | |
| "learning_rate": 1.0019856301293482e-05, | |
| "loss": 0.5689443945884705, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.2394678492239466, | |
| "grad_norm": 0.46540704369544983, | |
| "learning_rate": 9.987047833140668e-06, | |
| "loss": 0.3451939523220062, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.2439024390243905, | |
| "grad_norm": 2.0232350826263428, | |
| "learning_rate": 9.954252993029803e-06, | |
| "loss": 0.5826783776283264, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.248337028824834, | |
| "grad_norm": 3.809951066970825, | |
| "learning_rate": 9.921472222175654e-06, | |
| "loss": 0.5647210478782654, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.2527716186252773, | |
| "grad_norm": 1.0120117664337158, | |
| "learning_rate": 9.888705961603709e-06, | |
| "loss": 0.6450280547142029, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.2572062084257207, | |
| "grad_norm": 0.6231004595756531, | |
| "learning_rate": 9.85595465214423e-06, | |
| "loss": 0.24749194085597992, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.261640798226164, | |
| "grad_norm": 0.5251925587654114, | |
| "learning_rate": 9.823218734426336e-06, | |
| "loss": 0.5488971471786499, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.2660753880266076, | |
| "grad_norm": 0.22870703041553497, | |
| "learning_rate": 9.79049864887207e-06, | |
| "loss": 0.39323848485946655, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.270509977827051, | |
| "grad_norm": 1.2425155639648438, | |
| "learning_rate": 9.757794835690463e-06, | |
| "loss": 0.8195447325706482, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.2749445676274944, | |
| "grad_norm": 1.2200350761413574, | |
| "learning_rate": 9.72510773487164e-06, | |
| "loss": 0.39812397956848145, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.279379157427938, | |
| "grad_norm": 0.5721977353096008, | |
| "learning_rate": 9.692437786180852e-06, | |
| "loss": 0.5707634687423706, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.2838137472283813, | |
| "grad_norm": 0.47224897146224976, | |
| "learning_rate": 9.659785429152615e-06, | |
| "loss": 0.6199125051498413, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.2882483370288247, | |
| "grad_norm": 1.0250192880630493, | |
| "learning_rate": 9.627151103084763e-06, | |
| "loss": 0.41856324672698975, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.292682926829268, | |
| "grad_norm": 0.947811484336853, | |
| "learning_rate": 9.594535247032543e-06, | |
| "loss": 0.32791462540626526, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.2971175166297115, | |
| "grad_norm": 0.6266341805458069, | |
| "learning_rate": 9.561938299802709e-06, | |
| "loss": 0.5352550745010376, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.3015521064301554, | |
| "grad_norm": 4.217014789581299, | |
| "learning_rate": 9.529360699947624e-06, | |
| "loss": 0.6385710835456848, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.305986696230599, | |
| "grad_norm": 0.8212743401527405, | |
| "learning_rate": 9.496802885759349e-06, | |
| "loss": 0.4557139277458191, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.3104212860310422, | |
| "grad_norm": 1.0060659646987915, | |
| "learning_rate": 9.464265295263762e-06, | |
| "loss": 0.7039799690246582, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.3148558758314857, | |
| "grad_norm": 12.946681022644043, | |
| "learning_rate": 9.431748366214648e-06, | |
| "loss": 0.4291222095489502, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.319290465631929, | |
| "grad_norm": 0.5580220222473145, | |
| "learning_rate": 9.399252536087822e-06, | |
| "loss": 0.6024729013442993, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.3237250554323725, | |
| "grad_norm": 0.607992947101593, | |
| "learning_rate": 9.366778242075236e-06, | |
| "loss": 0.5440095663070679, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.328159645232816, | |
| "grad_norm": 0.6783135533332825, | |
| "learning_rate": 9.334325921079104e-06, | |
| "loss": 0.6058806777000427, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.3325942350332594, | |
| "grad_norm": 0.6938934922218323, | |
| "learning_rate": 9.301896009706012e-06, | |
| "loss": 0.4494543671607971, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.337028824833703, | |
| "grad_norm": 0.477782279253006, | |
| "learning_rate": 9.269488944261058e-06, | |
| "loss": 0.4361210763454437, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.341463414634146, | |
| "grad_norm": 0.5728092193603516, | |
| "learning_rate": 9.237105160741976e-06, | |
| "loss": 0.5449360609054565, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.3458980044345896, | |
| "grad_norm": 0.18092034757137299, | |
| "learning_rate": 9.204745094833265e-06, | |
| "loss": 0.3745296895503998, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.3503325942350335, | |
| "grad_norm": 0.5357985496520996, | |
| "learning_rate": 9.172409181900337e-06, | |
| "loss": 0.6852684020996094, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.354767184035477, | |
| "grad_norm": 0.790863037109375, | |
| "learning_rate": 9.140097856983647e-06, | |
| "loss": 0.2813524603843689, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.3592017738359203, | |
| "grad_norm": 0.2192503809928894, | |
| "learning_rate": 9.107811554792863e-06, | |
| "loss": 0.3573903739452362, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 1.4538520574569702, | |
| "learning_rate": 9.075550709700992e-06, | |
| "loss": 0.5834711790084839, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.368070953436807, | |
| "grad_norm": 0.641722559928894, | |
| "learning_rate": 9.043315755738545e-06, | |
| "loss": 0.5266854763031006, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.3725055432372506, | |
| "grad_norm": 0.6017807126045227, | |
| "learning_rate": 9.011107126587705e-06, | |
| "loss": 0.5866771936416626, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.376940133037694, | |
| "grad_norm": 0.707431435585022, | |
| "learning_rate": 8.978925255576484e-06, | |
| "loss": 0.4829937517642975, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.3813747228381374, | |
| "grad_norm": 0.2395654022693634, | |
| "learning_rate": 8.946770575672897e-06, | |
| "loss": 0.04968187212944031, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.385809312638581, | |
| "grad_norm": 0.5818225741386414, | |
| "learning_rate": 8.914643519479134e-06, | |
| "loss": 0.3766881227493286, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.3902439024390243, | |
| "grad_norm": 0.4298112094402313, | |
| "learning_rate": 8.882544519225737e-06, | |
| "loss": 0.1799193024635315, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.3946784922394677, | |
| "grad_norm": 1.3011754751205444, | |
| "learning_rate": 8.850474006765806e-06, | |
| "loss": 0.5404252409934998, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.399113082039911, | |
| "grad_norm": 0.6072801351547241, | |
| "learning_rate": 8.818432413569153e-06, | |
| "loss": 0.42710888385772705, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.4035476718403546, | |
| "grad_norm": 0.8172256350517273, | |
| "learning_rate": 8.78642017071653e-06, | |
| "loss": 0.4754990339279175, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.4079822616407984, | |
| "grad_norm": 0.4423505961894989, | |
| "learning_rate": 8.754437708893803e-06, | |
| "loss": 0.5498704314231873, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.412416851441242, | |
| "grad_norm": 0.292689710855484, | |
| "learning_rate": 8.722485458386183e-06, | |
| "loss": 0.14969071745872498, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.4168514412416853, | |
| "grad_norm": 0.5658117532730103, | |
| "learning_rate": 8.690563849072416e-06, | |
| "loss": 0.593338131904602, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.4212860310421287, | |
| "grad_norm": 1.8885061740875244, | |
| "learning_rate": 8.65867331041901e-06, | |
| "loss": 0.3968830704689026, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.425720620842572, | |
| "grad_norm": 1.8343939781188965, | |
| "learning_rate": 8.62681427147446e-06, | |
| "loss": 0.28023120760917664, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.4301552106430155, | |
| "grad_norm": 1.2832564115524292, | |
| "learning_rate": 8.594987160863464e-06, | |
| "loss": 0.3517853617668152, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.434589800443459, | |
| "grad_norm": 0.32917505502700806, | |
| "learning_rate": 8.563192406781164e-06, | |
| "loss": 0.3207606077194214, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.4390243902439024, | |
| "grad_norm": 0.9043774008750916, | |
| "learning_rate": 8.53143043698739e-06, | |
| "loss": 0.4255558252334595, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.443458980044346, | |
| "grad_norm": 0.5287153124809265, | |
| "learning_rate": 8.499701678800891e-06, | |
| "loss": 0.6775237917900085, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.4478935698447892, | |
| "grad_norm": 1.211562991142273, | |
| "learning_rate": 8.4680065590936e-06, | |
| "loss": 0.28972724080085754, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.4523281596452327, | |
| "grad_norm": 0.5662131309509277, | |
| "learning_rate": 8.436345504284884e-06, | |
| "loss": 0.685287594795227, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.4567627494456765, | |
| "grad_norm": 1.0978025197982788, | |
| "learning_rate": 8.404718940335805e-06, | |
| "loss": 0.647050142288208, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.4611973392461195, | |
| "grad_norm": 0.48306140303611755, | |
| "learning_rate": 8.373127292743392e-06, | |
| "loss": 0.7415695190429688, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.4656319290465634, | |
| "grad_norm": 0.4147641360759735, | |
| "learning_rate": 8.341570986534926e-06, | |
| "loss": 0.47963038086891174, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.470066518847007, | |
| "grad_norm": 0.6168814301490784, | |
| "learning_rate": 8.310050446262204e-06, | |
| "loss": 0.5705453157424927, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.47450110864745, | |
| "grad_norm": 0.8609782457351685, | |
| "learning_rate": 8.278566095995837e-06, | |
| "loss": 0.24776363372802734, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.4789356984478936, | |
| "grad_norm": 0.41248947381973267, | |
| "learning_rate": 8.247118359319542e-06, | |
| "loss": 0.573097825050354, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.483370288248337, | |
| "grad_norm": 0.5210030674934387, | |
| "learning_rate": 8.215707659324448e-06, | |
| "loss": 0.45975643396377563, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.4878048780487805, | |
| "grad_norm": 0.48813074827194214, | |
| "learning_rate": 8.1843344186034e-06, | |
| "loss": 0.5684525370597839, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.492239467849224, | |
| "grad_norm": 1.437232494354248, | |
| "learning_rate": 8.152999059245273e-06, | |
| "loss": 0.6159149408340454, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.4966740576496673, | |
| "grad_norm": 0.6437961459159851, | |
| "learning_rate": 8.121702002829291e-06, | |
| "loss": 0.6514344811439514, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.5011086474501107, | |
| "grad_norm": 0.4339181184768677, | |
| "learning_rate": 8.090443670419368e-06, | |
| "loss": 0.3893609642982483, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.505543237250554, | |
| "grad_norm": 0.9250460863113403, | |
| "learning_rate": 8.05922448255842e-06, | |
| "loss": 0.5106027722358704, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.5099778270509976, | |
| "grad_norm": 0.7213279008865356, | |
| "learning_rate": 8.028044859262736e-06, | |
| "loss": 0.5997860431671143, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.5144124168514415, | |
| "grad_norm": 0.5925162434577942, | |
| "learning_rate": 7.996905220016295e-06, | |
| "loss": 0.37115636467933655, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.5188470066518844, | |
| "grad_norm": 0.4195973575115204, | |
| "learning_rate": 7.965805983765156e-06, | |
| "loss": 0.6658072471618652, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.5232815964523283, | |
| "grad_norm": 0.3894807994365692, | |
| "learning_rate": 7.934747568911792e-06, | |
| "loss": 0.48177286982536316, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.5277161862527717, | |
| "grad_norm": 0.4482039213180542, | |
| "learning_rate": 7.903730393309475e-06, | |
| "loss": 0.5770375728607178, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.532150776053215, | |
| "grad_norm": 1.4334046840667725, | |
| "learning_rate": 7.872754874256658e-06, | |
| "loss": 0.37715059518814087, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.5365853658536586, | |
| "grad_norm": 0.18956467509269714, | |
| "learning_rate": 7.841821428491358e-06, | |
| "loss": 0.3323401212692261, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.541019955654102, | |
| "grad_norm": 0.9211217761039734, | |
| "learning_rate": 7.810930472185542e-06, | |
| "loss": 0.7031457424163818, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 0.5245496034622192, | |
| "learning_rate": 7.78008242093953e-06, | |
| "loss": 0.6004937887191772, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.549889135254989, | |
| "grad_norm": 0.4028185307979584, | |
| "learning_rate": 7.749277689776411e-06, | |
| "loss": 0.496783971786499, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.5543237250554323, | |
| "grad_norm": 0.5988771915435791, | |
| "learning_rate": 7.718516693136455e-06, | |
| "loss": 0.38715416193008423, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.5587583148558757, | |
| "grad_norm": 0.1802368313074112, | |
| "learning_rate": 7.687799844871534e-06, | |
| "loss": 0.14051398634910583, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.5631929046563195, | |
| "grad_norm": 0.41661515831947327, | |
| "learning_rate": 7.657127558239563e-06, | |
| "loss": 0.3350878059864044, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.5676274944567625, | |
| "grad_norm": 1.085957646369934, | |
| "learning_rate": 7.626500245898927e-06, | |
| "loss": 0.3848508596420288, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.5720620842572064, | |
| "grad_norm": 0.8385711908340454, | |
| "learning_rate": 7.595918319902939e-06, | |
| "loss": 0.26338139176368713, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.57649667405765, | |
| "grad_norm": 0.716660737991333, | |
| "learning_rate": 7.565382191694302e-06, | |
| "loss": 0.6448018550872803, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.5809312638580932, | |
| "grad_norm": 0.6266429424285889, | |
| "learning_rate": 7.53489227209955e-06, | |
| "loss": 0.7049829363822937, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.5853658536585367, | |
| "grad_norm": 0.4636557996273041, | |
| "learning_rate": 7.50444897132355e-06, | |
| "loss": 0.38826262950897217, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.58980044345898, | |
| "grad_norm": 0.44733473658561707, | |
| "learning_rate": 7.474052698943961e-06, | |
| "loss": 0.5173879265785217, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.5942350332594235, | |
| "grad_norm": 0.5354277491569519, | |
| "learning_rate": 7.443703863905738e-06, | |
| "loss": 0.5096431374549866, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.598669623059867, | |
| "grad_norm": 1.4914095401763916, | |
| "learning_rate": 7.413402874515616e-06, | |
| "loss": 0.21273551881313324, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.6031042128603104, | |
| "grad_norm": 0.40523943305015564, | |
| "learning_rate": 7.383150138436628e-06, | |
| "loss": 0.49439945816993713, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.6075388026607538, | |
| "grad_norm": 0.5631287693977356, | |
| "learning_rate": 7.352946062682626e-06, | |
| "loss": 0.49207258224487305, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.611973392461197, | |
| "grad_norm": 0.5385340452194214, | |
| "learning_rate": 7.32279105361279e-06, | |
| "loss": 0.3323192000389099, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.6164079822616406, | |
| "grad_norm": 1.356742024421692, | |
| "learning_rate": 7.292685516926161e-06, | |
| "loss": 0.5721710324287415, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.6208425720620845, | |
| "grad_norm": 0.4816894829273224, | |
| "learning_rate": 7.262629857656198e-06, | |
| "loss": 0.5175535082817078, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.6252771618625275, | |
| "grad_norm": 0.4633226990699768, | |
| "learning_rate": 7.232624480165318e-06, | |
| "loss": 0.6447592973709106, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.6297117516629713, | |
| "grad_norm": 0.6813458800315857, | |
| "learning_rate": 7.202669788139456e-06, | |
| "loss": 0.5713311433792114, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.6341463414634148, | |
| "grad_norm": 2.180230140686035, | |
| "learning_rate": 7.172766184582629e-06, | |
| "loss": 0.6713429093360901, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.638580931263858, | |
| "grad_norm": 0.5426626801490784, | |
| "learning_rate": 7.142914071811535e-06, | |
| "loss": 0.37241318821907043, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.6430155210643016, | |
| "grad_norm": 1.2816245555877686, | |
| "learning_rate": 7.113113851450122e-06, | |
| "loss": 0.49532002210617065, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.647450110864745, | |
| "grad_norm": 1.509843111038208, | |
| "learning_rate": 7.083365924424175e-06, | |
| "loss": 0.40875858068466187, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.6518847006651884, | |
| "grad_norm": 0.5089661478996277, | |
| "learning_rate": 7.053670690955956e-06, | |
| "loss": 0.4947509467601776, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.656319290465632, | |
| "grad_norm": 0.381073921918869, | |
| "learning_rate": 7.024028550558781e-06, | |
| "loss": 0.2214895784854889, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.6607538802660753, | |
| "grad_norm": 0.521045446395874, | |
| "learning_rate": 6.994439902031679e-06, | |
| "loss": 0.6109291911125183, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6651884700665187, | |
| "grad_norm": 1.0478029251098633, | |
| "learning_rate": 6.964905143453995e-06, | |
| "loss": 0.6086549162864685, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.6696230598669626, | |
| "grad_norm": 0.4481736719608307, | |
| "learning_rate": 6.9354246721800685e-06, | |
| "loss": 0.29336196184158325, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.6740576496674056, | |
| "grad_norm": 1.675062894821167, | |
| "learning_rate": 6.9059988848338466e-06, | |
| "loss": 0.48426881432533264, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.6784922394678494, | |
| "grad_norm": 0.8172009587287903, | |
| "learning_rate": 6.8766281773035906e-06, | |
| "loss": 0.4322719871997833, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.682926829268293, | |
| "grad_norm": 0.5452362298965454, | |
| "learning_rate": 6.847312944736524e-06, | |
| "loss": 0.3221188187599182, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.6873614190687363, | |
| "grad_norm": 1.4369398355484009, | |
| "learning_rate": 6.818053581533512e-06, | |
| "loss": 0.20389345288276672, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.6917960088691797, | |
| "grad_norm": 0.5867207050323486, | |
| "learning_rate": 6.788850481343782e-06, | |
| "loss": 0.42180705070495605, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.696230598669623, | |
| "grad_norm": 2.326925754547119, | |
| "learning_rate": 6.759704037059598e-06, | |
| "loss": 0.36190155148506165, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.7006651884700665, | |
| "grad_norm": 1.7214257717132568, | |
| "learning_rate": 6.7306146408109885e-06, | |
| "loss": 0.34991076588630676, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.70509977827051, | |
| "grad_norm": 0.5046329498291016, | |
| "learning_rate": 6.701582683960481e-06, | |
| "loss": 0.6116279363632202, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.7095343680709534, | |
| "grad_norm": 0.8512217998504639, | |
| "learning_rate": 6.672608557097806e-06, | |
| "loss": 0.37688618898391724, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.713968957871397, | |
| "grad_norm": 1.6093370914459229, | |
| "learning_rate": 6.643692650034684e-06, | |
| "loss": 0.7054269909858704, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.7184035476718402, | |
| "grad_norm": 3.110217809677124, | |
| "learning_rate": 6.614835351799549e-06, | |
| "loss": 0.31694677472114563, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.7228381374722836, | |
| "grad_norm": 0.5730735659599304, | |
| "learning_rate": 6.586037050632315e-06, | |
| "loss": 0.8013717532157898, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.7116084098815918, | |
| "learning_rate": 6.557298133979177e-06, | |
| "loss": 0.45755088329315186, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.7317073170731705, | |
| "grad_norm": 0.4136090874671936, | |
| "learning_rate": 6.528618988487373e-06, | |
| "loss": 0.48779523372650146, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.7361419068736144, | |
| "grad_norm": 0.9168877601623535, | |
| "learning_rate": 6.500000000000003e-06, | |
| "loss": 0.2947143614292145, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.740576496674058, | |
| "grad_norm": 0.6739610433578491, | |
| "learning_rate": 6.471441553550813e-06, | |
| "loss": 0.6185624599456787, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.745011086474501, | |
| "grad_norm": 0.5895893573760986, | |
| "learning_rate": 6.442944033359042e-06, | |
| "loss": 0.35551586747169495, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.7494456762749446, | |
| "grad_norm": 0.37865594029426575, | |
| "learning_rate": 6.4145078228242375e-06, | |
| "loss": 0.3368171751499176, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.753880266075388, | |
| "grad_norm": 0.45283424854278564, | |
| "learning_rate": 6.386133304521094e-06, | |
| "loss": 0.5998995304107666, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.7583148558758315, | |
| "grad_norm": 0.7602055668830872, | |
| "learning_rate": 6.357820860194321e-06, | |
| "loss": 0.7485865354537964, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.762749445676275, | |
| "grad_norm": 0.12720580399036407, | |
| "learning_rate": 6.32957087075349e-06, | |
| "loss": 0.18481549620628357, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.7671840354767183, | |
| "grad_norm": 1.2511968612670898, | |
| "learning_rate": 6.301383716267917e-06, | |
| "loss": 0.3667486011981964, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.7716186252771617, | |
| "grad_norm": 0.6795738339424133, | |
| "learning_rate": 6.273259775961562e-06, | |
| "loss": 0.43524369597435, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.776053215077605, | |
| "grad_norm": 0.4668692946434021, | |
| "learning_rate": 6.245199428207898e-06, | |
| "loss": 0.7469791173934937, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.7804878048780486, | |
| "grad_norm": 0.4733211100101471, | |
| "learning_rate": 6.2172030505248515e-06, | |
| "loss": 0.6893079876899719, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.7849223946784925, | |
| "grad_norm": 0.4810378849506378, | |
| "learning_rate": 6.189271019569707e-06, | |
| "loss": 0.6243588328361511, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.7893569844789354, | |
| "grad_norm": 0.21061930060386658, | |
| "learning_rate": 6.161403711134031e-06, | |
| "loss": 0.09384872019290924, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.7937915742793793, | |
| "grad_norm": 0.4916951358318329, | |
| "learning_rate": 6.133601500138643e-06, | |
| "loss": 0.5685229301452637, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.7982261640798227, | |
| "grad_norm": 0.8098857402801514, | |
| "learning_rate": 6.1058647606285394e-06, | |
| "loss": 0.3363065719604492, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.802660753880266, | |
| "grad_norm": 0.5222221612930298, | |
| "learning_rate": 6.078193865767893e-06, | |
| "loss": 0.36431118845939636, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.8070953436807096, | |
| "grad_norm": 0.48917877674102783, | |
| "learning_rate": 6.050589187835001e-06, | |
| "loss": 0.48057618737220764, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.811529933481153, | |
| "grad_norm": 1.3627451658248901, | |
| "learning_rate": 6.023051098217307e-06, | |
| "loss": 0.4955880343914032, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.8159645232815964, | |
| "grad_norm": 0.5931581854820251, | |
| "learning_rate": 5.995579967406379e-06, | |
| "loss": 0.5985972881317139, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.82039911308204, | |
| "grad_norm": 1.0736427307128906, | |
| "learning_rate": 5.968176164992938e-06, | |
| "loss": 0.24213649332523346, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.8248337028824833, | |
| "grad_norm": 0.6388216614723206, | |
| "learning_rate": 5.940840059661892e-06, | |
| "loss": 0.41631895303726196, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.8292682926829267, | |
| "grad_norm": 0.49787789583206177, | |
| "learning_rate": 5.913572019187355e-06, | |
| "loss": 0.6338592171669006, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.8337028824833705, | |
| "grad_norm": 0.4130885601043701, | |
| "learning_rate": 5.886372410427709e-06, | |
| "loss": 0.5558915734291077, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.8381374722838135, | |
| "grad_norm": 0.4531559944152832, | |
| "learning_rate": 5.859241599320686e-06, | |
| "loss": 0.24562785029411316, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.8425720620842574, | |
| "grad_norm": 1.1224136352539062, | |
| "learning_rate": 5.832179950878414e-06, | |
| "loss": 0.38200998306274414, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.847006651884701, | |
| "grad_norm": 0.5757291913032532, | |
| "learning_rate": 5.805187829182531e-06, | |
| "loss": 0.40263280272483826, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.8514412416851442, | |
| "grad_norm": 0.4876343607902527, | |
| "learning_rate": 5.778265597379269e-06, | |
| "loss": 0.5635562539100647, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 2.8558758314855877, | |
| "grad_norm": 1.429746150970459, | |
| "learning_rate": 5.751413617674584e-06, | |
| "loss": 0.13587771356105804, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 2.860310421286031, | |
| "grad_norm": 0.43107762932777405, | |
| "learning_rate": 5.724632251329272e-06, | |
| "loss": 0.5738257765769958, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.8647450110864745, | |
| "grad_norm": 1.0720781087875366, | |
| "learning_rate": 5.697921858654106e-06, | |
| "loss": 0.36557459831237793, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 2.869179600886918, | |
| "grad_norm": 0.4924733638763428, | |
| "learning_rate": 5.671282799005009e-06, | |
| "loss": 0.5723231434822083, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 2.8736141906873613, | |
| "grad_norm": 0.4669732451438904, | |
| "learning_rate": 5.644715430778187e-06, | |
| "loss": 0.5587807893753052, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 2.8780487804878048, | |
| "grad_norm": 0.8375265598297119, | |
| "learning_rate": 5.6182201114053405e-06, | |
| "loss": 0.407155841588974, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 2.882483370288248, | |
| "grad_norm": 0.6367316246032715, | |
| "learning_rate": 5.59179719734883e-06, | |
| "loss": 0.581174373626709, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.8869179600886916, | |
| "grad_norm": 1.9464964866638184, | |
| "learning_rate": 5.565447044096888e-06, | |
| "loss": 0.23274049162864685, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 2.8913525498891355, | |
| "grad_norm": 0.4807678461074829, | |
| "learning_rate": 5.539170006158859e-06, | |
| "loss": 0.5287979245185852, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 2.8957871396895785, | |
| "grad_norm": 0.5676413774490356, | |
| "learning_rate": 5.512966437060383e-06, | |
| "loss": 0.4669223129749298, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 2.9002217294900223, | |
| "grad_norm": 0.19804784655570984, | |
| "learning_rate": 5.4868366893386795e-06, | |
| "loss": 0.1954198181629181, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 2.9046563192904657, | |
| "grad_norm": 0.5282815098762512, | |
| "learning_rate": 5.460781114537794e-06, | |
| "loss": 0.3124288320541382, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.6704612374305725, | |
| "learning_rate": 5.434800063203855e-06, | |
| "loss": 0.5746976733207703, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 2.9135254988913526, | |
| "grad_norm": 0.48029983043670654, | |
| "learning_rate": 5.408893884880382e-06, | |
| "loss": 0.5503944158554077, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 2.917960088691796, | |
| "grad_norm": 1.208801031112671, | |
| "learning_rate": 5.383062928103551e-06, | |
| "loss": 0.4464556872844696, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 2.9223946784922394, | |
| "grad_norm": 0.5504411458969116, | |
| "learning_rate": 5.357307540397541e-06, | |
| "loss": 0.6808157563209534, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 0.4721316397190094, | |
| "learning_rate": 5.331628068269832e-06, | |
| "loss": 0.3994528353214264, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.9312638580931263, | |
| "grad_norm": 0.40078234672546387, | |
| "learning_rate": 5.306024857206551e-06, | |
| "loss": 0.589479386806488, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 2.9356984478935697, | |
| "grad_norm": 0.4144805073738098, | |
| "learning_rate": 5.28049825166783e-06, | |
| "loss": 0.6008284687995911, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 2.9401330376940136, | |
| "grad_norm": 0.4621680676937103, | |
| "learning_rate": 5.255048595083161e-06, | |
| "loss": 0.48713505268096924, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.9445676274944566, | |
| "grad_norm": 0.6959161758422852, | |
| "learning_rate": 5.229676229846788e-06, | |
| "loss": 0.5818562507629395, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.9490022172949004, | |
| "grad_norm": 0.8349772095680237, | |
| "learning_rate": 5.204381497313089e-06, | |
| "loss": 0.6031002402305603, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.953436807095344, | |
| "grad_norm": 0.5815767645835876, | |
| "learning_rate": 5.179164737791984e-06, | |
| "loss": 0.6579894423484802, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.9578713968957873, | |
| "grad_norm": 0.5155860781669617, | |
| "learning_rate": 5.15402629054437e-06, | |
| "loss": 0.3109511137008667, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.9623059866962307, | |
| "grad_norm": 0.5490220189094543, | |
| "learning_rate": 5.128966493777544e-06, | |
| "loss": 0.5789236426353455, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.966740576496674, | |
| "grad_norm": 0.5740970969200134, | |
| "learning_rate": 5.103985684640653e-06, | |
| "loss": 0.5203069448471069, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.9711751662971175, | |
| "grad_norm": 0.5606107711791992, | |
| "learning_rate": 5.079084199220168e-06, | |
| "loss": 0.4374566376209259, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.975609756097561, | |
| "grad_norm": 1.1846078634262085, | |
| "learning_rate": 5.0542623725353455e-06, | |
| "loss": 0.42820480465888977, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.9800443458980044, | |
| "grad_norm": 0.19243869185447693, | |
| "learning_rate": 5.029520538533742e-06, | |
| "loss": 0.125463604927063, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.984478935698448, | |
| "grad_norm": 0.4858459532260895, | |
| "learning_rate": 5.0048590300867e-06, | |
| "loss": 0.37778711318969727, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.988913525498891, | |
| "grad_norm": 0.4838855564594269, | |
| "learning_rate": 4.980278178984886e-06, | |
| "loss": 0.33112236857414246, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.9933481152993346, | |
| "grad_norm": 1.0332651138305664, | |
| "learning_rate": 4.9557783159338134e-06, | |
| "loss": 0.28946980834007263, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.9977827050997785, | |
| "grad_norm": 1.0827792882919312, | |
| "learning_rate": 4.9313597705494045e-06, | |
| "loss": 0.44148802757263184, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.002217294900222, | |
| "grad_norm": 0.3786047399044037, | |
| "learning_rate": 4.907022871353554e-06, | |
| "loss": 0.42598864436149597, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.0066518847006654, | |
| "grad_norm": 0.35562005639076233, | |
| "learning_rate": 4.882767945769696e-06, | |
| "loss": 0.1402987688779831, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.011086474501109, | |
| "grad_norm": 1.162191390991211, | |
| "learning_rate": 4.858595320118419e-06, | |
| "loss": 0.2594584822654724, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.015521064301552, | |
| "grad_norm": 0.3751342296600342, | |
| "learning_rate": 4.834505319613061e-06, | |
| "loss": 0.3178204894065857, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.0199556541019956, | |
| "grad_norm": 0.3661974370479584, | |
| "learning_rate": 4.810498268355337e-06, | |
| "loss": 0.2332019954919815, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.024390243902439, | |
| "grad_norm": 0.5547940135002136, | |
| "learning_rate": 4.786574489330988e-06, | |
| "loss": 0.2809712886810303, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.0288248337028825, | |
| "grad_norm": 0.08006221801042557, | |
| "learning_rate": 4.762734304405419e-06, | |
| "loss": 0.1403912454843521, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.033259423503326, | |
| "grad_norm": 0.5086005926132202, | |
| "learning_rate": 4.738978034319384e-06, | |
| "loss": 0.13945481181144714, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.0376940133037693, | |
| "grad_norm": 0.6609373688697815, | |
| "learning_rate": 4.715305998684668e-06, | |
| "loss": 0.14236144721508026, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.0421286031042127, | |
| "grad_norm": 0.7926512956619263, | |
| "learning_rate": 4.691718515979772e-06, | |
| "loss": 0.2316332459449768, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.046563192904656, | |
| "grad_norm": 0.6564216613769531, | |
| "learning_rate": 4.668215903545652e-06, | |
| "loss": 0.1165812611579895, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.0509977827050996, | |
| "grad_norm": 1.1338090896606445, | |
| "learning_rate": 4.644798477581427e-06, | |
| "loss": 0.13446903228759766, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.0554323725055434, | |
| "grad_norm": 0.34968799352645874, | |
| "learning_rate": 4.6214665531401465e-06, | |
| "loss": 0.0695309042930603, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.059866962305987, | |
| "grad_norm": 0.15553732216358185, | |
| "learning_rate": 4.5982204441245294e-06, | |
| "loss": 0.1173941045999527, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.0643015521064303, | |
| "grad_norm": 1.247266411781311, | |
| "learning_rate": 4.5750604632827615e-06, | |
| "loss": 0.05880206078290939, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.0687361419068737, | |
| "grad_norm": 0.9541630744934082, | |
| "learning_rate": 4.551986922204276e-06, | |
| "loss": 0.11438459157943726, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.073170731707317, | |
| "grad_norm": 0.11932838708162308, | |
| "learning_rate": 4.529000131315559e-06, | |
| "loss": 0.05259817838668823, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.0776053215077606, | |
| "grad_norm": 0.3025910258293152, | |
| "learning_rate": 4.5061003998759864e-06, | |
| "loss": 0.0788898915052414, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.082039911308204, | |
| "grad_norm": 0.41884443163871765, | |
| "learning_rate": 4.483288035973647e-06, | |
| "loss": 0.18548215925693512, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.0864745011086474, | |
| "grad_norm": 0.69329434633255, | |
| "learning_rate": 4.46056334652121e-06, | |
| "loss": 0.07898163050413132, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.090909090909091, | |
| "grad_norm": 1.9537714719772339, | |
| "learning_rate": 4.43792663725179e-06, | |
| "loss": 0.1453198343515396, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.0953436807095343, | |
| "grad_norm": 0.5684086084365845, | |
| "learning_rate": 4.415378212714833e-06, | |
| "loss": 0.2133360058069229, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.0997782705099777, | |
| "grad_norm": 0.4299287497997284, | |
| "learning_rate": 4.392918376272028e-06, | |
| "loss": 0.18916372954845428, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.104212860310421, | |
| "grad_norm": 0.2804919481277466, | |
| "learning_rate": 4.370547430093213e-06, | |
| "loss": 0.15570159256458282, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.1086474501108645, | |
| "grad_norm": 0.8112667798995972, | |
| "learning_rate": 4.348265675152312e-06, | |
| "loss": 0.05692750960588455, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.1130820399113084, | |
| "grad_norm": 1.0895768404006958, | |
| "learning_rate": 4.326073411223299e-06, | |
| "loss": 0.072386234998703, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.117516629711752, | |
| "grad_norm": 1.3162689208984375, | |
| "learning_rate": 4.303970936876145e-06, | |
| "loss": 0.2204161435365677, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.1219512195121952, | |
| "grad_norm": 0.4283730983734131, | |
| "learning_rate": 4.281958549472821e-06, | |
| "loss": 0.24357332289218903, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.1263858093126387, | |
| "grad_norm": 0.5136526226997375, | |
| "learning_rate": 4.2600365451632755e-06, | |
| "loss": 0.1705726683139801, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.130820399113082, | |
| "grad_norm": 0.5153740644454956, | |
| "learning_rate": 4.238205218881477e-06, | |
| "loss": 0.1938788741827011, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.1352549889135255, | |
| "grad_norm": 0.3389737606048584, | |
| "learning_rate": 4.216464864341415e-06, | |
| "loss": 0.1461533159017563, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.139689578713969, | |
| "grad_norm": 0.24095015227794647, | |
| "learning_rate": 4.1948157740331765e-06, | |
| "loss": 0.016989566385746002, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.1441241685144123, | |
| "grad_norm": 0.4946073591709137, | |
| "learning_rate": 4.173258239218998e-06, | |
| "loss": 0.16947562992572784, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.1485587583148558, | |
| "grad_norm": 1.0035178661346436, | |
| "learning_rate": 4.151792549929343e-06, | |
| "loss": 0.17151474952697754, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.152993348115299, | |
| "grad_norm": 0.925403356552124, | |
| "learning_rate": 4.130418994959004e-06, | |
| "loss": 0.12084448337554932, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.1574279379157426, | |
| "grad_norm": 0.30737417936325073, | |
| "learning_rate": 4.1091378618632276e-06, | |
| "loss": 0.03554686903953552, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.1618625277161865, | |
| "grad_norm": 0.9840001463890076, | |
| "learning_rate": 4.087949436953822e-06, | |
| "loss": 0.17049196362495422, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.16629711751663, | |
| "grad_norm": 1.108886957168579, | |
| "learning_rate": 4.066854005295336e-06, | |
| "loss": 0.12697622179985046, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.1707317073170733, | |
| "grad_norm": 0.6791403293609619, | |
| "learning_rate": 4.045851850701189e-06, | |
| "loss": 0.10053610801696777, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.1751662971175167, | |
| "grad_norm": 0.23437856137752533, | |
| "learning_rate": 4.024943255729886e-06, | |
| "loss": 0.1366463154554367, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.17960088691796, | |
| "grad_norm": 0.5337254405021667, | |
| "learning_rate": 4.004128501681197e-06, | |
| "loss": 0.1613321751356125, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.1840354767184036, | |
| "grad_norm": 0.6539866924285889, | |
| "learning_rate": 3.983407868592367e-06, | |
| "loss": 0.03396349772810936, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.188470066518847, | |
| "grad_norm": 0.5891013145446777, | |
| "learning_rate": 3.9627816352343714e-06, | |
| "loss": 0.1685631275177002, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.1929046563192904, | |
| "grad_norm": 0.8137240409851074, | |
| "learning_rate": 3.94225007910814e-06, | |
| "loss": 0.16547633707523346, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.197339246119734, | |
| "grad_norm": 0.4780210852622986, | |
| "learning_rate": 3.921813476440845e-06, | |
| "loss": 0.2140340805053711, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.2017738359201773, | |
| "grad_norm": 0.7639121413230896, | |
| "learning_rate": 3.901472102182168e-06, | |
| "loss": 0.2164526730775833, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.2062084257206207, | |
| "grad_norm": 0.44395381212234497, | |
| "learning_rate": 3.881226230000607e-06, | |
| "loss": 0.18533624708652496, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.210643015521064, | |
| "grad_norm": 0.5062630772590637, | |
| "learning_rate": 3.861076132279808e-06, | |
| "loss": 0.053058087825775146, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.2150776053215075, | |
| "grad_norm": 0.4987446069717407, | |
| "learning_rate": 3.8410220801148735e-06, | |
| "loss": 0.21477347612380981, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.2195121951219514, | |
| "grad_norm": 4.220211029052734, | |
| "learning_rate": 3.821064343308734e-06, | |
| "loss": 0.04978083446621895, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.223946784922395, | |
| "grad_norm": 0.555292010307312, | |
| "learning_rate": 3.8012031903685174e-06, | |
| "loss": 0.19708330929279327, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.2283813747228383, | |
| "grad_norm": 0.9038100838661194, | |
| "learning_rate": 3.7814388885019284e-06, | |
| "loss": 0.16057579219341278, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.2328159645232817, | |
| "grad_norm": 0.3948892652988434, | |
| "learning_rate": 3.7617717036136623e-06, | |
| "loss": 0.1567579060792923, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.237250554323725, | |
| "grad_norm": 0.6105815768241882, | |
| "learning_rate": 3.7422019003018174e-06, | |
| "loss": 0.15115660429000854, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.2416851441241685, | |
| "grad_norm": 0.7068625688552856, | |
| "learning_rate": 3.7227297418543464e-06, | |
| "loss": 0.17774607241153717, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.246119733924612, | |
| "grad_norm": 1.291515588760376, | |
| "learning_rate": 3.7033554902455105e-06, | |
| "loss": 0.20271697640419006, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.2505543237250554, | |
| "grad_norm": 0.4515579342842102, | |
| "learning_rate": 3.684079406132344e-06, | |
| "loss": 0.23176366090774536, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.254988913525499, | |
| "grad_norm": 0.17358291149139404, | |
| "learning_rate": 3.6649017488511684e-06, | |
| "loss": 0.035076484084129333, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.259423503325942, | |
| "grad_norm": 0.7106318473815918, | |
| "learning_rate": 3.6458227764140796e-06, | |
| "loss": 0.11743002384901047, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.2638580931263856, | |
| "grad_norm": 0.524408221244812, | |
| "learning_rate": 3.626842745505501e-06, | |
| "loss": 0.2437806874513626, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.2682926829268295, | |
| "grad_norm": 0.37512272596359253, | |
| "learning_rate": 3.607961911478708e-06, | |
| "loss": 0.03446941822767258, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.2727272727272725, | |
| "grad_norm": 0.48498690128326416, | |
| "learning_rate": 3.5891805283524055e-06, | |
| "loss": 0.15878258645534515, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.2771618625277164, | |
| "grad_norm": 0.1239403486251831, | |
| "learning_rate": 3.570498848807308e-06, | |
| "loss": 0.11845864355564117, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.2815964523281598, | |
| "grad_norm": 0.23787540197372437, | |
| "learning_rate": 3.5519171241827445e-06, | |
| "loss": 0.13304200768470764, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.286031042128603, | |
| "grad_norm": 0.46581289172172546, | |
| "learning_rate": 3.533435604473259e-06, | |
| "loss": 0.20721173286437988, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.2904656319290466, | |
| "grad_norm": 0.6229859590530396, | |
| "learning_rate": 3.515054538325272e-06, | |
| "loss": 0.19322358071804047, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.29490022172949, | |
| "grad_norm": 0.4470021426677704, | |
| "learning_rate": 3.496774173033717e-06, | |
| "loss": 0.17478328943252563, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.2993348115299335, | |
| "grad_norm": 1.0204616785049438, | |
| "learning_rate": 3.478594754538722e-06, | |
| "loss": 0.10508938133716583, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.303769401330377, | |
| "grad_norm": 0.4292312264442444, | |
| "learning_rate": 3.460516527422298e-06, | |
| "loss": 0.05400429666042328, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.3082039911308203, | |
| "grad_norm": 0.514301061630249, | |
| "learning_rate": 3.442539734905049e-06, | |
| "loss": 0.15547773241996765, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.3126385809312637, | |
| "grad_norm": 0.8231419920921326, | |
| "learning_rate": 3.424664618842897e-06, | |
| "loss": 0.1262798309326172, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.317073170731707, | |
| "grad_norm": 0.6278258562088013, | |
| "learning_rate": 3.4068914197238352e-06, | |
| "loss": 0.17141902446746826, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.3215077605321506, | |
| "grad_norm": 0.7143641710281372, | |
| "learning_rate": 3.389220376664687e-06, | |
| "loss": 0.2325032353401184, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.3259423503325944, | |
| "grad_norm": 0.6291862726211548, | |
| "learning_rate": 3.3716517274078842e-06, | |
| "loss": 0.1395445019006729, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.330376940133038, | |
| "grad_norm": 1.111968994140625, | |
| "learning_rate": 3.354185708318284e-06, | |
| "loss": 0.19360409677028656, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.3348115299334813, | |
| "grad_norm": 0.4316374659538269, | |
| "learning_rate": 3.3368225543799716e-06, | |
| "loss": 0.19091464579105377, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.3392461197339247, | |
| "grad_norm": 0.07719559222459793, | |
| "learning_rate": 3.3195624991931074e-06, | |
| "loss": 0.0855455994606018, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.343680709534368, | |
| "grad_norm": 0.48246321082115173, | |
| "learning_rate": 3.302405774970788e-06, | |
| "loss": 0.08791041374206543, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.3481152993348116, | |
| "grad_norm": 0.36730292439460754, | |
| "learning_rate": 3.2853526125359105e-06, | |
| "loss": 0.12776361405849457, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.352549889135255, | |
| "grad_norm": 0.09562593698501587, | |
| "learning_rate": 3.26840324131808e-06, | |
| "loss": 0.0983489602804184, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.3569844789356984, | |
| "grad_norm": 0.7086212038993835, | |
| "learning_rate": 3.251557889350514e-06, | |
| "loss": 0.23420387506484985, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.361419068736142, | |
| "grad_norm": 5.378333568572998, | |
| "learning_rate": 3.2348167832669754e-06, | |
| "loss": 0.10752184689044952, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.3658536585365852, | |
| "grad_norm": 0.5152938961982727, | |
| "learning_rate": 3.218180148298732e-06, | |
| "loss": 0.21186313033103943, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.3702882483370287, | |
| "grad_norm": 1.4693471193313599, | |
| "learning_rate": 3.201648208271507e-06, | |
| "loss": 0.19114084541797638, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.374722838137472, | |
| "grad_norm": 0.12920020520687103, | |
| "learning_rate": 3.185221185602497e-06, | |
| "loss": 0.12129313498735428, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.3791574279379155, | |
| "grad_norm": 0.8857243061065674, | |
| "learning_rate": 3.168899301297347e-06, | |
| "loss": 0.21523553133010864, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.3835920177383594, | |
| "grad_norm": 0.7426590919494629, | |
| "learning_rate": 3.152682774947202e-06, | |
| "loss": 0.1364864557981491, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.388026607538803, | |
| "grad_norm": 0.7999682426452637, | |
| "learning_rate": 3.136571824725744e-06, | |
| "loss": 0.0897040143609047, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.3924611973392462, | |
| "grad_norm": 0.6461058855056763, | |
| "learning_rate": 3.1205666673862484e-06, | |
| "loss": 0.09447822719812393, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.3968957871396896, | |
| "grad_norm": 0.3650994300842285, | |
| "learning_rate": 3.104667518258688e-06, | |
| "loss": 0.041886042803525925, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.401330376940133, | |
| "grad_norm": 1.1809720993041992, | |
| "learning_rate": 3.0888745912468123e-06, | |
| "loss": 0.13893677294254303, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.4057649667405765, | |
| "grad_norm": 0.5130560398101807, | |
| "learning_rate": 3.073188098825285e-06, | |
| "loss": 0.19634631276130676, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.41019955654102, | |
| "grad_norm": 0.7646129131317139, | |
| "learning_rate": 3.0576082520368265e-06, | |
| "loss": 0.11035222560167313, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.4146341463414633, | |
| "grad_norm": 1.119156837463379, | |
| "learning_rate": 3.0421352604893602e-06, | |
| "loss": 0.23807543516159058, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.4190687361419068, | |
| "grad_norm": 0.4573220908641815, | |
| "learning_rate": 3.0267693323532116e-06, | |
| "loss": 0.14719665050506592, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.42350332594235, | |
| "grad_norm": 0.683412492275238, | |
| "learning_rate": 3.0115106743582922e-06, | |
| "loss": 0.21427640318870544, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.4279379157427936, | |
| "grad_norm": 0.5579946637153625, | |
| "learning_rate": 2.9963594917913248e-06, | |
| "loss": 0.02915109321475029, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.4323725055432375, | |
| "grad_norm": 0.10574361681938171, | |
| "learning_rate": 2.981315988493084e-06, | |
| "loss": 0.04074406251311302, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.436807095343681, | |
| "grad_norm": 0.366202175617218, | |
| "learning_rate": 2.9663803668556424e-06, | |
| "loss": 0.22145552933216095, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.4412416851441243, | |
| "grad_norm": 0.5682427287101746, | |
| "learning_rate": 2.9515528278196665e-06, | |
| "loss": 0.25287312269210815, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.4456762749445677, | |
| "grad_norm": 0.10395639389753342, | |
| "learning_rate": 2.936833570871694e-06, | |
| "loss": 0.11668358743190765, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.450110864745011, | |
| "grad_norm": 0.631152868270874, | |
| "learning_rate": 2.922222794041464e-06, | |
| "loss": 0.23132863640785217, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.4545454545454546, | |
| "grad_norm": 0.881669282913208, | |
| "learning_rate": 2.907720693899243e-06, | |
| "loss": 0.330628901720047, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.458980044345898, | |
| "grad_norm": 0.462612122297287, | |
| "learning_rate": 2.8933274655531874e-06, | |
| "loss": 0.25399714708328247, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.4634146341463414, | |
| "grad_norm": 0.5779225826263428, | |
| "learning_rate": 2.879043302646717e-06, | |
| "loss": 0.039646755903959274, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.467849223946785, | |
| "grad_norm": 0.32095006108283997, | |
| "learning_rate": 2.8648683973559054e-06, | |
| "loss": 0.23187652230262756, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.4722838137472283, | |
| "grad_norm": 0.3223656415939331, | |
| "learning_rate": 2.8508029403868962e-06, | |
| "loss": 0.09090401232242584, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.4767184035476717, | |
| "grad_norm": 0.5520133376121521, | |
| "learning_rate": 2.836847120973345e-06, | |
| "loss": 0.15556883811950684, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.481152993348115, | |
| "grad_norm": 0.47338053584098816, | |
| "learning_rate": 2.8230011268738593e-06, | |
| "loss": 0.09746363013982773, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.4855875831485585, | |
| "grad_norm": 0.1202714741230011, | |
| "learning_rate": 2.8092651443694886e-06, | |
| "loss": 0.13933829963207245, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.4900221729490024, | |
| "grad_norm": 0.6928906440734863, | |
| "learning_rate": 2.795639358261202e-06, | |
| "loss": 0.43705928325653076, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.494456762749446, | |
| "grad_norm": 0.22218959033489227, | |
| "learning_rate": 2.782123951867415e-06, | |
| "loss": 0.12843255698680878, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.4988913525498893, | |
| "grad_norm": 0.4401395618915558, | |
| "learning_rate": 2.7687191070215174e-06, | |
| "loss": 0.11058890074491501, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.5033259423503327, | |
| "grad_norm": 0.4982577860355377, | |
| "learning_rate": 2.755425004069424e-06, | |
| "loss": 0.20767910778522491, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.507760532150776, | |
| "grad_norm": 0.5209600925445557, | |
| "learning_rate": 2.7422418218671586e-06, | |
| "loss": 0.3028036952018738, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.5121951219512195, | |
| "grad_norm": 0.6526494026184082, | |
| "learning_rate": 2.7291697377784325e-06, | |
| "loss": 0.13182812929153442, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.516629711751663, | |
| "grad_norm": 0.5955665707588196, | |
| "learning_rate": 2.7162089276722746e-06, | |
| "loss": 0.11612501740455627, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.5210643015521064, | |
| "grad_norm": 0.5240582227706909, | |
| "learning_rate": 2.703359565920651e-06, | |
| "loss": 0.19106577336788177, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.52549889135255, | |
| "grad_norm": 0.5816933512687683, | |
| "learning_rate": 2.6906218253961285e-06, | |
| "loss": 0.052692461758852005, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.529933481152993, | |
| "grad_norm": 1.794288992881775, | |
| "learning_rate": 2.6779958774695487e-06, | |
| "loss": 0.15381264686584473, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.5343680709534366, | |
| "grad_norm": 0.6399196982383728, | |
| "learning_rate": 2.665481892007714e-06, | |
| "loss": 0.25606346130371094, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.5388026607538805, | |
| "grad_norm": 0.4062730371952057, | |
| "learning_rate": 2.6530800373711097e-06, | |
| "loss": 0.021856600418686867, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.5432372505543235, | |
| "grad_norm": 0.5443702936172485, | |
| "learning_rate": 2.640790480411638e-06, | |
| "loss": 0.08779677748680115, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.5476718403547673, | |
| "grad_norm": 1.7016083002090454, | |
| "learning_rate": 2.628613386470371e-06, | |
| "loss": 0.1265704333782196, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.5521064301552108, | |
| "grad_norm": 0.5498143434524536, | |
| "learning_rate": 2.61654891937533e-06, | |
| "loss": 0.19086270034313202, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.556541019955654, | |
| "grad_norm": 0.5192769765853882, | |
| "learning_rate": 2.6045972414392735e-06, | |
| "loss": 0.3860751688480377, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.5609756097560976, | |
| "grad_norm": 1.643974781036377, | |
| "learning_rate": 2.5927585134575233e-06, | |
| "loss": 0.2832165062427521, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.565410199556541, | |
| "grad_norm": 0.16696669161319733, | |
| "learning_rate": 2.581032894705798e-06, | |
| "loss": 0.013047085143625736, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.5698447893569845, | |
| "grad_norm": 0.5006920099258423, | |
| "learning_rate": 2.5694205429380616e-06, | |
| "loss": 0.17075103521347046, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.574279379157428, | |
| "grad_norm": 0.4067634642124176, | |
| "learning_rate": 2.5579216143844153e-06, | |
| "loss": 0.049309611320495605, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.5787139689578713, | |
| "grad_norm": 0.8766622543334961, | |
| "learning_rate": 2.5465362637489847e-06, | |
| "loss": 0.1669972687959671, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.5831485587583147, | |
| "grad_norm": 0.7486819624900818, | |
| "learning_rate": 2.5352646442078472e-06, | |
| "loss": 0.20184892416000366, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.587583148558758, | |
| "grad_norm": 0.6373207569122314, | |
| "learning_rate": 2.524106907406959e-06, | |
| "loss": 0.1479307860136032, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.5920177383592016, | |
| "grad_norm": 1.1294218301773071, | |
| "learning_rate": 2.513063203460127e-06, | |
| "loss": 0.15324336290359497, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.5964523281596454, | |
| "grad_norm": 0.4940034747123718, | |
| "learning_rate": 2.502133680946985e-06, | |
| "loss": 0.260329931974411, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.6008869179600884, | |
| "grad_norm": 0.5072565674781799, | |
| "learning_rate": 2.4913184869109925e-06, | |
| "loss": 0.14236906170845032, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.6053215077605323, | |
| "grad_norm": 0.14243106544017792, | |
| "learning_rate": 2.4806177668574564e-06, | |
| "loss": 0.03839609771966934, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.6097560975609757, | |
| "grad_norm": 0.462656706571579, | |
| "learning_rate": 2.4700316647515805e-06, | |
| "loss": 0.1687300205230713, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.614190687361419, | |
| "grad_norm": 2.395517587661743, | |
| "learning_rate": 2.459560323016518e-06, | |
| "loss": 0.11912352591753006, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.6186252771618626, | |
| "grad_norm": 0.4201103746891022, | |
| "learning_rate": 2.4492038825314637e-06, | |
| "loss": 0.148905947804451, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.623059866962306, | |
| "grad_norm": 1.4418302774429321, | |
| "learning_rate": 2.438962482629751e-06, | |
| "loss": 0.19345171749591827, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.6274944567627494, | |
| "grad_norm": 0.47817596793174744, | |
| "learning_rate": 2.42883626109699e-06, | |
| "loss": 0.12935222685337067, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.631929046563193, | |
| "grad_norm": 0.543739914894104, | |
| "learning_rate": 2.4188253541691973e-06, | |
| "loss": 0.1430729478597641, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.43734827637672424, | |
| "learning_rate": 2.4089298965309753e-06, | |
| "loss": 0.19318100810050964, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.6407982261640797, | |
| "grad_norm": 0.2320224642753601, | |
| "learning_rate": 2.399150021313699e-06, | |
| "loss": 0.0949181392788887, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.6452328159645235, | |
| "grad_norm": 0.6401042938232422, | |
| "learning_rate": 2.389485860093715e-06, | |
| "loss": 0.2700011730194092, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.6496674057649665, | |
| "grad_norm": 0.12314002215862274, | |
| "learning_rate": 2.3799375428905864e-06, | |
| "loss": 0.07954643666744232, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.6541019955654104, | |
| "grad_norm": 0.7883126735687256, | |
| "learning_rate": 2.3705051981653315e-06, | |
| "loss": 0.07769718766212463, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.658536585365854, | |
| "grad_norm": 0.7627129554748535, | |
| "learning_rate": 2.361188952818697e-06, | |
| "loss": 0.2676461338996887, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.662971175166297, | |
| "grad_norm": 0.8268294334411621, | |
| "learning_rate": 2.3519889321894603e-06, | |
| "loss": 0.4033682346343994, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.6674057649667406, | |
| "grad_norm": 2.1596076488494873, | |
| "learning_rate": 2.34290526005273e-06, | |
| "loss": 0.09330250322818756, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 3.671840354767184, | |
| "grad_norm": 0.6786802411079407, | |
| "learning_rate": 2.3339380586182904e-06, | |
| "loss": 0.23048776388168335, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 3.6762749445676275, | |
| "grad_norm": 0.8763942718505859, | |
| "learning_rate": 2.3250874485289545e-06, | |
| "loss": 0.13142776489257812, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 3.680709534368071, | |
| "grad_norm": 0.49550583958625793, | |
| "learning_rate": 2.3163535488589363e-06, | |
| "loss": 0.17957837879657745, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.6851441241685143, | |
| "grad_norm": 0.08660886436700821, | |
| "learning_rate": 2.3077364771122573e-06, | |
| "loss": 0.12105847150087357, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 3.6895787139689578, | |
| "grad_norm": 0.2725079655647278, | |
| "learning_rate": 2.299236349221157e-06, | |
| "loss": 0.06378458440303802, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 3.694013303769401, | |
| "grad_norm": 0.40256035327911377, | |
| "learning_rate": 2.2908532795445414e-06, | |
| "loss": 0.187424436211586, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 3.6984478935698446, | |
| "grad_norm": 0.4576587975025177, | |
| "learning_rate": 2.2825873808664363e-06, | |
| "loss": 0.25221118330955505, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 3.7028824833702885, | |
| "grad_norm": 0.5043409466743469, | |
| "learning_rate": 2.2744387643944757e-06, | |
| "loss": 0.1796739250421524, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.7073170731707314, | |
| "grad_norm": 0.5289079546928406, | |
| "learning_rate": 2.2664075397584066e-06, | |
| "loss": 0.15418490767478943, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 3.7117516629711753, | |
| "grad_norm": 0.5016271471977234, | |
| "learning_rate": 2.258493815008605e-06, | |
| "loss": 0.23040637373924255, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.7161862527716187, | |
| "grad_norm": 0.5144860744476318, | |
| "learning_rate": 2.2506976966146355e-06, | |
| "loss": 0.21655163168907166, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 3.720620842572062, | |
| "grad_norm": 0.5468173027038574, | |
| "learning_rate": 2.2430192894638077e-06, | |
| "loss": 0.19511225819587708, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 3.7250554323725056, | |
| "grad_norm": 0.6539567112922668, | |
| "learning_rate": 2.235458696859768e-06, | |
| "loss": 0.05055548995733261, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.729490022172949, | |
| "grad_norm": 0.5066478848457336, | |
| "learning_rate": 2.228016020521116e-06, | |
| "loss": 0.17900614440441132, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 3.7339246119733924, | |
| "grad_norm": 0.5024972558021545, | |
| "learning_rate": 2.2206913605800267e-06, | |
| "loss": 0.12050139158964157, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 3.738359201773836, | |
| "grad_norm": 0.5398685932159424, | |
| "learning_rate": 2.213484815580911e-06, | |
| "loss": 0.12008091807365417, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 3.7427937915742793, | |
| "grad_norm": 0.10834494233131409, | |
| "learning_rate": 2.206396482479084e-06, | |
| "loss": 0.03123791143298149, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 3.7472283813747227, | |
| "grad_norm": 0.6228474974632263, | |
| "learning_rate": 2.199426456639465e-06, | |
| "loss": 0.22591347992420197, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.7516629711751666, | |
| "grad_norm": 0.8757428526878357, | |
| "learning_rate": 2.192574831835291e-06, | |
| "loss": 0.1378636211156845, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 3.7560975609756095, | |
| "grad_norm": 0.5694209933280945, | |
| "learning_rate": 2.185841700246857e-06, | |
| "loss": 0.24412274360656738, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 3.7605321507760534, | |
| "grad_norm": 0.494783490896225, | |
| "learning_rate": 2.1792271524602786e-06, | |
| "loss": 0.23211520910263062, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 3.764966740576497, | |
| "grad_norm": 0.5232568979263306, | |
| "learning_rate": 2.1727312774662656e-06, | |
| "loss": 0.12440581619739532, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 3.7694013303769403, | |
| "grad_norm": 0.4039710462093353, | |
| "learning_rate": 2.1663541626589337e-06, | |
| "loss": 0.11090154200792313, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.7738359201773837, | |
| "grad_norm": 0.48914211988449097, | |
| "learning_rate": 2.1600958938346202e-06, | |
| "loss": 0.5025262832641602, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 3.778270509977827, | |
| "grad_norm": 0.18319113552570343, | |
| "learning_rate": 2.153956555190738e-06, | |
| "loss": 0.02325468324124813, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 3.7827050997782705, | |
| "grad_norm": 0.10409087687730789, | |
| "learning_rate": 2.147936229324637e-06, | |
| "loss": 0.1210860013961792, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 3.787139689578714, | |
| "grad_norm": 0.5911566615104675, | |
| "learning_rate": 2.1420349972324942e-06, | |
| "loss": 0.11488822847604752, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 3.7915742793791574, | |
| "grad_norm": 0.5132036209106445, | |
| "learning_rate": 2.1362529383082255e-06, | |
| "loss": 0.30858707427978516, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.796008869179601, | |
| "grad_norm": 0.28792333602905273, | |
| "learning_rate": 2.1305901303424143e-06, | |
| "loss": 0.1212579756975174, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 3.800443458980044, | |
| "grad_norm": 0.7928282618522644, | |
| "learning_rate": 2.1250466495212697e-06, | |
| "loss": 0.1450139433145523, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 3.8048780487804876, | |
| "grad_norm": 2.0321249961853027, | |
| "learning_rate": 2.119622570425598e-06, | |
| "loss": 0.19779935479164124, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 3.8093126385809315, | |
| "grad_norm": 0.3824866712093353, | |
| "learning_rate": 2.1143179660298e-06, | |
| "loss": 0.1265445351600647, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 3.8137472283813745, | |
| "grad_norm": 0.5937016606330872, | |
| "learning_rate": 2.109132907700888e-06, | |
| "loss": 0.11517294496297836, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.8181818181818183, | |
| "grad_norm": 0.7648696899414062, | |
| "learning_rate": 2.1040674651975297e-06, | |
| "loss": 0.21361251175403595, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 3.8226164079822618, | |
| "grad_norm": 0.8102192282676697, | |
| "learning_rate": 2.099121706669106e-06, | |
| "loss": 0.24782630801200867, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 3.827050997782705, | |
| "grad_norm": 0.5768070220947266, | |
| "learning_rate": 2.0942956986547953e-06, | |
| "loss": 0.3066186010837555, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 3.8314855875831486, | |
| "grad_norm": 0.4514220356941223, | |
| "learning_rate": 2.0895895060826777e-06, | |
| "loss": 0.08890463411808014, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 3.835920177383592, | |
| "grad_norm": 0.387746661901474, | |
| "learning_rate": 2.085003192268862e-06, | |
| "loss": 0.11902990192174911, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.8403547671840355, | |
| "grad_norm": 0.5934492349624634, | |
| "learning_rate": 2.0805368189166347e-06, | |
| "loss": 0.26432839035987854, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 3.844789356984479, | |
| "grad_norm": 0.8290284276008606, | |
| "learning_rate": 2.076190446115625e-06, | |
| "loss": 0.13800962269306183, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 3.8492239467849223, | |
| "grad_norm": 0.42487943172454834, | |
| "learning_rate": 2.0719641323410084e-06, | |
| "loss": 0.1366715282201767, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 3.8536585365853657, | |
| "grad_norm": 0.5068730711936951, | |
| "learning_rate": 2.0678579344527038e-06, | |
| "loss": 0.18744944036006927, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 3.858093126385809, | |
| "grad_norm": 0.45418834686279297, | |
| "learning_rate": 2.0638719076946213e-06, | |
| "loss": 0.12399666011333466, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.8625277161862526, | |
| "grad_norm": 0.5102381706237793, | |
| "learning_rate": 2.060006105693913e-06, | |
| "loss": 0.11724897474050522, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 3.8669623059866964, | |
| "grad_norm": 0.5589990615844727, | |
| "learning_rate": 2.056260580460251e-06, | |
| "loss": 0.15366147458553314, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 3.8713968957871394, | |
| "grad_norm": 0.4272408187389374, | |
| "learning_rate": 2.052635382385134e-06, | |
| "loss": 0.16997916996479034, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 3.8758314855875833, | |
| "grad_norm": 0.8717123866081238, | |
| "learning_rate": 2.0491305602411997e-06, | |
| "loss": 0.11671534180641174, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 3.8802660753880267, | |
| "grad_norm": 0.5559657216072083, | |
| "learning_rate": 2.0457461611815782e-06, | |
| "loss": 0.15400242805480957, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.88470066518847, | |
| "grad_norm": 0.6432749032974243, | |
| "learning_rate": 2.0424822307392493e-06, | |
| "loss": 0.18111613392829895, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 3.8891352549889135, | |
| "grad_norm": 0.5203759074211121, | |
| "learning_rate": 2.039338812826436e-06, | |
| "loss": 0.17084263265132904, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 3.893569844789357, | |
| "grad_norm": 1.0190702676773071, | |
| "learning_rate": 2.036315949734011e-06, | |
| "loss": 0.1340053379535675, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 3.8980044345898004, | |
| "grad_norm": 2.1606268882751465, | |
| "learning_rate": 2.0334136821309286e-06, | |
| "loss": 0.23111629486083984, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 3.902439024390244, | |
| "grad_norm": 0.013512303121387959, | |
| "learning_rate": 2.0306320490636767e-06, | |
| "loss": 0.04244675859808922, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.9068736141906872, | |
| "grad_norm": 0.33751603960990906, | |
| "learning_rate": 2.027971087955753e-06, | |
| "loss": 0.050674207508563995, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 3.9113082039911307, | |
| "grad_norm": 0.043730415403842926, | |
| "learning_rate": 2.0254308346071574e-06, | |
| "loss": 0.13882163166999817, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 3.9157427937915745, | |
| "grad_norm": 0.37790897488594055, | |
| "learning_rate": 2.023011323193917e-06, | |
| "loss": 0.16915282607078552, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 3.9201773835920175, | |
| "grad_norm": 0.5498037338256836, | |
| "learning_rate": 2.020712586267621e-06, | |
| "loss": 0.24210064113140106, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 3.9246119733924614, | |
| "grad_norm": 0.5351380109786987, | |
| "learning_rate": 2.018534654754984e-06, | |
| "loss": 0.2681524157524109, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.929046563192905, | |
| "grad_norm": 0.7002694606781006, | |
| "learning_rate": 2.016477557957432e-06, | |
| "loss": 0.0865524411201477, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 3.933481152993348, | |
| "grad_norm": 1.273296594619751, | |
| "learning_rate": 2.0145413235507057e-06, | |
| "loss": 0.15231235325336456, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 3.9379157427937916, | |
| "grad_norm": 0.560060977935791, | |
| "learning_rate": 2.0127259775844882e-06, | |
| "loss": 0.2978004813194275, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 3.942350332594235, | |
| "grad_norm": 0.08104455471038818, | |
| "learning_rate": 2.0110315444820557e-06, | |
| "loss": 0.015620124526321888, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 3.9467849223946785, | |
| "grad_norm": 1.2214912176132202, | |
| "learning_rate": 2.0094580470399507e-06, | |
| "loss": 0.08288650959730148, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.951219512195122, | |
| "grad_norm": 0.08862635493278503, | |
| "learning_rate": 2.0080055064276703e-06, | |
| "loss": 0.11820105463266373, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 3.9556541019955653, | |
| "grad_norm": 0.6492655277252197, | |
| "learning_rate": 2.0066739421873856e-06, | |
| "loss": 0.23602721095085144, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 3.9600886917960088, | |
| "grad_norm": 0.4690077602863312, | |
| "learning_rate": 2.0054633722336776e-06, | |
| "loss": 0.17881526052951813, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 3.964523281596452, | |
| "grad_norm": 0.4643179774284363, | |
| "learning_rate": 2.0043738128532943e-06, | |
| "loss": 0.1461382508277893, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 3.9689578713968956, | |
| "grad_norm": 0.33378270268440247, | |
| "learning_rate": 2.003405278704937e-06, | |
| "loss": 0.12822888791561127, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.9733924611973395, | |
| "grad_norm": 0.7190065979957581, | |
| "learning_rate": 2.002557782819055e-06, | |
| "loss": 0.1802365928888321, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 3.9778270509977824, | |
| "grad_norm": 0.6157906651496887, | |
| "learning_rate": 2.001831336597679e-06, | |
| "loss": 0.09615038335323334, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 3.9822616407982263, | |
| "grad_norm": 0.4767264127731323, | |
| "learning_rate": 2.0012259498142596e-06, | |
| "loss": 0.13788002729415894, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 3.9866962305986697, | |
| "grad_norm": 0.499039888381958, | |
| "learning_rate": 2.00074163061354e-06, | |
| "loss": 0.18137040734291077, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 3.991130820399113, | |
| "grad_norm": 0.6075534224510193, | |
| "learning_rate": 2.000378385511451e-06, | |
| "loss": 0.10324703902006149, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.9955654101995566, | |
| "grad_norm": 0.5070518851280212, | |
| "learning_rate": 2.000136219395011e-06, | |
| "loss": 0.16305242478847504, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.4194043278694153, | |
| "learning_rate": 2.0000151355222728e-06, | |
| "loss": 0.06611192226409912, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 1804, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_loss": 0.6816160985415268, | |
| "train_runtime": 8301.6433, | |
| "train_samples_per_second": 6.519, | |
| "train_steps_per_second": 0.217 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 1804, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4175049861232067e+18, | |
| "train_batch_size": 3, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |