Instructions to use furproxy/9b-18 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/9b-18 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.5-9B") model = PeftModel.from_pretrained(base_model, "furproxy/9b-18") - Transformers
How to use furproxy/9b-18 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/9b-18") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/9b-18", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-18 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-18" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-18", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/9b-18
- SGLang
How to use furproxy/9b-18 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-18" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-18", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-18" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-18", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/9b-18 with Docker Model Runner:
docker model run hf.co/furproxy/9b-18
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2058, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029154518950437317, | |
| "grad_norm": 0.08236155658960342, | |
| "learning_rate": 1.4563106796116505e-07, | |
| "loss": 1.120621681213379, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0058309037900874635, | |
| "grad_norm": 0.5269019603729248, | |
| "learning_rate": 4.368932038834952e-07, | |
| "loss": 1.9105433225631714, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.008746355685131196, | |
| "grad_norm": 0.3449331521987915, | |
| "learning_rate": 7.281553398058253e-07, | |
| "loss": 1.8805404901504517, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.011661807580174927, | |
| "grad_norm": 0.10853756964206696, | |
| "learning_rate": 1.0194174757281554e-06, | |
| "loss": 1.5699371099472046, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.014577259475218658, | |
| "grad_norm": 1.1428029537200928, | |
| "learning_rate": 1.3106796116504856e-06, | |
| "loss": 1.4362584352493286, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01749271137026239, | |
| "grad_norm": 0.5868045091629028, | |
| "learning_rate": 1.6019417475728156e-06, | |
| "loss": 2.0035324096679688, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02040816326530612, | |
| "grad_norm": 0.08258485049009323, | |
| "learning_rate": 1.8932038834951458e-06, | |
| "loss": 1.5183849334716797, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.023323615160349854, | |
| "grad_norm": 2.6764633655548096, | |
| "learning_rate": 2.1844660194174755e-06, | |
| "loss": 1.8052839040756226, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026239067055393587, | |
| "grad_norm": 0.342227965593338, | |
| "learning_rate": 2.475728155339806e-06, | |
| "loss": 1.8929893970489502, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.029154518950437316, | |
| "grad_norm": 0.1563744992017746, | |
| "learning_rate": 2.766990291262136e-06, | |
| "loss": 1.7904902696609497, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03206997084548105, | |
| "grad_norm": 0.1933348923921585, | |
| "learning_rate": 3.058252427184466e-06, | |
| "loss": 1.4513907432556152, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03498542274052478, | |
| "grad_norm": 0.5887855291366577, | |
| "learning_rate": 3.3495145631067963e-06, | |
| "loss": 2.2697947025299072, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.037900874635568516, | |
| "grad_norm": 0.33995822072029114, | |
| "learning_rate": 3.6407766990291263e-06, | |
| "loss": 1.7317644357681274, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04081632653061224, | |
| "grad_norm": 0.6888100504875183, | |
| "learning_rate": 3.932038834951457e-06, | |
| "loss": 1.8117475509643555, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.043731778425655975, | |
| "grad_norm": 0.7777397036552429, | |
| "learning_rate": 4.223300970873786e-06, | |
| "loss": 1.8055756092071533, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04664723032069971, | |
| "grad_norm": 0.4058018624782562, | |
| "learning_rate": 4.514563106796117e-06, | |
| "loss": 1.9432220458984375, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04956268221574344, | |
| "grad_norm": 0.22737905383110046, | |
| "learning_rate": 4.805825242718447e-06, | |
| "loss": 1.6058305501937866, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.052478134110787174, | |
| "grad_norm": 0.3183080852031708, | |
| "learning_rate": 5.097087378640777e-06, | |
| "loss": 1.8658274412155151, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05539358600583091, | |
| "grad_norm": 0.17585590481758118, | |
| "learning_rate": 5.388349514563107e-06, | |
| "loss": 2.2423486709594727, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05830903790087463, | |
| "grad_norm": 0.10230281203985214, | |
| "learning_rate": 5.679611650485437e-06, | |
| "loss": 1.5302915573120117, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.061224489795918366, | |
| "grad_norm": 0.4932399392127991, | |
| "learning_rate": 5.970873786407767e-06, | |
| "loss": 1.813106656074524, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0641399416909621, | |
| "grad_norm": 0.15170824527740479, | |
| "learning_rate": 6.262135922330097e-06, | |
| "loss": 1.6509969234466553, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06705539358600583, | |
| "grad_norm": 0.1539481282234192, | |
| "learning_rate": 6.553398058252427e-06, | |
| "loss": 1.7683537006378174, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06997084548104957, | |
| "grad_norm": 0.6599376201629639, | |
| "learning_rate": 6.844660194174757e-06, | |
| "loss": 2.1816630363464355, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0728862973760933, | |
| "grad_norm": 0.24105864763259888, | |
| "learning_rate": 7.135922330097088e-06, | |
| "loss": 1.910886526107788, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07580174927113703, | |
| "grad_norm": 0.09656477719545364, | |
| "learning_rate": 7.427184466019417e-06, | |
| "loss": 1.199069857597351, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07871720116618076, | |
| "grad_norm": 0.3129803240299225, | |
| "learning_rate": 7.718446601941748e-06, | |
| "loss": 1.7870614528656006, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08163265306122448, | |
| "grad_norm": 0.253489226102829, | |
| "learning_rate": 8.009708737864077e-06, | |
| "loss": 2.0801727771759033, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08454810495626822, | |
| "grad_norm": 0.12343698740005493, | |
| "learning_rate": 8.300970873786407e-06, | |
| "loss": 1.4909915924072266, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08746355685131195, | |
| "grad_norm": 0.19224074482917786, | |
| "learning_rate": 8.592233009708738e-06, | |
| "loss": 2.0119330883026123, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09037900874635568, | |
| "grad_norm": 0.2891639471054077, | |
| "learning_rate": 8.883495145631068e-06, | |
| "loss": 1.9431190490722656, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09329446064139942, | |
| "grad_norm": 0.8908348083496094, | |
| "learning_rate": 9.174757281553397e-06, | |
| "loss": 1.8723704814910889, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09620991253644315, | |
| "grad_norm": 0.09907913953065872, | |
| "learning_rate": 9.466019417475729e-06, | |
| "loss": 1.556423306465149, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09912536443148688, | |
| "grad_norm": 0.18893972039222717, | |
| "learning_rate": 9.75728155339806e-06, | |
| "loss": 1.8031634092330933, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10204081632653061, | |
| "grad_norm": 0.3021998107433319, | |
| "learning_rate": 1.004854368932039e-05, | |
| "loss": 1.6836217641830444, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10495626822157435, | |
| "grad_norm": 0.19465358555316925, | |
| "learning_rate": 1.0339805825242719e-05, | |
| "loss": 1.3162983655929565, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.10787172011661808, | |
| "grad_norm": 0.35194098949432373, | |
| "learning_rate": 1.0631067961165048e-05, | |
| "loss": 1.6223976612091064, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11078717201166181, | |
| "grad_norm": 0.11608141660690308, | |
| "learning_rate": 1.092233009708738e-05, | |
| "loss": 1.5001176595687866, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11370262390670553, | |
| "grad_norm": 0.17615102231502533, | |
| "learning_rate": 1.121359223300971e-05, | |
| "loss": 1.6835155487060547, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11661807580174927, | |
| "grad_norm": 0.10972107201814651, | |
| "learning_rate": 1.1504854368932039e-05, | |
| "loss": 1.0958292484283447, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.119533527696793, | |
| "grad_norm": 0.2486797422170639, | |
| "learning_rate": 1.1796116504854368e-05, | |
| "loss": 1.5743815898895264, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12244897959183673, | |
| "grad_norm": 0.6029097437858582, | |
| "learning_rate": 1.20873786407767e-05, | |
| "loss": 1.4908254146575928, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12536443148688048, | |
| "grad_norm": 0.47159314155578613, | |
| "learning_rate": 1.237864077669903e-05, | |
| "loss": 1.3921440839767456, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1282798833819242, | |
| "grad_norm": 0.23478780686855316, | |
| "learning_rate": 1.2669902912621359e-05, | |
| "loss": 1.60302734375, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13119533527696792, | |
| "grad_norm": 0.06909849494695663, | |
| "learning_rate": 1.2961165048543688e-05, | |
| "loss": 1.3646469116210938, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13411078717201166, | |
| "grad_norm": 0.12045982480049133, | |
| "learning_rate": 1.3252427184466021e-05, | |
| "loss": 1.3031418323516846, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.13702623906705538, | |
| "grad_norm": 0.2616878151893616, | |
| "learning_rate": 1.3543689320388351e-05, | |
| "loss": 1.4213391542434692, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.13994169096209913, | |
| "grad_norm": 0.19713328778743744, | |
| "learning_rate": 1.383495145631068e-05, | |
| "loss": 1.8326067924499512, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.42456164956092834, | |
| "learning_rate": 1.412621359223301e-05, | |
| "loss": 2.064007043838501, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1457725947521866, | |
| "grad_norm": 0.1171143651008606, | |
| "learning_rate": 1.4417475728155341e-05, | |
| "loss": 1.3881018161773682, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14868804664723032, | |
| "grad_norm": 0.5466513633728027, | |
| "learning_rate": 1.470873786407767e-05, | |
| "loss": 1.7975414991378784, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15160349854227406, | |
| "grad_norm": 0.2429724484682083, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.581913709640503, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15451895043731778, | |
| "grad_norm": 0.16082407534122467, | |
| "learning_rate": 1.4999965139018001e-05, | |
| "loss": 1.6313072443008423, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15743440233236153, | |
| "grad_norm": 0.20626085996627808, | |
| "learning_rate": 1.4999860556432087e-05, | |
| "loss": 1.4128293991088867, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16034985422740525, | |
| "grad_norm": 0.08978555351495743, | |
| "learning_rate": 1.4999686253322514e-05, | |
| "loss": 1.6325119733810425, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16326530612244897, | |
| "grad_norm": 0.17410112917423248, | |
| "learning_rate": 1.4999442231489687e-05, | |
| "loss": 1.6410691738128662, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1661807580174927, | |
| "grad_norm": 0.11147186905145645, | |
| "learning_rate": 1.4999128493454151e-05, | |
| "loss": 1.3302874565124512, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16909620991253643, | |
| "grad_norm": 0.44229331612586975, | |
| "learning_rate": 1.4998745042456563e-05, | |
| "loss": 1.6997064352035522, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17201166180758018, | |
| "grad_norm": 0.1818253993988037, | |
| "learning_rate": 1.499829188245766e-05, | |
| "loss": 1.3123167753219604, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1749271137026239, | |
| "grad_norm": 0.15915799140930176, | |
| "learning_rate": 1.4997769018138212e-05, | |
| "loss": 1.6660683155059814, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17784256559766765, | |
| "grad_norm": 0.2367630898952484, | |
| "learning_rate": 1.4997176454898977e-05, | |
| "loss": 1.4073443412780762, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18075801749271136, | |
| "grad_norm": 0.653868556022644, | |
| "learning_rate": 1.4996514198860649e-05, | |
| "loss": 1.351149082183838, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1836734693877551, | |
| "grad_norm": 0.08681757003068924, | |
| "learning_rate": 1.4995782256863785e-05, | |
| "loss": 1.3422613143920898, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.18658892128279883, | |
| "grad_norm": 0.06514488905668259, | |
| "learning_rate": 1.4994980636468756e-05, | |
| "loss": 1.3343521356582642, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18950437317784258, | |
| "grad_norm": 0.9027652740478516, | |
| "learning_rate": 1.4994109345955632e-05, | |
| "loss": 1.4679464101791382, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1924198250728863, | |
| "grad_norm": 0.35018599033355713, | |
| "learning_rate": 1.4993168394324137e-05, | |
| "loss": 1.1963084936141968, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19533527696793002, | |
| "grad_norm": 0.13998304307460785, | |
| "learning_rate": 1.4992157791293523e-05, | |
| "loss": 1.333540678024292, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19825072886297376, | |
| "grad_norm": 0.11608795821666718, | |
| "learning_rate": 1.4991077547302497e-05, | |
| "loss": 1.5141417980194092, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.20116618075801748, | |
| "grad_norm": 0.08046405762434006, | |
| "learning_rate": 1.4989927673509089e-05, | |
| "loss": 1.3266879320144653, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 0.1371355652809143, | |
| "learning_rate": 1.4988708181790555e-05, | |
| "loss": 1.2892866134643555, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20699708454810495, | |
| "grad_norm": 0.1368686705827713, | |
| "learning_rate": 1.4987419084743244e-05, | |
| "loss": 1.0467798709869385, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2099125364431487, | |
| "grad_norm": 0.23302382230758667, | |
| "learning_rate": 1.4986060395682469e-05, | |
| "loss": 1.1930760145187378, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.21282798833819241, | |
| "grad_norm": 1.9061791896820068, | |
| "learning_rate": 1.4984632128642375e-05, | |
| "loss": 1.4475537538528442, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21574344023323616, | |
| "grad_norm": 0.18942643702030182, | |
| "learning_rate": 1.4983134298375787e-05, | |
| "loss": 1.376928448677063, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21865889212827988, | |
| "grad_norm": 0.21135789155960083, | |
| "learning_rate": 1.498156692035407e-05, | |
| "loss": 1.5480635166168213, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22157434402332363, | |
| "grad_norm": 0.13644421100616455, | |
| "learning_rate": 1.4979930010766947e-05, | |
| "loss": 1.7161264419555664, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22448979591836735, | |
| "grad_norm": 0.12430273741483688, | |
| "learning_rate": 1.4978223586522351e-05, | |
| "loss": 1.242932677268982, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22740524781341107, | |
| "grad_norm": 0.7622217535972595, | |
| "learning_rate": 1.4976447665246251e-05, | |
| "loss": 0.5300056338310242, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2303206997084548, | |
| "grad_norm": 0.13458958268165588, | |
| "learning_rate": 1.4974602265282451e-05, | |
| "loss": 1.571650743484497, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23323615160349853, | |
| "grad_norm": 0.2972854673862457, | |
| "learning_rate": 1.4972687405692425e-05, | |
| "loss": 1.2033076286315918, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23615160349854228, | |
| "grad_norm": 0.29232847690582275, | |
| "learning_rate": 1.4970703106255095e-05, | |
| "loss": 1.4756550788879395, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.239067055393586, | |
| "grad_norm": 0.07210766524076462, | |
| "learning_rate": 1.4968649387466655e-05, | |
| "loss": 1.3033177852630615, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24198250728862974, | |
| "grad_norm": 0.5424373745918274, | |
| "learning_rate": 1.4966526270540327e-05, | |
| "loss": 1.0460329055786133, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24489795918367346, | |
| "grad_norm": 0.28463321924209595, | |
| "learning_rate": 1.4964333777406174e-05, | |
| "loss": 1.250373363494873, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2478134110787172, | |
| "grad_norm": 0.3408065140247345, | |
| "learning_rate": 1.496207193071085e-05, | |
| "loss": 0.8593610525131226, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.25072886297376096, | |
| "grad_norm": 0.14829058945178986, | |
| "learning_rate": 1.4959740753817374e-05, | |
| "loss": 1.304344892501831, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2536443148688047, | |
| "grad_norm": 0.8436731696128845, | |
| "learning_rate": 1.4957340270804896e-05, | |
| "loss": 1.2743805646896362, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2565597667638484, | |
| "grad_norm": 0.11323361843824387, | |
| "learning_rate": 1.4954870506468434e-05, | |
| "loss": 1.329984188079834, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2594752186588921, | |
| "grad_norm": 0.09321129322052002, | |
| "learning_rate": 1.4952331486318626e-05, | |
| "loss": 1.2258719205856323, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.26239067055393583, | |
| "grad_norm": 0.37252843379974365, | |
| "learning_rate": 1.4949723236581472e-05, | |
| "loss": 1.0671582221984863, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2653061224489796, | |
| "grad_norm": 0.3797838091850281, | |
| "learning_rate": 1.4947045784198052e-05, | |
| "loss": 1.2696138620376587, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.26822157434402333, | |
| "grad_norm": 0.16805821657180786, | |
| "learning_rate": 1.4944299156824251e-05, | |
| "loss": 1.4738816022872925, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.27113702623906705, | |
| "grad_norm": 0.2671731114387512, | |
| "learning_rate": 1.4941483382830475e-05, | |
| "loss": 1.3171305656433105, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.27405247813411077, | |
| "grad_norm": 0.07962363958358765, | |
| "learning_rate": 1.4938598491301369e-05, | |
| "loss": 1.2901722192764282, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.27696793002915454, | |
| "grad_norm": 0.280506432056427, | |
| "learning_rate": 1.4935644512035486e-05, | |
| "loss": 1.3184595108032227, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27988338192419826, | |
| "grad_norm": 0.13458193838596344, | |
| "learning_rate": 1.4932621475545014e-05, | |
| "loss": 1.1937448978424072, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.282798833819242, | |
| "grad_norm": 0.7079519033432007, | |
| "learning_rate": 1.4929529413055442e-05, | |
| "loss": 1.1439327001571655, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.18462230265140533, | |
| "learning_rate": 1.4926368356505236e-05, | |
| "loss": 1.5497668981552124, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2886297376093295, | |
| "grad_norm": 0.16043758392333984, | |
| "learning_rate": 1.492313833854552e-05, | |
| "loss": 1.4568783044815063, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2915451895043732, | |
| "grad_norm": 0.42396068572998047, | |
| "learning_rate": 1.491983939253973e-05, | |
| "loss": 1.6005096435546875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2944606413994169, | |
| "grad_norm": 0.21155761182308197, | |
| "learning_rate": 1.4916471552563272e-05, | |
| "loss": 1.3397752046585083, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29737609329446063, | |
| "grad_norm": 0.17219677567481995, | |
| "learning_rate": 1.4913034853403173e-05, | |
| "loss": 1.3317774534225464, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.30029154518950435, | |
| "grad_norm": 0.12617312371730804, | |
| "learning_rate": 1.4909529330557714e-05, | |
| "loss": 1.2119510173797607, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3032069970845481, | |
| "grad_norm": 0.14850527048110962, | |
| "learning_rate": 1.4905955020236072e-05, | |
| "loss": 1.385998010635376, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.30612244897959184, | |
| "grad_norm": 0.1191219687461853, | |
| "learning_rate": 1.490231195935794e-05, | |
| "loss": 1.5534725189208984, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.30903790087463556, | |
| "grad_norm": 0.06989572942256927, | |
| "learning_rate": 1.4898600185553152e-05, | |
| "loss": 1.4775235652923584, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3119533527696793, | |
| "grad_norm": 0.08547376841306686, | |
| "learning_rate": 1.4894819737161285e-05, | |
| "loss": 1.033743977546692, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31486880466472306, | |
| "grad_norm": 0.11992272734642029, | |
| "learning_rate": 1.489097065323127e-05, | |
| "loss": 1.0980379581451416, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3177842565597668, | |
| "grad_norm": 0.30880632996559143, | |
| "learning_rate": 1.488705297352099e-05, | |
| "loss": 1.317891001701355, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3206997084548105, | |
| "grad_norm": 0.6510909795761108, | |
| "learning_rate": 1.4883066738496858e-05, | |
| "loss": 0.9413776993751526, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3236151603498542, | |
| "grad_norm": 0.43388184905052185, | |
| "learning_rate": 1.4879011989333418e-05, | |
| "loss": 1.381697177886963, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.32653061224489793, | |
| "grad_norm": 0.21984761953353882, | |
| "learning_rate": 1.4874888767912902e-05, | |
| "loss": 1.2626378536224365, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.3294460641399417, | |
| "grad_norm": 0.2687482237815857, | |
| "learning_rate": 1.48706971168248e-05, | |
| "loss": 1.2034857273101807, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.3323615160349854, | |
| "grad_norm": 0.08195902407169342, | |
| "learning_rate": 1.4866437079365439e-05, | |
| "loss": 1.2773680686950684, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.33527696793002915, | |
| "grad_norm": 0.1009335145354271, | |
| "learning_rate": 1.4862108699537504e-05, | |
| "loss": 1.0853190422058105, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33819241982507287, | |
| "grad_norm": 0.3376968204975128, | |
| "learning_rate": 1.4857712022049617e-05, | |
| "loss": 1.5481150150299072, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.34110787172011664, | |
| "grad_norm": 0.7441994547843933, | |
| "learning_rate": 1.4853247092315843e-05, | |
| "loss": 0.9510725140571594, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.34402332361516036, | |
| "grad_norm": 0.04717664048075676, | |
| "learning_rate": 1.484871395645525e-05, | |
| "loss": 1.4734127521514893, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3469387755102041, | |
| "grad_norm": 0.7886844873428345, | |
| "learning_rate": 1.4844112661291409e-05, | |
| "loss": 1.3192212581634521, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3498542274052478, | |
| "grad_norm": 0.4841660261154175, | |
| "learning_rate": 1.4839443254351925e-05, | |
| "loss": 1.691177487373352, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.35276967930029157, | |
| "grad_norm": 0.06492076069116592, | |
| "learning_rate": 1.4834705783867948e-05, | |
| "loss": 1.329490065574646, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3556851311953353, | |
| "grad_norm": 0.13113148510456085, | |
| "learning_rate": 1.4829900298773655e-05, | |
| "loss": 1.4308984279632568, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.358600583090379, | |
| "grad_norm": 0.2137414813041687, | |
| "learning_rate": 1.4825026848705774e-05, | |
| "loss": 1.5191004276275635, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.36151603498542273, | |
| "grad_norm": 0.1302558183670044, | |
| "learning_rate": 1.482008548400304e-05, | |
| "loss": 1.1112821102142334, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36443148688046645, | |
| "grad_norm": 0.24971581995487213, | |
| "learning_rate": 1.4815076255705704e-05, | |
| "loss": 1.2628142833709717, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3673469387755102, | |
| "grad_norm": 0.06637357920408249, | |
| "learning_rate": 1.4809999215554978e-05, | |
| "loss": 1.0483888387680054, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.37026239067055394, | |
| "grad_norm": 0.16134153306484222, | |
| "learning_rate": 1.4804854415992531e-05, | |
| "loss": 0.8284896612167358, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.37317784256559766, | |
| "grad_norm": 0.22190812230110168, | |
| "learning_rate": 1.479964191015992e-05, | |
| "loss": 1.228007197380066, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3760932944606414, | |
| "grad_norm": 0.3965594172477722, | |
| "learning_rate": 1.4794361751898052e-05, | |
| "loss": 1.461411952972412, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.37900874635568516, | |
| "grad_norm": 0.08565931022167206, | |
| "learning_rate": 1.4789013995746636e-05, | |
| "loss": 1.33036208152771, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3819241982507289, | |
| "grad_norm": 0.11709296703338623, | |
| "learning_rate": 1.4783598696943603e-05, | |
| "loss": 1.1803240776062012, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3848396501457726, | |
| "grad_norm": 0.15489286184310913, | |
| "learning_rate": 1.4778115911424552e-05, | |
| "loss": 1.234659194946289, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3877551020408163, | |
| "grad_norm": 0.19184595346450806, | |
| "learning_rate": 1.4772565695822158e-05, | |
| "loss": 1.2707804441452026, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.39067055393586003, | |
| "grad_norm": 0.1356089860200882, | |
| "learning_rate": 1.4766948107465598e-05, | |
| "loss": 1.192071557044983, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.3935860058309038, | |
| "grad_norm": 0.11989542841911316, | |
| "learning_rate": 1.476126320437995e-05, | |
| "loss": 1.391566276550293, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3965014577259475, | |
| "grad_norm": 0.47645920515060425, | |
| "learning_rate": 1.4755511045285605e-05, | |
| "loss": 1.1564279794692993, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.39941690962099125, | |
| "grad_norm": 0.4125911593437195, | |
| "learning_rate": 1.4749691689597646e-05, | |
| "loss": 1.536888599395752, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.40233236151603496, | |
| "grad_norm": 0.08971330523490906, | |
| "learning_rate": 1.4743805197425243e-05, | |
| "loss": 1.2086325883865356, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.40524781341107874, | |
| "grad_norm": 0.08347416669130325, | |
| "learning_rate": 1.4737851629571035e-05, | |
| "loss": 1.190657615661621, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.20587654411792755, | |
| "learning_rate": 1.4731831047530493e-05, | |
| "loss": 1.3656525611877441, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4110787172011662, | |
| "grad_norm": 0.22432878613471985, | |
| "learning_rate": 1.4725743513491294e-05, | |
| "loss": 1.1042253971099854, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4139941690962099, | |
| "grad_norm": 0.26549288630485535, | |
| "learning_rate": 1.471958909033267e-05, | |
| "loss": 1.3797943592071533, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41690962099125367, | |
| "grad_norm": 0.15680500864982605, | |
| "learning_rate": 1.4713367841624764e-05, | |
| "loss": 1.3377087116241455, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.4198250728862974, | |
| "grad_norm": 0.4737466275691986, | |
| "learning_rate": 1.4707079831627975e-05, | |
| "loss": 1.3034449815750122, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4227405247813411, | |
| "grad_norm": 0.1271553486585617, | |
| "learning_rate": 1.4700725125292288e-05, | |
| "loss": 1.1474194526672363, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.42565597667638483, | |
| "grad_norm": 0.06102332845330238, | |
| "learning_rate": 1.469430378825661e-05, | |
| "loss": 1.1918046474456787, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.15844929218292236, | |
| "learning_rate": 1.4687815886848083e-05, | |
| "loss": 1.206626296043396, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4314868804664723, | |
| "grad_norm": 0.24055972695350647, | |
| "learning_rate": 1.4681261488081409e-05, | |
| "loss": 1.5187625885009766, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.43440233236151604, | |
| "grad_norm": 0.7840580344200134, | |
| "learning_rate": 1.4674640659658149e-05, | |
| "loss": 1.0932797193527222, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.43731778425655976, | |
| "grad_norm": 0.10844213515520096, | |
| "learning_rate": 1.4667953469966035e-05, | |
| "loss": 1.1951229572296143, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4402332361516035, | |
| "grad_norm": 0.11183289438486099, | |
| "learning_rate": 1.466119998807825e-05, | |
| "loss": 1.1717019081115723, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.44314868804664725, | |
| "grad_norm": 0.30403003096580505, | |
| "learning_rate": 1.4654380283752722e-05, | |
| "loss": 1.4022222757339478, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.446064139941691, | |
| "grad_norm": 0.13156169652938843, | |
| "learning_rate": 1.4647494427431404e-05, | |
| "loss": 1.4486730098724365, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4489795918367347, | |
| "grad_norm": 0.1186894103884697, | |
| "learning_rate": 1.4640542490239546e-05, | |
| "loss": 1.2088007926940918, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4518950437317784, | |
| "grad_norm": 0.3326444625854492, | |
| "learning_rate": 1.4633524543984956e-05, | |
| "loss": 1.3544650077819824, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.45481049562682213, | |
| "grad_norm": 0.1379825323820114, | |
| "learning_rate": 1.4626440661157263e-05, | |
| "loss": 1.330404281616211, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4577259475218659, | |
| "grad_norm": 0.1476340889930725, | |
| "learning_rate": 1.4619290914927168e-05, | |
| "loss": 1.3507134914398193, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4606413994169096, | |
| "grad_norm": 0.1802261918783188, | |
| "learning_rate": 1.4612075379145683e-05, | |
| "loss": 1.2097649574279785, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.46355685131195334, | |
| "grad_norm": 0.12077829986810684, | |
| "learning_rate": 1.460479412834338e-05, | |
| "loss": 1.3490198850631714, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.46647230320699706, | |
| "grad_norm": 0.22901231050491333, | |
| "learning_rate": 1.4597447237729602e-05, | |
| "loss": 1.3041571378707886, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.46938775510204084, | |
| "grad_norm": 0.1394783854484558, | |
| "learning_rate": 1.4590034783191705e-05, | |
| "loss": 1.3151127099990845, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.47230320699708456, | |
| "grad_norm": 0.15815502405166626, | |
| "learning_rate": 1.4582556841294272e-05, | |
| "loss": 1.4624110460281372, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4752186588921283, | |
| "grad_norm": 0.2137562483549118, | |
| "learning_rate": 1.45750134892783e-05, | |
| "loss": 1.4430997371673584, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.478134110787172, | |
| "grad_norm": 0.3299601376056671, | |
| "learning_rate": 1.4567404805060432e-05, | |
| "loss": 1.3537228107452393, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.48104956268221577, | |
| "grad_norm": 0.21562345325946808, | |
| "learning_rate": 1.4559730867232141e-05, | |
| "loss": 1.169204592704773, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4839650145772595, | |
| "grad_norm": 0.1736089438199997, | |
| "learning_rate": 1.4551991755058902e-05, | |
| "loss": 1.1071885824203491, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4868804664723032, | |
| "grad_norm": 0.1834300458431244, | |
| "learning_rate": 1.45441875484794e-05, | |
| "loss": 1.5676034688949585, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4897959183673469, | |
| "grad_norm": 0.0843748077750206, | |
| "learning_rate": 1.4536318328104693e-05, | |
| "loss": 1.2121503353118896, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.49271137026239065, | |
| "grad_norm": 0.36758843064308167, | |
| "learning_rate": 1.452838417521737e-05, | |
| "loss": 1.1275235414505005, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.4956268221574344, | |
| "grad_norm": 0.18445612490177155, | |
| "learning_rate": 1.452038517177072e-05, | |
| "loss": 1.3472223281860352, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.49854227405247814, | |
| "grad_norm": 0.05781463533639908, | |
| "learning_rate": 1.4512321400387896e-05, | |
| "loss": 1.0872787237167358, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5014577259475219, | |
| "grad_norm": 0.19518744945526123, | |
| "learning_rate": 1.4504192944361035e-05, | |
| "loss": 1.1387406587600708, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5043731778425656, | |
| "grad_norm": 0.12471595406532288, | |
| "learning_rate": 1.4495999887650425e-05, | |
| "loss": 1.2551310062408447, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5072886297376094, | |
| "grad_norm": 0.21368560194969177, | |
| "learning_rate": 1.4487742314883622e-05, | |
| "loss": 1.4745806455612183, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 0.20728199183940887, | |
| "learning_rate": 1.447942031135458e-05, | |
| "loss": 1.3776572942733765, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5131195335276968, | |
| "grad_norm": 0.3676038384437561, | |
| "learning_rate": 1.447103396302277e-05, | |
| "loss": 1.393446922302246, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5160349854227405, | |
| "grad_norm": 0.4812930226325989, | |
| "learning_rate": 1.4462583356512293e-05, | |
| "loss": 1.6455305814743042, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5189504373177842, | |
| "grad_norm": 0.14569929242134094, | |
| "learning_rate": 1.4454068579110982e-05, | |
| "loss": 1.1214039325714111, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.521865889212828, | |
| "grad_norm": 0.08566080778837204, | |
| "learning_rate": 1.4445489718769505e-05, | |
| "loss": 1.0862312316894531, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5247813411078717, | |
| "grad_norm": 0.1737866848707199, | |
| "learning_rate": 1.4436846864100454e-05, | |
| "loss": 1.4677766561508179, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5276967930029155, | |
| "grad_norm": 0.24478068947792053, | |
| "learning_rate": 1.4428140104377428e-05, | |
| "loss": 1.4088914394378662, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5306122448979592, | |
| "grad_norm": 0.07167135179042816, | |
| "learning_rate": 1.4419369529534117e-05, | |
| "loss": 1.0589109659194946, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5335276967930029, | |
| "grad_norm": 0.4344414472579956, | |
| "learning_rate": 1.4410535230163361e-05, | |
| "loss": 1.0916839838027954, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5364431486880467, | |
| "grad_norm": 0.1588602066040039, | |
| "learning_rate": 1.440163729751623e-05, | |
| "loss": 1.2339898347854614, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5393586005830904, | |
| "grad_norm": 0.08355646580457687, | |
| "learning_rate": 1.4392675823501075e-05, | |
| "loss": 1.0559823513031006, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5422740524781341, | |
| "grad_norm": 0.09950409084558487, | |
| "learning_rate": 1.4383650900682563e-05, | |
| "loss": 1.1664844751358032, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5451895043731778, | |
| "grad_norm": 0.21663829684257507, | |
| "learning_rate": 1.4374562622280753e-05, | |
| "loss": 1.2800816297531128, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5481049562682215, | |
| "grad_norm": 0.45721420645713806, | |
| "learning_rate": 1.4365411082170105e-05, | |
| "loss": 1.0968526601791382, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5510204081632653, | |
| "grad_norm": 0.34029263257980347, | |
| "learning_rate": 1.435619637487852e-05, | |
| "loss": 1.4795793294906616, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5539358600583091, | |
| "grad_norm": 0.07205039262771606, | |
| "learning_rate": 1.4346918595586371e-05, | |
| "loss": 0.8370588421821594, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5568513119533528, | |
| "grad_norm": 0.12168021500110626, | |
| "learning_rate": 1.4337577840125506e-05, | |
| "loss": 1.2106021642684937, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5597667638483965, | |
| "grad_norm": 0.32209160923957825, | |
| "learning_rate": 1.4328174204978268e-05, | |
| "loss": 1.321066975593567, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5626822157434402, | |
| "grad_norm": 0.2250237762928009, | |
| "learning_rate": 1.4318707787276499e-05, | |
| "loss": 1.292655348777771, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.565597667638484, | |
| "grad_norm": 0.2742823362350464, | |
| "learning_rate": 1.4309178684800527e-05, | |
| "loss": 1.2520337104797363, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5685131195335277, | |
| "grad_norm": 0.27688226103782654, | |
| "learning_rate": 1.4299586995978166e-05, | |
| "loss": 1.38676917552948, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.2949990928173065, | |
| "learning_rate": 1.4289932819883696e-05, | |
| "loss": 0.8451089262962341, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5743440233236151, | |
| "grad_norm": 0.1089571937918663, | |
| "learning_rate": 1.4280216256236834e-05, | |
| "loss": 1.2847154140472412, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.577259475218659, | |
| "grad_norm": 0.19184090197086334, | |
| "learning_rate": 1.427043740540172e-05, | |
| "loss": 1.387587547302246, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5801749271137027, | |
| "grad_norm": 0.54814612865448, | |
| "learning_rate": 1.4260596368385856e-05, | |
| "loss": 1.3909755945205688, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5830903790087464, | |
| "grad_norm": 0.12275420129299164, | |
| "learning_rate": 1.4250693246839092e-05, | |
| "loss": 1.2625775337219238, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5860058309037901, | |
| "grad_norm": 0.7932881712913513, | |
| "learning_rate": 1.4240728143052544e-05, | |
| "loss": 1.2152988910675049, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5889212827988338, | |
| "grad_norm": 0.37155717611312866, | |
| "learning_rate": 1.4230701159957563e-05, | |
| "loss": 1.3423740863800049, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5918367346938775, | |
| "grad_norm": 0.18500366806983948, | |
| "learning_rate": 1.4220612401124663e-05, | |
| "loss": 1.3449385166168213, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5947521865889213, | |
| "grad_norm": 0.11731770634651184, | |
| "learning_rate": 1.4210461970762447e-05, | |
| "loss": 1.1119245290756226, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.597667638483965, | |
| "grad_norm": 0.10353056341409683, | |
| "learning_rate": 1.4200249973716534e-05, | |
| "loss": 1.263884425163269, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6005830903790087, | |
| "grad_norm": 0.14419683814048767, | |
| "learning_rate": 1.418997651546848e-05, | |
| "loss": 1.307144284248352, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6034985422740525, | |
| "grad_norm": 0.10403470695018768, | |
| "learning_rate": 1.4179641702134683e-05, | |
| "loss": 1.1156686544418335, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6064139941690962, | |
| "grad_norm": 0.14356708526611328, | |
| "learning_rate": 1.4169245640465292e-05, | |
| "loss": 1.1539418697357178, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.60932944606414, | |
| "grad_norm": 0.20612405240535736, | |
| "learning_rate": 1.415878843784309e-05, | |
| "loss": 1.2595444917678833, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 0.11746654659509659, | |
| "learning_rate": 1.414827020228241e-05, | |
| "loss": 1.2829625606536865, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6151603498542274, | |
| "grad_norm": 0.16831901669502258, | |
| "learning_rate": 1.4137691042427996e-05, | |
| "loss": 1.3437942266464233, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6180758017492711, | |
| "grad_norm": 0.35040462017059326, | |
| "learning_rate": 1.4127051067553895e-05, | |
| "loss": 1.4076067209243774, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6209912536443148, | |
| "grad_norm": 0.061461448669433594, | |
| "learning_rate": 1.4116350387562316e-05, | |
| "loss": 1.0884675979614258, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6239067055393586, | |
| "grad_norm": 0.15810243785381317, | |
| "learning_rate": 1.4105589112982514e-05, | |
| "loss": 1.2547569274902344, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6268221574344023, | |
| "grad_norm": 0.8622474074363708, | |
| "learning_rate": 1.4094767354969625e-05, | |
| "loss": 1.3274284601211548, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6297376093294461, | |
| "grad_norm": 0.13593973219394684, | |
| "learning_rate": 1.4083885225303535e-05, | |
| "loss": 1.2320295572280884, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6326530612244898, | |
| "grad_norm": 0.07243333756923676, | |
| "learning_rate": 1.407294283638772e-05, | |
| "loss": 1.4667418003082275, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6355685131195336, | |
| "grad_norm": 0.07801775634288788, | |
| "learning_rate": 1.406194030124808e-05, | |
| "loss": 1.3038822412490845, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6384839650145773, | |
| "grad_norm": 0.304385781288147, | |
| "learning_rate": 1.4050877733531783e-05, | |
| "loss": 1.3447275161743164, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.641399416909621, | |
| "grad_norm": 0.10865950584411621, | |
| "learning_rate": 1.4039755247506077e-05, | |
| "loss": 0.6549509167671204, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6443148688046647, | |
| "grad_norm": 0.28575700521469116, | |
| "learning_rate": 1.4028572958057122e-05, | |
| "loss": 1.1795369386672974, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6472303206997084, | |
| "grad_norm": 0.5246424078941345, | |
| "learning_rate": 1.4017330980688798e-05, | |
| "loss": 1.1711264848709106, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6501457725947521, | |
| "grad_norm": 0.18553860485553741, | |
| "learning_rate": 1.400602943152151e-05, | |
| "loss": 1.2232381105422974, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6530612244897959, | |
| "grad_norm": 0.12490701675415039, | |
| "learning_rate": 1.3994668427290992e-05, | |
| "loss": 1.3382079601287842, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6559766763848397, | |
| "grad_norm": 0.22397291660308838, | |
| "learning_rate": 1.3983248085347099e-05, | |
| "loss": 1.3612568378448486, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6588921282798834, | |
| "grad_norm": 0.35306331515312195, | |
| "learning_rate": 1.3971768523652598e-05, | |
| "loss": 1.2464739084243774, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6618075801749271, | |
| "grad_norm": 0.2772669494152069, | |
| "learning_rate": 1.3960229860781952e-05, | |
| "loss": 1.2844020128250122, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6647230320699709, | |
| "grad_norm": 0.10081592947244644, | |
| "learning_rate": 1.3948632215920074e-05, | |
| "loss": 1.2844829559326172, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6676384839650146, | |
| "grad_norm": 0.4896067678928375, | |
| "learning_rate": 1.3936975708861129e-05, | |
| "loss": 1.2661151885986328, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6705539358600583, | |
| "grad_norm": 0.09726856648921967, | |
| "learning_rate": 1.3925260460007276e-05, | |
| "loss": 1.3103440999984741, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.673469387755102, | |
| "grad_norm": 0.15830014646053314, | |
| "learning_rate": 1.3913486590367426e-05, | |
| "loss": 1.2458621263504028, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6763848396501457, | |
| "grad_norm": 0.3230348229408264, | |
| "learning_rate": 1.3901654221555998e-05, | |
| "loss": 1.534423589706421, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6793002915451894, | |
| "grad_norm": 0.1509629487991333, | |
| "learning_rate": 1.3889763475791653e-05, | |
| "loss": 1.2820494174957275, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6822157434402333, | |
| "grad_norm": 0.34530624747276306, | |
| "learning_rate": 1.3877814475896049e-05, | |
| "loss": 1.2601618766784668, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.685131195335277, | |
| "grad_norm": 0.10481005907058716, | |
| "learning_rate": 1.3865807345292548e-05, | |
| "loss": 1.1044316291809082, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6880466472303207, | |
| "grad_norm": 0.07815049588680267, | |
| "learning_rate": 1.3853742208004967e-05, | |
| "loss": 0.741702139377594, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6909620991253644, | |
| "grad_norm": 0.22590938210487366, | |
| "learning_rate": 1.3841619188656277e-05, | |
| "loss": 1.2955025434494019, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6938775510204082, | |
| "grad_norm": 0.08640377968549728, | |
| "learning_rate": 1.3829438412467324e-05, | |
| "loss": 1.1016216278076172, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6967930029154519, | |
| "grad_norm": 0.09496122598648071, | |
| "learning_rate": 1.3817200005255538e-05, | |
| "loss": 1.1232506036758423, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.6997084548104956, | |
| "grad_norm": 0.07495642453432083, | |
| "learning_rate": 1.380490409343363e-05, | |
| "loss": 1.2044416666030884, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7026239067055393, | |
| "grad_norm": 0.339239239692688, | |
| "learning_rate": 1.3792550804008275e-05, | |
| "loss": 1.2485543489456177, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7055393586005831, | |
| "grad_norm": 0.17572255432605743, | |
| "learning_rate": 1.3780140264578833e-05, | |
| "loss": 1.2681964635849, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7084548104956269, | |
| "grad_norm": 0.16934579610824585, | |
| "learning_rate": 1.3767672603335994e-05, | |
| "loss": 1.4810711145401, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7113702623906706, | |
| "grad_norm": 0.04486797749996185, | |
| "learning_rate": 1.375514794906047e-05, | |
| "loss": 1.046045184135437, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.700762927532196, | |
| "learning_rate": 1.374256643112167e-05, | |
| "loss": 1.0363354682922363, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.717201166180758, | |
| "grad_norm": 0.2569397985935211, | |
| "learning_rate": 1.3729928179476355e-05, | |
| "loss": 1.3074244260787964, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7201166180758017, | |
| "grad_norm": 0.20563913881778717, | |
| "learning_rate": 1.3717233324667303e-05, | |
| "loss": 1.1921494007110596, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7230320699708455, | |
| "grad_norm": 0.201784148812294, | |
| "learning_rate": 1.3704481997821944e-05, | |
| "loss": 1.3657381534576416, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7259475218658892, | |
| "grad_norm": 0.20627616345882416, | |
| "learning_rate": 1.3691674330651038e-05, | |
| "loss": 1.062203288078308, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7288629737609329, | |
| "grad_norm": 0.04925013706088066, | |
| "learning_rate": 1.3678810455447272e-05, | |
| "loss": 1.0565184354782104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7317784256559767, | |
| "grad_norm": 0.2994559407234192, | |
| "learning_rate": 1.3665890505083932e-05, | |
| "loss": 0.7342221140861511, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7346938775510204, | |
| "grad_norm": 0.2312147170305252, | |
| "learning_rate": 1.365291461301351e-05, | |
| "loss": 1.1462215185165405, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7376093294460642, | |
| "grad_norm": 0.1264645904302597, | |
| "learning_rate": 1.3639882913266321e-05, | |
| "loss": 1.2779966592788696, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7405247813411079, | |
| "grad_norm": 0.09908440709114075, | |
| "learning_rate": 1.3626795540449146e-05, | |
| "loss": 1.0050630569458008, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7434402332361516, | |
| "grad_norm": 0.0948040708899498, | |
| "learning_rate": 1.3613652629743807e-05, | |
| "loss": 0.9955649375915527, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7463556851311953, | |
| "grad_norm": 0.442697137594223, | |
| "learning_rate": 1.3600454316905794e-05, | |
| "loss": 1.2189491987228394, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.749271137026239, | |
| "grad_norm": 0.08219840377569199, | |
| "learning_rate": 1.3587200738262852e-05, | |
| "loss": 1.2169828414916992, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7521865889212828, | |
| "grad_norm": 0.39055153727531433, | |
| "learning_rate": 1.3573892030713581e-05, | |
| "loss": 1.1840598583221436, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7551020408163265, | |
| "grad_norm": 0.16979742050170898, | |
| "learning_rate": 1.3560528331726012e-05, | |
| "loss": 1.2608612775802612, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7580174927113703, | |
| "grad_norm": 0.18750780820846558, | |
| "learning_rate": 1.3547109779336198e-05, | |
| "loss": 1.0730546712875366, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.760932944606414, | |
| "grad_norm": 0.16917291283607483, | |
| "learning_rate": 1.3533636512146778e-05, | |
| "loss": 0.8358052968978882, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7638483965014577, | |
| "grad_norm": 0.21615351736545563, | |
| "learning_rate": 1.3520108669325555e-05, | |
| "loss": 1.2778382301330566, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7667638483965015, | |
| "grad_norm": 0.2199150174856186, | |
| "learning_rate": 1.350652639060405e-05, | |
| "loss": 1.3584939241409302, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7696793002915452, | |
| "grad_norm": 0.12701602280139923, | |
| "learning_rate": 1.3492889816276057e-05, | |
| "loss": 1.2652432918548584, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7725947521865889, | |
| "grad_norm": 0.2043219953775406, | |
| "learning_rate": 1.3479199087196211e-05, | |
| "loss": 0.9363166093826294, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7755102040816326, | |
| "grad_norm": 0.25679811835289, | |
| "learning_rate": 1.3465454344778514e-05, | |
| "loss": 1.30280601978302, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7784256559766763, | |
| "grad_norm": 0.1782459169626236, | |
| "learning_rate": 1.3451655730994879e-05, | |
| "loss": 0.8852262496948242, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.7813411078717201, | |
| "grad_norm": 0.15585428476333618, | |
| "learning_rate": 1.3437803388373673e-05, | |
| "loss": 1.2652050256729126, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7842565597667639, | |
| "grad_norm": 0.28724268078804016, | |
| "learning_rate": 1.3423897459998234e-05, | |
| "loss": 1.5547116994857788, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.7871720116618076, | |
| "grad_norm": 0.2500779628753662, | |
| "learning_rate": 1.3409938089505396e-05, | |
| "loss": 1.2525265216827393, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7900874635568513, | |
| "grad_norm": 0.45470234751701355, | |
| "learning_rate": 1.3395925421084008e-05, | |
| "loss": 1.2771704196929932, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.793002915451895, | |
| "grad_norm": 0.29030269384384155, | |
| "learning_rate": 1.3381859599473444e-05, | |
| "loss": 1.17940354347229, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7959183673469388, | |
| "grad_norm": 0.49152040481567383, | |
| "learning_rate": 1.3367740769962097e-05, | |
| "loss": 1.2586897611618042, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7988338192419825, | |
| "grad_norm": 0.6251534819602966, | |
| "learning_rate": 1.335356907838591e-05, | |
| "loss": 1.15794837474823, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8017492711370262, | |
| "grad_norm": 1.237188696861267, | |
| "learning_rate": 1.3339344671126823e-05, | |
| "loss": 1.2396069765090942, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8046647230320699, | |
| "grad_norm": 0.18844130635261536, | |
| "learning_rate": 1.3325067695111302e-05, | |
| "loss": 1.3848127126693726, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8075801749271136, | |
| "grad_norm": 0.0720212385058403, | |
| "learning_rate": 1.3310738297808797e-05, | |
| "loss": 1.2827481031417847, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8104956268221575, | |
| "grad_norm": 0.30795788764953613, | |
| "learning_rate": 1.3296356627230233e-05, | |
| "loss": 1.2539678812026978, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8134110787172012, | |
| "grad_norm": 0.12987054884433746, | |
| "learning_rate": 1.328192283192647e-05, | |
| "loss": 1.1838477849960327, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.11866369843482971, | |
| "learning_rate": 1.3267437060986776e-05, | |
| "loss": 1.2138683795928955, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8192419825072886, | |
| "grad_norm": 1.3589751720428467, | |
| "learning_rate": 1.3252899464037285e-05, | |
| "loss": 1.241382122039795, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8221574344023324, | |
| "grad_norm": 0.11315155029296875, | |
| "learning_rate": 1.3238310191239449e-05, | |
| "loss": 1.2092612981796265, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8250728862973761, | |
| "grad_norm": 0.16663309931755066, | |
| "learning_rate": 1.3223669393288492e-05, | |
| "loss": 1.3294919729232788, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8279883381924198, | |
| "grad_norm": 0.18580849468708038, | |
| "learning_rate": 1.320897722141185e-05, | |
| "loss": 1.165387749671936, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8309037900874635, | |
| "grad_norm": 0.14969834685325623, | |
| "learning_rate": 1.3194233827367605e-05, | |
| "loss": 1.1585993766784668, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8338192419825073, | |
| "grad_norm": 0.18476836383342743, | |
| "learning_rate": 1.317943936344293e-05, | |
| "loss": 1.2080127000808716, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8367346938775511, | |
| "grad_norm": 0.19693532586097717, | |
| "learning_rate": 1.3164593982452502e-05, | |
| "loss": 1.4070855379104614, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8396501457725948, | |
| "grad_norm": 0.3612503111362457, | |
| "learning_rate": 1.3149697837736932e-05, | |
| "loss": 1.375995397567749, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8425655976676385, | |
| "grad_norm": 0.2689799964427948, | |
| "learning_rate": 1.3134751083161177e-05, | |
| "loss": 1.5882023572921753, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8454810495626822, | |
| "grad_norm": 0.45044106245040894, | |
| "learning_rate": 1.3119753873112952e-05, | |
| "loss": 1.530938744544983, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8483965014577259, | |
| "grad_norm": 0.15131127834320068, | |
| "learning_rate": 1.3104706362501138e-05, | |
| "loss": 1.1275839805603027, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8513119533527697, | |
| "grad_norm": 0.12577542662620544, | |
| "learning_rate": 1.3089608706754179e-05, | |
| "loss": 1.4129434823989868, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8542274052478134, | |
| "grad_norm": 0.2110750824213028, | |
| "learning_rate": 1.3074461061818475e-05, | |
| "loss": 1.1559196710586548, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.21649499237537384, | |
| "learning_rate": 1.3059263584156778e-05, | |
| "loss": 1.3160138130187988, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8600583090379009, | |
| "grad_norm": 0.24884088337421417, | |
| "learning_rate": 1.3044016430746563e-05, | |
| "loss": 1.362827181816101, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8629737609329446, | |
| "grad_norm": 0.13489077985286713, | |
| "learning_rate": 1.3028719759078428e-05, | |
| "loss": 0.9931049942970276, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8658892128279884, | |
| "grad_norm": 0.09495119750499725, | |
| "learning_rate": 1.3013373727154437e-05, | |
| "loss": 1.088317632675171, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8688046647230321, | |
| "grad_norm": 0.08689741790294647, | |
| "learning_rate": 1.2997978493486516e-05, | |
| "loss": 1.135114312171936, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.8717201166180758, | |
| "grad_norm": 0.11740924417972565, | |
| "learning_rate": 1.2982534217094805e-05, | |
| "loss": 1.1683244705200195, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8746355685131195, | |
| "grad_norm": 0.19883382320404053, | |
| "learning_rate": 1.2967041057506012e-05, | |
| "loss": 1.200365662574768, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8775510204081632, | |
| "grad_norm": 0.1676117181777954, | |
| "learning_rate": 1.2951499174751767e-05, | |
| "loss": 1.17380952835083, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.880466472303207, | |
| "grad_norm": 0.10896378010511398, | |
| "learning_rate": 1.2935908729366975e-05, | |
| "loss": 1.1691476106643677, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8833819241982507, | |
| "grad_norm": 0.48385846614837646, | |
| "learning_rate": 1.2920269882388147e-05, | |
| "loss": 1.2547780275344849, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8862973760932945, | |
| "grad_norm": 0.5236583352088928, | |
| "learning_rate": 1.290458279535175e-05, | |
| "loss": 0.9720197916030884, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8892128279883382, | |
| "grad_norm": 0.14302794635295868, | |
| "learning_rate": 1.2888847630292523e-05, | |
| "loss": 0.7114431858062744, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.892128279883382, | |
| "grad_norm": 0.24016736447811127, | |
| "learning_rate": 1.287306454974182e-05, | |
| "loss": 1.1511893272399902, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.8950437317784257, | |
| "grad_norm": 0.23368032276630402, | |
| "learning_rate": 1.2857233716725915e-05, | |
| "loss": 1.270735740661621, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.8979591836734694, | |
| "grad_norm": 0.31318148970603943, | |
| "learning_rate": 1.2841355294764332e-05, | |
| "loss": 0.9339938163757324, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9008746355685131, | |
| "grad_norm": 0.14631935954093933, | |
| "learning_rate": 1.2825429447868144e-05, | |
| "loss": 1.0888878107070923, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9037900874635568, | |
| "grad_norm": 0.05644264817237854, | |
| "learning_rate": 1.2809456340538295e-05, | |
| "loss": 0.6944148540496826, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9067055393586005, | |
| "grad_norm": 0.5780438780784607, | |
| "learning_rate": 1.2793436137763877e-05, | |
| "loss": 1.4030423164367676, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9096209912536443, | |
| "grad_norm": 0.25053542852401733, | |
| "learning_rate": 1.2777369005020443e-05, | |
| "loss": 1.366930603981018, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9125364431486881, | |
| "grad_norm": 0.668838381767273, | |
| "learning_rate": 1.2761255108268305e-05, | |
| "loss": 1.4005160331726074, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9154518950437318, | |
| "grad_norm": 0.39348724484443665, | |
| "learning_rate": 1.2745094613950798e-05, | |
| "loss": 1.3920326232910156, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9183673469387755, | |
| "grad_norm": 0.21188022196292877, | |
| "learning_rate": 1.2728887688992571e-05, | |
| "loss": 1.2693376541137695, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9212827988338192, | |
| "grad_norm": 0.13943858444690704, | |
| "learning_rate": 1.2712634500797868e-05, | |
| "loss": 1.3852614164352417, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.924198250728863, | |
| "grad_norm": 0.09973420947790146, | |
| "learning_rate": 1.2696335217248797e-05, | |
| "loss": 1.0728514194488525, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9271137026239067, | |
| "grad_norm": 0.0977744311094284, | |
| "learning_rate": 1.2679990006703583e-05, | |
| "loss": 1.1080187559127808, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9300291545189504, | |
| "grad_norm": 0.09669560194015503, | |
| "learning_rate": 1.2663599037994848e-05, | |
| "loss": 1.101372480392456, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9329446064139941, | |
| "grad_norm": 0.2537369430065155, | |
| "learning_rate": 1.264716248042786e-05, | |
| "loss": 1.2607650756835938, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9358600583090378, | |
| "grad_norm": 0.10567066818475723, | |
| "learning_rate": 1.263068050377877e-05, | |
| "loss": 1.176032304763794, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9387755102040817, | |
| "grad_norm": 0.23190894722938538, | |
| "learning_rate": 1.2614153278292888e-05, | |
| "loss": 1.569797158241272, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9416909620991254, | |
| "grad_norm": 0.11260157078504562, | |
| "learning_rate": 1.259758097468289e-05, | |
| "loss": 1.124619960784912, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9446064139941691, | |
| "grad_norm": 0.10838615894317627, | |
| "learning_rate": 1.2580963764127086e-05, | |
| "loss": 1.0758150815963745, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9475218658892128, | |
| "grad_norm": 0.862457275390625, | |
| "learning_rate": 1.2564301818267634e-05, | |
| "loss": 0.809301495552063, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9504373177842566, | |
| "grad_norm": 0.13666097819805145, | |
| "learning_rate": 1.2547595309208762e-05, | |
| "loss": 1.1373188495635986, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9533527696793003, | |
| "grad_norm": 0.14616422355175018, | |
| "learning_rate": 1.2530844409515015e-05, | |
| "loss": 1.0827115774154663, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.956268221574344, | |
| "grad_norm": 0.10559694468975067, | |
| "learning_rate": 1.2514049292209443e-05, | |
| "loss": 0.9751679301261902, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9591836734693877, | |
| "grad_norm": 0.08088317513465881, | |
| "learning_rate": 1.2497210130771838e-05, | |
| "loss": 1.495046854019165, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9620991253644315, | |
| "grad_norm": 0.6228170990943909, | |
| "learning_rate": 1.2480327099136921e-05, | |
| "loss": 1.2217864990234375, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9650145772594753, | |
| "grad_norm": 0.29220765829086304, | |
| "learning_rate": 1.2463400371692567e-05, | |
| "loss": 1.3038297891616821, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.967930029154519, | |
| "grad_norm": 0.1476386934518814, | |
| "learning_rate": 1.2446430123277989e-05, | |
| "loss": 1.0814988613128662, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9708454810495627, | |
| "grad_norm": 0.5601685643196106, | |
| "learning_rate": 1.2429416529181928e-05, | |
| "loss": 1.3198177814483643, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9737609329446064, | |
| "grad_norm": 0.11794130504131317, | |
| "learning_rate": 1.2412359765140863e-05, | |
| "loss": 1.2900370359420776, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9766763848396501, | |
| "grad_norm": 0.1333070546388626, | |
| "learning_rate": 1.2395260007337178e-05, | |
| "loss": 1.0969475507736206, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9795918367346939, | |
| "grad_norm": 0.2164296805858612, | |
| "learning_rate": 1.2378117432397344e-05, | |
| "loss": 1.3217947483062744, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9825072886297376, | |
| "grad_norm": 0.1207147017121315, | |
| "learning_rate": 1.2360932217390101e-05, | |
| "loss": 1.1721763610839844, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9854227405247813, | |
| "grad_norm": 0.19854536652565002, | |
| "learning_rate": 1.2343704539824629e-05, | |
| "loss": 0.8384242057800293, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9883381924198251, | |
| "grad_norm": 0.11634889990091324, | |
| "learning_rate": 1.2326434577648703e-05, | |
| "loss": 0.5937544107437134, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9912536443148688, | |
| "grad_norm": 0.21319809556007385, | |
| "learning_rate": 1.2309122509246873e-05, | |
| "loss": 1.211629033088684, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9941690962099126, | |
| "grad_norm": 0.0654364675283432, | |
| "learning_rate": 1.2291768513438603e-05, | |
| "loss": 1.155535340309143, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.9970845481049563, | |
| "grad_norm": 0.25669339299201965, | |
| "learning_rate": 1.2274372769476438e-05, | |
| "loss": 1.164899230003357, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.12079296261072159, | |
| "learning_rate": 1.2256935457044149e-05, | |
| "loss": 1.3323872089385986, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0029154518950438, | |
| "grad_norm": 0.15898126363754272, | |
| "learning_rate": 1.223945675625487e-05, | |
| "loss": 0.9407209753990173, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0058309037900874, | |
| "grad_norm": 0.27969345450401306, | |
| "learning_rate": 1.2221936847649244e-05, | |
| "loss": 1.1378577947616577, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0087463556851313, | |
| "grad_norm": 0.25754043459892273, | |
| "learning_rate": 1.220437591219356e-05, | |
| "loss": 1.4397190809249878, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0116618075801749, | |
| "grad_norm": 0.10848913341760635, | |
| "learning_rate": 1.2186774131277878e-05, | |
| "loss": 1.1280958652496338, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0145772594752187, | |
| "grad_norm": 0.1306256800889969, | |
| "learning_rate": 1.2169131686714156e-05, | |
| "loss": 1.099426031112671, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0174927113702623, | |
| "grad_norm": 0.4202571511268616, | |
| "learning_rate": 1.2151448760734381e-05, | |
| "loss": 1.1389104127883911, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0204081632653061, | |
| "grad_norm": 0.24799339473247528, | |
| "learning_rate": 1.2133725535988675e-05, | |
| "loss": 1.1550320386886597, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0233236151603498, | |
| "grad_norm": 0.3226027190685272, | |
| "learning_rate": 1.211596219554341e-05, | |
| "loss": 1.3826884031295776, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0262390670553936, | |
| "grad_norm": 0.16781915724277496, | |
| "learning_rate": 1.209815892287933e-05, | |
| "loss": 1.2842170000076294, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0291545189504374, | |
| "grad_norm": 0.08502925932407379, | |
| "learning_rate": 1.2080315901889638e-05, | |
| "loss": 1.3487895727157593, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.032069970845481, | |
| "grad_norm": 0.16372652351856232, | |
| "learning_rate": 1.2062433316878107e-05, | |
| "loss": 1.0846039056777954, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0349854227405249, | |
| "grad_norm": 0.2926742434501648, | |
| "learning_rate": 1.204451135255717e-05, | |
| "loss": 1.3418132066726685, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0379008746355685, | |
| "grad_norm": 0.13081398606300354, | |
| "learning_rate": 1.2026550194046027e-05, | |
| "loss": 1.2699744701385498, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0408163265306123, | |
| "grad_norm": 0.3602919578552246, | |
| "learning_rate": 1.2008550026868707e-05, | |
| "loss": 1.1103326082229614, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.043731778425656, | |
| "grad_norm": 0.24668650329113007, | |
| "learning_rate": 1.1990511036952182e-05, | |
| "loss": 1.1811496019363403, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0466472303206997, | |
| "grad_norm": 0.2009333372116089, | |
| "learning_rate": 1.1972433410624415e-05, | |
| "loss": 1.3141359090805054, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.0495626822157433, | |
| "grad_norm": 0.4131545126438141, | |
| "learning_rate": 1.1954317334612466e-05, | |
| "loss": 1.1311266422271729, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0524781341107872, | |
| "grad_norm": 0.26808369159698486, | |
| "learning_rate": 1.193616299604054e-05, | |
| "loss": 1.2641208171844482, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.055393586005831, | |
| "grad_norm": 0.18929173052310944, | |
| "learning_rate": 1.1917970582428065e-05, | |
| "loss": 1.022256851196289, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0583090379008746, | |
| "grad_norm": 0.07950548082590103, | |
| "learning_rate": 1.1899740281687752e-05, | |
| "loss": 1.1594070196151733, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.0612244897959184, | |
| "grad_norm": 0.3975690007209778, | |
| "learning_rate": 1.1881472282123659e-05, | |
| "loss": 1.09200918674469, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.064139941690962, | |
| "grad_norm": 0.1322367936372757, | |
| "learning_rate": 1.1863166772429237e-05, | |
| "loss": 1.144595980644226, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0670553935860059, | |
| "grad_norm": 0.13084831833839417, | |
| "learning_rate": 1.1844823941685388e-05, | |
| "loss": 1.233044981956482, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.0699708454810495, | |
| "grad_norm": 0.17538310587406158, | |
| "learning_rate": 1.1826443979358511e-05, | |
| "loss": 0.648325502872467, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.0728862973760933, | |
| "grad_norm": 0.1613551825284958, | |
| "learning_rate": 1.1808027075298542e-05, | |
| "loss": 1.339321255683899, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.075801749271137, | |
| "grad_norm": 0.062147416174411774, | |
| "learning_rate": 1.1789573419736995e-05, | |
| "loss": 1.0158833265304565, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.0787172011661808, | |
| "grad_norm": 0.2725241184234619, | |
| "learning_rate": 1.1771083203284994e-05, | |
| "loss": 1.049664855003357, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0816326530612246, | |
| "grad_norm": 0.14118708670139313, | |
| "learning_rate": 1.1752556616931319e-05, | |
| "loss": 1.4558746814727783, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.0845481049562682, | |
| "grad_norm": 0.12485146522521973, | |
| "learning_rate": 1.17339938520404e-05, | |
| "loss": 1.067897081375122, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.087463556851312, | |
| "grad_norm": 0.14729249477386475, | |
| "learning_rate": 1.1715395100350386e-05, | |
| "loss": 1.2803950309753418, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.0903790087463556, | |
| "grad_norm": 0.2967908978462219, | |
| "learning_rate": 1.1696760553971122e-05, | |
| "loss": 1.4100807905197144, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.0932944606413995, | |
| "grad_norm": 0.18390890955924988, | |
| "learning_rate": 1.1678090405382191e-05, | |
| "loss": 1.0381572246551514, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.096209912536443, | |
| "grad_norm": 0.08851258456707001, | |
| "learning_rate": 1.1659384847430916e-05, | |
| "loss": 1.2206934690475464, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.099125364431487, | |
| "grad_norm": 0.1275774985551834, | |
| "learning_rate": 1.1640644073330365e-05, | |
| "loss": 1.258091688156128, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1020408163265305, | |
| "grad_norm": 0.3569571077823639, | |
| "learning_rate": 1.1621868276657371e-05, | |
| "loss": 1.2325845956802368, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1049562682215743, | |
| "grad_norm": 0.2721734642982483, | |
| "learning_rate": 1.1603057651350508e-05, | |
| "loss": 1.0642601251602173, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1078717201166182, | |
| "grad_norm": 0.2617255449295044, | |
| "learning_rate": 1.158421239170811e-05, | |
| "loss": 1.3023701906204224, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1107871720116618, | |
| "grad_norm": 0.1031145453453064, | |
| "learning_rate": 1.156533269238626e-05, | |
| "loss": 0.8144070506095886, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.1137026239067056, | |
| "grad_norm": 0.1646541804075241, | |
| "learning_rate": 1.1546418748396758e-05, | |
| "loss": 1.0213180780410767, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1166180758017492, | |
| "grad_norm": 0.3250854015350342, | |
| "learning_rate": 1.1527470755105138e-05, | |
| "loss": 0.9498108625411987, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.119533527696793, | |
| "grad_norm": 0.10029526799917221, | |
| "learning_rate": 1.1508488908228629e-05, | |
| "loss": 1.1771409511566162, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1224489795918366, | |
| "grad_norm": 0.09416939318180084, | |
| "learning_rate": 1.1489473403834142e-05, | |
| "loss": 0.5949094891548157, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1253644314868805, | |
| "grad_norm": 0.20775017142295837, | |
| "learning_rate": 1.1470424438336244e-05, | |
| "loss": 0.8676192760467529, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.128279883381924, | |
| "grad_norm": 0.24049599468708038, | |
| "learning_rate": 1.145134220849512e-05, | |
| "loss": 1.1979655027389526, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.131195335276968, | |
| "grad_norm": 0.320576548576355, | |
| "learning_rate": 1.1432226911414561e-05, | |
| "loss": 1.150422215461731, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1341107871720117, | |
| "grad_norm": 0.08741223067045212, | |
| "learning_rate": 1.1413078744539906e-05, | |
| "loss": 1.1655181646347046, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1370262390670554, | |
| "grad_norm": 0.13662189245224, | |
| "learning_rate": 1.139389790565601e-05, | |
| "loss": 1.1560207605361938, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1399416909620992, | |
| "grad_norm": 0.1589939296245575, | |
| "learning_rate": 1.1374684592885214e-05, | |
| "loss": 1.3467984199523926, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.29279693961143494, | |
| "learning_rate": 1.1355439004685278e-05, | |
| "loss": 1.0917768478393555, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.1457725947521866, | |
| "grad_norm": 0.5396981835365295, | |
| "learning_rate": 1.1336161339847343e-05, | |
| "loss": 1.131831169128418, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1486880466472302, | |
| "grad_norm": 1.319527506828308, | |
| "learning_rate": 1.1316851797493877e-05, | |
| "loss": 1.287348747253418, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.151603498542274, | |
| "grad_norm": 0.24090451002120972, | |
| "learning_rate": 1.1297510577076617e-05, | |
| "loss": 1.196481466293335, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1545189504373177, | |
| "grad_norm": 0.15632812678813934, | |
| "learning_rate": 1.1278137878374507e-05, | |
| "loss": 1.2842094898223877, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.1574344023323615, | |
| "grad_norm": 0.1558282971382141, | |
| "learning_rate": 1.1258733901491634e-05, | |
| "loss": 1.160306453704834, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.1603498542274053, | |
| "grad_norm": 0.0693809762597084, | |
| "learning_rate": 1.1239298846855166e-05, | |
| "loss": 1.3671103715896606, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.163265306122449, | |
| "grad_norm": 0.11606906354427338, | |
| "learning_rate": 1.121983291521328e-05, | |
| "loss": 1.2540158033370972, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.1661807580174928, | |
| "grad_norm": 0.5656346082687378, | |
| "learning_rate": 1.1200336307633083e-05, | |
| "loss": 1.095619797706604, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1690962099125364, | |
| "grad_norm": 0.3416520953178406, | |
| "learning_rate": 1.1180809225498542e-05, | |
| "loss": 1.33209228515625, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.1720116618075802, | |
| "grad_norm": 0.14092491567134857, | |
| "learning_rate": 1.11612518705084e-05, | |
| "loss": 1.121877670288086, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.1749271137026238, | |
| "grad_norm": 0.26185205578804016, | |
| "learning_rate": 1.1141664444674091e-05, | |
| "loss": 1.3565205335617065, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.1778425655976676, | |
| "grad_norm": 0.15331599116325378, | |
| "learning_rate": 1.1122047150317665e-05, | |
| "loss": 0.7860437631607056, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.1807580174927113, | |
| "grad_norm": 0.25274330377578735, | |
| "learning_rate": 1.110240019006968e-05, | |
| "loss": 0.7633789777755737, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.183673469387755, | |
| "grad_norm": 0.1963554322719574, | |
| "learning_rate": 1.1082723766867123e-05, | |
| "loss": 1.133277177810669, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.186588921282799, | |
| "grad_norm": 0.33926016092300415, | |
| "learning_rate": 1.1063018083951309e-05, | |
| "loss": 1.0211750268936157, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.1895043731778425, | |
| "grad_norm": 0.23344306647777557, | |
| "learning_rate": 1.1043283344865776e-05, | |
| "loss": 1.1373283863067627, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.1924198250728864, | |
| "grad_norm": 0.2557908594608307, | |
| "learning_rate": 1.1023519753454203e-05, | |
| "loss": 0.9404536485671997, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.19533527696793, | |
| "grad_norm": 1.4168596267700195, | |
| "learning_rate": 1.1003727513858268e-05, | |
| "loss": 1.1765224933624268, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1982507288629738, | |
| "grad_norm": 0.13063687086105347, | |
| "learning_rate": 1.0983906830515584e-05, | |
| "loss": 1.222176432609558, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.2011661807580174, | |
| "grad_norm": 0.07739931344985962, | |
| "learning_rate": 1.0964057908157548e-05, | |
| "loss": 1.151648998260498, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.2040816326530612, | |
| "grad_norm": 0.07822076976299286, | |
| "learning_rate": 1.094418095180725e-05, | |
| "loss": 1.061394453048706, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2069970845481048, | |
| "grad_norm": 0.14568239450454712, | |
| "learning_rate": 1.0924276166777349e-05, | |
| "loss": 0.7191852927207947, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2099125364431487, | |
| "grad_norm": 0.30981016159057617, | |
| "learning_rate": 1.090434375866795e-05, | |
| "loss": 0.9558042287826538, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2128279883381925, | |
| "grad_norm": 0.2437950074672699, | |
| "learning_rate": 1.0884383933364477e-05, | |
| "loss": 1.1506716012954712, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.215743440233236, | |
| "grad_norm": 0.24170175194740295, | |
| "learning_rate": 1.0864396897035558e-05, | |
| "loss": 1.1895190477371216, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.21865889212828, | |
| "grad_norm": 0.1518929898738861, | |
| "learning_rate": 1.0844382856130886e-05, | |
| "loss": 1.2491060495376587, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2215743440233235, | |
| "grad_norm": 0.14055992662906647, | |
| "learning_rate": 1.0824342017379089e-05, | |
| "loss": 1.4196858406066895, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 0.18487177789211273, | |
| "learning_rate": 1.0804274587785595e-05, | |
| "loss": 1.0294526815414429, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.227405247813411, | |
| "grad_norm": 0.6372827887535095, | |
| "learning_rate": 1.0784180774630495e-05, | |
| "loss": 0.26844465732574463, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.2303206997084548, | |
| "grad_norm": 0.15034730732440948, | |
| "learning_rate": 1.0764060785466391e-05, | |
| "loss": 1.2424967288970947, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.2332361516034984, | |
| "grad_norm": 0.16668657958507538, | |
| "learning_rate": 1.0743914828116281e-05, | |
| "loss": 1.0989577770233154, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.2361516034985423, | |
| "grad_norm": 0.15799511969089508, | |
| "learning_rate": 1.0723743110671378e-05, | |
| "loss": 1.2244020700454712, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.239067055393586, | |
| "grad_norm": 0.09745261073112488, | |
| "learning_rate": 1.0703545841488974e-05, | |
| "loss": 1.1401562690734863, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2419825072886297, | |
| "grad_norm": 0.5921195149421692, | |
| "learning_rate": 1.06833232291903e-05, | |
| "loss": 0.7718449234962463, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2448979591836735, | |
| "grad_norm": 0.08858446776866913, | |
| "learning_rate": 1.0663075482658355e-05, | |
| "loss": 1.074745774269104, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.2478134110787171, | |
| "grad_norm": 0.22339816391468048, | |
| "learning_rate": 1.0642802811035753e-05, | |
| "loss": 0.6682339310646057, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.250728862973761, | |
| "grad_norm": 0.22134488821029663, | |
| "learning_rate": 1.0622505423722566e-05, | |
| "loss": 1.1483386754989624, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.2536443148688048, | |
| "grad_norm": 0.34351247549057007, | |
| "learning_rate": 1.0602183530374159e-05, | |
| "loss": 0.9953691959381104, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2565597667638484, | |
| "grad_norm": 0.1252131313085556, | |
| "learning_rate": 1.0581837340899022e-05, | |
| "loss": 1.152267575263977, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.259475218658892, | |
| "grad_norm": 0.10258015990257263, | |
| "learning_rate": 1.0561467065456607e-05, | |
| "loss": 1.0798017978668213, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.2623906705539358, | |
| "grad_norm": 0.3338652551174164, | |
| "learning_rate": 1.0541072914455152e-05, | |
| "loss": 0.6286276578903198, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.2653061224489797, | |
| "grad_norm": 0.18449436128139496, | |
| "learning_rate": 1.0520655098549508e-05, | |
| "loss": 1.1572736501693726, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.2682215743440233, | |
| "grad_norm": 0.1656051129102707, | |
| "learning_rate": 1.0500213828638972e-05, | |
| "loss": 1.2729966640472412, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.271137026239067, | |
| "grad_norm": 0.1694529801607132, | |
| "learning_rate": 1.0479749315865093e-05, | |
| "loss": 1.1974416971206665, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.2740524781341107, | |
| "grad_norm": 0.07350558042526245, | |
| "learning_rate": 1.045926177160951e-05, | |
| "loss": 1.127896785736084, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.2769679300291545, | |
| "grad_norm": 0.1753559112548828, | |
| "learning_rate": 1.0438751407491745e-05, | |
| "loss": 1.1373307704925537, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.2798833819241984, | |
| "grad_norm": 0.16192442178726196, | |
| "learning_rate": 1.0418218435367043e-05, | |
| "loss": 1.0873537063598633, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.282798833819242, | |
| "grad_norm": 0.2647189497947693, | |
| "learning_rate": 1.0397663067324163e-05, | |
| "loss": 0.8994747400283813, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 0.16055135428905487, | |
| "learning_rate": 1.03770855156832e-05, | |
| "loss": 1.1629761457443237, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.2886297376093294, | |
| "grad_norm": 0.1312457174062729, | |
| "learning_rate": 1.0356485992993386e-05, | |
| "loss": 1.2289665937423706, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.2915451895043732, | |
| "grad_norm": 0.3237832486629486, | |
| "learning_rate": 1.0335864712030895e-05, | |
| "loss": 1.3477158546447754, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.2944606413994169, | |
| "grad_norm": 0.11200102418661118, | |
| "learning_rate": 1.0315221885796648e-05, | |
| "loss": 1.1597537994384766, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.2973760932944607, | |
| "grad_norm": 0.1582571268081665, | |
| "learning_rate": 1.029455772751411e-05, | |
| "loss": 1.0584282875061035, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3002915451895043, | |
| "grad_norm": 0.2713635563850403, | |
| "learning_rate": 1.0273872450627086e-05, | |
| "loss": 1.065276026725769, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.3032069970845481, | |
| "grad_norm": 0.617933988571167, | |
| "learning_rate": 1.025316626879752e-05, | |
| "loss": 1.1870301961898804, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.306122448979592, | |
| "grad_norm": 0.24628496170043945, | |
| "learning_rate": 1.0232439395903295e-05, | |
| "loss": 1.3716992139816284, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.3090379008746356, | |
| "grad_norm": 0.07092081010341644, | |
| "learning_rate": 1.0211692046036002e-05, | |
| "loss": 1.2022879123687744, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.3119533527696792, | |
| "grad_norm": 0.07380987703800201, | |
| "learning_rate": 1.019092443349875e-05, | |
| "loss": 0.9747592806816101, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.314868804664723, | |
| "grad_norm": 0.07589751482009888, | |
| "learning_rate": 1.0170136772803948e-05, | |
| "loss": 1.033135175704956, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3177842565597668, | |
| "grad_norm": 0.12000124901533127, | |
| "learning_rate": 1.0149329278671082e-05, | |
| "loss": 1.1944102048873901, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.3206997084548104, | |
| "grad_norm": 0.24365442991256714, | |
| "learning_rate": 1.0128502166024497e-05, | |
| "loss": 0.7611994743347168, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3236151603498543, | |
| "grad_norm": 0.5757351517677307, | |
| "learning_rate": 1.0107655649991186e-05, | |
| "loss": 1.0334023237228394, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3265306122448979, | |
| "grad_norm": 0.09015009552240372, | |
| "learning_rate": 1.0086789945898568e-05, | |
| "loss": 1.1387327909469604, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3294460641399417, | |
| "grad_norm": 0.6966755390167236, | |
| "learning_rate": 1.0065905269272245e-05, | |
| "loss": 1.0652743577957153, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3323615160349855, | |
| "grad_norm": 0.08158166706562042, | |
| "learning_rate": 1.0045001835833804e-05, | |
| "loss": 1.154505968093872, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.3352769679300291, | |
| "grad_norm": 0.17343761026859283, | |
| "learning_rate": 1.0024079861498566e-05, | |
| "loss": 1.0197257995605469, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.3381924198250728, | |
| "grad_norm": 0.3027811050415039, | |
| "learning_rate": 1.0003139562373365e-05, | |
| "loss": 1.3120397329330444, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.3411078717201166, | |
| "grad_norm": 0.7201161980628967, | |
| "learning_rate": 9.982181154754323e-06, | |
| "loss": 0.6248821020126343, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.3440233236151604, | |
| "grad_norm": 0.06654369831085205, | |
| "learning_rate": 9.961204855124595e-06, | |
| "loss": 1.3484827280044556, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.346938775510204, | |
| "grad_norm": 0.3403482437133789, | |
| "learning_rate": 9.940210880152157e-06, | |
| "loss": 1.023748517036438, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3498542274052479, | |
| "grad_norm": 0.3134101629257202, | |
| "learning_rate": 9.91919944668755e-06, | |
| "loss": 1.462807536125183, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3527696793002915, | |
| "grad_norm": 0.12223192304372787, | |
| "learning_rate": 9.89817077176165e-06, | |
| "loss": 1.0908539295196533, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.3556851311953353, | |
| "grad_norm": 0.14625874161720276, | |
| "learning_rate": 9.877125072583421e-06, | |
| "loss": 1.2502838373184204, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.3586005830903791, | |
| "grad_norm": 0.2647968828678131, | |
| "learning_rate": 9.856062566537677e-06, | |
| "loss": 1.3731303215026855, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.3615160349854227, | |
| "grad_norm": 0.14242695271968842, | |
| "learning_rate": 9.834983471182831e-06, | |
| "loss": 1.0232398509979248, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.3644314868804663, | |
| "grad_norm": 0.22755105793476105, | |
| "learning_rate": 9.813888004248648e-06, | |
| "loss": 1.1105183362960815, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.3673469387755102, | |
| "grad_norm": 0.10210377722978592, | |
| "learning_rate": 9.792776383634002e-06, | |
| "loss": 0.9822967648506165, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.370262390670554, | |
| "grad_norm": 0.2081102728843689, | |
| "learning_rate": 9.771648827404617e-06, | |
| "loss": 0.6831743121147156, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3731778425655976, | |
| "grad_norm": 0.195752814412117, | |
| "learning_rate": 9.750505553790823e-06, | |
| "loss": 1.017356514930725, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.3760932944606414, | |
| "grad_norm": 0.149446040391922, | |
| "learning_rate": 9.729346781185295e-06, | |
| "loss": 1.2844679355621338, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.379008746355685, | |
| "grad_norm": 0.08231537789106369, | |
| "learning_rate": 9.708172728140804e-06, | |
| "loss": 1.2107067108154297, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.3819241982507289, | |
| "grad_norm": 0.1436920166015625, | |
| "learning_rate": 9.686983613367947e-06, | |
| "loss": 0.9730831384658813, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.3848396501457727, | |
| "grad_norm": 0.13865897059440613, | |
| "learning_rate": 9.665779655732905e-06, | |
| "loss": 1.134727954864502, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3877551020408163, | |
| "grad_norm": 0.1278238445520401, | |
| "learning_rate": 9.644561074255168e-06, | |
| "loss": 1.1596717834472656, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.39067055393586, | |
| "grad_norm": 0.13528533279895782, | |
| "learning_rate": 9.62332808810528e-06, | |
| "loss": 1.0845617055892944, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.3935860058309038, | |
| "grad_norm": 0.14649415016174316, | |
| "learning_rate": 9.602080916602573e-06, | |
| "loss": 1.223073124885559, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.3965014577259476, | |
| "grad_norm": 0.1999201625585556, | |
| "learning_rate": 9.580819779212905e-06, | |
| "loss": 1.0572779178619385, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.3994169096209912, | |
| "grad_norm": 0.42912936210632324, | |
| "learning_rate": 9.559544895546393e-06, | |
| "loss": 1.211446762084961, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.402332361516035, | |
| "grad_norm": 0.3703382611274719, | |
| "learning_rate": 9.538256485355125e-06, | |
| "loss": 1.1024117469787598, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4052478134110786, | |
| "grad_norm": 0.09566738456487656, | |
| "learning_rate": 9.516954768530924e-06, | |
| "loss": 1.0713633298873901, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4081632653061225, | |
| "grad_norm": 0.13610726594924927, | |
| "learning_rate": 9.49563996510306e-06, | |
| "loss": 1.2085410356521606, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4110787172011663, | |
| "grad_norm": 0.19745762646198273, | |
| "learning_rate": 9.47431229523596e-06, | |
| "loss": 1.0144951343536377, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.41399416909621, | |
| "grad_norm": 0.41680532693862915, | |
| "learning_rate": 9.452971979226972e-06, | |
| "loss": 1.0802420377731323, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4169096209912537, | |
| "grad_norm": 0.18726322054862976, | |
| "learning_rate": 9.431619237504052e-06, | |
| "loss": 1.2159126996994019, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.4198250728862973, | |
| "grad_norm": 0.4570455551147461, | |
| "learning_rate": 9.410254290623512e-06, | |
| "loss": 1.1028673648834229, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.4227405247813412, | |
| "grad_norm": 0.1720321923494339, | |
| "learning_rate": 9.388877359267732e-06, | |
| "loss": 1.053758978843689, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4256559766763848, | |
| "grad_norm": 0.7719082832336426, | |
| "learning_rate": 9.367488664242878e-06, | |
| "loss": 1.0918673276901245, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.11719834804534912, | |
| "learning_rate": 9.346088426476627e-06, | |
| "loss": 1.1107982397079468, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4314868804664722, | |
| "grad_norm": 0.26357176899909973, | |
| "learning_rate": 9.32467686701589e-06, | |
| "loss": 1.3265354633331299, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.434402332361516, | |
| "grad_norm": 0.7194681167602539, | |
| "learning_rate": 9.303254207024509e-06, | |
| "loss": 0.6845600605010986, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.4373177842565599, | |
| "grad_norm": 0.19328005611896515, | |
| "learning_rate": 9.28182066778099e-06, | |
| "loss": 1.1066367626190186, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4402332361516035, | |
| "grad_norm": 0.3166584372520447, | |
| "learning_rate": 9.260376470676225e-06, | |
| "loss": 1.0711687803268433, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.4431486880466473, | |
| "grad_norm": 0.20059515535831451, | |
| "learning_rate": 9.238921837211175e-06, | |
| "loss": 1.2519899606704712, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.446064139941691, | |
| "grad_norm": 0.15826623141765594, | |
| "learning_rate": 9.217456988994608e-06, | |
| "loss": 1.3235565423965454, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4489795918367347, | |
| "grad_norm": 0.19210676848888397, | |
| "learning_rate": 9.1959821477408e-06, | |
| "loss": 1.0224212408065796, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.4518950437317784, | |
| "grad_norm": 0.26280826330184937, | |
| "learning_rate": 9.174497535267257e-06, | |
| "loss": 1.1540876626968384, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.4548104956268222, | |
| "grad_norm": 0.09911534935235977, | |
| "learning_rate": 9.153003373492395e-06, | |
| "loss": 1.197079062461853, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.4577259475218658, | |
| "grad_norm": 0.15191975235939026, | |
| "learning_rate": 9.131499884433285e-06, | |
| "loss": 1.2020612955093384, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4606413994169096, | |
| "grad_norm": 0.1272922158241272, | |
| "learning_rate": 9.109987290203325e-06, | |
| "loss": 1.1222330331802368, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.4635568513119535, | |
| "grad_norm": 0.17026354372501373, | |
| "learning_rate": 9.088465813009979e-06, | |
| "loss": 1.2111908197402954, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.466472303206997, | |
| "grad_norm": 0.1192101240158081, | |
| "learning_rate": 9.06693567515245e-06, | |
| "loss": 1.186848759651184, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.469387755102041, | |
| "grad_norm": 0.5374306440353394, | |
| "learning_rate": 9.045397099019405e-06, | |
| "loss": 1.1735105514526367, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.4723032069970845, | |
| "grad_norm": 0.14989781379699707, | |
| "learning_rate": 9.02385030708667e-06, | |
| "loss": 1.3269665241241455, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4752186588921283, | |
| "grad_norm": 0.23181524872779846, | |
| "learning_rate": 9.002295521914934e-06, | |
| "loss": 1.234397292137146, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.478134110787172, | |
| "grad_norm": 0.8318726420402527, | |
| "learning_rate": 8.980732966147451e-06, | |
| "loss": 1.2126901149749756, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.4810495626822158, | |
| "grad_norm": 0.2093929797410965, | |
| "learning_rate": 8.959162862507738e-06, | |
| "loss": 1.0737382173538208, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.4839650145772594, | |
| "grad_norm": 0.2963290214538574, | |
| "learning_rate": 8.937585433797273e-06, | |
| "loss": 0.9138633012771606, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.4868804664723032, | |
| "grad_norm": 0.2868603467941284, | |
| "learning_rate": 8.916000902893199e-06, | |
| "loss": 1.3595247268676758, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.489795918367347, | |
| "grad_norm": 0.11513882875442505, | |
| "learning_rate": 8.894409492746018e-06, | |
| "loss": 1.0969007015228271, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.4927113702623906, | |
| "grad_norm": 0.15273737907409668, | |
| "learning_rate": 8.87281142637729e-06, | |
| "loss": 1.0396068096160889, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.4956268221574345, | |
| "grad_norm": 0.12743119895458221, | |
| "learning_rate": 8.851206926877325e-06, | |
| "loss": 1.21293306350708, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.498542274052478, | |
| "grad_norm": 0.07293698191642761, | |
| "learning_rate": 8.82959621740288e-06, | |
| "loss": 0.8554050922393799, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.501457725947522, | |
| "grad_norm": 0.1396367996931076, | |
| "learning_rate": 8.807979521174866e-06, | |
| "loss": 0.8444166779518127, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5043731778425657, | |
| "grad_norm": 0.34662795066833496, | |
| "learning_rate": 8.786357061476029e-06, | |
| "loss": 1.1405446529388428, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.5072886297376094, | |
| "grad_norm": 0.2602401673793793, | |
| "learning_rate": 8.764729061648632e-06, | |
| "loss": 1.2988492250442505, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.510204081632653, | |
| "grad_norm": 0.19908583164215088, | |
| "learning_rate": 8.743095745092185e-06, | |
| "loss": 1.2301197052001953, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5131195335276968, | |
| "grad_norm": 0.20294634997844696, | |
| "learning_rate": 8.721457335261104e-06, | |
| "loss": 0.9326356053352356, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5160349854227406, | |
| "grad_norm": 0.5687612295150757, | |
| "learning_rate": 8.699814055662417e-06, | |
| "loss": 1.187393069267273, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.5189504373177842, | |
| "grad_norm": 0.27902352809906006, | |
| "learning_rate": 8.678166129853442e-06, | |
| "loss": 1.0565565824508667, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.5218658892128278, | |
| "grad_norm": 0.06307139247655869, | |
| "learning_rate": 8.656513781439512e-06, | |
| "loss": 1.0471357107162476, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.5247813411078717, | |
| "grad_norm": 0.3132034242153168, | |
| "learning_rate": 8.634857234071619e-06, | |
| "loss": 1.3265520334243774, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.5276967930029155, | |
| "grad_norm": 0.25837764143943787, | |
| "learning_rate": 8.613196711444138e-06, | |
| "loss": 1.1429646015167236, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5306122448979593, | |
| "grad_norm": 0.08677840977907181, | |
| "learning_rate": 8.591532437292502e-06, | |
| "loss": 0.9910908937454224, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.533527696793003, | |
| "grad_norm": 0.283247172832489, | |
| "learning_rate": 8.5698646353909e-06, | |
| "loss": 0.8875013589859009, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5364431486880465, | |
| "grad_norm": 0.16179129481315613, | |
| "learning_rate": 8.548193529549947e-06, | |
| "loss": 1.1073272228240967, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5393586005830904, | |
| "grad_norm": 0.12490551173686981, | |
| "learning_rate": 8.526519343614398e-06, | |
| "loss": 0.9769071340560913, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5422740524781342, | |
| "grad_norm": 0.25089073181152344, | |
| "learning_rate": 8.504842301460815e-06, | |
| "loss": 1.069384217262268, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5451895043731778, | |
| "grad_norm": 0.22324740886688232, | |
| "learning_rate": 8.483162626995268e-06, | |
| "loss": 1.0800434350967407, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.5481049562682214, | |
| "grad_norm": 0.358711302280426, | |
| "learning_rate": 8.461480544151012e-06, | |
| "loss": 0.8311281204223633, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.5510204081632653, | |
| "grad_norm": 0.35619816184043884, | |
| "learning_rate": 8.439796276886177e-06, | |
| "loss": 1.378959059715271, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.553935860058309, | |
| "grad_norm": 0.07740774750709534, | |
| "learning_rate": 8.418110049181464e-06, | |
| "loss": 0.7135167121887207, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.556851311953353, | |
| "grad_norm": 0.11709576100111008, | |
| "learning_rate": 8.396422085037822e-06, | |
| "loss": 1.1297550201416016, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.5597667638483965, | |
| "grad_norm": 0.1865878850221634, | |
| "learning_rate": 8.374732608474128e-06, | |
| "loss": 1.1906490325927734, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.5626822157434401, | |
| "grad_norm": 0.16431988775730133, | |
| "learning_rate": 8.353041843524886e-06, | |
| "loss": 1.1722774505615234, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.565597667638484, | |
| "grad_norm": 0.36135971546173096, | |
| "learning_rate": 8.331350014237912e-06, | |
| "loss": 1.1067001819610596, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.5685131195335278, | |
| "grad_norm": 0.3832073211669922, | |
| "learning_rate": 8.30965734467201e-06, | |
| "loss": 1.2439948320388794, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 0.2755753993988037, | |
| "learning_rate": 8.28796405889466e-06, | |
| "loss": 0.6848400831222534, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.574344023323615, | |
| "grad_norm": 0.07128661125898361, | |
| "learning_rate": 8.266270380979723e-06, | |
| "loss": 1.2033002376556396, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5772594752186588, | |
| "grad_norm": 0.16955770552158356, | |
| "learning_rate": 8.244576535005093e-06, | |
| "loss": 1.2546216249465942, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.5801749271137027, | |
| "grad_norm": 0.702198326587677, | |
| "learning_rate": 8.22288274505041e-06, | |
| "loss": 1.0031241178512573, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.5830903790087465, | |
| "grad_norm": 0.09851932525634766, | |
| "learning_rate": 8.201189235194729e-06, | |
| "loss": 1.171536922454834, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.58600583090379, | |
| "grad_norm": 0.5338625907897949, | |
| "learning_rate": 8.179496229514217e-06, | |
| "loss": 1.0307410955429077, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.5889212827988337, | |
| "grad_norm": 0.17403900623321533, | |
| "learning_rate": 8.157803952079832e-06, | |
| "loss": 1.2256954908370972, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5918367346938775, | |
| "grad_norm": 0.1747167557477951, | |
| "learning_rate": 8.136112626955005e-06, | |
| "loss": 1.2137948274612427, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.5947521865889214, | |
| "grad_norm": 0.07115664333105087, | |
| "learning_rate": 8.114422478193336e-06, | |
| "loss": 1.0697215795516968, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.597667638483965, | |
| "grad_norm": 0.12972617149353027, | |
| "learning_rate": 8.09273372983628e-06, | |
| "loss": 1.1039892435073853, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.6005830903790086, | |
| "grad_norm": 0.13853909075260162, | |
| "learning_rate": 8.071046605910804e-06, | |
| "loss": 1.186689853668213, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6034985422740524, | |
| "grad_norm": 0.1802920252084732, | |
| "learning_rate": 8.049361330427129e-06, | |
| "loss": 1.047842025756836, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6064139941690962, | |
| "grad_norm": 0.15627241134643555, | |
| "learning_rate": 8.027678127376353e-06, | |
| "loss": 1.081397294998169, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.60932944606414, | |
| "grad_norm": 0.13871587812900543, | |
| "learning_rate": 8.005997220728181e-06, | |
| "loss": 1.129719614982605, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.6122448979591837, | |
| "grad_norm": 20.326587677001953, | |
| "learning_rate": 7.984318834428607e-06, | |
| "loss": 1.1785022020339966, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.6151603498542273, | |
| "grad_norm": 0.13852129876613617, | |
| "learning_rate": 7.962643192397574e-06, | |
| "loss": 1.0734182596206665, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.6180758017492711, | |
| "grad_norm": 0.6223950982093811, | |
| "learning_rate": 7.940970518526686e-06, | |
| "loss": 1.1438935995101929, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.620991253644315, | |
| "grad_norm": 0.0528414323925972, | |
| "learning_rate": 7.919301036676892e-06, | |
| "loss": 0.9696015119552612, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.6239067055393586, | |
| "grad_norm": 0.13710257411003113, | |
| "learning_rate": 7.897634970676166e-06, | |
| "loss": 1.1505471467971802, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.6268221574344022, | |
| "grad_norm": 0.16004100441932678, | |
| "learning_rate": 7.875972544317203e-06, | |
| "loss": 1.2167091369628906, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.629737609329446, | |
| "grad_norm": 0.45379891991615295, | |
| "learning_rate": 7.854313981355101e-06, | |
| "loss": 1.131983757019043, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 0.13307584822177887, | |
| "learning_rate": 7.832659505505048e-06, | |
| "loss": 1.1805908679962158, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.6355685131195337, | |
| "grad_norm": 0.2649403214454651, | |
| "learning_rate": 7.811009340440022e-06, | |
| "loss": 1.2160626649856567, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.6384839650145773, | |
| "grad_norm": 0.16499841213226318, | |
| "learning_rate": 7.789363709788472e-06, | |
| "loss": 1.2312496900558472, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6413994169096209, | |
| "grad_norm": 0.14581745862960815, | |
| "learning_rate": 7.767722837132008e-06, | |
| "loss": 0.5785539150238037, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.6443148688046647, | |
| "grad_norm": 0.40138673782348633, | |
| "learning_rate": 7.746086946003103e-06, | |
| "loss": 1.102718472480774, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.6472303206997085, | |
| "grad_norm": 0.39575713872909546, | |
| "learning_rate": 7.724456259882758e-06, | |
| "loss": 0.9496442675590515, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.6501457725947521, | |
| "grad_norm": 0.16450181603431702, | |
| "learning_rate": 7.702831002198225e-06, | |
| "loss": 1.1438281536102295, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.6530612244897958, | |
| "grad_norm": 0.10068156570196152, | |
| "learning_rate": 7.68121139632068e-06, | |
| "loss": 1.2390490770339966, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.6559766763848396, | |
| "grad_norm": 0.25964057445526123, | |
| "learning_rate": 7.65959766556292e-06, | |
| "loss": 1.0381125211715698, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.6588921282798834, | |
| "grad_norm": 0.43424177169799805, | |
| "learning_rate": 7.637990033177057e-06, | |
| "loss": 1.109690546989441, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.6618075801749272, | |
| "grad_norm": 0.21539334952831268, | |
| "learning_rate": 7.616388722352214e-06, | |
| "loss": 1.2123034000396729, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6647230320699709, | |
| "grad_norm": 0.20255622267723083, | |
| "learning_rate": 7.594793956212212e-06, | |
| "loss": 1.217584490776062, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.6676384839650145, | |
| "grad_norm": 0.47754237055778503, | |
| "learning_rate": 7.573205957813276e-06, | |
| "loss": 0.9803376197814941, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.6705539358600583, | |
| "grad_norm": 0.09026843309402466, | |
| "learning_rate": 7.551624950141726e-06, | |
| "loss": 1.1912260055541992, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.6734693877551021, | |
| "grad_norm": 0.11982105672359467, | |
| "learning_rate": 7.530051156111669e-06, | |
| "loss": 1.1396859884262085, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.6763848396501457, | |
| "grad_norm": 0.42154011130332947, | |
| "learning_rate": 7.508484798562707e-06, | |
| "loss": 1.3917794227600098, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.6793002915451893, | |
| "grad_norm": 0.34086376428604126, | |
| "learning_rate": 7.486926100257621e-06, | |
| "loss": 1.1625425815582275, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.6822157434402332, | |
| "grad_norm": 0.33954572677612305, | |
| "learning_rate": 7.465375283880084e-06, | |
| "loss": 1.1317555904388428, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.685131195335277, | |
| "grad_norm": 0.15621435642242432, | |
| "learning_rate": 7.44383257203236e-06, | |
| "loss": 1.0376930236816406, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.6880466472303208, | |
| "grad_norm": 0.16445010900497437, | |
| "learning_rate": 7.422298187232988e-06, | |
| "loss": 0.6347440481185913, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.6909620991253644, | |
| "grad_norm": 0.11221948266029358, | |
| "learning_rate": 7.4007723519145005e-06, | |
| "loss": 1.2130205631256104, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.693877551020408, | |
| "grad_norm": 0.10298870503902435, | |
| "learning_rate": 7.37925528842113e-06, | |
| "loss": 1.0703403949737549, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.6967930029154519, | |
| "grad_norm": 0.05989653244614601, | |
| "learning_rate": 7.357747219006487e-06, | |
| "loss": 1.0500437021255493, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.6997084548104957, | |
| "grad_norm": 0.18388091027736664, | |
| "learning_rate": 7.336248365831293e-06, | |
| "loss": 1.0820516347885132, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.7026239067055393, | |
| "grad_norm": 0.30676501989364624, | |
| "learning_rate": 7.314758950961069e-06, | |
| "loss": 0.8827295303344727, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7055393586005831, | |
| "grad_norm": 0.1762169450521469, | |
| "learning_rate": 7.293279196363844e-06, | |
| "loss": 1.1642931699752808, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.7084548104956268, | |
| "grad_norm": 0.138104647397995, | |
| "learning_rate": 7.271809323907868e-06, | |
| "loss": 1.3497681617736816, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.7113702623906706, | |
| "grad_norm": 0.04815658926963806, | |
| "learning_rate": 7.250349555359316e-06, | |
| "loss": 0.9686152935028076, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.4449727535247803, | |
| "learning_rate": 7.228900112379993e-06, | |
| "loss": 0.8205754160881042, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.717201166180758, | |
| "grad_norm": 0.19454075396060944, | |
| "learning_rate": 7.2074612165250596e-06, | |
| "loss": 1.1948063373565674, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.7201166180758016, | |
| "grad_norm": 0.1630457043647766, | |
| "learning_rate": 7.18603308924072e-06, | |
| "loss": 1.122542381286621, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.7230320699708455, | |
| "grad_norm": 0.2632548213005066, | |
| "learning_rate": 7.164615951861958e-06, | |
| "loss": 1.2288137674331665, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.7259475218658893, | |
| "grad_norm": 0.185108482837677, | |
| "learning_rate": 7.143210025610238e-06, | |
| "loss": 1.029456615447998, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.728862973760933, | |
| "grad_norm": 0.06753533333539963, | |
| "learning_rate": 7.121815531591222e-06, | |
| "loss": 0.9876729846000671, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.7317784256559767, | |
| "grad_norm": 0.16401244699954987, | |
| "learning_rate": 7.100432690792484e-06, | |
| "loss": 0.6059045791625977, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7346938775510203, | |
| "grad_norm": 0.2957839369773865, | |
| "learning_rate": 7.0790617240812374e-06, | |
| "loss": 1.0509564876556396, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7376093294460642, | |
| "grad_norm": 0.13618314266204834, | |
| "learning_rate": 7.057702852202037e-06, | |
| "loss": 1.1775768995285034, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.740524781341108, | |
| "grad_norm": 0.171565443277359, | |
| "learning_rate": 7.0363562957745105e-06, | |
| "loss": 0.9801825881004333, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.7434402332361516, | |
| "grad_norm": 0.09507802128791809, | |
| "learning_rate": 7.015022275291084e-06, | |
| "loss": 0.969845175743103, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.7463556851311952, | |
| "grad_norm": 0.49828192591667175, | |
| "learning_rate": 6.993701011114686e-06, | |
| "loss": 0.9284896850585938, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.749271137026239, | |
| "grad_norm": 0.10986272245645523, | |
| "learning_rate": 6.972392723476494e-06, | |
| "loss": 1.1610954999923706, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7521865889212829, | |
| "grad_norm": 0.36414283514022827, | |
| "learning_rate": 6.9510976324736415e-06, | |
| "loss": 0.9902899861335754, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.7551020408163265, | |
| "grad_norm": 0.15007393062114716, | |
| "learning_rate": 6.929815958066951e-06, | |
| "loss": 1.1686747074127197, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.7580174927113703, | |
| "grad_norm": 0.09150854498147964, | |
| "learning_rate": 6.908547920078671e-06, | |
| "loss": 0.9296596050262451, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.760932944606414, | |
| "grad_norm": 0.13725019991397858, | |
| "learning_rate": 6.887293738190183e-06, | |
| "loss": 0.6867948174476624, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.7638483965014577, | |
| "grad_norm": 0.2506777346134186, | |
| "learning_rate": 6.866053631939756e-06, | |
| "loss": 1.1812880039215088, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.7667638483965016, | |
| "grad_norm": 0.24459925293922424, | |
| "learning_rate": 6.844827820720275e-06, | |
| "loss": 1.233087420463562, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.7696793002915452, | |
| "grad_norm": 0.18725088238716125, | |
| "learning_rate": 6.8236165237769555e-06, | |
| "loss": 1.0703694820404053, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.7725947521865888, | |
| "grad_norm": 0.08817660808563232, | |
| "learning_rate": 6.802419960205095e-06, | |
| "loss": 0.9150586724281311, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.7755102040816326, | |
| "grad_norm": 0.24206826090812683, | |
| "learning_rate": 6.7812383489478216e-06, | |
| "loss": 1.2116329669952393, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.7784256559766765, | |
| "grad_norm": 0.13627009093761444, | |
| "learning_rate": 6.760071908793796e-06, | |
| "loss": 0.6978607177734375, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.78134110787172, | |
| "grad_norm": 0.19865363836288452, | |
| "learning_rate": 6.738920858374991e-06, | |
| "loss": 1.0590617656707764, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.784256559766764, | |
| "grad_norm": 0.4059164524078369, | |
| "learning_rate": 6.717785416164414e-06, | |
| "loss": 1.38783860206604, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.7871720116618075, | |
| "grad_norm": 0.2919604480266571, | |
| "learning_rate": 6.696665800473842e-06, | |
| "loss": 1.1487404108047485, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.7900874635568513, | |
| "grad_norm": 0.1517525017261505, | |
| "learning_rate": 6.675562229451589e-06, | |
| "loss": 1.206036925315857, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.7930029154518952, | |
| "grad_norm": 0.2847557067871094, | |
| "learning_rate": 6.6544749210802305e-06, | |
| "loss": 0.8351743817329407, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7959183673469388, | |
| "grad_norm": 0.2792437672615051, | |
| "learning_rate": 6.633404093174371e-06, | |
| "loss": 0.9937669634819031, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.7988338192419824, | |
| "grad_norm": 0.39450135827064514, | |
| "learning_rate": 6.612349963378381e-06, | |
| "loss": 0.9253970980644226, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.8017492711370262, | |
| "grad_norm": 0.26529014110565186, | |
| "learning_rate": 6.591312749164154e-06, | |
| "loss": 1.1452049016952515, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.80466472303207, | |
| "grad_norm": 0.23458294570446014, | |
| "learning_rate": 6.570292667828856e-06, | |
| "loss": 1.2078217267990112, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.8075801749271136, | |
| "grad_norm": 0.13832348585128784, | |
| "learning_rate": 6.549289936492693e-06, | |
| "loss": 1.2237412929534912, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8104956268221575, | |
| "grad_norm": 0.08728086948394775, | |
| "learning_rate": 6.5283047720966505e-06, | |
| "loss": 1.1127595901489258, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.813411078717201, | |
| "grad_norm": 0.2100764364004135, | |
| "learning_rate": 6.5073373914002656e-06, | |
| "loss": 1.0868037939071655, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.816326530612245, | |
| "grad_norm": 0.13499869406223297, | |
| "learning_rate": 6.486388010979388e-06, | |
| "loss": 1.119627833366394, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.8192419825072887, | |
| "grad_norm": 0.34346649050712585, | |
| "learning_rate": 6.465456847223932e-06, | |
| "loss": 1.0318715572357178, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.8221574344023324, | |
| "grad_norm": 0.07944006472826004, | |
| "learning_rate": 6.444544116335655e-06, | |
| "loss": 1.1757546663284302, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.825072886297376, | |
| "grad_norm": 0.2944159209728241, | |
| "learning_rate": 6.423650034325915e-06, | |
| "loss": 1.2396355867385864, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.8279883381924198, | |
| "grad_norm": 0.18287204205989838, | |
| "learning_rate": 6.402774817013442e-06, | |
| "loss": 1.097105860710144, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.8309037900874636, | |
| "grad_norm": 0.141254261136055, | |
| "learning_rate": 6.381918680022112e-06, | |
| "loss": 1.0068081617355347, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.8338192419825075, | |
| "grad_norm": 0.17386725544929504, | |
| "learning_rate": 6.36108183877871e-06, | |
| "loss": 1.1032158136367798, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.836734693877551, | |
| "grad_norm": 0.22268234193325043, | |
| "learning_rate": 6.3402645085107224e-06, | |
| "loss": 1.2912282943725586, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.8396501457725947, | |
| "grad_norm": 0.411150723695755, | |
| "learning_rate": 6.3194669042440976e-06, | |
| "loss": 1.129095196723938, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.8425655976676385, | |
| "grad_norm": 0.3001119792461395, | |
| "learning_rate": 6.298689240801026e-06, | |
| "loss": 1.365820050239563, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.8454810495626823, | |
| "grad_norm": 0.36252474784851074, | |
| "learning_rate": 6.277931732797732e-06, | |
| "loss": 1.3998820781707764, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.848396501457726, | |
| "grad_norm": 0.29093074798583984, | |
| "learning_rate": 6.257194594642254e-06, | |
| "loss": 1.0682395696640015, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.8513119533527695, | |
| "grad_norm": 0.13126376271247864, | |
| "learning_rate": 6.236478040532214e-06, | |
| "loss": 1.0302337408065796, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.8542274052478134, | |
| "grad_norm": 0.1628250777721405, | |
| "learning_rate": 6.215782284452628e-06, | |
| "loss": 1.098158359527588, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 0.20393933355808258, | |
| "learning_rate": 6.195107540173687e-06, | |
| "loss": 1.1833226680755615, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.860058309037901, | |
| "grad_norm": 0.2242426872253418, | |
| "learning_rate": 6.174454021248537e-06, | |
| "loss": 1.2466531991958618, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.8629737609329446, | |
| "grad_norm": 0.1543884128332138, | |
| "learning_rate": 6.15382194101109e-06, | |
| "loss": 0.9692124724388123, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.8658892128279883, | |
| "grad_norm": 0.10594581812620163, | |
| "learning_rate": 6.133211512573819e-06, | |
| "loss": 1.0277884006500244, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.868804664723032, | |
| "grad_norm": 0.1760384440422058, | |
| "learning_rate": 6.1126229488255416e-06, | |
| "loss": 1.0745232105255127, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.871720116618076, | |
| "grad_norm": 0.11243575066328049, | |
| "learning_rate": 6.092056462429238e-06, | |
| "loss": 1.11955988407135, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.8746355685131195, | |
| "grad_norm": 0.3004339337348938, | |
| "learning_rate": 6.071512265819841e-06, | |
| "loss": 1.1129993200302124, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.8775510204081631, | |
| "grad_norm": 0.1870323270559311, | |
| "learning_rate": 6.0509905712020554e-06, | |
| "loss": 1.1004483699798584, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.880466472303207, | |
| "grad_norm": 0.15390393137931824, | |
| "learning_rate": 6.030491590548157e-06, | |
| "loss": 1.1051290035247803, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8833819241982508, | |
| "grad_norm": 0.17591705918312073, | |
| "learning_rate": 6.010015535595802e-06, | |
| "loss": 1.19423246383667, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.8862973760932946, | |
| "grad_norm": 0.517492413520813, | |
| "learning_rate": 5.989562617845843e-06, | |
| "loss": 0.7528221011161804, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.8892128279883382, | |
| "grad_norm": 0.2763058543205261, | |
| "learning_rate": 5.969133048560151e-06, | |
| "loss": 0.6028561592102051, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.8921282798833818, | |
| "grad_norm": 0.1741061955690384, | |
| "learning_rate": 5.948727038759415e-06, | |
| "loss": 0.9944829344749451, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.8950437317784257, | |
| "grad_norm": 0.3421262204647064, | |
| "learning_rate": 5.928344799220985e-06, | |
| "loss": 1.118728756904602, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8979591836734695, | |
| "grad_norm": 0.42300957441329956, | |
| "learning_rate": 5.907986540476678e-06, | |
| "loss": 0.7158623337745667, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.900874635568513, | |
| "grad_norm": 0.14869055151939392, | |
| "learning_rate": 5.887652472810609e-06, | |
| "loss": 1.0393644571304321, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.9037900874635567, | |
| "grad_norm": 0.07201150804758072, | |
| "learning_rate": 5.86734280625702e-06, | |
| "loss": 0.5461652874946594, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.9067055393586005, | |
| "grad_norm": 0.6429765820503235, | |
| "learning_rate": 5.847057750598111e-06, | |
| "loss": 1.1324551105499268, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.9096209912536444, | |
| "grad_norm": 0.18680232763290405, | |
| "learning_rate": 5.826797515361868e-06, | |
| "loss": 1.274292230606079, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.9125364431486882, | |
| "grad_norm": 0.1953829973936081, | |
| "learning_rate": 5.806562309819909e-06, | |
| "loss": 1.2884361743927002, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.9154518950437318, | |
| "grad_norm": 0.28342682123184204, | |
| "learning_rate": 5.7863523429853055e-06, | |
| "loss": 1.279549479484558, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.9183673469387754, | |
| "grad_norm": 0.45169350504875183, | |
| "learning_rate": 5.766167823610443e-06, | |
| "loss": 1.074336051940918, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.9212827988338192, | |
| "grad_norm": 0.18884071707725525, | |
| "learning_rate": 5.746008960184852e-06, | |
| "loss": 1.262738585472107, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.924198250728863, | |
| "grad_norm": 0.059031542390584946, | |
| "learning_rate": 5.725875960933058e-06, | |
| "loss": 1.0195709466934204, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.9271137026239067, | |
| "grad_norm": 0.11774204671382904, | |
| "learning_rate": 5.705769033812431e-06, | |
| "loss": 1.04592764377594, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.9300291545189503, | |
| "grad_norm": 0.13104864954948425, | |
| "learning_rate": 5.685688386511041e-06, | |
| "loss": 1.0482321977615356, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.9329446064139941, | |
| "grad_norm": 0.15567655861377716, | |
| "learning_rate": 5.665634226445501e-06, | |
| "loss": 1.2044618129730225, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.935860058309038, | |
| "grad_norm": 0.14479920268058777, | |
| "learning_rate": 5.645606760758836e-06, | |
| "loss": 1.0985395908355713, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.9387755102040818, | |
| "grad_norm": 0.1920030266046524, | |
| "learning_rate": 5.625606196318347e-06, | |
| "loss": 1.4523109197616577, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.9416909620991254, | |
| "grad_norm": 0.2637879252433777, | |
| "learning_rate": 5.605632739713456e-06, | |
| "loss": 1.0658267736434937, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.944606413994169, | |
| "grad_norm": 0.08796999603509903, | |
| "learning_rate": 5.585686597253593e-06, | |
| "loss": 1.0220710039138794, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.9475218658892128, | |
| "grad_norm": 0.4936763644218445, | |
| "learning_rate": 5.5657679749660455e-06, | |
| "loss": 0.5359926223754883, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.9504373177842567, | |
| "grad_norm": 0.25524938106536865, | |
| "learning_rate": 5.545877078593849e-06, | |
| "loss": 1.0832246541976929, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.9533527696793003, | |
| "grad_norm": 0.3815828263759613, | |
| "learning_rate": 5.52601411359365e-06, | |
| "loss": 1.0333139896392822, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.9562682215743439, | |
| "grad_norm": 0.1364160180091858, | |
| "learning_rate": 5.506179285133582e-06, | |
| "loss": 0.8447660207748413, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.9591836734693877, | |
| "grad_norm": 0.22036899626255035, | |
| "learning_rate": 5.486372798091161e-06, | |
| "loss": 1.4143515825271606, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.9620991253644315, | |
| "grad_norm": 0.4314256012439728, | |
| "learning_rate": 5.466594857051153e-06, | |
| "loss": 0.9990249276161194, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.9650145772594754, | |
| "grad_norm": 0.15996676683425903, | |
| "learning_rate": 5.4468456663034635e-06, | |
| "loss": 1.2198452949523926, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.967930029154519, | |
| "grad_norm": 0.19972719252109528, | |
| "learning_rate": 5.427125429841039e-06, | |
| "loss": 1.0296826362609863, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.9708454810495626, | |
| "grad_norm": 0.1828991174697876, | |
| "learning_rate": 5.4074343513577536e-06, | |
| "loss": 1.2304623126983643, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.9737609329446064, | |
| "grad_norm": 0.2502359449863434, | |
| "learning_rate": 5.387772634246287e-06, | |
| "loss": 1.1169551610946655, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.9766763848396502, | |
| "grad_norm": 0.1563616245985031, | |
| "learning_rate": 5.36814048159606e-06, | |
| "loss": 0.818549633026123, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.9795918367346939, | |
| "grad_norm": 0.08790906518697739, | |
| "learning_rate": 5.348538096191109e-06, | |
| "loss": 1.2132847309112549, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.9825072886297375, | |
| "grad_norm": 0.3884468376636505, | |
| "learning_rate": 5.328965680507991e-06, | |
| "loss": 1.1513258218765259, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9854227405247813, | |
| "grad_norm": 0.24757881462574005, | |
| "learning_rate": 5.309423436713714e-06, | |
| "loss": 0.6811099052429199, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.9883381924198251, | |
| "grad_norm": 0.0917486697435379, | |
| "learning_rate": 5.289911566663626e-06, | |
| "loss": 0.5249199271202087, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.991253644314869, | |
| "grad_norm": 0.3590066432952881, | |
| "learning_rate": 5.270430271899342e-06, | |
| "loss": 1.1386462450027466, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.9941690962099126, | |
| "grad_norm": 0.0781368613243103, | |
| "learning_rate": 5.250979753646664e-06, | |
| "loss": 1.0840882062911987, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.9970845481049562, | |
| "grad_norm": 0.3470701277256012, | |
| "learning_rate": 5.231560212813487e-06, | |
| "loss": 1.0490968227386475, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.13662609457969666, | |
| "learning_rate": 5.212171849987743e-06, | |
| "loss": 1.1986355781555176, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.002915451895044, | |
| "grad_norm": 0.15793374180793762, | |
| "learning_rate": 5.1928148654353196e-06, | |
| "loss": 0.921393871307373, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.0058309037900877, | |
| "grad_norm": 0.4891752600669861, | |
| "learning_rate": 5.17348945909799e-06, | |
| "loss": 0.9690005779266357, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.008746355685131, | |
| "grad_norm": 0.2033310979604721, | |
| "learning_rate": 5.1541958305913536e-06, | |
| "loss": 1.3568806648254395, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.011661807580175, | |
| "grad_norm": 0.1594112515449524, | |
| "learning_rate": 5.134934179202771e-06, | |
| "loss": 1.033390998840332, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.0145772594752187, | |
| "grad_norm": 0.2081524133682251, | |
| "learning_rate": 5.115704703889299e-06, | |
| "loss": 1.0304166078567505, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.0174927113702625, | |
| "grad_norm": 0.38243576884269714, | |
| "learning_rate": 5.096507603275648e-06, | |
| "loss": 0.9502314925193787, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.020408163265306, | |
| "grad_norm": 0.06100543960928917, | |
| "learning_rate": 5.077343075652124e-06, | |
| "loss": 1.1048611402511597, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.0233236151603498, | |
| "grad_norm": 0.386870414018631, | |
| "learning_rate": 5.058211318972581e-06, | |
| "loss": 1.2929866313934326, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.0262390670553936, | |
| "grad_norm": 0.1502365618944168, | |
| "learning_rate": 5.0391125308523744e-06, | |
| "loss": 1.2062195539474487, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.0291545189504374, | |
| "grad_norm": 0.46698620915412903, | |
| "learning_rate": 5.020046908566317e-06, | |
| "loss": 1.2675377130508423, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.0320699708454812, | |
| "grad_norm": 0.2170051783323288, | |
| "learning_rate": 5.001014649046655e-06, | |
| "loss": 1.0185376405715942, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.0349854227405246, | |
| "grad_norm": 0.5570895671844482, | |
| "learning_rate": 4.98201594888102e-06, | |
| "loss": 1.1238821744918823, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.0379008746355685, | |
| "grad_norm": 0.19649037718772888, | |
| "learning_rate": 4.963051004310397e-06, | |
| "loss": 1.1577717065811157, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 0.3043438494205475, | |
| "learning_rate": 4.944120011227115e-06, | |
| "loss": 0.945805549621582, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.043731778425656, | |
| "grad_norm": 0.8879981637001038, | |
| "learning_rate": 4.925223165172808e-06, | |
| "loss": 1.0322425365447998, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.0466472303206995, | |
| "grad_norm": 0.26241424679756165, | |
| "learning_rate": 4.906360661336394e-06, | |
| "loss": 1.2149442434310913, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.0495626822157433, | |
| "grad_norm": 0.8886216878890991, | |
| "learning_rate": 4.887532694552066e-06, | |
| "loss": 1.0274255275726318, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.052478134110787, | |
| "grad_norm": 0.21257859468460083, | |
| "learning_rate": 4.868739459297286e-06, | |
| "loss": 1.1855621337890625, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.055393586005831, | |
| "grad_norm": 0.14593669772148132, | |
| "learning_rate": 4.8499811496907506e-06, | |
| "loss": 0.7928017377853394, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.058309037900875, | |
| "grad_norm": 0.06642908602952957, | |
| "learning_rate": 4.831257959490425e-06, | |
| "loss": 1.0738983154296875, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.061224489795918, | |
| "grad_norm": 0.3109600841999054, | |
| "learning_rate": 4.812570082091498e-06, | |
| "loss": 0.8972907662391663, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.064139941690962, | |
| "grad_norm": 0.13277745246887207, | |
| "learning_rate": 4.793917710524422e-06, | |
| "loss": 1.0650956630706787, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.067055393586006, | |
| "grad_norm": 0.14433449506759644, | |
| "learning_rate": 4.775301037452898e-06, | |
| "loss": 1.1586172580718994, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.0699708454810497, | |
| "grad_norm": 0.15220968425273895, | |
| "learning_rate": 4.756720255171887e-06, | |
| "loss": 0.5742167234420776, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.072886297376093, | |
| "grad_norm": 0.126608744263649, | |
| "learning_rate": 4.738175555605632e-06, | |
| "loss": 1.242780327796936, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.075801749271137, | |
| "grad_norm": 0.10246127843856812, | |
| "learning_rate": 4.719667130305671e-06, | |
| "loss": 0.9981814622879028, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.0787172011661808, | |
| "grad_norm": 0.2460668534040451, | |
| "learning_rate": 4.701195170448857e-06, | |
| "loss": 0.8302922248840332, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.0816326530612246, | |
| "grad_norm": 0.155581995844841, | |
| "learning_rate": 4.682759866835388e-06, | |
| "loss": 1.3268355131149292, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.0845481049562684, | |
| "grad_norm": 0.10044138133525848, | |
| "learning_rate": 4.664361409886829e-06, | |
| "loss": 0.9983614087104797, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.087463556851312, | |
| "grad_norm": 0.2085467278957367, | |
| "learning_rate": 4.645999989644148e-06, | |
| "loss": 1.1001629829406738, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.0903790087463556, | |
| "grad_norm": 0.33730220794677734, | |
| "learning_rate": 4.627675795765761e-06, | |
| "loss": 1.3111716508865356, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.0932944606413995, | |
| "grad_norm": 0.2143622636795044, | |
| "learning_rate": 4.60938901752556e-06, | |
| "loss": 0.8293286561965942, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.0962099125364433, | |
| "grad_norm": 0.07966610789299011, | |
| "learning_rate": 4.591139843810967e-06, | |
| "loss": 1.1742640733718872, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.0991253644314867, | |
| "grad_norm": 0.18288615345954895, | |
| "learning_rate": 4.572928463120982e-06, | |
| "loss": 1.1798888444900513, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.1020408163265305, | |
| "grad_norm": 0.2549722194671631, | |
| "learning_rate": 4.554755063564226e-06, | |
| "loss": 1.0986790657043457, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.1049562682215743, | |
| "grad_norm": 0.1803271621465683, | |
| "learning_rate": 4.536619832857015e-06, | |
| "loss": 1.0121634006500244, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.107871720116618, | |
| "grad_norm": 0.33244436979293823, | |
| "learning_rate": 4.518522958321409e-06, | |
| "loss": 1.2030587196350098, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.110787172011662, | |
| "grad_norm": 0.07119657844305038, | |
| "learning_rate": 4.500464626883276e-06, | |
| "loss": 0.6789675354957581, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.1137026239067054, | |
| "grad_norm": 0.3919859230518341, | |
| "learning_rate": 4.4824450250703755e-06, | |
| "loss": 0.8600730895996094, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.116618075801749, | |
| "grad_norm": 0.1530391424894333, | |
| "learning_rate": 4.464464339010414e-06, | |
| "loss": 0.9321385622024536, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.119533527696793, | |
| "grad_norm": 0.12812215089797974, | |
| "learning_rate": 4.446522754429127e-06, | |
| "loss": 1.1020374298095703, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.122448979591837, | |
| "grad_norm": 0.2687873840332031, | |
| "learning_rate": 4.4286204566483715e-06, | |
| "loss": 0.548167884349823, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.1253644314868803, | |
| "grad_norm": 0.351572722196579, | |
| "learning_rate": 4.410757630584204e-06, | |
| "loss": 0.671511709690094, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.128279883381924, | |
| "grad_norm": 0.3009466230869293, | |
| "learning_rate": 4.392934460744958e-06, | |
| "loss": 1.0809369087219238, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.131195335276968, | |
| "grad_norm": 0.1647637039422989, | |
| "learning_rate": 4.375151131229369e-06, | |
| "loss": 1.0825597047805786, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.1341107871720117, | |
| "grad_norm": 0.15290948748588562, | |
| "learning_rate": 4.357407825724648e-06, | |
| "loss": 1.132341742515564, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.1370262390670556, | |
| "grad_norm": 0.30983132123947144, | |
| "learning_rate": 4.339704727504581e-06, | |
| "loss": 1.115373969078064, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.139941690962099, | |
| "grad_norm": 0.1616809368133545, | |
| "learning_rate": 4.32204201942766e-06, | |
| "loss": 1.2571251392364502, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.44996944069862366, | |
| "learning_rate": 4.304419883935167e-06, | |
| "loss": 0.7702177166938782, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.1457725947521866, | |
| "grad_norm": 0.08497241884469986, | |
| "learning_rate": 4.286838503049309e-06, | |
| "loss": 1.0834498405456543, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.1486880466472305, | |
| "grad_norm": 0.4060671925544739, | |
| "learning_rate": 4.26929805837134e-06, | |
| "loss": 1.1200850009918213, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.151603498542274, | |
| "grad_norm": 0.17709168791770935, | |
| "learning_rate": 4.2517987310796595e-06, | |
| "loss": 1.1172959804534912, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.1545189504373177, | |
| "grad_norm": 0.1522580236196518, | |
| "learning_rate": 4.23434070192797e-06, | |
| "loss": 1.168565034866333, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.1574344023323615, | |
| "grad_norm": 0.1714070737361908, | |
| "learning_rate": 4.216924151243395e-06, | |
| "loss": 1.1115281581878662, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.1603498542274053, | |
| "grad_norm": 0.13482044637203217, | |
| "learning_rate": 4.199549258924615e-06, | |
| "loss": 1.2671080827713013, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.163265306122449, | |
| "grad_norm": 0.1459122747182846, | |
| "learning_rate": 4.18221620444002e-06, | |
| "loss": 1.172806739807129, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.1661807580174925, | |
| "grad_norm": 0.08871738612651825, | |
| "learning_rate": 4.1649251668258475e-06, | |
| "loss": 1.045624852180481, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.1690962099125364, | |
| "grad_norm": 0.3394921123981476, | |
| "learning_rate": 4.147676324684335e-06, | |
| "loss": 1.1889164447784424, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.17201166180758, | |
| "grad_norm": 0.1473836749792099, | |
| "learning_rate": 4.130469856181873e-06, | |
| "loss": 1.079075813293457, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.174927113702624, | |
| "grad_norm": 0.18347686529159546, | |
| "learning_rate": 4.113305939047174e-06, | |
| "loss": 1.2786171436309814, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.1778425655976674, | |
| "grad_norm": 0.16250960528850555, | |
| "learning_rate": 4.096184750569422e-06, | |
| "loss": 0.677879273891449, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.1807580174927113, | |
| "grad_norm": 0.383709192276001, | |
| "learning_rate": 4.07910646759645e-06, | |
| "loss": 0.6416628360748291, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.183673469387755, | |
| "grad_norm": 0.07085460424423218, | |
| "learning_rate": 4.062071266532916e-06, | |
| "loss": 1.0884201526641846, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.186588921282799, | |
| "grad_norm": 0.10339315980672836, | |
| "learning_rate": 4.045079323338477e-06, | |
| "loss": 0.8533938527107239, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1895043731778427, | |
| "grad_norm": 0.20028476417064667, | |
| "learning_rate": 4.0281308135259705e-06, | |
| "loss": 0.9680588841438293, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.192419825072886, | |
| "grad_norm": 0.3516143560409546, | |
| "learning_rate": 4.0112259121596e-06, | |
| "loss": 0.7940521240234375, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.19533527696793, | |
| "grad_norm": 0.10385473072528839, | |
| "learning_rate": 3.994364793853135e-06, | |
| "loss": 1.1375114917755127, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.198250728862974, | |
| "grad_norm": 0.10895653814077377, | |
| "learning_rate": 3.977547632768095e-06, | |
| "loss": 1.1559362411499023, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.2011661807580176, | |
| "grad_norm": 0.11289890855550766, | |
| "learning_rate": 3.960774602611966e-06, | |
| "loss": 1.1142271757125854, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.204081632653061, | |
| "grad_norm": 0.11957119405269623, | |
| "learning_rate": 3.94404587663639e-06, | |
| "loss": 0.997885525226593, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.206997084548105, | |
| "grad_norm": 0.1454574018716812, | |
| "learning_rate": 3.9273616276353904e-06, | |
| "loss": 0.6211732625961304, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.2099125364431487, | |
| "grad_norm": 0.2732894718647003, | |
| "learning_rate": 3.910722027943569e-06, | |
| "loss": 0.7947649955749512, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.2128279883381925, | |
| "grad_norm": 0.31755542755126953, | |
| "learning_rate": 3.894127249434352e-06, | |
| "loss": 0.9824427366256714, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.2157434402332363, | |
| "grad_norm": 0.31029990315437317, | |
| "learning_rate": 3.877577463518183e-06, | |
| "loss": 1.0954536199569702, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.2186588921282797, | |
| "grad_norm": 0.13882219791412354, | |
| "learning_rate": 3.861072841140779e-06, | |
| "loss": 1.1737290620803833, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.2215743440233235, | |
| "grad_norm": 0.199194073677063, | |
| "learning_rate": 3.8446135527813596e-06, | |
| "loss": 1.2562403678894043, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.2244897959183674, | |
| "grad_norm": 0.09712310880422592, | |
| "learning_rate": 3.828199768450866e-06, | |
| "loss": 0.887328028678894, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.227405247813411, | |
| "grad_norm": 0.3643515110015869, | |
| "learning_rate": 3.8118316576902345e-06, | |
| "loss": 0.13481314480304718, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.2303206997084546, | |
| "grad_norm": 0.4534083604812622, | |
| "learning_rate": 3.7955093895686242e-06, | |
| "loss": 1.0862985849380493, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.2332361516034984, | |
| "grad_norm": 0.15879718959331512, | |
| "learning_rate": 3.779233132681675e-06, | |
| "loss": 1.045498013496399, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.2361516034985423, | |
| "grad_norm": 0.18001393973827362, | |
| "learning_rate": 3.7630030551497728e-06, | |
| "loss": 1.1538960933685303, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.239067055393586, | |
| "grad_norm": 0.08799666166305542, | |
| "learning_rate": 3.746819324616308e-06, | |
| "loss": 1.0975581407546997, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.24198250728863, | |
| "grad_norm": 0.24161297082901, | |
| "learning_rate": 3.730682108245944e-06, | |
| "loss": 0.6484414339065552, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.2448979591836733, | |
| "grad_norm": 0.08378497511148453, | |
| "learning_rate": 3.714591572722891e-06, | |
| "loss": 0.9581442475318909, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.247813411078717, | |
| "grad_norm": 0.10033685714006424, | |
| "learning_rate": 3.698547884249187e-06, | |
| "loss": 0.6113779544830322, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.250728862973761, | |
| "grad_norm": 0.275552362203598, | |
| "learning_rate": 3.6825512085429703e-06, | |
| "loss": 1.1037795543670654, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.253644314868805, | |
| "grad_norm": 0.5268692374229431, | |
| "learning_rate": 3.6666017108367837e-06, | |
| "loss": 0.8392840027809143, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.256559766763848, | |
| "grad_norm": 0.24270810186862946, | |
| "learning_rate": 3.6506995558758586e-06, | |
| "loss": 1.0857195854187012, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.259475218658892, | |
| "grad_norm": 0.11209052801132202, | |
| "learning_rate": 3.6348449079164116e-06, | |
| "loss": 1.0408934354782104, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.262390670553936, | |
| "grad_norm": 0.3595077097415924, | |
| "learning_rate": 3.619037930723958e-06, | |
| "loss": 0.41006362438201904, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.2653061224489797, | |
| "grad_norm": 0.20681369304656982, | |
| "learning_rate": 3.603278787571601e-06, | |
| "loss": 1.08263099193573, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.2682215743440235, | |
| "grad_norm": 0.1791142076253891, | |
| "learning_rate": 3.587567641238369e-06, | |
| "loss": 1.1789532899856567, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.271137026239067, | |
| "grad_norm": 0.15824060142040253, | |
| "learning_rate": 3.5719046540075155e-06, | |
| "loss": 1.138330101966858, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.2740524781341107, | |
| "grad_norm": 0.08995150774717331, | |
| "learning_rate": 3.5562899876648556e-06, | |
| "loss": 1.0861237049102783, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.2769679300291545, | |
| "grad_norm": 0.20422294735908508, | |
| "learning_rate": 3.540723803497084e-06, | |
| "loss": 1.068771481513977, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.2798833819241984, | |
| "grad_norm": 0.29918450117111206, | |
| "learning_rate": 3.5252062622901196e-06, | |
| "loss": 1.0257431268692017, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.2827988338192418, | |
| "grad_norm": 0.2508153021335602, | |
| "learning_rate": 3.5097375243274322e-06, | |
| "loss": 0.7228989601135254, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.20312649011611938, | |
| "learning_rate": 3.494317749388401e-06, | |
| "loss": 0.9408363103866577, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.2886297376093294, | |
| "grad_norm": 0.18280087411403656, | |
| "learning_rate": 3.4789470967466528e-06, | |
| "loss": 1.1609010696411133, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.2915451895043732, | |
| "grad_norm": 0.4031111001968384, | |
| "learning_rate": 3.4636257251684247e-06, | |
| "loss": 1.1523736715316772, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.294460641399417, | |
| "grad_norm": 0.14943495392799377, | |
| "learning_rate": 3.4483537929109212e-06, | |
| "loss": 1.0938516855239868, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.2973760932944605, | |
| "grad_norm": 0.32287096977233887, | |
| "learning_rate": 3.433131457720673e-06, | |
| "loss": 0.8949427604675293, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.3002915451895043, | |
| "grad_norm": 0.13816498219966888, | |
| "learning_rate": 3.4179588768319194e-06, | |
| "loss": 1.004232406616211, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.303206997084548, | |
| "grad_norm": 0.17348824441432953, | |
| "learning_rate": 3.4028362069649807e-06, | |
| "loss": 1.1232084035873413, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.306122448979592, | |
| "grad_norm": 0.2952488362789154, | |
| "learning_rate": 3.387763604324628e-06, | |
| "loss": 1.2846827507019043, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.3090379008746353, | |
| "grad_norm": 0.0930081456899643, | |
| "learning_rate": 3.3727412245984863e-06, | |
| "loss": 1.0255701541900635, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.311953352769679, | |
| "grad_norm": 0.19518348574638367, | |
| "learning_rate": 3.3577692229554225e-06, | |
| "loss": 0.9602378606796265, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.314868804664723, | |
| "grad_norm": 0.08679629117250443, | |
| "learning_rate": 3.3428477540439295e-06, | |
| "loss": 1.0191975831985474, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.317784256559767, | |
| "grad_norm": 0.07790417969226837, | |
| "learning_rate": 3.3279769719905438e-06, | |
| "loss": 1.1509268283843994, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.3206997084548107, | |
| "grad_norm": 0.2912391126155853, | |
| "learning_rate": 3.3131570303982517e-06, | |
| "loss": 0.6687411665916443, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.323615160349854, | |
| "grad_norm": 0.4317520260810852, | |
| "learning_rate": 3.2983880823448896e-06, | |
| "loss": 0.8183987736701965, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.326530612244898, | |
| "grad_norm": 0.11885584890842438, | |
| "learning_rate": 3.283670280381581e-06, | |
| "loss": 1.1012320518493652, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.3294460641399417, | |
| "grad_norm": 0.35252460837364197, | |
| "learning_rate": 3.269003776531148e-06, | |
| "loss": 0.9789476990699768, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.3323615160349855, | |
| "grad_norm": 0.15434707701206207, | |
| "learning_rate": 3.2543887222865496e-06, | |
| "loss": 1.1043654680252075, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.335276967930029, | |
| "grad_norm": 0.16315020620822906, | |
| "learning_rate": 3.239825268609309e-06, | |
| "loss": 1.0038485527038574, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.3381924198250728, | |
| "grad_norm": 0.39029252529144287, | |
| "learning_rate": 3.2253135659279558e-06, | |
| "loss": 1.1852213144302368, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.3411078717201166, | |
| "grad_norm": 0.2913620173931122, | |
| "learning_rate": 3.2108537641364786e-06, | |
| "loss": 0.45255744457244873, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.3440233236151604, | |
| "grad_norm": 0.06582468003034592, | |
| "learning_rate": 3.19644601259277e-06, | |
| "loss": 1.269538402557373, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.3469387755102042, | |
| "grad_norm": 0.5571786761283875, | |
| "learning_rate": 3.1820904601170884e-06, | |
| "loss": 0.8519521355628967, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.3498542274052476, | |
| "grad_norm": 0.31546610593795776, | |
| "learning_rate": 3.1677872549905154e-06, | |
| "loss": 1.3262689113616943, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.3527696793002915, | |
| "grad_norm": 0.09515654295682907, | |
| "learning_rate": 3.153536544953433e-06, | |
| "loss": 0.9249638319015503, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.3556851311953353, | |
| "grad_norm": 0.15578609704971313, | |
| "learning_rate": 3.139338477203983e-06, | |
| "loss": 1.1823093891143799, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.358600583090379, | |
| "grad_norm": 0.2227763533592224, | |
| "learning_rate": 3.125193198396564e-06, | |
| "loss": 1.2877289056777954, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.3615160349854225, | |
| "grad_norm": 0.4745902121067047, | |
| "learning_rate": 3.111100854640303e-06, | |
| "loss": 0.9719488024711609, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.3644314868804663, | |
| "grad_norm": 0.24592548608779907, | |
| "learning_rate": 3.097061591497555e-06, | |
| "loss": 1.0211539268493652, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.36734693877551, | |
| "grad_norm": 0.21700948476791382, | |
| "learning_rate": 3.0830755539823942e-06, | |
| "loss": 0.9550508260726929, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.370262390670554, | |
| "grad_norm": 0.20466458797454834, | |
| "learning_rate": 3.0691428865591153e-06, | |
| "loss": 0.5767884254455566, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.373177842565598, | |
| "grad_norm": 0.14715692400932312, | |
| "learning_rate": 3.0552637331407466e-06, | |
| "loss": 0.894551694393158, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.376093294460641, | |
| "grad_norm": 0.1368647813796997, | |
| "learning_rate": 3.0414382370875628e-06, | |
| "loss": 1.2126644849777222, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.379008746355685, | |
| "grad_norm": 0.2084326297044754, | |
| "learning_rate": 3.027666541205592e-06, | |
| "loss": 1.1460554599761963, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.381924198250729, | |
| "grad_norm": 0.12772594392299652, | |
| "learning_rate": 3.013948787745166e-06, | |
| "loss": 0.8425911664962769, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.3848396501457727, | |
| "grad_norm": 0.21220910549163818, | |
| "learning_rate": 3.000285118399425e-06, | |
| "loss": 1.0760411024093628, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.387755102040816, | |
| "grad_norm": 0.16325032711029053, | |
| "learning_rate": 2.9866756743028644e-06, | |
| "loss": 1.1195225715637207, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.39067055393586, | |
| "grad_norm": 0.1648532897233963, | |
| "learning_rate": 2.973120596029882e-06, | |
| "loss": 1.0467681884765625, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.3935860058309038, | |
| "grad_norm": 0.5487902164459229, | |
| "learning_rate": 2.9596200235933215e-06, | |
| "loss": 1.1597939729690552, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.3965014577259476, | |
| "grad_norm": 0.15476688742637634, | |
| "learning_rate": 2.9461740964430176e-06, | |
| "loss": 1.0105078220367432, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.3994169096209914, | |
| "grad_norm": 1.1137182712554932, | |
| "learning_rate": 2.932782953464373e-06, | |
| "loss": 1.0070343017578125, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.402332361516035, | |
| "grad_norm": 0.3256247043609619, | |
| "learning_rate": 2.9194467329769166e-06, | |
| "loss": 0.9948145151138306, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.4052478134110786, | |
| "grad_norm": 0.14843417704105377, | |
| "learning_rate": 2.9061655727328617e-06, | |
| "loss": 1.0339670181274414, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.4081632653061225, | |
| "grad_norm": 0.14106328785419464, | |
| "learning_rate": 2.8929396099157056e-06, | |
| "loss": 1.149165391921997, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.4110787172011663, | |
| "grad_norm": 0.1781884729862213, | |
| "learning_rate": 2.8797689811387944e-06, | |
| "loss": 0.9708322286605835, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.4139941690962097, | |
| "grad_norm": 0.16324618458747864, | |
| "learning_rate": 2.8666538224439207e-06, | |
| "loss": 0.9147579669952393, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.4169096209912535, | |
| "grad_norm": 0.10199990123510361, | |
| "learning_rate": 2.853594269299919e-06, | |
| "loss": 1.1740384101867676, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.4198250728862973, | |
| "grad_norm": 0.36128106713294983, | |
| "learning_rate": 2.8405904566012634e-06, | |
| "loss": 0.9795001149177551, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.422740524781341, | |
| "grad_norm": 0.11705031245946884, | |
| "learning_rate": 2.827642518666673e-06, | |
| "loss": 1.0222880840301514, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.425655976676385, | |
| "grad_norm": 0.19340762495994568, | |
| "learning_rate": 2.814750589237729e-06, | |
| "loss": 1.0553447008132935, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 0.09246297180652618, | |
| "learning_rate": 2.8019148014774856e-06, | |
| "loss": 1.0741846561431885, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.431486880466472, | |
| "grad_norm": 0.23843225836753845, | |
| "learning_rate": 2.789135287969106e-06, | |
| "loss": 1.1993522644042969, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.434402332361516, | |
| "grad_norm": 0.7431137561798096, | |
| "learning_rate": 2.7764121807144815e-06, | |
| "loss": 0.42419517040252686, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.43731778425656, | |
| "grad_norm": 0.11922803521156311, | |
| "learning_rate": 2.7637456111328773e-06, | |
| "loss": 1.0701881647109985, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.4402332361516033, | |
| "grad_norm": 0.238107368350029, | |
| "learning_rate": 2.7511357100595675e-06, | |
| "loss": 1.0204083919525146, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.443148688046647, | |
| "grad_norm": 0.18065865337848663, | |
| "learning_rate": 2.738582607744491e-06, | |
| "loss": 1.1767973899841309, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.446064139941691, | |
| "grad_norm": 0.6328040361404419, | |
| "learning_rate": 2.7260864338508944e-06, | |
| "loss": 1.2465075254440308, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 0.32334592938423157, | |
| "learning_rate": 2.71364731745401e-06, | |
| "loss": 0.9165597558021545, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.4518950437317786, | |
| "grad_norm": 0.29830703139305115, | |
| "learning_rate": 2.701265387039703e-06, | |
| "loss": 1.0425974130630493, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.454810495626822, | |
| "grad_norm": 0.09913703799247742, | |
| "learning_rate": 2.688940770503163e-06, | |
| "loss": 1.1421351432800293, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.457725947521866, | |
| "grad_norm": 0.19002677500247955, | |
| "learning_rate": 2.676673595147574e-06, | |
| "loss": 1.14607572555542, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.4606413994169096, | |
| "grad_norm": 0.17399148643016815, | |
| "learning_rate": 2.6644639876827903e-06, | |
| "loss": 1.0854803323745728, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.4635568513119535, | |
| "grad_norm": 0.18045774102210999, | |
| "learning_rate": 2.6523120742240457e-06, | |
| "loss": 1.156597375869751, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.466472303206997, | |
| "grad_norm": 0.36970221996307373, | |
| "learning_rate": 2.6402179802906417e-06, | |
| "loss": 1.1326744556427002, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.4693877551020407, | |
| "grad_norm": 0.16106556355953217, | |
| "learning_rate": 2.6281818308046466e-06, | |
| "loss": 1.1174097061157227, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.4723032069970845, | |
| "grad_norm": 0.23179616034030914, | |
| "learning_rate": 2.6162037500896134e-06, | |
| "loss": 1.247542381286621, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.4752186588921283, | |
| "grad_norm": 0.20750805735588074, | |
| "learning_rate": 2.6042838618692964e-06, | |
| "loss": 1.120650291442871, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.478134110787172, | |
| "grad_norm": 0.4005797207355499, | |
| "learning_rate": 2.5924222892663607e-06, | |
| "loss": 1.1234309673309326, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.481049562682216, | |
| "grad_norm": 0.11094089597463608, | |
| "learning_rate": 2.580619154801124e-06, | |
| "loss": 1.0382579565048218, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.4839650145772594, | |
| "grad_norm": 0.1598607450723648, | |
| "learning_rate": 2.5688745803902863e-06, | |
| "loss": 0.8054310083389282, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.486880466472303, | |
| "grad_norm": 0.29358312487602234, | |
| "learning_rate": 2.557188687345666e-06, | |
| "loss": 1.2227270603179932, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.489795918367347, | |
| "grad_norm": 0.10478518158197403, | |
| "learning_rate": 2.545561596372957e-06, | |
| "loss": 1.0256011486053467, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.4927113702623904, | |
| "grad_norm": 0.19069114327430725, | |
| "learning_rate": 2.533993427570471e-06, | |
| "loss": 1.003487467765808, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.4956268221574343, | |
| "grad_norm": 0.19944234192371368, | |
| "learning_rate": 2.522484300427905e-06, | |
| "loss": 1.1340402364730835, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.498542274052478, | |
| "grad_norm": 0.206906259059906, | |
| "learning_rate": 2.5110343338251055e-06, | |
| "loss": 0.7293667793273926, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.501457725947522, | |
| "grad_norm": 0.22807729244232178, | |
| "learning_rate": 2.499643646030833e-06, | |
| "loss": 0.6911664009094238, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.5043731778425657, | |
| "grad_norm": 0.12783202528953552, | |
| "learning_rate": 2.488312354701552e-06, | |
| "loss": 1.0861356258392334, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.5072886297376096, | |
| "grad_norm": 0.24884046614170074, | |
| "learning_rate": 2.4770405768802087e-06, | |
| "loss": 1.2009036540985107, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.510204081632653, | |
| "grad_norm": 0.19883911311626434, | |
| "learning_rate": 2.4658284289950235e-06, | |
| "loss": 1.171090006828308, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.513119533527697, | |
| "grad_norm": 0.2198370397090912, | |
| "learning_rate": 2.454676026858288e-06, | |
| "loss": 0.6773008704185486, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.5160349854227406, | |
| "grad_norm": 0.3970673084259033, | |
| "learning_rate": 2.443583485665172e-06, | |
| "loss": 0.9177547693252563, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.518950437317784, | |
| "grad_norm": 0.14196209609508514, | |
| "learning_rate": 2.432550919992524e-06, | |
| "loss": 1.0238224267959595, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.521865889212828, | |
| "grad_norm": 0.08479610830545425, | |
| "learning_rate": 2.4215784437977023e-06, | |
| "loss": 1.0351308584213257, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.5247813411078717, | |
| "grad_norm": 0.2791972756385803, | |
| "learning_rate": 2.4106661704173856e-06, | |
| "loss": 1.2357579469680786, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.5276967930029155, | |
| "grad_norm": 0.300520658493042, | |
| "learning_rate": 2.3998142125664094e-06, | |
| "loss": 0.9955886602401733, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.5306122448979593, | |
| "grad_norm": 0.07155195623636246, | |
| "learning_rate": 2.3890226823365984e-06, | |
| "loss": 0.9533568024635315, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.533527696793003, | |
| "grad_norm": 0.37421008944511414, | |
| "learning_rate": 2.3782916911956072e-06, | |
| "loss": 0.7588440179824829, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.5364431486880465, | |
| "grad_norm": 0.21846982836723328, | |
| "learning_rate": 2.3676213499857742e-06, | |
| "loss": 1.0482406616210938, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.5393586005830904, | |
| "grad_norm": 0.22150775790214539, | |
| "learning_rate": 2.357011768922975e-06, | |
| "loss": 0.9425265789031982, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.542274052478134, | |
| "grad_norm": 0.0946943610906601, | |
| "learning_rate": 2.3464630575954748e-06, | |
| "loss": 1.0236523151397705, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.5451895043731776, | |
| "grad_norm": 0.2336379438638687, | |
| "learning_rate": 2.3359753249628156e-06, | |
| "loss": 0.9605098962783813, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.5481049562682214, | |
| "grad_norm": 0.38517579436302185, | |
| "learning_rate": 2.3255486793546735e-06, | |
| "loss": 0.7055401802062988, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.5510204081632653, | |
| "grad_norm": 0.22488614916801453, | |
| "learning_rate": 2.3151832284697437e-06, | |
| "loss": 1.3222585916519165, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.553935860058309, | |
| "grad_norm": 0.14808881282806396, | |
| "learning_rate": 2.304879079374634e-06, | |
| "loss": 0.6318288445472717, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.556851311953353, | |
| "grad_norm": 0.12122584134340286, | |
| "learning_rate": 2.2946363385027555e-06, | |
| "loss": 1.0979853868484497, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.5597667638483967, | |
| "grad_norm": 0.17218822240829468, | |
| "learning_rate": 2.2844551116532164e-06, | |
| "loss": 1.1333314180374146, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.56268221574344, | |
| "grad_norm": 0.2076103240251541, | |
| "learning_rate": 2.274335503989743e-06, | |
| "loss": 1.1102957725524902, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.565597667638484, | |
| "grad_norm": 0.3147886395454407, | |
| "learning_rate": 2.2642776200395825e-06, | |
| "loss": 1.0110862255096436, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.568513119533528, | |
| "grad_norm": 0.199388787150383, | |
| "learning_rate": 2.2542815636924273e-06, | |
| "loss": 1.1791144609451294, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.14399054646492004, | |
| "learning_rate": 2.2443474381993418e-06, | |
| "loss": 0.6136134266853333, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.574344023323615, | |
| "grad_norm": 0.12786594033241272, | |
| "learning_rate": 2.2344753461716924e-06, | |
| "loss": 1.169732928276062, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.577259475218659, | |
| "grad_norm": 0.42270779609680176, | |
| "learning_rate": 2.2246653895800945e-06, | |
| "loss": 1.167303442955017, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.5801749271137027, | |
| "grad_norm": 0.3366575539112091, | |
| "learning_rate": 2.2149176697533547e-06, | |
| "loss": 0.7395915985107422, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.5830903790087465, | |
| "grad_norm": 0.11204802244901657, | |
| "learning_rate": 2.2052322873774243e-06, | |
| "loss": 1.130765676498413, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.5860058309037903, | |
| "grad_norm": 0.40100663900375366, | |
| "learning_rate": 2.195609342494358e-06, | |
| "loss": 0.9160555601119995, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.5889212827988337, | |
| "grad_norm": 0.3878629505634308, | |
| "learning_rate": 2.1860489345012882e-06, | |
| "loss": 1.1737711429595947, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.5918367346938775, | |
| "grad_norm": 0.2504361569881439, | |
| "learning_rate": 2.1765511621493837e-06, | |
| "loss": 1.1497868299484253, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.5947521865889214, | |
| "grad_norm": 0.399038165807724, | |
| "learning_rate": 2.1671161235428466e-06, | |
| "loss": 1.0515235662460327, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.5976676384839648, | |
| "grad_norm": 0.18093329668045044, | |
| "learning_rate": 2.1577439161378857e-06, | |
| "loss": 1.0114405155181885, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.6005830903790086, | |
| "grad_norm": 0.20376266539096832, | |
| "learning_rate": 2.1484346367417174e-06, | |
| "loss": 1.1349772214889526, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.6034985422740524, | |
| "grad_norm": 0.12697869539260864, | |
| "learning_rate": 2.139188381511565e-06, | |
| "loss": 1.0220611095428467, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.6064139941690962, | |
| "grad_norm": 0.17522640526294708, | |
| "learning_rate": 2.1300052459536577e-06, | |
| "loss": 1.04948890209198, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.60932944606414, | |
| "grad_norm": 0.33081164956092834, | |
| "learning_rate": 2.120885324922257e-06, | |
| "loss": 1.067612648010254, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.612244897959184, | |
| "grad_norm": 0.19511879980564117, | |
| "learning_rate": 2.1118287126186663e-06, | |
| "loss": 1.1198432445526123, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.6151603498542273, | |
| "grad_norm": 0.12612418830394745, | |
| "learning_rate": 2.102835502590264e-06, | |
| "loss": 0.9212133884429932, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.618075801749271, | |
| "grad_norm": 1.4945578575134277, | |
| "learning_rate": 2.0939057877295337e-06, | |
| "loss": 0.9755832552909851, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.620991253644315, | |
| "grad_norm": 0.11096255481243134, | |
| "learning_rate": 2.085039660273107e-06, | |
| "loss": 0.8870418071746826, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.6239067055393583, | |
| "grad_norm": 0.16551688313484192, | |
| "learning_rate": 2.076237211800807e-06, | |
| "loss": 1.1013219356536865, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.626822157434402, | |
| "grad_norm": 0.12267225235700607, | |
| "learning_rate": 2.067498533234708e-06, | |
| "loss": 1.1636854410171509, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.629737609329446, | |
| "grad_norm": 0.21022585034370422, | |
| "learning_rate": 2.0588237148381937e-06, | |
| "loss": 1.0870646238327026, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.63265306122449, | |
| "grad_norm": 0.12315444648265839, | |
| "learning_rate": 2.05021284621502e-06, | |
| "loss": 1.0031044483184814, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.6355685131195337, | |
| "grad_norm": 0.08722248673439026, | |
| "learning_rate": 2.0416660163084007e-06, | |
| "loss": 1.1768810749053955, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.6384839650145775, | |
| "grad_norm": 0.14608271420001984, | |
| "learning_rate": 2.0331833134000806e-06, | |
| "loss": 1.1812292337417603, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.641399416909621, | |
| "grad_norm": 0.12209862470626831, | |
| "learning_rate": 2.0247648251094187e-06, | |
| "loss": 0.5496333241462708, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.6443148688046647, | |
| "grad_norm": 0.14420591294765472, | |
| "learning_rate": 2.0164106383924995e-06, | |
| "loss": 1.0734022855758667, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.6472303206997085, | |
| "grad_norm": 0.34557104110717773, | |
| "learning_rate": 2.008120839541217e-06, | |
| "loss": 0.8214896321296692, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.650145772594752, | |
| "grad_norm": 0.19864369928836823, | |
| "learning_rate": 1.9998955141823947e-06, | |
| "loss": 1.1074302196502686, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.6530612244897958, | |
| "grad_norm": 0.1151181161403656, | |
| "learning_rate": 1.9917347472768996e-06, | |
| "loss": 1.1880613565444946, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.6559766763848396, | |
| "grad_norm": 0.3938349783420563, | |
| "learning_rate": 1.983638623118759e-06, | |
| "loss": 0.8221843242645264, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.6588921282798834, | |
| "grad_norm": 0.4980735182762146, | |
| "learning_rate": 1.9756072253342956e-06, | |
| "loss": 1.0243555307388306, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.6618075801749272, | |
| "grad_norm": 0.2903914451599121, | |
| "learning_rate": 1.967640636881263e-06, | |
| "loss": 1.1823608875274658, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.664723032069971, | |
| "grad_norm": 0.1528269499540329, | |
| "learning_rate": 1.9597389400479843e-06, | |
| "loss": 1.1882878541946411, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.6676384839650145, | |
| "grad_norm": 0.37738537788391113, | |
| "learning_rate": 1.9519022164525086e-06, | |
| "loss": 0.8332970142364502, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.6705539358600583, | |
| "grad_norm": 0.10077593475580215, | |
| "learning_rate": 1.9441305470417622e-06, | |
| "loss": 1.1155685186386108, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.673469387755102, | |
| "grad_norm": 0.24888084828853607, | |
| "learning_rate": 1.936424012090716e-06, | |
| "loss": 1.0899043083190918, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.6763848396501455, | |
| "grad_norm": 0.3049887418746948, | |
| "learning_rate": 1.9287826912015588e-06, | |
| "loss": 1.3089343309402466, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.6793002915451893, | |
| "grad_norm": 0.15812550485134125, | |
| "learning_rate": 1.9212066633028635e-06, | |
| "loss": 1.0993826389312744, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.682215743440233, | |
| "grad_norm": 0.265886515378952, | |
| "learning_rate": 1.9136960066487884e-06, | |
| "loss": 1.0602340698242188, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.685131195335277, | |
| "grad_norm": 0.8439386487007141, | |
| "learning_rate": 1.9062507988182545e-06, | |
| "loss": 1.0067952871322632, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.688046647230321, | |
| "grad_norm": 0.45330727100372314, | |
| "learning_rate": 1.8988711167141542e-06, | |
| "loss": 0.5957139134407043, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.6909620991253647, | |
| "grad_norm": 0.14824670553207397, | |
| "learning_rate": 1.8915570365625508e-06, | |
| "loss": 1.1712740659713745, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.693877551020408, | |
| "grad_norm": 0.10511742532253265, | |
| "learning_rate": 1.8843086339118943e-06, | |
| "loss": 1.0602518320083618, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.696793002915452, | |
| "grad_norm": 0.07894819229841232, | |
| "learning_rate": 1.8771259836322376e-06, | |
| "loss": 1.014635682106018, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.6997084548104957, | |
| "grad_norm": 0.10334635525941849, | |
| "learning_rate": 1.8700091599144688e-06, | |
| "loss": 1.0106903314590454, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.702623906705539, | |
| "grad_norm": 0.30136221647262573, | |
| "learning_rate": 1.8629582362695395e-06, | |
| "loss": 0.673401951789856, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.705539358600583, | |
| "grad_norm": 0.5134400129318237, | |
| "learning_rate": 1.8559732855277067e-06, | |
| "loss": 1.1158447265625, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.7084548104956268, | |
| "grad_norm": 0.35808032751083374, | |
| "learning_rate": 1.8490543798377848e-06, | |
| "loss": 1.2872017621994019, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.7113702623906706, | |
| "grad_norm": 0.04801107197999954, | |
| "learning_rate": 1.8422015906663964e-06, | |
| "loss": 0.932016909122467, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.7142857142857144, | |
| "grad_norm": 0.34277820587158203, | |
| "learning_rate": 1.8354149887972297e-06, | |
| "loss": 0.6936520338058472, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.7172011661807582, | |
| "grad_norm": 0.16731053590774536, | |
| "learning_rate": 1.8286946443303187e-06, | |
| "loss": 1.1427615880966187, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.7201166180758016, | |
| "grad_norm": 0.8489914536476135, | |
| "learning_rate": 1.822040626681308e-06, | |
| "loss": 1.0948349237442017, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.7230320699708455, | |
| "grad_norm": 0.41851627826690674, | |
| "learning_rate": 1.8154530045807438e-06, | |
| "loss": 1.157147765159607, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.7259475218658893, | |
| "grad_norm": 0.09261982142925262, | |
| "learning_rate": 1.808931846073361e-06, | |
| "loss": 1.0182065963745117, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.7288629737609327, | |
| "grad_norm": 0.07328807562589645, | |
| "learning_rate": 1.8024772185173758e-06, | |
| "loss": 0.9535019397735596, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.7317784256559765, | |
| "grad_norm": 0.3953118324279785, | |
| "learning_rate": 1.7960891885837988e-06, | |
| "loss": 0.5561579465866089, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.7346938775510203, | |
| "grad_norm": 0.7391979694366455, | |
| "learning_rate": 1.7897678222557402e-06, | |
| "loss": 0.9951037764549255, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.737609329446064, | |
| "grad_norm": 0.16622287034988403, | |
| "learning_rate": 1.7835131848277288e-06, | |
| "loss": 1.129691243171692, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.740524781341108, | |
| "grad_norm": 0.08795658499002457, | |
| "learning_rate": 1.7773253409050398e-06, | |
| "loss": 0.9720866680145264, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.743440233236152, | |
| "grad_norm": 0.10475818812847137, | |
| "learning_rate": 1.7712043544030265e-06, | |
| "loss": 0.9624143242835999, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.746355685131195, | |
| "grad_norm": 0.5169785618782043, | |
| "learning_rate": 1.7651502885464582e-06, | |
| "loss": 0.7830743789672852, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.749271137026239, | |
| "grad_norm": 0.06864479184150696, | |
| "learning_rate": 1.7591632058688719e-06, | |
| "loss": 1.1376532316207886, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.752186588921283, | |
| "grad_norm": 4.637813091278076, | |
| "learning_rate": 1.7532431682119205e-06, | |
| "loss": 0.8696690797805786, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.7551020408163263, | |
| "grad_norm": 0.15929657220840454, | |
| "learning_rate": 1.7473902367247361e-06, | |
| "loss": 1.1236258745193481, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.75801749271137, | |
| "grad_norm": 0.3590356707572937, | |
| "learning_rate": 1.7416044718633025e-06, | |
| "loss": 0.8365395665168762, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.760932944606414, | |
| "grad_norm": 0.1510230451822281, | |
| "learning_rate": 1.735885933389825e-06, | |
| "loss": 0.6292239427566528, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.7638483965014577, | |
| "grad_norm": 0.18348506093025208, | |
| "learning_rate": 1.730234680372116e-06, | |
| "loss": 1.1290793418884277, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.7667638483965016, | |
| "grad_norm": 0.16462060809135437, | |
| "learning_rate": 1.7246507711829852e-06, | |
| "loss": 1.1606987714767456, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.7696793002915454, | |
| "grad_norm": 0.16783565282821655, | |
| "learning_rate": 1.719134263499633e-06, | |
| "loss": 0.9577206373214722, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.772594752186589, | |
| "grad_norm": 0.08972535282373428, | |
| "learning_rate": 1.7136852143030605e-06, | |
| "loss": 0.9086419343948364, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.7755102040816326, | |
| "grad_norm": 0.25966984033584595, | |
| "learning_rate": 1.7083036798774771e-06, | |
| "loss": 1.16250479221344, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.7784256559766765, | |
| "grad_norm": 0.14714005589485168, | |
| "learning_rate": 1.7029897158097191e-06, | |
| "loss": 0.6218932867050171, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.78134110787172, | |
| "grad_norm": 0.1505810022354126, | |
| "learning_rate": 1.6977433769886777e-06, | |
| "loss": 0.9435967206954956, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.7842565597667637, | |
| "grad_norm": 0.5554741621017456, | |
| "learning_rate": 1.6925647176047304e-06, | |
| "loss": 1.2954356670379639, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.7871720116618075, | |
| "grad_norm": 0.7726877331733704, | |
| "learning_rate": 1.6874537911491804e-06, | |
| "loss": 1.100317120552063, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.7900874635568513, | |
| "grad_norm": 0.1900632381439209, | |
| "learning_rate": 1.682410650413707e-06, | |
| "loss": 1.1734505891799927, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.793002915451895, | |
| "grad_norm": 0.2996356189250946, | |
| "learning_rate": 1.6774353474898176e-06, | |
| "loss": 0.6496275067329407, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.795918367346939, | |
| "grad_norm": 0.28916487097740173, | |
| "learning_rate": 1.6725279337683096e-06, | |
| "loss": 0.8404643535614014, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.7988338192419824, | |
| "grad_norm": 0.30399462580680847, | |
| "learning_rate": 1.6676884599387447e-06, | |
| "loss": 0.8097843527793884, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.801749271137026, | |
| "grad_norm": 0.15744291245937347, | |
| "learning_rate": 1.6629169759889167e-06, | |
| "loss": 1.1007176637649536, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.80466472303207, | |
| "grad_norm": 0.22451713681221008, | |
| "learning_rate": 1.6582135312043415e-06, | |
| "loss": 1.1043728590011597, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.8075801749271134, | |
| "grad_norm": 0.16485294699668884, | |
| "learning_rate": 1.6535781741677468e-06, | |
| "loss": 1.1978418827056885, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.8104956268221573, | |
| "grad_norm": 0.11872020363807678, | |
| "learning_rate": 1.6490109527585685e-06, | |
| "loss": 1.0319398641586304, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.813411078717201, | |
| "grad_norm": 0.22041387856006622, | |
| "learning_rate": 1.6445119141524586e-06, | |
| "loss": 1.0383124351501465, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.816326530612245, | |
| "grad_norm": 0.1371716856956482, | |
| "learning_rate": 1.6400811048207957e-06, | |
| "loss": 1.0704172849655151, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.8192419825072887, | |
| "grad_norm": 0.33869630098342896, | |
| "learning_rate": 1.6357185705302059e-06, | |
| "loss": 0.9032880663871765, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.8221574344023326, | |
| "grad_norm": 0.19506464898586273, | |
| "learning_rate": 1.6314243563420908e-06, | |
| "loss": 1.1649752855300903, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.825072886297376, | |
| "grad_norm": 0.16767188906669617, | |
| "learning_rate": 1.627198506612162e-06, | |
| "loss": 1.197486162185669, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.82798833819242, | |
| "grad_norm": 0.17042168974876404, | |
| "learning_rate": 1.62304106498998e-06, | |
| "loss": 1.065731167793274, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.8309037900874636, | |
| "grad_norm": 0.25560781359672546, | |
| "learning_rate": 1.6189520744185072e-06, | |
| "loss": 0.9224144220352173, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.8338192419825075, | |
| "grad_norm": 0.20863035321235657, | |
| "learning_rate": 1.614931577133663e-06, | |
| "loss": 1.0565248727798462, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.836734693877551, | |
| "grad_norm": 0.19189637899398804, | |
| "learning_rate": 1.6109796146638871e-06, | |
| "loss": 1.232025384902954, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.8396501457725947, | |
| "grad_norm": 0.6458204984664917, | |
| "learning_rate": 1.6070962278297113e-06, | |
| "loss": 1.0065245628356934, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.8425655976676385, | |
| "grad_norm": 0.3259865939617157, | |
| "learning_rate": 1.6032814567433348e-06, | |
| "loss": 1.2361031770706177, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.8454810495626823, | |
| "grad_norm": 0.4714111089706421, | |
| "learning_rate": 1.5995353408082157e-06, | |
| "loss": 1.3339447975158691, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.848396501457726, | |
| "grad_norm": 0.16928227245807648, | |
| "learning_rate": 1.5958579187186582e-06, | |
| "loss": 1.0442076921463013, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.8513119533527695, | |
| "grad_norm": 0.3731814920902252, | |
| "learning_rate": 1.5922492284594174e-06, | |
| "loss": 0.878253698348999, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.8542274052478134, | |
| "grad_norm": 0.6527604460716248, | |
| "learning_rate": 1.5887093073053036e-06, | |
| "loss": 1.0772031545639038, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.18542839586734772, | |
| "learning_rate": 1.5852381918207995e-06, | |
| "loss": 1.116060733795166, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.860058309037901, | |
| "grad_norm": 0.250535786151886, | |
| "learning_rate": 1.5818359178596806e-06, | |
| "loss": 1.1924026012420654, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.8629737609329444, | |
| "grad_norm": 0.07601413875818253, | |
| "learning_rate": 1.5785025205646468e-06, | |
| "loss": 0.9614888429641724, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.8658892128279883, | |
| "grad_norm": 0.17522846162319183, | |
| "learning_rate": 1.5752380343669574e-06, | |
| "loss": 1.0021862983703613, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.868804664723032, | |
| "grad_norm": 0.22332464158535004, | |
| "learning_rate": 1.5720424929860793e-06, | |
| "loss": 1.0522475242614746, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.871720116618076, | |
| "grad_norm": 0.39566364884376526, | |
| "learning_rate": 1.5689159294293333e-06, | |
| "loss": 1.0991871356964111, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.8746355685131197, | |
| "grad_norm": 0.3006777763366699, | |
| "learning_rate": 1.5658583759915563e-06, | |
| "loss": 1.068638801574707, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.877551020408163, | |
| "grad_norm": 0.18835684657096863, | |
| "learning_rate": 1.5628698642547674e-06, | |
| "loss": 1.0682188272476196, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.880466472303207, | |
| "grad_norm": 0.13527542352676392, | |
| "learning_rate": 1.5599504250878434e-06, | |
| "loss": 1.0796337127685547, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.883381924198251, | |
| "grad_norm": 0.2289610654115677, | |
| "learning_rate": 1.5571000886461946e-06, | |
| "loss": 1.1682178974151611, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.8862973760932946, | |
| "grad_norm": 0.3208562731742859, | |
| "learning_rate": 1.5543188843714597e-06, | |
| "loss": 0.6415768265724182, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.889212827988338, | |
| "grad_norm": 0.2707623541355133, | |
| "learning_rate": 1.551606840991198e-06, | |
| "loss": 0.5584684014320374, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.892128279883382, | |
| "grad_norm": 0.24681639671325684, | |
| "learning_rate": 1.5489639865185929e-06, | |
| "loss": 0.9024500846862793, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.8950437317784257, | |
| "grad_norm": 0.2885083556175232, | |
| "learning_rate": 1.5463903482521637e-06, | |
| "loss": 1.0408830642700195, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.8979591836734695, | |
| "grad_norm": 0.2863474190235138, | |
| "learning_rate": 1.543885952775484e-06, | |
| "loss": 0.5923194289207458, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.9008746355685133, | |
| "grad_norm": 0.13149987161159515, | |
| "learning_rate": 1.5414508259569033e-06, | |
| "loss": 1.0203630924224854, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.9037900874635567, | |
| "grad_norm": 0.08542142808437347, | |
| "learning_rate": 1.5390849929492853e-06, | |
| "loss": 0.4749288260936737, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.9067055393586005, | |
| "grad_norm": 0.39572906494140625, | |
| "learning_rate": 1.5367884781897442e-06, | |
| "loss": 0.9975032210350037, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.9096209912536444, | |
| "grad_norm": 0.3944467604160309, | |
| "learning_rate": 1.5345613053993947e-06, | |
| "loss": 1.2269786596298218, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.912536443148688, | |
| "grad_norm": 0.14900818467140198, | |
| "learning_rate": 1.5324034975831053e-06, | |
| "loss": 1.2356706857681274, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.9154518950437316, | |
| "grad_norm": 0.31048882007598877, | |
| "learning_rate": 1.53031507702926e-06, | |
| "loss": 1.218428611755371, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.9183673469387754, | |
| "grad_norm": 0.1689174771308899, | |
| "learning_rate": 1.5282960653095309e-06, | |
| "loss": 0.9620698094367981, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.9212827988338192, | |
| "grad_norm": 0.2305694818496704, | |
| "learning_rate": 1.5263464832786536e-06, | |
| "loss": 1.2038404941558838, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.924198250728863, | |
| "grad_norm": 0.12036718428134918, | |
| "learning_rate": 1.5244663510742102e-06, | |
| "loss": 0.9968715310096741, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.927113702623907, | |
| "grad_norm": 0.12467171996831894, | |
| "learning_rate": 1.5226556881164256e-06, | |
| "loss": 1.0186277627944946, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.9300291545189503, | |
| "grad_norm": 0.13296104967594147, | |
| "learning_rate": 1.5209145131079634e-06, | |
| "loss": 1.026340365409851, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.932944606413994, | |
| "grad_norm": 0.12233509868383408, | |
| "learning_rate": 1.5192428440337316e-06, | |
| "loss": 1.182348608970642, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.935860058309038, | |
| "grad_norm": 0.1486111879348755, | |
| "learning_rate": 1.5176406981607024e-06, | |
| "loss": 1.0666353702545166, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.938775510204082, | |
| "grad_norm": 0.5397063493728638, | |
| "learning_rate": 1.5161080920377289e-06, | |
| "loss": 1.389245629310608, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.941690962099125, | |
| "grad_norm": 0.15026716887950897, | |
| "learning_rate": 1.5146450414953738e-06, | |
| "loss": 1.0400997400283813, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.944606413994169, | |
| "grad_norm": 0.11009442806243896, | |
| "learning_rate": 1.5132515616457505e-06, | |
| "loss": 1.001649260520935, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.947521865889213, | |
| "grad_norm": 0.7643895745277405, | |
| "learning_rate": 1.5119276668823628e-06, | |
| "loss": 0.37964844703674316, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.9504373177842567, | |
| "grad_norm": 0.2546994984149933, | |
| "learning_rate": 1.510673370879957e-06, | |
| "loss": 1.0618635416030884, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.9533527696793005, | |
| "grad_norm": 0.15609286725521088, | |
| "learning_rate": 1.5094886865943835e-06, | |
| "loss": 1.013123869895935, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.956268221574344, | |
| "grad_norm": 0.09666828066110611, | |
| "learning_rate": 1.5083736262624577e-06, | |
| "loss": 0.7794107794761658, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.9591836734693877, | |
| "grad_norm": 0.07339915633201599, | |
| "learning_rate": 1.5073282014018395e-06, | |
| "loss": 1.3735166788101196, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.9620991253644315, | |
| "grad_norm": 0.6088920831680298, | |
| "learning_rate": 1.5063524228109107e-06, | |
| "loss": 0.8808611035346985, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.9650145772594754, | |
| "grad_norm": 0.1744547188282013, | |
| "learning_rate": 1.5054463005686626e-06, | |
| "loss": 1.1831696033477783, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.9679300291545188, | |
| "grad_norm": 0.24790845811367035, | |
| "learning_rate": 1.5046098440345955e-06, | |
| "loss": 1.00650155544281, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.9708454810495626, | |
| "grad_norm": 0.18026836216449738, | |
| "learning_rate": 1.5038430618486194e-06, | |
| "loss": 1.1893560886383057, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.9737609329446064, | |
| "grad_norm": 0.1259116381406784, | |
| "learning_rate": 1.5031459619309653e-06, | |
| "loss": 1.0219632387161255, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.9766763848396502, | |
| "grad_norm": 0.15073135495185852, | |
| "learning_rate": 1.502518551482103e-06, | |
| "loss": 0.7194128036499023, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.979591836734694, | |
| "grad_norm": 0.05049153417348862, | |
| "learning_rate": 1.5019608369826692e-06, | |
| "loss": 1.1609373092651367, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.9825072886297375, | |
| "grad_norm": 0.11255478858947754, | |
| "learning_rate": 1.501472824193396e-06, | |
| "loss": 1.1452926397323608, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.9854227405247813, | |
| "grad_norm": 0.16929762065410614, | |
| "learning_rate": 1.5010545181550563e-06, | |
| "loss": 0.5922563076019287, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.988338192419825, | |
| "grad_norm": 0.1267116516828537, | |
| "learning_rate": 1.5007059231884077e-06, | |
| "loss": 0.49650248885154724, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.991253644314869, | |
| "grad_norm": 0.1838807910680771, | |
| "learning_rate": 1.5004270428941505e-06, | |
| "loss": 1.1091796159744263, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.9941690962099123, | |
| "grad_norm": 0.08408603817224503, | |
| "learning_rate": 1.500217880152889e-06, | |
| "loss": 1.0519981384277344, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.997084548104956, | |
| "grad_norm": 0.36840710043907166, | |
| "learning_rate": 1.5000784371251037e-06, | |
| "loss": 0.9989621639251709, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.15688389539718628, | |
| "learning_rate": 1.5000087152511266e-06, | |
| "loss": 1.1339861154556274, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2058, | |
| "total_flos": 3.1865440491043553e+18, | |
| "train_loss": 1.1440774658359985, | |
| "train_runtime": 18974.7516, | |
| "train_samples_per_second": 1.735, | |
| "train_steps_per_second": 0.108 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2058, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 9999999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1865440491043553e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |