Instructions to use furproxy/27b-9-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use furproxy/27b-9-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/workspace/models/Qwen3.6-27B") model = PeftModel.from_pretrained(base_model, "furproxy/27b-9-lora") - Transformers
How to use furproxy/27b-9-lora with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="furproxy/27b-9-lora") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("furproxy/27b-9-lora", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/27b-9-lora with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/27b-9-lora" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-9-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/furproxy/27b-9-lora
- SGLang
How to use furproxy/27b-9-lora with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/27b-9-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-9-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/27b-9-lora" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/27b-9-lora", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use furproxy/27b-9-lora with Docker Model Runner:
docker model run hf.co/furproxy/27b-9-lora
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2457, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002442002442002442, | |
| "grad_norm": 2.1811931133270264, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "loss": 1.868557095527649, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004884004884004884, | |
| "grad_norm": 0.7884963750839233, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "loss": 2.1157455444335938, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007326007326007326, | |
| "grad_norm": 1.584022879600525, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 2.497404098510742, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.009768009768009768, | |
| "grad_norm": 1.1417503356933594, | |
| "learning_rate": 9.45945945945946e-06, | |
| "loss": 1.7899458408355713, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01221001221001221, | |
| "grad_norm": 0.47124025225639343, | |
| "learning_rate": 1.2162162162162164e-05, | |
| "loss": 1.809164047241211, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014652014652014652, | |
| "grad_norm": 1.2848045825958252, | |
| "learning_rate": 1.4864864864864867e-05, | |
| "loss": 1.5581135749816895, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.017094017094017096, | |
| "grad_norm": 0.13765056431293488, | |
| "learning_rate": 1.756756756756757e-05, | |
| "loss": 1.4552407264709473, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.019536019536019536, | |
| "grad_norm": 0.8791428804397583, | |
| "learning_rate": 2.0270270270270273e-05, | |
| "loss": 1.2719577550888062, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 1.1116337776184082, | |
| "learning_rate": 2.2972972972972976e-05, | |
| "loss": 1.2390166521072388, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.02442002442002442, | |
| "grad_norm": 1.4942550659179688, | |
| "learning_rate": 2.5675675675675675e-05, | |
| "loss": 1.5379928350448608, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026862026862026864, | |
| "grad_norm": 0.3107071816921234, | |
| "learning_rate": 2.8378378378378378e-05, | |
| "loss": 1.2056142091751099, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.029304029304029304, | |
| "grad_norm": 0.19078664481639862, | |
| "learning_rate": 3.108108108108108e-05, | |
| "loss": 1.0852341651916504, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 0.19628752768039703, | |
| "learning_rate": 3.3783783783783784e-05, | |
| "loss": 1.573239803314209, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03418803418803419, | |
| "grad_norm": 0.35955142974853516, | |
| "learning_rate": 3.648648648648649e-05, | |
| "loss": 1.6702971458435059, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03663003663003663, | |
| "grad_norm": 0.9169114232063293, | |
| "learning_rate": 3.918918918918919e-05, | |
| "loss": 1.0869706869125366, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03907203907203907, | |
| "grad_norm": 0.1373993307352066, | |
| "learning_rate": 4.189189189189189e-05, | |
| "loss": 0.9406343102455139, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04151404151404151, | |
| "grad_norm": 0.17059364914894104, | |
| "learning_rate": 4.4594594594594596e-05, | |
| "loss": 1.4778518676757812, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 0.6226996779441833, | |
| "learning_rate": 4.72972972972973e-05, | |
| "loss": 1.2717756032943726, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0463980463980464, | |
| "grad_norm": 0.23670853674411774, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4467027187347412, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04884004884004884, | |
| "grad_norm": 1.4851033687591553, | |
| "learning_rate": 5.27027027027027e-05, | |
| "loss": 1.3635867834091187, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 0.5448616147041321, | |
| "learning_rate": 5.540540540540541e-05, | |
| "loss": 1.4161449670791626, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05372405372405373, | |
| "grad_norm": 0.20034204423427582, | |
| "learning_rate": 5.8108108108108105e-05, | |
| "loss": 1.3606215715408325, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05616605616605617, | |
| "grad_norm": 0.402030885219574, | |
| "learning_rate": 6.0810810810810814e-05, | |
| "loss": 1.391851782798767, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05860805860805861, | |
| "grad_norm": 0.3388453423976898, | |
| "learning_rate": 6.351351351351352e-05, | |
| "loss": 0.9975868463516235, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06105006105006105, | |
| "grad_norm": 0.2083309441804886, | |
| "learning_rate": 6.621621621621621e-05, | |
| "loss": 0.9237704277038574, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 0.41451364755630493, | |
| "learning_rate": 6.891891891891892e-05, | |
| "loss": 1.1483968496322632, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 1.6289576292037964, | |
| "learning_rate": 7.162162162162162e-05, | |
| "loss": 1.0937950611114502, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06837606837606838, | |
| "grad_norm": 0.2840253412723541, | |
| "learning_rate": 7.432432432432433e-05, | |
| "loss": 1.2342710494995117, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07081807081807082, | |
| "grad_norm": 0.2380845546722412, | |
| "learning_rate": 7.702702702702703e-05, | |
| "loss": 1.3051679134368896, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07326007326007326, | |
| "grad_norm": 0.31609681248664856, | |
| "learning_rate": 7.972972972972974e-05, | |
| "loss": 1.3347299098968506, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0757020757020757, | |
| "grad_norm": 0.1922251433134079, | |
| "learning_rate": 8.243243243243243e-05, | |
| "loss": 1.014764666557312, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07814407814407814, | |
| "grad_norm": 0.7087109088897705, | |
| "learning_rate": 8.513513513513514e-05, | |
| "loss": 1.5816763639450073, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08058608058608059, | |
| "grad_norm": 3.1793134212493896, | |
| "learning_rate": 8.783783783783784e-05, | |
| "loss": 1.036803126335144, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08302808302808302, | |
| "grad_norm": 0.4886671304702759, | |
| "learning_rate": 9.054054054054055e-05, | |
| "loss": 1.0347474813461304, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 0.10275212675333023, | |
| "learning_rate": 9.324324324324324e-05, | |
| "loss": 1.0979657173156738, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 0.3693665862083435, | |
| "learning_rate": 9.594594594594595e-05, | |
| "loss": 1.335373878479004, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09035409035409035, | |
| "grad_norm": 0.6129628419876099, | |
| "learning_rate": 9.864864864864865e-05, | |
| "loss": 1.3981304168701172, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0927960927960928, | |
| "grad_norm": 1.7713521718978882, | |
| "learning_rate": 9.999996089483563e-05, | |
| "loss": 1.3795212507247925, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 0.28953030705451965, | |
| "learning_rate": 9.999964805392829e-05, | |
| "loss": 1.2494409084320068, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09768009768009768, | |
| "grad_norm": 0.4251790940761566, | |
| "learning_rate": 9.999902237428851e-05, | |
| "loss": 1.121893286705017, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10012210012210013, | |
| "grad_norm": 0.4000133275985718, | |
| "learning_rate": 9.999808386026605e-05, | |
| "loss": 0.9894894361495972, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 0.2130272090435028, | |
| "learning_rate": 9.999683251838543e-05, | |
| "loss": 1.0684369802474976, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10500610500610501, | |
| "grad_norm": 0.295070618391037, | |
| "learning_rate": 9.999526835734604e-05, | |
| "loss": 1.2120764255523682, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10744810744810745, | |
| "grad_norm": 1.4132590293884277, | |
| "learning_rate": 9.999339138802198e-05, | |
| "loss": 1.380957007408142, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.12594525516033173, | |
| "learning_rate": 9.999120162346197e-05, | |
| "loss": 1.0840160846710205, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.11233211233211234, | |
| "grad_norm": 0.6391283869743347, | |
| "learning_rate": 9.998869907888932e-05, | |
| "loss": 1.4286606311798096, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11477411477411477, | |
| "grad_norm": 0.14439892768859863, | |
| "learning_rate": 9.998588377170179e-05, | |
| "loss": 0.8911224007606506, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11721611721611722, | |
| "grad_norm": 0.5071588158607483, | |
| "learning_rate": 9.998275572147145e-05, | |
| "loss": 1.350576400756836, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11965811965811966, | |
| "grad_norm": 0.25438928604125977, | |
| "learning_rate": 9.997931494994461e-05, | |
| "loss": 1.2859123945236206, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1221001221001221, | |
| "grad_norm": 0.3218170702457428, | |
| "learning_rate": 9.99755614810416e-05, | |
| "loss": 1.0188755989074707, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12454212454212454, | |
| "grad_norm": 0.9470664262771606, | |
| "learning_rate": 9.997149534085663e-05, | |
| "loss": 1.244392991065979, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 0.26736143231391907, | |
| "learning_rate": 9.99671165576576e-05, | |
| "loss": 0.9236730337142944, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12942612942612944, | |
| "grad_norm": 0.11875687539577484, | |
| "learning_rate": 9.996242516188592e-05, | |
| "loss": 0.8855689167976379, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 0.27750205993652344, | |
| "learning_rate": 9.995742118615635e-05, | |
| "loss": 1.2882789373397827, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1343101343101343, | |
| "grad_norm": 0.26098600029945374, | |
| "learning_rate": 9.995210466525661e-05, | |
| "loss": 1.5547314882278442, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.13675213675213677, | |
| "grad_norm": 0.34054574370384216, | |
| "learning_rate": 9.99464756361473e-05, | |
| "loss": 1.303471326828003, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1391941391941392, | |
| "grad_norm": 0.7845221757888794, | |
| "learning_rate": 9.99405341379616e-05, | |
| "loss": 1.1013230085372925, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.14163614163614163, | |
| "grad_norm": 0.255355566740036, | |
| "learning_rate": 9.993428021200492e-05, | |
| "loss": 1.2645354270935059, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14407814407814407, | |
| "grad_norm": 5.062347412109375, | |
| "learning_rate": 9.992771390175476e-05, | |
| "loss": 1.3062028884887695, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 0.17388251423835754, | |
| "learning_rate": 9.992083525286026e-05, | |
| "loss": 1.2420780658721924, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14896214896214896, | |
| "grad_norm": 0.227823868393898, | |
| "learning_rate": 9.991364431314197e-05, | |
| "loss": 1.266753077507019, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1514041514041514, | |
| "grad_norm": 0.2214212864637375, | |
| "learning_rate": 9.99061411325915e-05, | |
| "loss": 1.327446699142456, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.33451107144355774, | |
| "learning_rate": 9.989832576337114e-05, | |
| "loss": 1.1129432916641235, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1562881562881563, | |
| "grad_norm": 0.14846676588058472, | |
| "learning_rate": 9.989019825981355e-05, | |
| "loss": 1.3108670711517334, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 0.42650359869003296, | |
| "learning_rate": 9.988175867842136e-05, | |
| "loss": 1.1997311115264893, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16117216117216118, | |
| "grad_norm": 0.11989300698041916, | |
| "learning_rate": 9.987300707786673e-05, | |
| "loss": 0.9887065887451172, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16361416361416362, | |
| "grad_norm": 0.3521728217601776, | |
| "learning_rate": 9.986394351899106e-05, | |
| "loss": 1.1310060024261475, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16605616605616605, | |
| "grad_norm": 1.3506042957305908, | |
| "learning_rate": 9.985456806480441e-05, | |
| "loss": 1.1330771446228027, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1684981684981685, | |
| "grad_norm": 0.5201511979103088, | |
| "learning_rate": 9.984488078048521e-05, | |
| "loss": 1.093977928161621, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 0.1980191022157669, | |
| "learning_rate": 9.983488173337968e-05, | |
| "loss": 1.0323731899261475, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17338217338217338, | |
| "grad_norm": 0.3852697014808655, | |
| "learning_rate": 9.982457099300151e-05, | |
| "loss": 1.2874304056167603, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 1.0511770248413086, | |
| "learning_rate": 9.981394863103119e-05, | |
| "loss": 0.9831873774528503, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17826617826617827, | |
| "grad_norm": 0.5995257496833801, | |
| "learning_rate": 9.980301472131569e-05, | |
| "loss": 0.9431930184364319, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1807081807081807, | |
| "grad_norm": 0.21805083751678467, | |
| "learning_rate": 9.979176933986781e-05, | |
| "loss": 0.9249687790870667, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.18315018315018314, | |
| "grad_norm": 0.3012978732585907, | |
| "learning_rate": 9.978021256486574e-05, | |
| "loss": 1.2490816116333008, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1855921855921856, | |
| "grad_norm": 0.3798808157444, | |
| "learning_rate": 9.976834447665254e-05, | |
| "loss": 1.4017581939697266, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18803418803418803, | |
| "grad_norm": 0.2104426920413971, | |
| "learning_rate": 9.975616515773537e-05, | |
| "loss": 1.2626969814300537, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 0.24719548225402832, | |
| "learning_rate": 9.974367469278525e-05, | |
| "loss": 1.563340187072754, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19291819291819293, | |
| "grad_norm": 0.21048368513584137, | |
| "learning_rate": 9.973087316863619e-05, | |
| "loss": 1.1235123872756958, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19536019536019536, | |
| "grad_norm": 0.41723477840423584, | |
| "learning_rate": 9.971776067428474e-05, | |
| "loss": 0.8511600494384766, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 0.2877791225910187, | |
| "learning_rate": 9.970433730088928e-05, | |
| "loss": 1.313219428062439, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20024420024420025, | |
| "grad_norm": 0.3880084455013275, | |
| "learning_rate": 9.969060314176948e-05, | |
| "loss": 1.1544859409332275, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2026862026862027, | |
| "grad_norm": 0.2529478371143341, | |
| "learning_rate": 9.967655829240554e-05, | |
| "loss": 1.2319352626800537, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 1.051802158355713, | |
| "learning_rate": 9.966220285043766e-05, | |
| "loss": 1.1815460920333862, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20757020757020758, | |
| "grad_norm": 0.34059885144233704, | |
| "learning_rate": 9.964753691566522e-05, | |
| "loss": 0.8983937501907349, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.21001221001221002, | |
| "grad_norm": 0.33452534675598145, | |
| "learning_rate": 9.96325605900462e-05, | |
| "loss": 0.9824310541152954, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21245421245421245, | |
| "grad_norm": 0.24487395584583282, | |
| "learning_rate": 9.961727397769638e-05, | |
| "loss": 1.2502901554107666, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2148962148962149, | |
| "grad_norm": 0.21993722021579742, | |
| "learning_rate": 9.960167718488873e-05, | |
| "loss": 1.1866809129714966, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21733821733821734, | |
| "grad_norm": 0.15603308379650116, | |
| "learning_rate": 9.958577032005254e-05, | |
| "loss": 1.2905993461608887, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.09865382313728333, | |
| "learning_rate": 9.956955349377276e-05, | |
| "loss": 0.9236026406288147, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 0.36675918102264404, | |
| "learning_rate": 9.955302681878916e-05, | |
| "loss": 0.8462271094322205, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22466422466422467, | |
| "grad_norm": 0.24180293083190918, | |
| "learning_rate": 9.953619040999566e-05, | |
| "loss": 1.2509307861328125, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2271062271062271, | |
| "grad_norm": 1.0099902153015137, | |
| "learning_rate": 9.95190443844394e-05, | |
| "loss": 1.2364705801010132, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.22954822954822954, | |
| "grad_norm": 0.4814469516277313, | |
| "learning_rate": 9.950158886132e-05, | |
| "loss": 1.322394609451294, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.231990231990232, | |
| "grad_norm": 0.3766232430934906, | |
| "learning_rate": 9.94838239619887e-05, | |
| "loss": 1.2770557403564453, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23443223443223443, | |
| "grad_norm": 0.6789996027946472, | |
| "learning_rate": 9.946574980994755e-05, | |
| "loss": 1.2627307176589966, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23687423687423687, | |
| "grad_norm": 0.27175506949424744, | |
| "learning_rate": 9.944736653084856e-05, | |
| "loss": 1.2486218214035034, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23931623931623933, | |
| "grad_norm": 1.9304533004760742, | |
| "learning_rate": 9.942867425249272e-05, | |
| "loss": 1.0011327266693115, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 0.6165415048599243, | |
| "learning_rate": 9.940967310482926e-05, | |
| "loss": 1.2334614992141724, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2442002442002442, | |
| "grad_norm": 0.36087173223495483, | |
| "learning_rate": 9.939036321995468e-05, | |
| "loss": 1.3565868139266968, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24664224664224665, | |
| "grad_norm": 0.6039561033248901, | |
| "learning_rate": 9.937074473211177e-05, | |
| "loss": 1.2386367321014404, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2490842490842491, | |
| "grad_norm": 3.3820178508758545, | |
| "learning_rate": 9.935081777768879e-05, | |
| "loss": 0.9418210387229919, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2515262515262515, | |
| "grad_norm": 0.23542089760303497, | |
| "learning_rate": 9.933058249521843e-05, | |
| "loss": 1.141281247138977, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 0.20796838402748108, | |
| "learning_rate": 9.931003902537695e-05, | |
| "loss": 1.0437651872634888, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 1.398657202720642, | |
| "learning_rate": 9.928918751098302e-05, | |
| "loss": 0.7830906510353088, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2588522588522589, | |
| "grad_norm": 0.6968111395835876, | |
| "learning_rate": 9.926802809699698e-05, | |
| "loss": 0.5629807114601135, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2612942612942613, | |
| "grad_norm": 0.042602118104696274, | |
| "learning_rate": 9.924656093051961e-05, | |
| "loss": 1.0612519979476929, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 0.4785561263561249, | |
| "learning_rate": 9.92247861607912e-05, | |
| "loss": 1.226085901260376, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.2661782661782662, | |
| "grad_norm": 0.28073281049728394, | |
| "learning_rate": 9.920270393919052e-05, | |
| "loss": 1.1725265979766846, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2686202686202686, | |
| "grad_norm": 0.19930191338062286, | |
| "learning_rate": 9.918031441923377e-05, | |
| "loss": 1.2625384330749512, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.27106227106227104, | |
| "grad_norm": 0.227145716547966, | |
| "learning_rate": 9.915761775657347e-05, | |
| "loss": 0.8954972624778748, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.27350427350427353, | |
| "grad_norm": 0.46113353967666626, | |
| "learning_rate": 9.913461410899741e-05, | |
| "loss": 0.8901416659355164, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27594627594627597, | |
| "grad_norm": 0.4459180533885956, | |
| "learning_rate": 9.911130363642756e-05, | |
| "loss": 1.268752098083496, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2783882783882784, | |
| "grad_norm": 13.680084228515625, | |
| "learning_rate": 9.908768650091893e-05, | |
| "loss": 1.4194141626358032, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.28083028083028083, | |
| "grad_norm": 0.41491127014160156, | |
| "learning_rate": 9.906376286665846e-05, | |
| "loss": 1.31623375415802, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.28327228327228327, | |
| "grad_norm": 1.1663336753845215, | |
| "learning_rate": 9.90395328999639e-05, | |
| "loss": 1.1711093187332153, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.2338106781244278, | |
| "learning_rate": 9.90149967692826e-05, | |
| "loss": 0.904794454574585, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28815628815628813, | |
| "grad_norm": 0.3005489408969879, | |
| "learning_rate": 9.899015464519038e-05, | |
| "loss": 1.2332934141159058, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2905982905982906, | |
| "grad_norm": 0.279463529586792, | |
| "learning_rate": 9.896500670039034e-05, | |
| "loss": 1.205855131149292, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 0.27901721000671387, | |
| "learning_rate": 9.893955310971166e-05, | |
| "loss": 1.0446724891662598, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2954822954822955, | |
| "grad_norm": 0.34487685561180115, | |
| "learning_rate": 9.891379405010836e-05, | |
| "loss": 0.9597420692443848, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2979242979242979, | |
| "grad_norm": 0.32779836654663086, | |
| "learning_rate": 9.888772970065811e-05, | |
| "loss": 1.279968023300171, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30036630036630035, | |
| "grad_norm": 0.1265312284231186, | |
| "learning_rate": 9.886136024256092e-05, | |
| "loss": 0.7203795909881592, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3028083028083028, | |
| "grad_norm": 0.47152769565582275, | |
| "learning_rate": 9.883468585913799e-05, | |
| "loss": 1.133167028427124, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3052503052503053, | |
| "grad_norm": 0.26629748940467834, | |
| "learning_rate": 9.880770673583032e-05, | |
| "loss": 0.9463790059089661, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.7086778879165649, | |
| "learning_rate": 9.878042306019748e-05, | |
| "loss": 1.2091960906982422, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.31013431013431014, | |
| "grad_norm": 0.13133220374584198, | |
| "learning_rate": 9.87528350219163e-05, | |
| "loss": 1.237898826599121, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3125763125763126, | |
| "grad_norm": 0.13263174891471863, | |
| "learning_rate": 9.872494281277953e-05, | |
| "loss": 1.2304308414459229, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.315018315018315, | |
| "grad_norm": 0.25059109926223755, | |
| "learning_rate": 9.869674662669456e-05, | |
| "loss": 1.265878677368164, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 0.12049660086631775, | |
| "learning_rate": 9.866824665968199e-05, | |
| "loss": 1.2052003145217896, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3199023199023199, | |
| "grad_norm": 0.2586797773838043, | |
| "learning_rate": 9.863944310987437e-05, | |
| "loss": 1.4983171224594116, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.32234432234432236, | |
| "grad_norm": 0.6865373253822327, | |
| "learning_rate": 9.861033617751472e-05, | |
| "loss": 0.7312511801719666, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3247863247863248, | |
| "grad_norm": 0.41726404428482056, | |
| "learning_rate": 9.85809260649552e-05, | |
| "loss": 0.9812588691711426, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.32722832722832723, | |
| "grad_norm": 0.2417762130498886, | |
| "learning_rate": 9.855121297665567e-05, | |
| "loss": 1.4306082725524902, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.17309008538722992, | |
| "learning_rate": 9.852119711918228e-05, | |
| "loss": 0.9648420810699463, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3321123321123321, | |
| "grad_norm": 0.2355567216873169, | |
| "learning_rate": 9.84908787012061e-05, | |
| "loss": 1.2845078706741333, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33455433455433453, | |
| "grad_norm": 0.42767664790153503, | |
| "learning_rate": 9.846025793350155e-05, | |
| "loss": 1.0052818059921265, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.336996336996337, | |
| "grad_norm": 0.775022029876709, | |
| "learning_rate": 9.842933502894502e-05, | |
| "loss": 1.0569660663604736, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33943833943833945, | |
| "grad_norm": 0.27430880069732666, | |
| "learning_rate": 9.839811020251332e-05, | |
| "loss": 1.3184398412704468, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 0.18294291198253632, | |
| "learning_rate": 9.836658367128227e-05, | |
| "loss": 1.2585026025772095, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3443223443223443, | |
| "grad_norm": 0.36614471673965454, | |
| "learning_rate": 9.833475565442519e-05, | |
| "loss": 1.2577348947525024, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.34676434676434675, | |
| "grad_norm": 0.4083240330219269, | |
| "learning_rate": 9.830262637321123e-05, | |
| "loss": 1.3025801181793213, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 0.3884086608886719, | |
| "learning_rate": 9.827019605100402e-05, | |
| "loss": 1.1091903448104858, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 0.6931549310684204, | |
| "learning_rate": 9.823746491326002e-05, | |
| "loss": 1.6183369159698486, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3540903540903541, | |
| "grad_norm": 0.2619084119796753, | |
| "learning_rate": 9.820443318752693e-05, | |
| "loss": 0.9341627955436707, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35653235653235654, | |
| "grad_norm": 0.2873152494430542, | |
| "learning_rate": 9.817110110344218e-05, | |
| "loss": 1.2621850967407227, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 0.32480373978614807, | |
| "learning_rate": 9.813746889273128e-05, | |
| "loss": 1.0202349424362183, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3614163614163614, | |
| "grad_norm": 0.22089935839176178, | |
| "learning_rate": 9.81035367892063e-05, | |
| "loss": 1.0331007242202759, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.36385836385836384, | |
| "grad_norm": 8.16120433807373, | |
| "learning_rate": 9.806930502876405e-05, | |
| "loss": 1.3227959871292114, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3663003663003663, | |
| "grad_norm": 0.7218601107597351, | |
| "learning_rate": 9.803477384938464e-05, | |
| "loss": 1.1196651458740234, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.36874236874236876, | |
| "grad_norm": 1.8668947219848633, | |
| "learning_rate": 9.799994349112975e-05, | |
| "loss": 0.8535418510437012, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3711843711843712, | |
| "grad_norm": 0.48257267475128174, | |
| "learning_rate": 9.796481419614094e-05, | |
| "loss": 1.167357325553894, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 0.15137240290641785, | |
| "learning_rate": 9.792938620863798e-05, | |
| "loss": 1.2498914003372192, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.37606837606837606, | |
| "grad_norm": 0.27039146423339844, | |
| "learning_rate": 9.789365977491716e-05, | |
| "loss": 1.1255189180374146, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3785103785103785, | |
| "grad_norm": 0.2629050016403198, | |
| "learning_rate": 9.78576351433496e-05, | |
| "loss": 1.3129310607910156, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 0.28735068440437317, | |
| "learning_rate": 9.782131256437947e-05, | |
| "loss": 0.9864372611045837, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3833943833943834, | |
| "grad_norm": 0.335689514875412, | |
| "learning_rate": 9.778469229052231e-05, | |
| "loss": 1.3696770668029785, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.38583638583638585, | |
| "grad_norm": 0.7812069058418274, | |
| "learning_rate": 9.77477745763632e-05, | |
| "loss": 0.5830596685409546, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3882783882783883, | |
| "grad_norm": 0.41175198554992676, | |
| "learning_rate": 9.771055967855507e-05, | |
| "loss": 0.8974475860595703, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3907203907203907, | |
| "grad_norm": 0.5099735856056213, | |
| "learning_rate": 9.767304785581686e-05, | |
| "loss": 1.2769310474395752, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39316239316239315, | |
| "grad_norm": 0.2129960060119629, | |
| "learning_rate": 9.763523936893175e-05, | |
| "loss": 1.3239271640777588, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 0.26624882221221924, | |
| "learning_rate": 9.759713448074533e-05, | |
| "loss": 1.2988332509994507, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.398046398046398, | |
| "grad_norm": 0.2127254754304886, | |
| "learning_rate": 9.755873345616378e-05, | |
| "loss": 1.1850067377090454, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4004884004884005, | |
| "grad_norm": 0.3029441237449646, | |
| "learning_rate": 9.7520036562152e-05, | |
| "loss": 1.1345984935760498, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40293040293040294, | |
| "grad_norm": 0.8693342804908752, | |
| "learning_rate": 9.748104406773187e-05, | |
| "loss": 0.8637459874153137, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4053724053724054, | |
| "grad_norm": 0.5175452828407288, | |
| "learning_rate": 9.74417562439802e-05, | |
| "loss": 0.6529451608657837, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4078144078144078, | |
| "grad_norm": 0.19759483635425568, | |
| "learning_rate": 9.740217336402696e-05, | |
| "loss": 1.0596596002578735, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 0.30637237429618835, | |
| "learning_rate": 9.73622957030534e-05, | |
| "loss": 1.292237401008606, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 0.2845645546913147, | |
| "learning_rate": 9.732212353829003e-05, | |
| "loss": 0.9293122887611389, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.41514041514041516, | |
| "grad_norm": 0.4744598865509033, | |
| "learning_rate": 9.728165714901483e-05, | |
| "loss": 1.1430048942565918, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 0.3360954821109772, | |
| "learning_rate": 9.724089681655115e-05, | |
| "loss": 1.6140475273132324, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42002442002442003, | |
| "grad_norm": 0.1928117275238037, | |
| "learning_rate": 9.719984282426592e-05, | |
| "loss": 0.9177114367485046, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42246642246642246, | |
| "grad_norm": 0.3101710081100464, | |
| "learning_rate": 9.715849545756757e-05, | |
| "loss": 1.0519685745239258, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.4249084249084249, | |
| "grad_norm": 0.20583422482013702, | |
| "learning_rate": 9.711685500390407e-05, | |
| "loss": 1.2847375869750977, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 0.28896406292915344, | |
| "learning_rate": 9.707492175276094e-05, | |
| "loss": 0.8652978539466858, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4297924297924298, | |
| "grad_norm": 0.19667665660381317, | |
| "learning_rate": 9.703269599565926e-05, | |
| "loss": 1.0848617553710938, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.43223443223443225, | |
| "grad_norm": 0.5856490731239319, | |
| "learning_rate": 9.699017802615358e-05, | |
| "loss": 1.230289340019226, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4346764346764347, | |
| "grad_norm": 0.3849090337753296, | |
| "learning_rate": 9.694736813982996e-05, | |
| "loss": 0.8349976539611816, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.4371184371184371, | |
| "grad_norm": 0.5175620317459106, | |
| "learning_rate": 9.690426663430382e-05, | |
| "loss": 1.0158485174179077, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.24635784327983856, | |
| "learning_rate": 9.686087380921798e-05, | |
| "loss": 1.0942423343658447, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.442002442002442, | |
| "grad_norm": 0.29661303758621216, | |
| "learning_rate": 9.68171899662405e-05, | |
| "loss": 1.025334119796753, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.5717456340789795, | |
| "learning_rate": 9.677321540906258e-05, | |
| "loss": 0.9988979697227478, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4468864468864469, | |
| "grad_norm": 0.3307958245277405, | |
| "learning_rate": 9.672895044339651e-05, | |
| "loss": 1.4021763801574707, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.44932844932844934, | |
| "grad_norm": 0.24330608546733856, | |
| "learning_rate": 9.668439537697348e-05, | |
| "loss": 0.9170594215393066, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4517704517704518, | |
| "grad_norm": 0.053159430623054504, | |
| "learning_rate": 9.66395505195415e-05, | |
| "loss": 0.9720377922058105, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4542124542124542, | |
| "grad_norm": 0.20135316252708435, | |
| "learning_rate": 9.659441618286317e-05, | |
| "loss": 1.2760885953903198, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.45665445665445664, | |
| "grad_norm": 0.1744951605796814, | |
| "learning_rate": 9.65489926807136e-05, | |
| "loss": 0.8951278924942017, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4590964590964591, | |
| "grad_norm": 0.9618193507194519, | |
| "learning_rate": 9.650328032887815e-05, | |
| "loss": 1.1860800981521606, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 1.1108251810073853, | |
| "learning_rate": 9.645727944515029e-05, | |
| "loss": 0.834053099155426, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.463980463980464, | |
| "grad_norm": 0.3237703740596771, | |
| "learning_rate": 9.641099034932939e-05, | |
| "loss": 0.8309035897254944, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.46642246642246643, | |
| "grad_norm": 0.30231574177742004, | |
| "learning_rate": 9.636441336321844e-05, | |
| "loss": 1.2859207391738892, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46886446886446886, | |
| "grad_norm": 0.17075681686401367, | |
| "learning_rate": 9.631754881062186e-05, | |
| "loss": 0.9477740526199341, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4713064713064713, | |
| "grad_norm": 0.3386945128440857, | |
| "learning_rate": 9.627039701734327e-05, | |
| "loss": 1.0787287950515747, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.47374847374847373, | |
| "grad_norm": 0.23227858543395996, | |
| "learning_rate": 9.622295831118315e-05, | |
| "loss": 1.545161485671997, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 0.2268853336572647, | |
| "learning_rate": 9.617523302193665e-05, | |
| "loss": 0.6267635822296143, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.47863247863247865, | |
| "grad_norm": 0.3856842815876007, | |
| "learning_rate": 9.612722148139122e-05, | |
| "loss": 0.7686330080032349, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4810744810744811, | |
| "grad_norm": 0.18474534153938293, | |
| "learning_rate": 9.607892402332434e-05, | |
| "loss": 0.8829730153083801, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 0.6889500021934509, | |
| "learning_rate": 9.603034098350121e-05, | |
| "loss": 1.18562650680542, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.48595848595848595, | |
| "grad_norm": 0.6979353427886963, | |
| "learning_rate": 9.59814726996724e-05, | |
| "loss": 1.2505449056625366, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4884004884004884, | |
| "grad_norm": 0.7509117126464844, | |
| "learning_rate": 9.593231951157151e-05, | |
| "loss": 0.5224822759628296, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4908424908424908, | |
| "grad_norm": 0.22064976394176483, | |
| "learning_rate": 9.588288176091277e-05, | |
| "loss": 1.210641860961914, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4932844932844933, | |
| "grad_norm": 0.14853781461715698, | |
| "learning_rate": 9.583315979138873e-05, | |
| "loss": 1.286338210105896, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.49572649572649574, | |
| "grad_norm": 0.5779667496681213, | |
| "learning_rate": 9.578315394866782e-05, | |
| "loss": 0.651908278465271, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4981684981684982, | |
| "grad_norm": 0.34377893805503845, | |
| "learning_rate": 9.573286458039198e-05, | |
| "loss": 0.8280749320983887, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5006105006105006, | |
| "grad_norm": 0.17841589450836182, | |
| "learning_rate": 9.56822920361742e-05, | |
| "loss": 1.2296653985977173, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.503052503052503, | |
| "grad_norm": 0.14455167949199677, | |
| "learning_rate": 9.563143666759615e-05, | |
| "loss": 0.5456517934799194, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 0.17755503952503204, | |
| "learning_rate": 9.558029882820565e-05, | |
| "loss": 1.2897342443466187, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 0.1847013235092163, | |
| "learning_rate": 9.552887887351435e-05, | |
| "loss": 1.2527668476104736, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5103785103785103, | |
| "grad_norm": 0.09219064563512802, | |
| "learning_rate": 9.547717716099508e-05, | |
| "loss": 1.1090110540390015, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.12072814255952835, | |
| "learning_rate": 9.542519405007954e-05, | |
| "loss": 1.2077261209487915, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5152625152625152, | |
| "grad_norm": 0.2985288202762604, | |
| "learning_rate": 9.537292990215564e-05, | |
| "loss": 1.2155014276504517, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5177045177045178, | |
| "grad_norm": 0.1399923712015152, | |
| "learning_rate": 9.532038508056513e-05, | |
| "loss": 1.2267438173294067, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5201465201465202, | |
| "grad_norm": 0.36764466762542725, | |
| "learning_rate": 9.526755995060098e-05, | |
| "loss": 1.07572340965271, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5225885225885226, | |
| "grad_norm": 0.08539887517690659, | |
| "learning_rate": 9.521445487950487e-05, | |
| "loss": 1.229189157485962, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.525030525030525, | |
| "grad_norm": 0.3200857639312744, | |
| "learning_rate": 9.516107023646467e-05, | |
| "loss": 0.8851771950721741, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 0.23066289722919464, | |
| "learning_rate": 9.510740639261178e-05, | |
| "loss": 0.568865180015564, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5299145299145299, | |
| "grad_norm": 0.12386373430490494, | |
| "learning_rate": 9.505346372101868e-05, | |
| "loss": 1.2345771789550781, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5323565323565324, | |
| "grad_norm": 0.16147340834140778, | |
| "learning_rate": 9.499924259669621e-05, | |
| "loss": 1.0792996883392334, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5347985347985348, | |
| "grad_norm": 0.11039295792579651, | |
| "learning_rate": 9.494474339659109e-05, | |
| "loss": 1.2031606435775757, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5372405372405372, | |
| "grad_norm": 0.2149389535188675, | |
| "learning_rate": 9.488996649958312e-05, | |
| "loss": 1.164471983909607, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 0.9730677008628845, | |
| "learning_rate": 9.483491228648279e-05, | |
| "loss": 1.027786374092102, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5421245421245421, | |
| "grad_norm": 0.42046958208084106, | |
| "learning_rate": 9.477958114002841e-05, | |
| "loss": 0.7723256945610046, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5445665445665445, | |
| "grad_norm": 0.5654972195625305, | |
| "learning_rate": 9.472397344488355e-05, | |
| "loss": 0.867957592010498, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5470085470085471, | |
| "grad_norm": 0.7515931725502014, | |
| "learning_rate": 9.466808958763436e-05, | |
| "loss": 1.1303768157958984, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.6230604648590088, | |
| "learning_rate": 9.461192995678689e-05, | |
| "loss": 0.9241065382957458, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5518925518925519, | |
| "grad_norm": 1.763684868812561, | |
| "learning_rate": 9.455549494276438e-05, | |
| "loss": 0.7027381658554077, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5543345543345544, | |
| "grad_norm": 0.4505990147590637, | |
| "learning_rate": 9.44987849379045e-05, | |
| "loss": 1.214455008506775, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5567765567765568, | |
| "grad_norm": 0.1572820544242859, | |
| "learning_rate": 9.44418003364567e-05, | |
| "loss": 1.1924667358398438, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5592185592185592, | |
| "grad_norm": 0.1346113532781601, | |
| "learning_rate": 9.438454153457942e-05, | |
| "loss": 0.6208730340003967, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5616605616605617, | |
| "grad_norm": 0.9395261406898499, | |
| "learning_rate": 9.432700893033735e-05, | |
| "loss": 1.1647229194641113, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 0.25545039772987366, | |
| "learning_rate": 9.426920292369868e-05, | |
| "loss": 1.0791125297546387, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5665445665445665, | |
| "grad_norm": 0.21288283169269562, | |
| "learning_rate": 9.421112391653226e-05, | |
| "loss": 0.5335599184036255, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.568986568986569, | |
| "grad_norm": 0.1555309295654297, | |
| "learning_rate": 9.415277231260488e-05, | |
| "loss": 1.532774567604065, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.6552872657775879, | |
| "learning_rate": 9.409414851757841e-05, | |
| "loss": 1.2282241582870483, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5738705738705738, | |
| "grad_norm": 0.23758231103420258, | |
| "learning_rate": 9.403525293900704e-05, | |
| "loss": 0.8937569260597229, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5763125763125763, | |
| "grad_norm": 0.2393610179424286, | |
| "learning_rate": 9.397608598633434e-05, | |
| "loss": 1.2297227382659912, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5787545787545788, | |
| "grad_norm": 0.1583927571773529, | |
| "learning_rate": 9.391664807089052e-05, | |
| "loss": 0.8401937484741211, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5811965811965812, | |
| "grad_norm": 0.49851861596107483, | |
| "learning_rate": 9.385693960588956e-05, | |
| "loss": 0.9421334266662598, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5836385836385837, | |
| "grad_norm": 0.16811226308345795, | |
| "learning_rate": 9.379696100642622e-05, | |
| "loss": 1.1455268859863281, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 0.14929954707622528, | |
| "learning_rate": 9.373671268947333e-05, | |
| "loss": 1.2628676891326904, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5885225885225885, | |
| "grad_norm": 0.1939006894826889, | |
| "learning_rate": 9.367619507387876e-05, | |
| "loss": 0.4674907922744751, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.590964590964591, | |
| "grad_norm": 0.18351028859615326, | |
| "learning_rate": 9.361540858036258e-05, | |
| "loss": 0.8831008672714233, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.15032213926315308, | |
| "learning_rate": 9.355435363151405e-05, | |
| "loss": 0.9298666715621948, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5958485958485958, | |
| "grad_norm": 0.17858006060123444, | |
| "learning_rate": 9.34930306517888e-05, | |
| "loss": 0.8963842391967773, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 0.16214722394943237, | |
| "learning_rate": 9.343144006750577e-05, | |
| "loss": 0.9911201596260071, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6007326007326007, | |
| "grad_norm": 0.29397615790367126, | |
| "learning_rate": 9.336958230684432e-05, | |
| "loss": 0.6251745223999023, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 0.16859205067157745, | |
| "learning_rate": 9.330745779984126e-05, | |
| "loss": 0.8774055242538452, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6056166056166056, | |
| "grad_norm": 0.20924700796604156, | |
| "learning_rate": 9.324506697838774e-05, | |
| "loss": 1.194309949874878, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.608058608058608, | |
| "grad_norm": 1.1096948385238647, | |
| "learning_rate": 9.318241027622642e-05, | |
| "loss": 0.5684680938720703, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6105006105006106, | |
| "grad_norm": 0.14514295756816864, | |
| "learning_rate": 9.31194881289483e-05, | |
| "loss": 0.8453354835510254, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.612942612942613, | |
| "grad_norm": 0.15760479867458344, | |
| "learning_rate": 9.305630097398986e-05, | |
| "loss": 1.168281078338623, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.19755949079990387, | |
| "learning_rate": 9.299284925062977e-05, | |
| "loss": 1.1970603466033936, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6178266178266179, | |
| "grad_norm": 0.402976393699646, | |
| "learning_rate": 9.292913339998614e-05, | |
| "loss": 1.2204663753509521, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6202686202686203, | |
| "grad_norm": 0.19864894449710846, | |
| "learning_rate": 9.286515386501323e-05, | |
| "loss": 1.0551217794418335, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6227106227106227, | |
| "grad_norm": 0.1931726485490799, | |
| "learning_rate": 9.280091109049841e-05, | |
| "loss": 1.2066923379898071, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6251526251526252, | |
| "grad_norm": 0.21012164652347565, | |
| "learning_rate": 9.273640552305916e-05, | |
| "loss": 1.0003726482391357, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6275946275946276, | |
| "grad_norm": 0.21010857820510864, | |
| "learning_rate": 9.267163761113985e-05, | |
| "loss": 0.9760635495185852, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.63003663003663, | |
| "grad_norm": 0.5790798664093018, | |
| "learning_rate": 9.260660780500872e-05, | |
| "loss": 0.9110443592071533, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6324786324786325, | |
| "grad_norm": 0.32287415862083435, | |
| "learning_rate": 9.25413165567547e-05, | |
| "loss": 1.1938380002975464, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 0.14292655885219574, | |
| "learning_rate": 9.247576432028421e-05, | |
| "loss": 0.7279146909713745, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 0.23636585474014282, | |
| "learning_rate": 9.240995155131816e-05, | |
| "loss": 0.9307098984718323, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6398046398046398, | |
| "grad_norm": 0.48398420214653015, | |
| "learning_rate": 9.234387870738861e-05, | |
| "loss": 0.9981100559234619, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6422466422466423, | |
| "grad_norm": 0.2807958126068115, | |
| "learning_rate": 9.227754624783575e-05, | |
| "loss": 1.2286438941955566, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6446886446886447, | |
| "grad_norm": 0.22547727823257446, | |
| "learning_rate": 9.221095463380454e-05, | |
| "loss": 1.275038480758667, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6471306471306472, | |
| "grad_norm": 0.29100245237350464, | |
| "learning_rate": 9.214410432824163e-05, | |
| "loss": 1.0418328046798706, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6495726495726496, | |
| "grad_norm": 0.6905118227005005, | |
| "learning_rate": 9.207699579589212e-05, | |
| "loss": 0.9832897186279297, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.652014652014652, | |
| "grad_norm": 0.23352235555648804, | |
| "learning_rate": 9.200962950329624e-05, | |
| "loss": 1.5091339349746704, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6544566544566545, | |
| "grad_norm": 0.25114279985427856, | |
| "learning_rate": 9.194200591878626e-05, | |
| "loss": 0.7402273416519165, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6568986568986569, | |
| "grad_norm": 0.22499670088291168, | |
| "learning_rate": 9.18741255124831e-05, | |
| "loss": 1.2244811058044434, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.29726406931877136, | |
| "learning_rate": 9.180598875629314e-05, | |
| "loss": 1.2527215480804443, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6617826617826618, | |
| "grad_norm": 0.193584606051445, | |
| "learning_rate": 9.173759612390484e-05, | |
| "loss": 0.9180420637130737, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6642246642246642, | |
| "grad_norm": 0.40724650025367737, | |
| "learning_rate": 9.166894809078564e-05, | |
| "loss": 1.2139979600906372, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.6462997794151306, | |
| "learning_rate": 9.160004513417842e-05, | |
| "loss": 1.1352370977401733, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6691086691086691, | |
| "grad_norm": 0.26992952823638916, | |
| "learning_rate": 9.153088773309836e-05, | |
| "loss": 1.2043757438659668, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6715506715506715, | |
| "grad_norm": 0.422783762216568, | |
| "learning_rate": 9.146147636832952e-05, | |
| "loss": 1.1186383962631226, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.673992673992674, | |
| "grad_norm": 0.3179819881916046, | |
| "learning_rate": 9.139181152242152e-05, | |
| "loss": 1.026376485824585, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6764346764346765, | |
| "grad_norm": 0.26202264428138733, | |
| "learning_rate": 9.13218936796862e-05, | |
| "loss": 0.9343441724777222, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6788766788766789, | |
| "grad_norm": 0.14061686396598816, | |
| "learning_rate": 9.125172332619425e-05, | |
| "loss": 0.7112762928009033, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 0.5914305448532104, | |
| "learning_rate": 9.118130094977178e-05, | |
| "loss": 1.2085468769073486, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 0.04549518972635269, | |
| "learning_rate": 9.111062703999702e-05, | |
| "loss": 1.0624598264694214, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6862026862026862, | |
| "grad_norm": 0.35139355063438416, | |
| "learning_rate": 9.10397020881968e-05, | |
| "loss": 0.9602486491203308, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6886446886446886, | |
| "grad_norm": 0.8143149018287659, | |
| "learning_rate": 9.096852658744331e-05, | |
| "loss": 1.2754855155944824, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6910866910866911, | |
| "grad_norm": 0.5778771042823792, | |
| "learning_rate": 9.089710103255044e-05, | |
| "loss": 1.0818145275115967, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6935286935286935, | |
| "grad_norm": 0.39189010858535767, | |
| "learning_rate": 9.082542592007055e-05, | |
| "loss": 1.3231815099716187, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6959706959706959, | |
| "grad_norm": 0.17417526245117188, | |
| "learning_rate": 9.075350174829088e-05, | |
| "loss": 0.890396773815155, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 0.20697654783725739, | |
| "learning_rate": 9.068132901723018e-05, | |
| "loss": 0.9492074251174927, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7008547008547008, | |
| "grad_norm": 0.3690604865550995, | |
| "learning_rate": 9.060890822863515e-05, | |
| "loss": 1.4233372211456299, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 0.5194886326789856, | |
| "learning_rate": 9.0536239885977e-05, | |
| "loss": 1.1718875169754028, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7057387057387058, | |
| "grad_norm": 0.7631717324256897, | |
| "learning_rate": 9.046332449444796e-05, | |
| "loss": 1.153130292892456, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7081807081807082, | |
| "grad_norm": 0.41176915168762207, | |
| "learning_rate": 9.039016256095777e-05, | |
| "loss": 1.0873521566390991, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7106227106227107, | |
| "grad_norm": 0.32372066378593445, | |
| "learning_rate": 9.03167545941301e-05, | |
| "loss": 1.2140915393829346, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7130647130647131, | |
| "grad_norm": 0.21838268637657166, | |
| "learning_rate": 9.024310110429902e-05, | |
| "loss": 0.9264190196990967, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7155067155067155, | |
| "grad_norm": 0.36397573351860046, | |
| "learning_rate": 9.016920260350557e-05, | |
| "loss": 1.2471363544464111, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 0.6672040820121765, | |
| "learning_rate": 9.009505960549401e-05, | |
| "loss": 1.0811785459518433, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7203907203907204, | |
| "grad_norm": 0.12362607568502426, | |
| "learning_rate": 9.002067262570846e-05, | |
| "loss": 0.9149802327156067, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7228327228327228, | |
| "grad_norm": 0.13345609605312347, | |
| "learning_rate": 8.994604218128912e-05, | |
| "loss": 0.8213675022125244, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 0.19838625192642212, | |
| "learning_rate": 8.987116879106882e-05, | |
| "loss": 1.178354024887085, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7277167277167277, | |
| "grad_norm": 0.18763922154903412, | |
| "learning_rate": 8.97960529755693e-05, | |
| "loss": 0.9279124140739441, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 0.12447916716337204, | |
| "learning_rate": 8.97206952569977e-05, | |
| "loss": 0.8444339036941528, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 0.44550734758377075, | |
| "learning_rate": 8.964509615924285e-05, | |
| "loss": 1.1798958778381348, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7350427350427351, | |
| "grad_norm": 0.38194093108177185, | |
| "learning_rate": 8.956925620787166e-05, | |
| "loss": 1.3009207248687744, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7374847374847375, | |
| "grad_norm": 0.18306893110275269, | |
| "learning_rate": 8.949317593012544e-05, | |
| "loss": 1.0973061323165894, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.73992673992674, | |
| "grad_norm": 0.3112310767173767, | |
| "learning_rate": 8.94168558549163e-05, | |
| "loss": 1.5330253839492798, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7423687423687424, | |
| "grad_norm": 1.3974329233169556, | |
| "learning_rate": 8.93402965128234e-05, | |
| "loss": 1.1588298082351685, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7448107448107448, | |
| "grad_norm": 0.2988543212413788, | |
| "learning_rate": 8.926349843608927e-05, | |
| "loss": 0.8357654213905334, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 0.23195451498031616, | |
| "learning_rate": 8.918646215861618e-05, | |
| "loss": 1.2047741413116455, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7496947496947497, | |
| "grad_norm": 0.3196408152580261, | |
| "learning_rate": 8.910918821596234e-05, | |
| "loss": 1.191171407699585, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7521367521367521, | |
| "grad_norm": 0.24305181205272675, | |
| "learning_rate": 8.903167714533821e-05, | |
| "loss": 0.7358626127243042, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7545787545787546, | |
| "grad_norm": 0.15109124779701233, | |
| "learning_rate": 8.895392948560281e-05, | |
| "loss": 1.4771884679794312, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.757020757020757, | |
| "grad_norm": 0.5152910351753235, | |
| "learning_rate": 8.887594577725987e-05, | |
| "loss": 0.9606069326400757, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7594627594627594, | |
| "grad_norm": 0.4518302381038666, | |
| "learning_rate": 8.87977265624542e-05, | |
| "loss": 0.9361006021499634, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.1910531371831894, | |
| "learning_rate": 8.871927238496784e-05, | |
| "loss": 1.1847237348556519, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7643467643467643, | |
| "grad_norm": 0.2079242467880249, | |
| "learning_rate": 8.864058379021627e-05, | |
| "loss": 1.254426121711731, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7667887667887668, | |
| "grad_norm": 0.26695823669433594, | |
| "learning_rate": 8.856166132524467e-05, | |
| "loss": 0.9584991335868835, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.49299949407577515, | |
| "learning_rate": 8.84825055387241e-05, | |
| "loss": 1.1340306997299194, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7716727716727717, | |
| "grad_norm": 0.16500461101531982, | |
| "learning_rate": 8.840311698094762e-05, | |
| "loss": 0.8147421479225159, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7741147741147741, | |
| "grad_norm": 0.14543406665325165, | |
| "learning_rate": 8.832349620382659e-05, | |
| "loss": 0.8940989375114441, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7765567765567766, | |
| "grad_norm": 0.5508949160575867, | |
| "learning_rate": 8.824364376088675e-05, | |
| "loss": 1.2726483345031738, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.778998778998779, | |
| "grad_norm": 0.5127528309822083, | |
| "learning_rate": 8.816356020726436e-05, | |
| "loss": 1.2133723497390747, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7814407814407814, | |
| "grad_norm": 0.21939769387245178, | |
| "learning_rate": 8.808324609970237e-05, | |
| "loss": 0.8959197998046875, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7838827838827839, | |
| "grad_norm": 0.3500021994113922, | |
| "learning_rate": 8.80027019965466e-05, | |
| "loss": 0.9891265630722046, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7863247863247863, | |
| "grad_norm": 0.5465804934501648, | |
| "learning_rate": 8.792192845774175e-05, | |
| "loss": 1.0522013902664185, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7887667887667887, | |
| "grad_norm": 1.1952425241470337, | |
| "learning_rate": 8.784092604482754e-05, | |
| "loss": 0.8848550319671631, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 0.4305606782436371, | |
| "learning_rate": 8.775969532093491e-05, | |
| "loss": 1.1473037004470825, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 0.19582562148571014, | |
| "learning_rate": 8.767823685078198e-05, | |
| "loss": 0.5288962721824646, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.796092796092796, | |
| "grad_norm": 0.38399538397789, | |
| "learning_rate": 8.759655120067016e-05, | |
| "loss": 0.7343263030052185, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7985347985347986, | |
| "grad_norm": 0.19986340403556824, | |
| "learning_rate": 8.751463893848021e-05, | |
| "loss": 0.9599148035049438, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.800976800976801, | |
| "grad_norm": 0.16012515127658844, | |
| "learning_rate": 8.743250063366836e-05, | |
| "loss": 1.1695860624313354, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8034188034188035, | |
| "grad_norm": 0.11873755604028702, | |
| "learning_rate": 8.735013685726223e-05, | |
| "loss": 0.39983829855918884, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8058608058608059, | |
| "grad_norm": 0.2125730663537979, | |
| "learning_rate": 8.726754818185697e-05, | |
| "loss": 0.9837296605110168, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8083028083028083, | |
| "grad_norm": 0.30212274193763733, | |
| "learning_rate": 8.71847351816112e-05, | |
| "loss": 1.1627798080444336, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.8107448107448108, | |
| "grad_norm": 0.39462244510650635, | |
| "learning_rate": 8.710169843224308e-05, | |
| "loss": 1.276789903640747, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 0.19324733316898346, | |
| "learning_rate": 8.701843851102625e-05, | |
| "loss": 0.8581050038337708, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8156288156288156, | |
| "grad_norm": 0.5387722849845886, | |
| "learning_rate": 8.693495599678588e-05, | |
| "loss": 1.058676838874817, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.818070818070818, | |
| "grad_norm": 0.4488937258720398, | |
| "learning_rate": 8.68512514698946e-05, | |
| "loss": 1.3410178422927856, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 0.22639647126197815, | |
| "learning_rate": 8.676732551226846e-05, | |
| "loss": 1.2160142660140991, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8229548229548229, | |
| "grad_norm": 0.41213053464889526, | |
| "learning_rate": 8.668317870736292e-05, | |
| "loss": 0.9283809065818787, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 0.23253750801086426, | |
| "learning_rate": 8.659881164016878e-05, | |
| "loss": 0.8342546820640564, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8278388278388278, | |
| "grad_norm": 0.3493381142616272, | |
| "learning_rate": 8.651422489720811e-05, | |
| "loss": 1.1806530952453613, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8302808302808303, | |
| "grad_norm": 0.8462190628051758, | |
| "learning_rate": 8.642941906653014e-05, | |
| "loss": 1.1770362854003906, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8327228327228328, | |
| "grad_norm": 0.17604343593120575, | |
| "learning_rate": 8.634439473770724e-05, | |
| "loss": 0.980627715587616, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 0.26461589336395264, | |
| "learning_rate": 8.625915250183077e-05, | |
| "loss": 0.6270996928215027, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8376068376068376, | |
| "grad_norm": 0.17119862139225006, | |
| "learning_rate": 8.6173692951507e-05, | |
| "loss": 1.3099520206451416, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8400488400488401, | |
| "grad_norm": 0.256907194852829, | |
| "learning_rate": 8.608801668085294e-05, | |
| "loss": 1.2098278999328613, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8424908424908425, | |
| "grad_norm": 0.5490866303443909, | |
| "learning_rate": 8.600212428549228e-05, | |
| "loss": 0.9178333282470703, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8449328449328449, | |
| "grad_norm": 0.19187872111797333, | |
| "learning_rate": 8.591601636255122e-05, | |
| "loss": 1.2199687957763672, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8473748473748474, | |
| "grad_norm": 0.14713522791862488, | |
| "learning_rate": 8.582969351065425e-05, | |
| "loss": 0.942196249961853, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8498168498168498, | |
| "grad_norm": 0.1639857441186905, | |
| "learning_rate": 8.574315632992016e-05, | |
| "loss": 1.0774165391921997, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8522588522588522, | |
| "grad_norm": 0.3029420077800751, | |
| "learning_rate": 8.565640542195764e-05, | |
| "loss": 0.9257597327232361, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 0.17788438498973846, | |
| "learning_rate": 8.55694413898613e-05, | |
| "loss": 0.9854938983917236, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.5631709098815918, | |
| "learning_rate": 8.54822648382074e-05, | |
| "loss": 1.0237292051315308, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8595848595848596, | |
| "grad_norm": 0.5782780647277832, | |
| "learning_rate": 8.539487637304957e-05, | |
| "loss": 0.9616599678993225, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8620268620268621, | |
| "grad_norm": 0.11893776059150696, | |
| "learning_rate": 8.530727660191471e-05, | |
| "loss": 0.42293256521224976, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8644688644688645, | |
| "grad_norm": 0.1753421127796173, | |
| "learning_rate": 8.521946613379878e-05, | |
| "loss": 1.1575310230255127, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8669108669108669, | |
| "grad_norm": 0.13845719397068024, | |
| "learning_rate": 8.513144557916238e-05, | |
| "loss": 0.8323335647583008, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8693528693528694, | |
| "grad_norm": 0.2918700575828552, | |
| "learning_rate": 8.504321554992675e-05, | |
| "loss": 1.2685977220535278, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 0.3813958764076233, | |
| "learning_rate": 8.495477665946935e-05, | |
| "loss": 1.4935425519943237, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8742368742368742, | |
| "grad_norm": 1.1048682928085327, | |
| "learning_rate": 8.486612952261969e-05, | |
| "loss": 1.4269726276397705, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8766788766788767, | |
| "grad_norm": 0.17622821033000946, | |
| "learning_rate": 8.477727475565497e-05, | |
| "loss": 0.8385149240493774, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.27277296781539917, | |
| "learning_rate": 8.468821297629586e-05, | |
| "loss": 1.1558010578155518, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8815628815628815, | |
| "grad_norm": 0.19739723205566406, | |
| "learning_rate": 8.459894480370217e-05, | |
| "loss": 0.8325861692428589, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.884004884004884, | |
| "grad_norm": 0.4474552571773529, | |
| "learning_rate": 8.45094708584686e-05, | |
| "loss": 1.185271143913269, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8864468864468864, | |
| "grad_norm": 0.49738621711730957, | |
| "learning_rate": 8.441979176262036e-05, | |
| "loss": 1.141027808189392, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.2048785239458084, | |
| "learning_rate": 8.432990813960887e-05, | |
| "loss": 1.117997646331787, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8913308913308914, | |
| "grad_norm": 0.24652199447155, | |
| "learning_rate": 8.423982061430741e-05, | |
| "loss": 1.3922690153121948, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8937728937728938, | |
| "grad_norm": 0.2622013986110687, | |
| "learning_rate": 8.414952981300684e-05, | |
| "loss": 1.2243542671203613, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8962148962148963, | |
| "grad_norm": 0.22121727466583252, | |
| "learning_rate": 8.405903636341119e-05, | |
| "loss": 0.9962153434753418, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8986568986568987, | |
| "grad_norm": 0.1412484496831894, | |
| "learning_rate": 8.396834089463327e-05, | |
| "loss": 0.7077422142028809, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 0.3614236116409302, | |
| "learning_rate": 8.387744403719036e-05, | |
| "loss": 0.909615159034729, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9035409035409036, | |
| "grad_norm": 0.8937837481498718, | |
| "learning_rate": 8.378634642299977e-05, | |
| "loss": 0.9833166003227234, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.905982905982906, | |
| "grad_norm": 0.18500567972660065, | |
| "learning_rate": 8.369504868537456e-05, | |
| "loss": 0.8549164533615112, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.9084249084249084, | |
| "grad_norm": 0.2052248865365982, | |
| "learning_rate": 8.360355145901893e-05, | |
| "loss": 0.8864015936851501, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9108669108669109, | |
| "grad_norm": 0.31549692153930664, | |
| "learning_rate": 8.351185538002405e-05, | |
| "loss": 1.166724681854248, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9133089133089133, | |
| "grad_norm": 0.11704683303833008, | |
| "learning_rate": 8.341996108586344e-05, | |
| "loss": 0.8230197429656982, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9157509157509157, | |
| "grad_norm": 0.22029860317707062, | |
| "learning_rate": 8.332786921538861e-05, | |
| "loss": 1.325174331665039, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9181929181929182, | |
| "grad_norm": 0.15344086289405823, | |
| "learning_rate": 8.32355804088247e-05, | |
| "loss": 1.187482237815857, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 0.2685832381248474, | |
| "learning_rate": 8.314309530776591e-05, | |
| "loss": 1.203112006187439, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.2993275821208954, | |
| "learning_rate": 8.305041455517109e-05, | |
| "loss": 1.1844916343688965, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9255189255189256, | |
| "grad_norm": 0.2729142904281616, | |
| "learning_rate": 8.295753879535924e-05, | |
| "loss": 1.2081713676452637, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.927960927960928, | |
| "grad_norm": 0.3772994577884674, | |
| "learning_rate": 8.286446867400505e-05, | |
| "loss": 0.7893592715263367, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9304029304029304, | |
| "grad_norm": 0.2306855022907257, | |
| "learning_rate": 8.27712048381345e-05, | |
| "loss": 0.8467912077903748, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9328449328449329, | |
| "grad_norm": 0.38122788071632385, | |
| "learning_rate": 8.267774793612019e-05, | |
| "loss": 0.8108948469161987, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9352869352869353, | |
| "grad_norm": 0.5010560750961304, | |
| "learning_rate": 8.258409861767694e-05, | |
| "loss": 0.5701981782913208, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9377289377289377, | |
| "grad_norm": 0.3828265070915222, | |
| "learning_rate": 8.249025753385731e-05, | |
| "loss": 0.8258084058761597, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 0.40815332531929016, | |
| "learning_rate": 8.239622533704694e-05, | |
| "loss": 0.5677390098571777, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9426129426129426, | |
| "grad_norm": 0.20372486114501953, | |
| "learning_rate": 8.230200268096012e-05, | |
| "loss": 1.0656176805496216, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 0.7339096069335938, | |
| "learning_rate": 8.220759022063526e-05, | |
| "loss": 1.189461350440979, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9474969474969475, | |
| "grad_norm": 0.19655680656433105, | |
| "learning_rate": 8.21129886124302e-05, | |
| "loss": 1.1418293714523315, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9499389499389499, | |
| "grad_norm": 0.21596293151378632, | |
| "learning_rate": 8.201819851401787e-05, | |
| "loss": 0.7796148061752319, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.09174662828445435, | |
| "learning_rate": 8.192322058438147e-05, | |
| "loss": 0.8845089673995972, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9548229548229549, | |
| "grad_norm": 1.103407859802246, | |
| "learning_rate": 8.182805548381005e-05, | |
| "loss": 1.1154630184173584, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9572649572649573, | |
| "grad_norm": 0.6213619112968445, | |
| "learning_rate": 8.173270387389393e-05, | |
| "loss": 1.180299162864685, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9597069597069597, | |
| "grad_norm": 0.24691630899906158, | |
| "learning_rate": 8.163716641751993e-05, | |
| "loss": 1.2167127132415771, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9621489621489622, | |
| "grad_norm": 0.6181882619857788, | |
| "learning_rate": 8.154144377886703e-05, | |
| "loss": 1.3297795057296753, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9645909645909646, | |
| "grad_norm": 0.4106929302215576, | |
| "learning_rate": 8.144553662340148e-05, | |
| "loss": 1.2432931661605835, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 0.288196861743927, | |
| "learning_rate": 8.134944561787232e-05, | |
| "loss": 0.9817522764205933, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.9694749694749695, | |
| "grad_norm": 0.3909185528755188, | |
| "learning_rate": 8.125317143030679e-05, | |
| "loss": 1.2259184122085571, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9719169719169719, | |
| "grad_norm": 0.4058467745780945, | |
| "learning_rate": 8.11567147300055e-05, | |
| "loss": 1.1331555843353271, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 0.26042062044143677, | |
| "learning_rate": 8.106007618753801e-05, | |
| "loss": 1.1959770917892456, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9768009768009768, | |
| "grad_norm": 0.2748604118824005, | |
| "learning_rate": 8.096325647473799e-05, | |
| "loss": 1.2063909769058228, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9792429792429792, | |
| "grad_norm": 0.3420720398426056, | |
| "learning_rate": 8.086625626469861e-05, | |
| "loss": 0.8798475861549377, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9816849816849816, | |
| "grad_norm": 0.27383849024772644, | |
| "learning_rate": 8.076907623176787e-05, | |
| "loss": 1.0582551956176758, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 0.36179980635643005, | |
| "learning_rate": 8.067171705154392e-05, | |
| "loss": 1.5065923929214478, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9865689865689866, | |
| "grad_norm": 0.4236770570278168, | |
| "learning_rate": 8.05741794008703e-05, | |
| "loss": 0.8796338438987732, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.24213649332523346, | |
| "learning_rate": 8.047646395783138e-05, | |
| "loss": 1.220908284187317, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9914529914529915, | |
| "grad_norm": 0.5446254014968872, | |
| "learning_rate": 8.037857140174739e-05, | |
| "loss": 1.1596776247024536, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9938949938949939, | |
| "grad_norm": 0.4332633316516876, | |
| "learning_rate": 8.028050241317001e-05, | |
| "loss": 1.2945256233215332, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9963369963369964, | |
| "grad_norm": 0.21300239861011505, | |
| "learning_rate": 8.01822576738774e-05, | |
| "loss": 1.1899398565292358, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9987789987789988, | |
| "grad_norm": 0.9659496545791626, | |
| "learning_rate": 8.008383786686955e-05, | |
| "loss": 1.0717302560806274, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.0012210012210012, | |
| "grad_norm": 0.27998143434524536, | |
| "learning_rate": 7.998524367636357e-05, | |
| "loss": 0.8679777979850769, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.0036630036630036, | |
| "grad_norm": 0.30651572346687317, | |
| "learning_rate": 7.988647578778883e-05, | |
| "loss": 1.1609539985656738, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.006105006105006, | |
| "grad_norm": 0.12804636359214783, | |
| "learning_rate": 7.978753488778229e-05, | |
| "loss": 0.42196178436279297, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.0085470085470085, | |
| "grad_norm": 0.22472728788852692, | |
| "learning_rate": 7.968842166418366e-05, | |
| "loss": 1.0349066257476807, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.010989010989011, | |
| "grad_norm": 0.2975021004676819, | |
| "learning_rate": 7.958913680603068e-05, | |
| "loss": 0.8017234206199646, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.0134310134310134, | |
| "grad_norm": 0.14798957109451294, | |
| "learning_rate": 7.94896810035543e-05, | |
| "loss": 0.821688711643219, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0158730158730158, | |
| "grad_norm": 1.7618190050125122, | |
| "learning_rate": 7.939005494817384e-05, | |
| "loss": 0.887861430644989, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0183150183150182, | |
| "grad_norm": 0.23109908401966095, | |
| "learning_rate": 7.929025933249228e-05, | |
| "loss": 0.8126564025878906, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0207570207570207, | |
| "grad_norm": 0.7098497152328491, | |
| "learning_rate": 7.919029485029132e-05, | |
| "loss": 1.1389166116714478, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0231990231990231, | |
| "grad_norm": 0.4822708070278168, | |
| "learning_rate": 7.909016219652668e-05, | |
| "loss": 1.1576635837554932, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 1.9382874965667725, | |
| "learning_rate": 7.898986206732318e-05, | |
| "loss": 0.8626747727394104, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.028083028083028, | |
| "grad_norm": 0.2783157527446747, | |
| "learning_rate": 7.888939515996997e-05, | |
| "loss": 0.8771259188652039, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0305250305250304, | |
| "grad_norm": 0.6428969502449036, | |
| "learning_rate": 7.878876217291559e-05, | |
| "loss": 1.07393479347229, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.032967032967033, | |
| "grad_norm": 0.33416125178337097, | |
| "learning_rate": 7.868796380576323e-05, | |
| "loss": 0.8236369490623474, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0354090354090355, | |
| "grad_norm": 0.1370590627193451, | |
| "learning_rate": 7.858700075926578e-05, | |
| "loss": 1.1043959856033325, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.037851037851038, | |
| "grad_norm": 0.3286353051662445, | |
| "learning_rate": 7.848587373532096e-05, | |
| "loss": 0.8794589638710022, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0402930402930404, | |
| "grad_norm": 0.12774719297885895, | |
| "learning_rate": 7.83845834369665e-05, | |
| "loss": 0.3835936486721039, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0427350427350428, | |
| "grad_norm": 0.3142074942588806, | |
| "learning_rate": 7.828313056837523e-05, | |
| "loss": 1.0747510194778442, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.0451770451770452, | |
| "grad_norm": 0.41835519671440125, | |
| "learning_rate": 7.818151583485012e-05, | |
| "loss": 0.7233187556266785, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 0.23530925810337067, | |
| "learning_rate": 7.807973994281948e-05, | |
| "loss": 1.1343649625778198, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.05006105006105, | |
| "grad_norm": 0.4393465220928192, | |
| "learning_rate": 7.797780359983198e-05, | |
| "loss": 0.7696434259414673, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0525030525030525, | |
| "grad_norm": 0.5696142911911011, | |
| "learning_rate": 7.787570751455173e-05, | |
| "loss": 0.997528612613678, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.054945054945055, | |
| "grad_norm": 0.673509955406189, | |
| "learning_rate": 7.777345239675342e-05, | |
| "loss": 1.0887210369110107, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.0573870573870574, | |
| "grad_norm": 0.8321965336799622, | |
| "learning_rate": 7.767103895731727e-05, | |
| "loss": 1.3814585208892822, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0598290598290598, | |
| "grad_norm": 0.40577077865600586, | |
| "learning_rate": 7.756846790822422e-05, | |
| "loss": 0.39930352568626404, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.0622710622710623, | |
| "grad_norm": 0.15732550621032715, | |
| "learning_rate": 7.746573996255089e-05, | |
| "loss": 0.8849136233329773, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0647130647130647, | |
| "grad_norm": 0.11497446149587631, | |
| "learning_rate": 7.736285583446463e-05, | |
| "loss": 0.9299435019493103, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0671550671550671, | |
| "grad_norm": 0.21858395636081696, | |
| "learning_rate": 7.725981623921862e-05, | |
| "loss": 0.7411776781082153, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0695970695970696, | |
| "grad_norm": 0.18695200979709625, | |
| "learning_rate": 7.715662189314682e-05, | |
| "loss": 1.057569146156311, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.072039072039072, | |
| "grad_norm": 0.8311925530433655, | |
| "learning_rate": 7.705327351365901e-05, | |
| "loss": 1.0278160572052002, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.0744810744810744, | |
| "grad_norm": 0.31226208806037903, | |
| "learning_rate": 7.694977181923585e-05, | |
| "loss": 0.723739743232727, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 0.3679559528827667, | |
| "learning_rate": 7.684611752942383e-05, | |
| "loss": 1.0882328748703003, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.0793650793650793, | |
| "grad_norm": 1.0180855989456177, | |
| "learning_rate": 7.674231136483029e-05, | |
| "loss": 0.7524626851081848, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0818070818070817, | |
| "grad_norm": 0.28578001260757446, | |
| "learning_rate": 7.663835404711841e-05, | |
| "loss": 0.6836865544319153, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0842490842490842, | |
| "grad_norm": 0.2188597023487091, | |
| "learning_rate": 7.653424629900217e-05, | |
| "loss": 1.1328239440917969, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0866910866910866, | |
| "grad_norm": 0.1949382871389389, | |
| "learning_rate": 7.642998884424139e-05, | |
| "loss": 0.8361105918884277, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.089133089133089, | |
| "grad_norm": 0.7119947671890259, | |
| "learning_rate": 7.632558240763659e-05, | |
| "loss": 1.0361770391464233, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.0915750915750915, | |
| "grad_norm": 0.7097267508506775, | |
| "learning_rate": 7.622102771502407e-05, | |
| "loss": 1.0411162376403809, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0940170940170941, | |
| "grad_norm": 1.0294795036315918, | |
| "learning_rate": 7.61163254932708e-05, | |
| "loss": 1.0457152128219604, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0964590964590966, | |
| "grad_norm": 0.20822793245315552, | |
| "learning_rate": 7.601147647026938e-05, | |
| "loss": 0.8243889212608337, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.098901098901099, | |
| "grad_norm": 0.2013041228055954, | |
| "learning_rate": 7.590648137493294e-05, | |
| "loss": 0.6829918622970581, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1013431013431014, | |
| "grad_norm": 0.36496207118034363, | |
| "learning_rate": 7.580134093719017e-05, | |
| "loss": 0.6160250306129456, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.1037851037851039, | |
| "grad_norm": 0.1050109714269638, | |
| "learning_rate": 7.569605588798009e-05, | |
| "loss": 0.639164388179779, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.1062271062271063, | |
| "grad_norm": 0.47659313678741455, | |
| "learning_rate": 7.559062695924717e-05, | |
| "loss": 0.8138771057128906, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.1086691086691087, | |
| "grad_norm": 0.23948967456817627, | |
| "learning_rate": 7.548505488393607e-05, | |
| "loss": 1.0633188486099243, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.6550633907318115, | |
| "learning_rate": 7.537934039598662e-05, | |
| "loss": 0.8208356499671936, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.1135531135531136, | |
| "grad_norm": 0.4368497431278229, | |
| "learning_rate": 7.527348423032872e-05, | |
| "loss": 1.124341607093811, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.115995115995116, | |
| "grad_norm": 71.85137939453125, | |
| "learning_rate": 7.51674871228772e-05, | |
| "loss": 0.7379211187362671, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1184371184371185, | |
| "grad_norm": 0.7239526510238647, | |
| "learning_rate": 7.506134981052673e-05, | |
| "loss": 0.7759638428688049, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.120879120879121, | |
| "grad_norm": 0.3855206072330475, | |
| "learning_rate": 7.495507303114667e-05, | |
| "loss": 1.0572266578674316, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1233211233211233, | |
| "grad_norm": 0.5260382294654846, | |
| "learning_rate": 7.484865752357602e-05, | |
| "loss": 1.0437157154083252, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1257631257631258, | |
| "grad_norm": 0.15241922438144684, | |
| "learning_rate": 7.474210402761814e-05, | |
| "loss": 0.7936856150627136, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 0.4152061641216278, | |
| "learning_rate": 7.463541328403573e-05, | |
| "loss": 0.6679997444152832, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1306471306471306, | |
| "grad_norm": 0.2306256741285324, | |
| "learning_rate": 7.452858603454563e-05, | |
| "loss": 0.559407114982605, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.133089133089133, | |
| "grad_norm": 1.4361807107925415, | |
| "learning_rate": 7.44216230218137e-05, | |
| "loss": 0.5593606233596802, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1355311355311355, | |
| "grad_norm": 0.1379753202199936, | |
| "learning_rate": 7.43145249894496e-05, | |
| "loss": 1.0598596334457397, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.137973137973138, | |
| "grad_norm": 0.22989769279956818, | |
| "learning_rate": 7.420729268200164e-05, | |
| "loss": 1.1493088006973267, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.1404151404151404, | |
| "grad_norm": 0.40291470289230347, | |
| "learning_rate": 7.409992684495167e-05, | |
| "loss": 0.8961124420166016, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 0.17242197692394257, | |
| "learning_rate": 7.399242822470975e-05, | |
| "loss": 0.8901186585426331, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.1452991452991452, | |
| "grad_norm": 0.14181964099407196, | |
| "learning_rate": 7.388479756860915e-05, | |
| "loss": 0.9390299320220947, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1477411477411477, | |
| "grad_norm": 0.3699209690093994, | |
| "learning_rate": 7.377703562490099e-05, | |
| "loss": 1.0898199081420898, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.15018315018315, | |
| "grad_norm": 0.42112553119659424, | |
| "learning_rate": 7.366914314274912e-05, | |
| "loss": 1.0293309688568115, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1526251526251525, | |
| "grad_norm": 0.2779008746147156, | |
| "learning_rate": 7.356112087222493e-05, | |
| "loss": 0.8111650347709656, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.155067155067155, | |
| "grad_norm": 0.16659800708293915, | |
| "learning_rate": 7.345296956430203e-05, | |
| "loss": 0.7641980051994324, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.1575091575091574, | |
| "grad_norm": 0.7026783227920532, | |
| "learning_rate": 7.33446899708512e-05, | |
| "loss": 0.4367336332798004, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1599511599511598, | |
| "grad_norm": 0.3488795757293701, | |
| "learning_rate": 7.323628284463499e-05, | |
| "loss": 0.9880833625793457, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1623931623931625, | |
| "grad_norm": 0.17974501848220825, | |
| "learning_rate": 7.312774893930255e-05, | |
| "loss": 0.978955864906311, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.164835164835165, | |
| "grad_norm": 0.4115867018699646, | |
| "learning_rate": 7.301908900938448e-05, | |
| "loss": 0.818198025226593, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1672771672771673, | |
| "grad_norm": 0.1330568641424179, | |
| "learning_rate": 7.291030381028744e-05, | |
| "loss": 0.8854855895042419, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.1697191697191698, | |
| "grad_norm": 0.41006651520729065, | |
| "learning_rate": 7.2801394098289e-05, | |
| "loss": 0.8362188339233398, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1721611721611722, | |
| "grad_norm": 0.3192770779132843, | |
| "learning_rate": 7.26923606305323e-05, | |
| "loss": 1.1527349948883057, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.1746031746031746, | |
| "grad_norm": 0.41627976298332214, | |
| "learning_rate": 7.258320416502089e-05, | |
| "loss": 0.8889287710189819, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.177045177045177, | |
| "grad_norm": 0.12926903367042542, | |
| "learning_rate": 7.247392546061337e-05, | |
| "loss": 0.7678587436676025, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.06340300291776657, | |
| "learning_rate": 7.236452527701816e-05, | |
| "loss": 1.0936024188995361, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.181929181929182, | |
| "grad_norm": 0.2604690194129944, | |
| "learning_rate": 7.22550043747882e-05, | |
| "loss": 1.0897053480148315, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1843711843711844, | |
| "grad_norm": 0.30255287885665894, | |
| "learning_rate": 7.214536351531568e-05, | |
| "loss": 0.7890142202377319, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.1868131868131868, | |
| "grad_norm": 0.21171697974205017, | |
| "learning_rate": 7.203560346082672e-05, | |
| "loss": 1.1312793493270874, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1892551892551892, | |
| "grad_norm": 0.33636537194252014, | |
| "learning_rate": 7.19257249743761e-05, | |
| "loss": 0.8746294379234314, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1916971916971917, | |
| "grad_norm": 0.21301348507404327, | |
| "learning_rate": 7.181572881984194e-05, | |
| "loss": 0.7623686194419861, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1941391941391941, | |
| "grad_norm": 0.3435983955860138, | |
| "learning_rate": 7.170561576192037e-05, | |
| "loss": 0.8620277643203735, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 0.395470529794693, | |
| "learning_rate": 7.159538656612025e-05, | |
| "loss": 1.0953612327575684, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.199023199023199, | |
| "grad_norm": 0.31399983167648315, | |
| "learning_rate": 7.148504199875789e-05, | |
| "loss": 1.097609043121338, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.2014652014652014, | |
| "grad_norm": 0.26807838678359985, | |
| "learning_rate": 7.137458282695156e-05, | |
| "loss": 1.1081359386444092, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.2039072039072038, | |
| "grad_norm": 0.5702657103538513, | |
| "learning_rate": 7.126400981861634e-05, | |
| "loss": 0.7700930833816528, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.2063492063492063, | |
| "grad_norm": 0.6189203262329102, | |
| "learning_rate": 7.115332374245866e-05, | |
| "loss": 0.7745048999786377, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.2087912087912087, | |
| "grad_norm": 0.549611508846283, | |
| "learning_rate": 7.104252536797105e-05, | |
| "loss": 0.8819143772125244, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.2112332112332111, | |
| "grad_norm": 0.27053648233413696, | |
| "learning_rate": 7.093161546542671e-05, | |
| "loss": 1.1001336574554443, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.2136752136752136, | |
| "grad_norm": 0.7893326878547668, | |
| "learning_rate": 7.082059480587418e-05, | |
| "loss": 1.0525215864181519, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.2161172161172162, | |
| "grad_norm": 0.24564678966999054, | |
| "learning_rate": 7.070946416113199e-05, | |
| "loss": 0.8900183439254761, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.2185592185592187, | |
| "grad_norm": 0.22014155983924866, | |
| "learning_rate": 7.05982243037833e-05, | |
| "loss": 1.0661380290985107, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.221001221001221, | |
| "grad_norm": 0.09143731743097305, | |
| "learning_rate": 7.048687600717052e-05, | |
| "loss": 0.6782379150390625, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2234432234432235, | |
| "grad_norm": 0.3361298739910126, | |
| "learning_rate": 7.037542004538991e-05, | |
| "loss": 0.8010128736495972, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.225885225885226, | |
| "grad_norm": 0.4829467535018921, | |
| "learning_rate": 7.026385719328623e-05, | |
| "loss": 1.0825215578079224, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.2283272283272284, | |
| "grad_norm": 0.7137899398803711, | |
| "learning_rate": 7.01521882264474e-05, | |
| "loss": 0.7480688095092773, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.14110709726810455, | |
| "learning_rate": 7.004041392119894e-05, | |
| "loss": 0.7143623232841492, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.2332112332112333, | |
| "grad_norm": 0.33323201537132263, | |
| "learning_rate": 6.992853505459878e-05, | |
| "loss": 1.1174697875976562, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2356532356532357, | |
| "grad_norm": 0.0313718244433403, | |
| "learning_rate": 6.981655240443175e-05, | |
| "loss": 0.641499936580658, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 0.25837182998657227, | |
| "learning_rate": 6.970446674920414e-05, | |
| "loss": 1.1100095510482788, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2405372405372406, | |
| "grad_norm": 0.4039323925971985, | |
| "learning_rate": 6.959227886813837e-05, | |
| "loss": 0.9780574440956116, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.242979242979243, | |
| "grad_norm": 0.4271914064884186, | |
| "learning_rate": 6.947998954116754e-05, | |
| "loss": 1.0637195110321045, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.2454212454212454, | |
| "grad_norm": 0.1750369668006897, | |
| "learning_rate": 6.936759954892997e-05, | |
| "loss": 0.758945643901825, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2478632478632479, | |
| "grad_norm": 1.4834343194961548, | |
| "learning_rate": 6.925510967276384e-05, | |
| "loss": 0.955576479434967, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2503052503052503, | |
| "grad_norm": 0.2839743494987488, | |
| "learning_rate": 6.914252069470175e-05, | |
| "loss": 1.065222978591919, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2527472527472527, | |
| "grad_norm": 0.22138509154319763, | |
| "learning_rate": 6.902983339746518e-05, | |
| "loss": 1.080528736114502, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2551892551892552, | |
| "grad_norm": 0.7222219705581665, | |
| "learning_rate": 6.891704856445921e-05, | |
| "loss": 0.9944185614585876, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.2576312576312576, | |
| "grad_norm": 0.36048662662506104, | |
| "learning_rate": 6.88041669797669e-05, | |
| "loss": 0.9482412934303284, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.26007326007326, | |
| "grad_norm": 0.24719834327697754, | |
| "learning_rate": 6.869118942814404e-05, | |
| "loss": 1.1315852403640747, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.2625152625152625, | |
| "grad_norm": 0.5581805109977722, | |
| "learning_rate": 6.857811669501352e-05, | |
| "loss": 1.092515468597412, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.264957264957265, | |
| "grad_norm": 0.06896381825208664, | |
| "learning_rate": 6.846494956645993e-05, | |
| "loss": 0.808214545249939, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2673992673992673, | |
| "grad_norm": 0.6676501035690308, | |
| "learning_rate": 6.83516888292241e-05, | |
| "loss": 0.6143781542778015, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 0.19579023122787476, | |
| "learning_rate": 6.823833527069767e-05, | |
| "loss": 0.7590903043746948, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2722832722832722, | |
| "grad_norm": 0.20698176324367523, | |
| "learning_rate": 6.812488967891755e-05, | |
| "loss": 1.0690947771072388, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.2747252747252746, | |
| "grad_norm": 0.1280958503484726, | |
| "learning_rate": 6.801135284256044e-05, | |
| "loss": 0.879156231880188, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.277167277167277, | |
| "grad_norm": 0.499832421541214, | |
| "learning_rate": 6.789772555093741e-05, | |
| "loss": 1.0459083318710327, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2796092796092795, | |
| "grad_norm": 0.35604536533355713, | |
| "learning_rate": 6.77840085939884e-05, | |
| "loss": 0.7542599439620972, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.4993343651294708, | |
| "learning_rate": 6.767020276227665e-05, | |
| "loss": 0.34675005078315735, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2844932844932844, | |
| "grad_norm": 0.468500554561615, | |
| "learning_rate": 6.755630884698327e-05, | |
| "loss": 1.0820245742797852, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.2869352869352868, | |
| "grad_norm": 0.3695545792579651, | |
| "learning_rate": 6.744232763990176e-05, | |
| "loss": 0.8342756628990173, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2893772893772895, | |
| "grad_norm": 0.3611419200897217, | |
| "learning_rate": 6.732825993343246e-05, | |
| "loss": 0.7318598628044128, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.291819291819292, | |
| "grad_norm": 0.7220599055290222, | |
| "learning_rate": 6.721410652057705e-05, | |
| "loss": 1.1916691064834595, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2942612942612943, | |
| "grad_norm": 0.22138787806034088, | |
| "learning_rate": 6.709986819493306e-05, | |
| "loss": 1.0766773223876953, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2967032967032968, | |
| "grad_norm": 0.5020396113395691, | |
| "learning_rate": 6.698554575068829e-05, | |
| "loss": 0.6150152087211609, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2991452991452992, | |
| "grad_norm": 0.15298037230968475, | |
| "learning_rate": 6.687113998261541e-05, | |
| "loss": 1.1057627201080322, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.3015873015873016, | |
| "grad_norm": 0.1805209517478943, | |
| "learning_rate": 6.67566516860663e-05, | |
| "loss": 0.7469316124916077, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.304029304029304, | |
| "grad_norm": 0.585453987121582, | |
| "learning_rate": 6.664208165696658e-05, | |
| "loss": 1.003629446029663, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.3064713064713065, | |
| "grad_norm": 0.42859435081481934, | |
| "learning_rate": 6.65274306918101e-05, | |
| "loss": 0.6870042085647583, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.308913308913309, | |
| "grad_norm": 0.4040563106536865, | |
| "learning_rate": 6.641269958765342e-05, | |
| "loss": 0.9547469019889832, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.3113553113553114, | |
| "grad_norm": 0.35660508275032043, | |
| "learning_rate": 6.629788914211012e-05, | |
| "loss": 0.8478542566299438, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.3137973137973138, | |
| "grad_norm": 0.1982157677412033, | |
| "learning_rate": 6.618300015334546e-05, | |
| "loss": 0.725452721118927, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.3162393162393162, | |
| "grad_norm": 1.687403678894043, | |
| "learning_rate": 6.60680334200707e-05, | |
| "loss": 1.0525561571121216, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.3186813186813187, | |
| "grad_norm": 0.7481430172920227, | |
| "learning_rate": 6.595298974153757e-05, | |
| "loss": 0.8642970323562622, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.321123321123321, | |
| "grad_norm": 0.12711447477340698, | |
| "learning_rate": 6.583786991753273e-05, | |
| "loss": 0.896920382976532, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3235653235653235, | |
| "grad_norm": 0.26414385437965393, | |
| "learning_rate": 6.572267474837225e-05, | |
| "loss": 1.0309571027755737, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.326007326007326, | |
| "grad_norm": 0.13098475337028503, | |
| "learning_rate": 6.560740503489593e-05, | |
| "loss": 1.0929380655288696, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.3284493284493284, | |
| "grad_norm": 0.6176242232322693, | |
| "learning_rate": 6.549206157846186e-05, | |
| "loss": 1.1012804508209229, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3308913308913308, | |
| "grad_norm": 0.6268819570541382, | |
| "learning_rate": 6.537664518094073e-05, | |
| "loss": 0.6613734364509583, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.734652042388916, | |
| "learning_rate": 6.526115664471042e-05, | |
| "loss": 0.8728964328765869, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3357753357753357, | |
| "grad_norm": 0.2716307044029236, | |
| "learning_rate": 6.514559677265022e-05, | |
| "loss": 1.092790961265564, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3382173382173383, | |
| "grad_norm": 0.5069316029548645, | |
| "learning_rate": 6.502996636813539e-05, | |
| "loss": 0.39242786169052124, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.3406593406593408, | |
| "grad_norm": 0.43597611784935, | |
| "learning_rate": 6.491426623503157e-05, | |
| "loss": 1.2618279457092285, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3431013431013432, | |
| "grad_norm": 0.13656683266162872, | |
| "learning_rate": 6.479849717768907e-05, | |
| "loss": 1.0743504762649536, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3455433455433456, | |
| "grad_norm": 0.12377850711345673, | |
| "learning_rate": 6.468266000093744e-05, | |
| "loss": 1.0664759874343872, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.347985347985348, | |
| "grad_norm": 0.19693827629089355, | |
| "learning_rate": 6.456675551007974e-05, | |
| "loss": 0.7982177138328552, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3504273504273505, | |
| "grad_norm": 0.25434446334838867, | |
| "learning_rate": 6.445078451088707e-05, | |
| "loss": 1.0961756706237793, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.352869352869353, | |
| "grad_norm": 0.19862128794193268, | |
| "learning_rate": 6.433474780959282e-05, | |
| "loss": 0.9376218914985657, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3553113553113554, | |
| "grad_norm": 0.19884797930717468, | |
| "learning_rate": 6.421864621288718e-05, | |
| "loss": 1.148040533065796, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.3577533577533578, | |
| "grad_norm": 0.2507520020008087, | |
| "learning_rate": 6.41024805279115e-05, | |
| "loss": 1.0505282878875732, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3601953601953602, | |
| "grad_norm": 0.26704150438308716, | |
| "learning_rate": 6.39862515622527e-05, | |
| "loss": 0.9849526286125183, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.3626373626373627, | |
| "grad_norm": 1.389377474784851, | |
| "learning_rate": 6.386996012393752e-05, | |
| "loss": 0.4344385862350464, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3650793650793651, | |
| "grad_norm": 0.4976778030395508, | |
| "learning_rate": 6.375360702142715e-05, | |
| "loss": 1.0218122005462646, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 0.19328756630420685, | |
| "learning_rate": 6.363719306361139e-05, | |
| "loss": 0.9801905155181885, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.36996336996337, | |
| "grad_norm": 0.3681502938270569, | |
| "learning_rate": 6.35207190598031e-05, | |
| "loss": 0.9958073496818542, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3724053724053724, | |
| "grad_norm": 0.3005128800868988, | |
| "learning_rate": 6.340418581973265e-05, | |
| "loss": 1.0011686086654663, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.3748473748473748, | |
| "grad_norm": 0.27037566900253296, | |
| "learning_rate": 6.328759415354217e-05, | |
| "loss": 1.0258492231369019, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3772893772893773, | |
| "grad_norm": 0.33061879873275757, | |
| "learning_rate": 6.317094487177992e-05, | |
| "loss": 1.0367637872695923, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3797313797313797, | |
| "grad_norm": 0.2548726499080658, | |
| "learning_rate": 6.305423878539484e-05, | |
| "loss": 0.8461532592773438, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3821733821733821, | |
| "grad_norm": 0.0829450935125351, | |
| "learning_rate": 6.293747670573067e-05, | |
| "loss": 0.7600595355033875, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 0.21981346607208252, | |
| "learning_rate": 6.282065944452045e-05, | |
| "loss": 0.4816887378692627, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.387057387057387, | |
| "grad_norm": 0.28539714217185974, | |
| "learning_rate": 6.270378781388082e-05, | |
| "loss": 0.785520613193512, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3894993894993894, | |
| "grad_norm": 0.28171128034591675, | |
| "learning_rate": 6.258686262630645e-05, | |
| "loss": 1.1236557960510254, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.3919413919413919, | |
| "grad_norm": 0.34872862696647644, | |
| "learning_rate": 6.246988469466429e-05, | |
| "loss": 0.7208234667778015, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3943833943833943, | |
| "grad_norm": 0.3961777687072754, | |
| "learning_rate": 6.235285483218799e-05, | |
| "loss": 0.8725804686546326, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3968253968253967, | |
| "grad_norm": 0.35713279247283936, | |
| "learning_rate": 6.223577385247223e-05, | |
| "loss": 0.645413875579834, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3992673992673992, | |
| "grad_norm": 0.14098773896694183, | |
| "learning_rate": 6.211864256946703e-05, | |
| "loss": 1.0651791095733643, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.4017094017094016, | |
| "grad_norm": 0.6047578454017639, | |
| "learning_rate": 6.200146179747216e-05, | |
| "loss": 0.812719464302063, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.404151404151404, | |
| "grad_norm": 0.30947285890579224, | |
| "learning_rate": 6.188423235113138e-05, | |
| "loss": 0.7721526026725769, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4065934065934065, | |
| "grad_norm": 0.08517705649137497, | |
| "learning_rate": 6.176695504542693e-05, | |
| "loss": 0.7817604541778564, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.409035409035409, | |
| "grad_norm": 0.13853046298027039, | |
| "learning_rate": 6.164963069567367e-05, | |
| "loss": 0.8681179285049438, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.4114774114774113, | |
| "grad_norm": 0.34450244903564453, | |
| "learning_rate": 6.153226011751357e-05, | |
| "loss": 0.6927434802055359, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.4139194139194138, | |
| "grad_norm": 0.31808528304100037, | |
| "learning_rate": 6.141484412690996e-05, | |
| "loss": 0.9719551205635071, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.4163614163614164, | |
| "grad_norm": 0.1396312266588211, | |
| "learning_rate": 6.12973835401419e-05, | |
| "loss": 0.9163044691085815, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.4188034188034189, | |
| "grad_norm": 0.21555638313293457, | |
| "learning_rate": 6.117987917379841e-05, | |
| "loss": 0.6509947776794434, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.4212454212454213, | |
| "grad_norm": 1.1163426637649536, | |
| "learning_rate": 6.106233184477301e-05, | |
| "loss": 0.5620357394218445, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4236874236874237, | |
| "grad_norm": 0.177830308675766, | |
| "learning_rate": 6.094474237025775e-05, | |
| "loss": 1.1495651006698608, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4261294261294262, | |
| "grad_norm": 0.05539826676249504, | |
| "learning_rate": 6.082711156773774e-05, | |
| "loss": 0.8995115160942078, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.188784658908844, | |
| "learning_rate": 6.070944025498542e-05, | |
| "loss": 0.7915201783180237, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.431013431013431, | |
| "grad_norm": 0.2244511991739273, | |
| "learning_rate": 6.059172925005485e-05, | |
| "loss": 1.0928409099578857, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.4334554334554335, | |
| "grad_norm": 0.13012833893299103, | |
| "learning_rate": 6.0473979371276e-05, | |
| "loss": 1.0331077575683594, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 0.23487408459186554, | |
| "learning_rate": 6.035619143724912e-05, | |
| "loss": 1.1558940410614014, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4383394383394383, | |
| "grad_norm": 0.29506245255470276, | |
| "learning_rate": 6.023836626683902e-05, | |
| "loss": 0.8217901587486267, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4407814407814408, | |
| "grad_norm": 0.30551743507385254, | |
| "learning_rate": 6.0120504679169366e-05, | |
| "loss": 0.9451017379760742, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4432234432234432, | |
| "grad_norm": 0.40952038764953613, | |
| "learning_rate": 6.0002607493616994e-05, | |
| "loss": 0.7660074234008789, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.4456654456654456, | |
| "grad_norm": 0.49127933382987976, | |
| "learning_rate": 5.9884675529806245e-05, | |
| "loss": 1.1192713975906372, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.448107448107448, | |
| "grad_norm": 0.30256426334381104, | |
| "learning_rate": 5.9766709607603224e-05, | |
| "loss": 1.030974268913269, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.4505494505494505, | |
| "grad_norm": 0.5313738584518433, | |
| "learning_rate": 5.964871054711008e-05, | |
| "loss": 0.8067046999931335, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 0.3709588050842285, | |
| "learning_rate": 5.953067916865941e-05, | |
| "loss": 0.966224730014801, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4554334554334554, | |
| "grad_norm": 0.3335258662700653, | |
| "learning_rate": 5.9412616292808446e-05, | |
| "loss": 0.8578511476516724, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4578754578754578, | |
| "grad_norm": 0.8506854176521301, | |
| "learning_rate": 5.929452274033336e-05, | |
| "loss": 0.746453583240509, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4603174603174602, | |
| "grad_norm": 0.3150297701358795, | |
| "learning_rate": 5.917639933222368e-05, | |
| "loss": 0.7317304611206055, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.462759462759463, | |
| "grad_norm": 0.16696445643901825, | |
| "learning_rate": 5.905824688967638e-05, | |
| "loss": 1.0088220834732056, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4652014652014653, | |
| "grad_norm": 0.3829812705516815, | |
| "learning_rate": 5.894006623409037e-05, | |
| "loss": 1.0668638944625854, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4676434676434678, | |
| "grad_norm": 0.1337672919034958, | |
| "learning_rate": 5.882185818706064e-05, | |
| "loss": 1.0435950756072998, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4700854700854702, | |
| "grad_norm": 0.1704324334859848, | |
| "learning_rate": 5.870362357037267e-05, | |
| "loss": 0.8998317122459412, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4725274725274726, | |
| "grad_norm": 0.5092012882232666, | |
| "learning_rate": 5.858536320599658e-05, | |
| "loss": 0.8602074980735779, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.474969474969475, | |
| "grad_norm": 0.5197181105613708, | |
| "learning_rate": 5.846707791608151e-05, | |
| "loss": 0.8431311249732971, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4774114774114775, | |
| "grad_norm": 0.3496190011501312, | |
| "learning_rate": 5.834876852294995e-05, | |
| "loss": 0.8800300359725952, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.47985347985348, | |
| "grad_norm": 0.2381490170955658, | |
| "learning_rate": 5.8230435849091833e-05, | |
| "loss": 1.0404175519943237, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4822954822954824, | |
| "grad_norm": 0.20975252985954285, | |
| "learning_rate": 5.811208071715907e-05, | |
| "loss": 1.0842742919921875, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4847374847374848, | |
| "grad_norm": 0.1687372624874115, | |
| "learning_rate": 5.799370394995959e-05, | |
| "loss": 1.0320112705230713, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 0.13617894053459167, | |
| "learning_rate": 5.787530637045182e-05, | |
| "loss": 0.2343486249446869, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4896214896214897, | |
| "grad_norm": 0.08795233815908432, | |
| "learning_rate": 5.775688880173882e-05, | |
| "loss": 0.5835520029067993, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.492063492063492, | |
| "grad_norm": 0.2893770933151245, | |
| "learning_rate": 5.763845206706261e-05, | |
| "loss": 0.9278719425201416, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4945054945054945, | |
| "grad_norm": 0.7804391384124756, | |
| "learning_rate": 5.7519996989798494e-05, | |
| "loss": 0.8234866857528687, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.496947496947497, | |
| "grad_norm": 1.0618951320648193, | |
| "learning_rate": 5.740152439344929e-05, | |
| "loss": 1.0766711235046387, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4993894993894994, | |
| "grad_norm": 0.24072515964508057, | |
| "learning_rate": 5.728303510163956e-05, | |
| "loss": 0.48025545477867126, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.5018315018315018, | |
| "grad_norm": 0.2178301364183426, | |
| "learning_rate": 5.716452993810998e-05, | |
| "loss": 0.7202725410461426, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.5042735042735043, | |
| "grad_norm": 0.10087763518095016, | |
| "learning_rate": 5.7046009726711545e-05, | |
| "loss": 0.6398283839225769, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.5067155067155067, | |
| "grad_norm": 0.17874756455421448, | |
| "learning_rate": 5.692747529139989e-05, | |
| "loss": 1.0056767463684082, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.5091575091575091, | |
| "grad_norm": 0.20484614372253418, | |
| "learning_rate": 5.680892745622949e-05, | |
| "loss": 0.8894158601760864, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.5115995115995116, | |
| "grad_norm": 0.2931312620639801, | |
| "learning_rate": 5.6690367045348014e-05, | |
| "loss": 0.8917006850242615, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.514041514041514, | |
| "grad_norm": 0.10633936524391174, | |
| "learning_rate": 5.657179488299055e-05, | |
| "loss": 1.0501837730407715, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.5164835164835164, | |
| "grad_norm": 0.12917259335517883, | |
| "learning_rate": 5.645321179347386e-05, | |
| "loss": 1.0691356658935547, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.5189255189255189, | |
| "grad_norm": 0.1591254472732544, | |
| "learning_rate": 5.6334618601190705e-05, | |
| "loss": 0.609035313129425, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.5213675213675213, | |
| "grad_norm": 0.3975456953048706, | |
| "learning_rate": 5.621601613060407e-05, | |
| "loss": 0.7681704759597778, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 0.19978463649749756, | |
| "learning_rate": 5.6097405206241426e-05, | |
| "loss": 0.8475586175918579, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5262515262515262, | |
| "grad_norm": 0.23777256906032562, | |
| "learning_rate": 5.597878665268903e-05, | |
| "loss": 1.0776598453521729, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5286935286935286, | |
| "grad_norm": 0.3354805111885071, | |
| "learning_rate": 5.586016129458619e-05, | |
| "loss": 0.8357318043708801, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.531135531135531, | |
| "grad_norm": 0.12921759486198425, | |
| "learning_rate": 5.5741529956619476e-05, | |
| "loss": 1.051845669746399, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5335775335775335, | |
| "grad_norm": 0.2622128427028656, | |
| "learning_rate": 5.562289346351707e-05, | |
| "loss": 1.0979580879211426, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.536019536019536, | |
| "grad_norm": 0.1916062831878662, | |
| "learning_rate": 5.550425264004299e-05, | |
| "loss": 0.4232596158981323, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.2835545241832733, | |
| "learning_rate": 5.5385608310991334e-05, | |
| "loss": 0.6855109333992004, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.5409035409035408, | |
| "grad_norm": 0.8020928502082825, | |
| "learning_rate": 5.526696130118061e-05, | |
| "loss": 0.6893743872642517, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5433455433455432, | |
| "grad_norm": 0.33751028776168823, | |
| "learning_rate": 5.514831243544791e-05, | |
| "loss": 0.7404376268386841, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.5457875457875456, | |
| "grad_norm": 0.18948665261268616, | |
| "learning_rate": 5.5029662538643276e-05, | |
| "loss": 0.8048269152641296, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5482295482295483, | |
| "grad_norm": 0.22791559994220734, | |
| "learning_rate": 5.49110124356239e-05, | |
| "loss": 1.0504255294799805, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5506715506715507, | |
| "grad_norm": 0.9400246739387512, | |
| "learning_rate": 5.479236295124841e-05, | |
| "loss": 0.46398311853408813, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5531135531135531, | |
| "grad_norm": 0.5621083378791809, | |
| "learning_rate": 5.467371491037112e-05, | |
| "loss": 1.138421654701233, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 0.16255539655685425, | |
| "learning_rate": 5.4555069137836314e-05, | |
| "loss": 0.8798990845680237, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.557997557997558, | |
| "grad_norm": 0.3270881175994873, | |
| "learning_rate": 5.443642645847253e-05, | |
| "loss": 1.3364378213882446, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5604395604395604, | |
| "grad_norm": 0.17161868512630463, | |
| "learning_rate": 5.431778769708675e-05, | |
| "loss": 0.7309210300445557, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5628815628815629, | |
| "grad_norm": 0.28098684549331665, | |
| "learning_rate": 5.4199153678458814e-05, | |
| "loss": 1.0521801710128784, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5653235653235653, | |
| "grad_norm": 0.9097205400466919, | |
| "learning_rate": 5.4080525227335464e-05, | |
| "loss": 0.6273832321166992, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5677655677655677, | |
| "grad_norm": 0.4730888307094574, | |
| "learning_rate": 5.3961903168424855e-05, | |
| "loss": 1.0628879070281982, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.5702075702075702, | |
| "grad_norm": 0.3619194030761719, | |
| "learning_rate": 5.384328832639061e-05, | |
| "loss": 1.1048762798309326, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5726495726495726, | |
| "grad_norm": 0.4583139717578888, | |
| "learning_rate": 5.372468152584622e-05, | |
| "loss": 0.5223218202590942, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.575091575091575, | |
| "grad_norm": 0.45348143577575684, | |
| "learning_rate": 5.360608359134928e-05, | |
| "loss": 0.8029018044471741, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5775335775335775, | |
| "grad_norm": 0.2899197041988373, | |
| "learning_rate": 5.3487495347395736e-05, | |
| "loss": 0.37328973412513733, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5799755799755801, | |
| "grad_norm": 0.09255759418010712, | |
| "learning_rate": 5.3368917618414184e-05, | |
| "loss": 0.6895552277565002, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5824175824175826, | |
| "grad_norm": 0.13683350384235382, | |
| "learning_rate": 5.3250351228760086e-05, | |
| "loss": 1.072478175163269, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.584859584859585, | |
| "grad_norm": 0.7693029046058655, | |
| "learning_rate": 5.31317970027101e-05, | |
| "loss": 1.0769416093826294, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 0.19626326858997345, | |
| "learning_rate": 5.30132557644563e-05, | |
| "loss": 0.8601043820381165, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 0.4950772821903229, | |
| "learning_rate": 5.2894728338100496e-05, | |
| "loss": 1.0493766069412231, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5921855921855923, | |
| "grad_norm": 0.35559603571891785, | |
| "learning_rate": 5.277621554764842e-05, | |
| "loss": 1.0305781364440918, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5946275946275947, | |
| "grad_norm": 0.39851295948028564, | |
| "learning_rate": 5.265771821700416e-05, | |
| "loss": 0.7244423627853394, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5970695970695972, | |
| "grad_norm": 0.10679028183221817, | |
| "learning_rate": 5.2539237169964216e-05, | |
| "loss": 1.0926436185836792, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.5995115995115996, | |
| "grad_norm": 0.3007868528366089, | |
| "learning_rate": 5.242077323021197e-05, | |
| "loss": 0.9811714291572571, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.601953601953602, | |
| "grad_norm": 0.20424215495586395, | |
| "learning_rate": 5.2302327221311796e-05, | |
| "loss": 0.947564423084259, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.6043956043956045, | |
| "grad_norm": 0.24308611452579498, | |
| "learning_rate": 5.218389996670349e-05, | |
| "loss": 1.02983820438385, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.606837606837607, | |
| "grad_norm": 0.16875241696834564, | |
| "learning_rate": 5.206549228969639e-05, | |
| "loss": 1.1396514177322388, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.6092796092796093, | |
| "grad_norm": 0.149415984749794, | |
| "learning_rate": 5.194710501346377e-05, | |
| "loss": 0.9516569375991821, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.6117216117216118, | |
| "grad_norm": 0.3515506386756897, | |
| "learning_rate": 5.182873896103713e-05, | |
| "loss": 1.199094295501709, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.6141636141636142, | |
| "grad_norm": 0.16218596696853638, | |
| "learning_rate": 5.1710394955300315e-05, | |
| "loss": 1.0511850118637085, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.6166056166056166, | |
| "grad_norm": 0.15163403749465942, | |
| "learning_rate": 5.1592073818983976e-05, | |
| "loss": 1.0825765132904053, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 0.5036738514900208, | |
| "learning_rate": 5.147377637465975e-05, | |
| "loss": 0.6521511077880859, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.6214896214896215, | |
| "grad_norm": 0.31287693977355957, | |
| "learning_rate": 5.1355503444734576e-05, | |
| "loss": 1.101414680480957, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 0.33664900064468384, | |
| "learning_rate": 5.123725585144494e-05, | |
| "loss": 1.081400752067566, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6263736263736264, | |
| "grad_norm": 0.17957070469856262, | |
| "learning_rate": 5.111903441685124e-05, | |
| "loss": 0.8277972936630249, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.6288156288156288, | |
| "grad_norm": 0.1797637641429901, | |
| "learning_rate": 5.100083996283196e-05, | |
| "loss": 1.068657398223877, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6312576312576312, | |
| "grad_norm": 0.061219751834869385, | |
| "learning_rate": 5.0882673311078053e-05, | |
| "loss": 0.6104809045791626, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.6336996336996337, | |
| "grad_norm": 0.5505644679069519, | |
| "learning_rate": 5.076453528308719e-05, | |
| "loss": 1.0238608121871948, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.636141636141636, | |
| "grad_norm": 0.23708876967430115, | |
| "learning_rate": 5.064642670015801e-05, | |
| "loss": 0.48725709319114685, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.6385836385836385, | |
| "grad_norm": 0.14664816856384277, | |
| "learning_rate": 5.0528348383384507e-05, | |
| "loss": 0.36775800585746765, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 0.14682380855083466, | |
| "learning_rate": 5.0410301153650195e-05, | |
| "loss": 1.0905888080596924, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6434676434676434, | |
| "grad_norm": 0.20299766957759857, | |
| "learning_rate": 5.029228583162256e-05, | |
| "loss": 1.0694624185562134, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6459096459096458, | |
| "grad_norm": 0.2950347661972046, | |
| "learning_rate": 5.017430323774718e-05, | |
| "loss": 0.7154189944267273, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6483516483516483, | |
| "grad_norm": 0.10156578570604324, | |
| "learning_rate": 5.005635419224214e-05, | |
| "loss": 0.6325153708457947, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6507936507936507, | |
| "grad_norm": 0.13161711394786835, | |
| "learning_rate": 4.993843951509229e-05, | |
| "loss": 1.0733890533447266, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6532356532356531, | |
| "grad_norm": 0.25999629497528076, | |
| "learning_rate": 4.982056002604356e-05, | |
| "loss": 1.1192110776901245, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6556776556776556, | |
| "grad_norm": 0.17848606407642365, | |
| "learning_rate": 4.970271654459726e-05, | |
| "loss": 1.0675522089004517, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.658119658119658, | |
| "grad_norm": 0.10351155698299408, | |
| "learning_rate": 4.958490989000436e-05, | |
| "loss": 0.5787196755409241, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6605616605616604, | |
| "grad_norm": 0.6748509407043457, | |
| "learning_rate": 4.9467140881259796e-05, | |
| "loss": 0.7209932208061218, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.6630036630036629, | |
| "grad_norm": 0.20171122252941132, | |
| "learning_rate": 4.9349410337096825e-05, | |
| "loss": 1.0964769124984741, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6654456654456653, | |
| "grad_norm": 0.19162552058696747, | |
| "learning_rate": 4.9231719075981244e-05, | |
| "loss": 1.0881688594818115, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.6678876678876677, | |
| "grad_norm": 0.8815730214118958, | |
| "learning_rate": 4.91140679161058e-05, | |
| "loss": 1.0124543905258179, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6703296703296702, | |
| "grad_norm": 0.16690880060195923, | |
| "learning_rate": 4.8996457675384444e-05, | |
| "loss": 1.0523033142089844, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6727716727716728, | |
| "grad_norm": 0.17800509929656982, | |
| "learning_rate": 4.887888917144663e-05, | |
| "loss": 1.0857226848602295, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6752136752136753, | |
| "grad_norm": 0.348590224981308, | |
| "learning_rate": 4.876136322163171e-05, | |
| "loss": 0.6205699443817139, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6776556776556777, | |
| "grad_norm": 0.11171364039182663, | |
| "learning_rate": 4.864388064298315e-05, | |
| "loss": 0.2655639946460724, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6800976800976801, | |
| "grad_norm": 0.23623786866664886, | |
| "learning_rate": 4.852644225224291e-05, | |
| "loss": 1.1150223016738892, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6825396825396826, | |
| "grad_norm": 0.14239074289798737, | |
| "learning_rate": 4.840904886584579e-05, | |
| "loss": 1.0644621849060059, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.684981684981685, | |
| "grad_norm": 0.05473846197128296, | |
| "learning_rate": 4.829170129991364e-05, | |
| "loss": 0.911546528339386, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6874236874236874, | |
| "grad_norm": 0.18205346167087555, | |
| "learning_rate": 4.817440037024985e-05, | |
| "loss": 0.9958337545394897, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6898656898656899, | |
| "grad_norm": 0.1949300616979599, | |
| "learning_rate": 4.805714689233351e-05, | |
| "loss": 1.159174919128418, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.5501182079315186, | |
| "learning_rate": 4.793994168131395e-05, | |
| "loss": 1.0625038146972656, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6947496947496947, | |
| "grad_norm": 0.21715255081653595, | |
| "learning_rate": 4.782278555200481e-05, | |
| "loss": 1.0934077501296997, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6971916971916972, | |
| "grad_norm": 0.11062823981046677, | |
| "learning_rate": 4.770567931887858e-05, | |
| "loss": 0.34447038173675537, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6996336996336996, | |
| "grad_norm": 0.29721036553382874, | |
| "learning_rate": 4.758862379606088e-05, | |
| "loss": 0.9947092533111572, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.702075702075702, | |
| "grad_norm": 0.5796966552734375, | |
| "learning_rate": 4.747161979732475e-05, | |
| "loss": 0.8076590895652771, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.7045177045177047, | |
| "grad_norm": 1.3212742805480957, | |
| "learning_rate": 4.735466813608505e-05, | |
| "loss": 1.1376543045043945, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.7069597069597071, | |
| "grad_norm": 0.1878126710653305, | |
| "learning_rate": 4.723776962539282e-05, | |
| "loss": 0.7179307341575623, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 2.141376495361328, | |
| "learning_rate": 4.712092507792956e-05, | |
| "loss": 0.38371261954307556, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.711843711843712, | |
| "grad_norm": 0.36681321263313293, | |
| "learning_rate": 4.700413530600162e-05, | |
| "loss": 0.8716605305671692, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.16726715862751007, | |
| "learning_rate": 4.6887401121534554e-05, | |
| "loss": 1.057639241218567, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.7167277167277168, | |
| "grad_norm": 0.17409463226795197, | |
| "learning_rate": 4.677072333606749e-05, | |
| "loss": 0.5047734975814819, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.7191697191697193, | |
| "grad_norm": 0.15982046723365784, | |
| "learning_rate": 4.665410276074742e-05, | |
| "loss": 1.0847749710083008, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.7216117216117217, | |
| "grad_norm": 0.46375003457069397, | |
| "learning_rate": 4.6537540206323635e-05, | |
| "loss": 0.7411858439445496, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.7240537240537241, | |
| "grad_norm": 0.47292381525039673, | |
| "learning_rate": 4.642103648314211e-05, | |
| "loss": 0.8171213865280151, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7264957264957266, | |
| "grad_norm": 0.36377474665641785, | |
| "learning_rate": 4.630459240113977e-05, | |
| "loss": 1.047723412513733, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.728937728937729, | |
| "grad_norm": 0.32019728422164917, | |
| "learning_rate": 4.618820876983889e-05, | |
| "loss": 0.7329713106155396, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7313797313797314, | |
| "grad_norm": 0.2337159365415573, | |
| "learning_rate": 4.607188639834157e-05, | |
| "loss": 1.1218899488449097, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.7338217338217339, | |
| "grad_norm": 0.3704914450645447, | |
| "learning_rate": 4.595562609532396e-05, | |
| "loss": 0.7929244041442871, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7362637362637363, | |
| "grad_norm": 0.1733865886926651, | |
| "learning_rate": 4.5839428669030705e-05, | |
| "loss": 0.8216549158096313, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7387057387057387, | |
| "grad_norm": 0.3664633631706238, | |
| "learning_rate": 4.572329492726941e-05, | |
| "loss": 0.7749980092048645, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7411477411477412, | |
| "grad_norm": 0.13494762778282166, | |
| "learning_rate": 4.560722567740486e-05, | |
| "loss": 0.7580897212028503, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 0.16795885562896729, | |
| "learning_rate": 4.549122172635354e-05, | |
| "loss": 1.1150444746017456, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.746031746031746, | |
| "grad_norm": 0.19896958768367767, | |
| "learning_rate": 4.5375283880577945e-05, | |
| "loss": 1.135327935218811, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7484737484737485, | |
| "grad_norm": 0.18567411601543427, | |
| "learning_rate": 4.5259412946081025e-05, | |
| "loss": 0.7407920956611633, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.750915750915751, | |
| "grad_norm": 0.13866519927978516, | |
| "learning_rate": 4.514360972840054e-05, | |
| "loss": 0.6161085963249207, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7533577533577533, | |
| "grad_norm": 0.13026298582553864, | |
| "learning_rate": 4.502787503260349e-05, | |
| "loss": 0.758384644985199, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7557997557997558, | |
| "grad_norm": 0.1002855971455574, | |
| "learning_rate": 4.4912209663280545e-05, | |
| "loss": 0.4888274371623993, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.7582417582417582, | |
| "grad_norm": 0.15915000438690186, | |
| "learning_rate": 4.479661442454035e-05, | |
| "loss": 1.0535722970962524, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7606837606837606, | |
| "grad_norm": 0.7222951650619507, | |
| "learning_rate": 4.468109012000406e-05, | |
| "loss": 0.8508101105690002, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.763125763125763, | |
| "grad_norm": 0.17121757566928864, | |
| "learning_rate": 4.4565637552799634e-05, | |
| "loss": 0.7293910980224609, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7655677655677655, | |
| "grad_norm": 0.0991348847746849, | |
| "learning_rate": 4.4450257525556384e-05, | |
| "loss": 1.0878574848175049, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.768009768009768, | |
| "grad_norm": 0.1505967676639557, | |
| "learning_rate": 4.433495084039925e-05, | |
| "loss": 1.101405382156372, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7704517704517704, | |
| "grad_norm": 0.1785927265882492, | |
| "learning_rate": 4.4219718298943333e-05, | |
| "loss": 0.9359110593795776, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7728937728937728, | |
| "grad_norm": 0.3384542763233185, | |
| "learning_rate": 4.4104560702288313e-05, | |
| "loss": 0.3786666989326477, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.7753357753357752, | |
| "grad_norm": 0.257131427526474, | |
| "learning_rate": 4.398947885101278e-05, | |
| "loss": 0.7697170376777649, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 2.5905237197875977, | |
| "learning_rate": 4.38744735451688e-05, | |
| "loss": 0.5154775381088257, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7802197802197801, | |
| "grad_norm": 0.3309757113456726, | |
| "learning_rate": 4.375954558427628e-05, | |
| "loss": 1.0552986860275269, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7826617826617825, | |
| "grad_norm": 0.8500292301177979, | |
| "learning_rate": 4.36446957673174e-05, | |
| "loss": 0.9365640878677368, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.785103785103785, | |
| "grad_norm": 0.194388285279274, | |
| "learning_rate": 4.3529924892731077e-05, | |
| "loss": 0.8781149387359619, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.7875457875457874, | |
| "grad_norm": 0.2340954691171646, | |
| "learning_rate": 4.341523375840748e-05, | |
| "loss": 0.7231025099754333, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7899877899877898, | |
| "grad_norm": 0.3413144052028656, | |
| "learning_rate": 4.3300623161682374e-05, | |
| "loss": 0.75654137134552, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.7924297924297923, | |
| "grad_norm": 5.676682949066162, | |
| "learning_rate": 4.3186093899331605e-05, | |
| "loss": 0.762105405330658, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.3567386269569397, | |
| "learning_rate": 4.307164676756563e-05, | |
| "loss": 0.973125159740448, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7973137973137974, | |
| "grad_norm": 0.21971167623996735, | |
| "learning_rate": 4.2957282562023905e-05, | |
| "loss": 1.0411320924758911, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7997557997557998, | |
| "grad_norm": 0.23026219010353088, | |
| "learning_rate": 4.284300207776937e-05, | |
| "loss": 0.7280438542366028, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.8021978021978022, | |
| "grad_norm": 0.15556035935878754, | |
| "learning_rate": 4.272880610928294e-05, | |
| "loss": 0.5909247994422913, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.8046398046398047, | |
| "grad_norm": 0.29173851013183594, | |
| "learning_rate": 4.2614695450458e-05, | |
| "loss": 0.7470681667327881, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.807081807081807, | |
| "grad_norm": 0.17577774822711945, | |
| "learning_rate": 4.250067089459485e-05, | |
| "loss": 0.40631747245788574, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 0.5311745405197144, | |
| "learning_rate": 4.238673323439516e-05, | |
| "loss": 0.6865248084068298, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.811965811965812, | |
| "grad_norm": 0.13050416111946106, | |
| "learning_rate": 4.227288326195655e-05, | |
| "loss": 1.124280571937561, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.8144078144078144, | |
| "grad_norm": 0.3412638306617737, | |
| "learning_rate": 4.215912176876702e-05, | |
| "loss": 1.0452717542648315, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.8168498168498168, | |
| "grad_norm": 0.6988785266876221, | |
| "learning_rate": 4.20454495456994e-05, | |
| "loss": 0.6673856973648071, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.8192918192918193, | |
| "grad_norm": 0.3266439139842987, | |
| "learning_rate": 4.193186738300603e-05, | |
| "loss": 0.850813090801239, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.8217338217338217, | |
| "grad_norm": 3.462282419204712, | |
| "learning_rate": 4.1818376070313046e-05, | |
| "loss": 1.0719836950302124, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.8241758241758241, | |
| "grad_norm": 0.2799535393714905, | |
| "learning_rate": 4.1704976396615035e-05, | |
| "loss": 0.6984477639198303, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.8266178266178266, | |
| "grad_norm": 0.2727070748806, | |
| "learning_rate": 4.1591669150269505e-05, | |
| "loss": 0.8232375979423523, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8290598290598292, | |
| "grad_norm": 0.14200325310230255, | |
| "learning_rate": 4.14784551189914e-05, | |
| "loss": 1.4073742628097534, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 0.46531203389167786, | |
| "learning_rate": 4.1365335089847615e-05, | |
| "loss": 1.1864604949951172, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.833943833943834, | |
| "grad_norm": 0.6122459173202515, | |
| "learning_rate": 4.125230984925156e-05, | |
| "loss": 0.8614127039909363, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8363858363858365, | |
| "grad_norm": 0.03934510424733162, | |
| "learning_rate": 4.11393801829577e-05, | |
| "loss": 0.8807006478309631, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.838827838827839, | |
| "grad_norm": 0.12789197266101837, | |
| "learning_rate": 4.1026546876056006e-05, | |
| "loss": 0.7302087545394897, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8412698412698414, | |
| "grad_norm": 0.28864189982414246, | |
| "learning_rate": 4.0913810712966594e-05, | |
| "loss": 0.7880204916000366, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8437118437118438, | |
| "grad_norm": 0.1285659670829773, | |
| "learning_rate": 4.080117247743422e-05, | |
| "loss": 1.0747519731521606, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.2884065806865692, | |
| "learning_rate": 4.0688632952522854e-05, | |
| "loss": 1.0847268104553223, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8485958485958487, | |
| "grad_norm": 0.10085301846265793, | |
| "learning_rate": 4.0576192920610225e-05, | |
| "loss": 1.0555427074432373, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.8510378510378511, | |
| "grad_norm": 0.39521071314811707, | |
| "learning_rate": 4.046385316338235e-05, | |
| "loss": 0.7720159292221069, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8534798534798536, | |
| "grad_norm": 0.16272993385791779, | |
| "learning_rate": 4.03516144618282e-05, | |
| "loss": 1.0903829336166382, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.855921855921856, | |
| "grad_norm": 1.2611911296844482, | |
| "learning_rate": 4.0239477596234166e-05, | |
| "loss": 0.850383460521698, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8583638583638584, | |
| "grad_norm": 0.2067229151725769, | |
| "learning_rate": 4.0127443346178675e-05, | |
| "loss": 0.909993588924408, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8608058608058609, | |
| "grad_norm": 0.3549044132232666, | |
| "learning_rate": 4.0015512490526764e-05, | |
| "loss": 0.8329422473907471, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.8632478632478633, | |
| "grad_norm": 0.33927121758461, | |
| "learning_rate": 3.9903685807424665e-05, | |
| "loss": 0.8696761727333069, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8656898656898657, | |
| "grad_norm": 0.22251230478286743, | |
| "learning_rate": 3.979196407429441e-05, | |
| "loss": 0.7492315769195557, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8681318681318682, | |
| "grad_norm": 0.33873438835144043, | |
| "learning_rate": 3.9680348067828445e-05, | |
| "loss": 0.8280366659164429, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8705738705738706, | |
| "grad_norm": 0.30588895082473755, | |
| "learning_rate": 3.9568838563984145e-05, | |
| "loss": 0.7478283643722534, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.873015873015873, | |
| "grad_norm": 0.17078544199466705, | |
| "learning_rate": 3.945743633797849e-05, | |
| "loss": 0.9188095927238464, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.8754578754578755, | |
| "grad_norm": 0.13569653034210205, | |
| "learning_rate": 3.934614216428272e-05, | |
| "loss": 1.0689340829849243, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.877899877899878, | |
| "grad_norm": 0.16706790030002594, | |
| "learning_rate": 3.92349568166168e-05, | |
| "loss": 1.0157550573349, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 0.11259295791387558, | |
| "learning_rate": 3.9123881067944214e-05, | |
| "loss": 1.0532252788543701, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8827838827838828, | |
| "grad_norm": 0.23742644488811493, | |
| "learning_rate": 3.901291569046646e-05, | |
| "loss": 1.0480480194091797, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8852258852258852, | |
| "grad_norm": 0.23344580829143524, | |
| "learning_rate": 3.890206145561774e-05, | |
| "loss": 0.8115158677101135, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8876678876678876, | |
| "grad_norm": 0.21767543256282806, | |
| "learning_rate": 3.8791319134059635e-05, | |
| "loss": 1.0859732627868652, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.89010989010989, | |
| "grad_norm": 0.6981015205383301, | |
| "learning_rate": 3.8680689495675624e-05, | |
| "loss": 1.0850564241409302, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8925518925518925, | |
| "grad_norm": 0.2215709686279297, | |
| "learning_rate": 3.8570173309565895e-05, | |
| "loss": 0.6140363812446594, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.894993894993895, | |
| "grad_norm": 0.2647048830986023, | |
| "learning_rate": 3.8459771344041827e-05, | |
| "loss": 1.1245466470718384, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 0.33657070994377136, | |
| "learning_rate": 3.834948436662078e-05, | |
| "loss": 0.6867294311523438, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8998778998778998, | |
| "grad_norm": 0.27911558747291565, | |
| "learning_rate": 3.823931314402076e-05, | |
| "loss": 1.1472887992858887, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.9023199023199022, | |
| "grad_norm": 0.8100700974464417, | |
| "learning_rate": 3.812925844215496e-05, | |
| "loss": 0.9275177121162415, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 0.639488697052002, | |
| "learning_rate": 3.8019321026126556e-05, | |
| "loss": 0.6028476357460022, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.907203907203907, | |
| "grad_norm": 0.22780871391296387, | |
| "learning_rate": 3.790950166022334e-05, | |
| "loss": 1.064815640449524, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.9096459096459095, | |
| "grad_norm": 0.19272786378860474, | |
| "learning_rate": 3.7799801107912404e-05, | |
| "loss": 0.7198202013969421, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.912087912087912, | |
| "grad_norm": 0.3562992513179779, | |
| "learning_rate": 3.76902201318349e-05, | |
| "loss": 1.0721912384033203, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.9145299145299144, | |
| "grad_norm": 1.4828698635101318, | |
| "learning_rate": 3.758075949380061e-05, | |
| "loss": 0.905088484287262, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.9169719169719168, | |
| "grad_norm": 0.3064350485801697, | |
| "learning_rate": 3.7471419954782796e-05, | |
| "loss": 0.7685626745223999, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.9194139194139193, | |
| "grad_norm": 0.3019864857196808, | |
| "learning_rate": 3.736220227491276e-05, | |
| "loss": 1.0030697584152222, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.9218559218559217, | |
| "grad_norm": 0.7463777661323547, | |
| "learning_rate": 3.72531072134747e-05, | |
| "loss": 0.783904492855072, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.9242979242979243, | |
| "grad_norm": 0.5904989838600159, | |
| "learning_rate": 3.714413552890035e-05, | |
| "loss": 0.939898669719696, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.9267399267399268, | |
| "grad_norm": 0.22297552227973938, | |
| "learning_rate": 3.703528797876368e-05, | |
| "loss": 0.6810073852539062, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9291819291819292, | |
| "grad_norm": 0.4896375238895416, | |
| "learning_rate": 3.692656531977574e-05, | |
| "loss": 1.0190911293029785, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9316239316239316, | |
| "grad_norm": 0.11533039808273315, | |
| "learning_rate": 3.681796830777927e-05, | |
| "loss": 0.25521060824394226, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.934065934065934, | |
| "grad_norm": 0.35863131284713745, | |
| "learning_rate": 3.67094976977436e-05, | |
| "loss": 0.4837794303894043, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9365079365079365, | |
| "grad_norm": 0.1478620022535324, | |
| "learning_rate": 3.6601154243759215e-05, | |
| "loss": 0.6568963527679443, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.938949938949939, | |
| "grad_norm": 0.27968350052833557, | |
| "learning_rate": 3.649293869903265e-05, | |
| "loss": 1.0917468070983887, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.9413919413919414, | |
| "grad_norm": 0.10699980705976486, | |
| "learning_rate": 3.6384851815881214e-05, | |
| "loss": 1.0869860649108887, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9438339438339438, | |
| "grad_norm": 0.4071694612503052, | |
| "learning_rate": 3.6276894345727755e-05, | |
| "loss": 0.9575576782226562, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.9462759462759462, | |
| "grad_norm": 0.30121275782585144, | |
| "learning_rate": 3.6169067039095404e-05, | |
| "loss": 0.672225832939148, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 0.2678603231906891, | |
| "learning_rate": 3.606137064560245e-05, | |
| "loss": 0.5794538259506226, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.9511599511599511, | |
| "grad_norm": 0.34622427821159363, | |
| "learning_rate": 3.595380591395706e-05, | |
| "loss": 0.7376147508621216, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9536019536019538, | |
| "grad_norm": 0.203504741191864, | |
| "learning_rate": 3.5846373591952034e-05, | |
| "loss": 0.7228766679763794, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9560439560439562, | |
| "grad_norm": 0.19558553397655487, | |
| "learning_rate": 3.57390744264597e-05, | |
| "loss": 1.0548901557922363, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9584859584859586, | |
| "grad_norm": 0.5159153342247009, | |
| "learning_rate": 3.563190916342667e-05, | |
| "loss": 1.1147674322128296, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.960927960927961, | |
| "grad_norm": 0.45352670550346375, | |
| "learning_rate": 3.5524878547868665e-05, | |
| "loss": 1.2071866989135742, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9633699633699635, | |
| "grad_norm": 0.11871611326932907, | |
| "learning_rate": 3.541798332386529e-05, | |
| "loss": 1.1454509496688843, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 0.18767890334129333, | |
| "learning_rate": 3.531122423455502e-05, | |
| "loss": 1.0524815320968628, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9682539682539684, | |
| "grad_norm": 0.7703342437744141, | |
| "learning_rate": 3.5204602022129776e-05, | |
| "loss": 0.9447095990180969, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.9706959706959708, | |
| "grad_norm": 0.15847618877887726, | |
| "learning_rate": 3.5098117427830014e-05, | |
| "loss": 1.049167513847351, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9731379731379732, | |
| "grad_norm": 0.12073543667793274, | |
| "learning_rate": 3.499177119193938e-05, | |
| "loss": 0.8764771819114685, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.9755799755799757, | |
| "grad_norm": 0.138034388422966, | |
| "learning_rate": 3.488556405377974e-05, | |
| "loss": 0.9779093861579895, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.978021978021978, | |
| "grad_norm": 0.15564721822738647, | |
| "learning_rate": 3.477949675170585e-05, | |
| "loss": 1.1160447597503662, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.9804639804639805, | |
| "grad_norm": 0.2628539204597473, | |
| "learning_rate": 3.467357002310043e-05, | |
| "loss": 0.8582868576049805, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.982905982905983, | |
| "grad_norm": 0.20883886516094208, | |
| "learning_rate": 3.4567784604368844e-05, | |
| "loss": 0.4796310365200043, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9853479853479854, | |
| "grad_norm": 0.23412923514842987, | |
| "learning_rate": 3.4462141230934096e-05, | |
| "loss": 0.7904374599456787, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9877899877899878, | |
| "grad_norm": 1.0725616216659546, | |
| "learning_rate": 3.4356640637231673e-05, | |
| "loss": 0.8463651537895203, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9902319902319903, | |
| "grad_norm": 0.1747504323720932, | |
| "learning_rate": 3.425128355670451e-05, | |
| "loss": 1.1446012258529663, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.9926739926739927, | |
| "grad_norm": 0.5847082138061523, | |
| "learning_rate": 3.414607072179773e-05, | |
| "loss": 1.027217984199524, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9951159951159951, | |
| "grad_norm": 0.16668929159641266, | |
| "learning_rate": 3.4041002863953766e-05, | |
| "loss": 0.7982567548751831, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9975579975579976, | |
| "grad_norm": 0.1288972645998001, | |
| "learning_rate": 3.39360807136071e-05, | |
| "loss": 1.1141502857208252, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.19240225851535797, | |
| "learning_rate": 3.383130500017928e-05, | |
| "loss": 1.0660604238510132, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.0024420024420024, | |
| "grad_norm": 0.1633676290512085, | |
| "learning_rate": 3.3726676452073794e-05, | |
| "loss": 0.8098340630531311, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.004884004884005, | |
| "grad_norm": 0.433170348405838, | |
| "learning_rate": 3.362219579667108e-05, | |
| "loss": 0.6766613721847534, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.0073260073260073, | |
| "grad_norm": 0.06701789796352386, | |
| "learning_rate": 3.351786376032336e-05, | |
| "loss": 0.8549144268035889, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.0097680097680097, | |
| "grad_norm": 0.5762924551963806, | |
| "learning_rate": 3.341368106834972e-05, | |
| "loss": 0.6137865781784058, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.012210012210012, | |
| "grad_norm": 0.21846307814121246, | |
| "learning_rate": 3.33096484450309e-05, | |
| "loss": 0.5671168565750122, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.0146520146520146, | |
| "grad_norm": 4.144954204559326, | |
| "learning_rate": 3.320576661360451e-05, | |
| "loss": 0.5974899530410767, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.017094017094017, | |
| "grad_norm": 0.4958506226539612, | |
| "learning_rate": 3.310203629625971e-05, | |
| "loss": 0.7066149711608887, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.0195360195360195, | |
| "grad_norm": 0.22288993000984192, | |
| "learning_rate": 3.299845821413239e-05, | |
| "loss": 1.0410499572753906, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.021978021978022, | |
| "grad_norm": 0.06826517730951309, | |
| "learning_rate": 3.28950330873001e-05, | |
| "loss": 0.44449853897094727, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.0244200244200243, | |
| "grad_norm": 0.40834012627601624, | |
| "learning_rate": 3.279176163477704e-05, | |
| "loss": 0.7431243062019348, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.0268620268620268, | |
| "grad_norm": 0.1631796807050705, | |
| "learning_rate": 3.2688644574509005e-05, | |
| "loss": 0.8592994809150696, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.029304029304029, | |
| "grad_norm": 0.39541497826576233, | |
| "learning_rate": 3.258568262336857e-05, | |
| "loss": 0.9036648869514465, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0317460317460316, | |
| "grad_norm": 0.1921057105064392, | |
| "learning_rate": 3.248287649714987e-05, | |
| "loss": 0.42328229546546936, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.034188034188034, | |
| "grad_norm": 0.2879117727279663, | |
| "learning_rate": 3.238022691056383e-05, | |
| "loss": 0.6711524128913879, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.0366300366300365, | |
| "grad_norm": 0.18322761356830597, | |
| "learning_rate": 3.227773457723301e-05, | |
| "loss": 0.9243687391281128, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.039072039072039, | |
| "grad_norm": 0.24786271154880524, | |
| "learning_rate": 3.2175400209686845e-05, | |
| "loss": 0.6299894452095032, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.0415140415140414, | |
| "grad_norm": 0.14640846848487854, | |
| "learning_rate": 3.207322451935651e-05, | |
| "loss": 0.8961169123649597, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.043956043956044, | |
| "grad_norm": 0.2015378326177597, | |
| "learning_rate": 3.197120821657008e-05, | |
| "loss": 0.8252861499786377, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.0463980463980462, | |
| "grad_norm": 0.04018518701195717, | |
| "learning_rate": 3.186935201054761e-05, | |
| "loss": 0.6224890351295471, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.0488400488400487, | |
| "grad_norm": 0.2952715754508972, | |
| "learning_rate": 3.1767656609396055e-05, | |
| "loss": 0.9035543203353882, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.23838657140731812, | |
| "learning_rate": 3.1666122720104544e-05, | |
| "loss": 0.9151688814163208, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.0537240537240535, | |
| "grad_norm": 0.14511041343212128, | |
| "learning_rate": 3.156475104853933e-05, | |
| "loss": 0.5381996631622314, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.056166056166056, | |
| "grad_norm": 0.2589055299758911, | |
| "learning_rate": 3.1463542299438916e-05, | |
| "loss": 0.6053510904312134, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.0586080586080584, | |
| "grad_norm": 0.5927218794822693, | |
| "learning_rate": 3.1362497176409196e-05, | |
| "loss": 0.49944713711738586, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.061050061050061, | |
| "grad_norm": 0.29157495498657227, | |
| "learning_rate": 3.126161638191851e-05, | |
| "loss": 0.6878185868263245, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.0634920634920633, | |
| "grad_norm": 0.20957320928573608, | |
| "learning_rate": 3.116090061729278e-05, | |
| "loss": 0.5788707733154297, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.065934065934066, | |
| "grad_norm": 0.13952338695526123, | |
| "learning_rate": 3.106035058271064e-05, | |
| "loss": 0.565746545791626, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.0683760683760686, | |
| "grad_norm": 0.22634997963905334, | |
| "learning_rate": 3.095996697719856e-05, | |
| "loss": 0.6304118037223816, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.070818070818071, | |
| "grad_norm": 0.32739636301994324, | |
| "learning_rate": 3.085975049862602e-05, | |
| "loss": 0.8893492817878723, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.0732600732600734, | |
| "grad_norm": 0.1666596531867981, | |
| "learning_rate": 3.075970184370057e-05, | |
| "loss": 0.8977892398834229, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.075702075702076, | |
| "grad_norm": 0.6082616448402405, | |
| "learning_rate": 3.065982170796309e-05, | |
| "loss": 0.881981372833252, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0781440781440783, | |
| "grad_norm": 0.7366942167282104, | |
| "learning_rate": 3.056011078578292e-05, | |
| "loss": 0.3656110167503357, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.0805860805860807, | |
| "grad_norm": 0.10232818126678467, | |
| "learning_rate": 3.046056977035301e-05, | |
| "loss": 0.4570177495479584, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.083028083028083, | |
| "grad_norm": 0.3028525114059448, | |
| "learning_rate": 3.0361199353685065e-05, | |
| "loss": 0.30000945925712585, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.0854700854700856, | |
| "grad_norm": 0.2878250479698181, | |
| "learning_rate": 3.026200022660488e-05, | |
| "loss": 0.7617815136909485, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.087912087912088, | |
| "grad_norm": 0.37716731429100037, | |
| "learning_rate": 3.0162973078747337e-05, | |
| "loss": 0.5552463531494141, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.0903540903540905, | |
| "grad_norm": 0.7748197317123413, | |
| "learning_rate": 3.0064118598551795e-05, | |
| "loss": 0.6170698404312134, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.092796092796093, | |
| "grad_norm": 0.254808247089386, | |
| "learning_rate": 2.9965437473257146e-05, | |
| "loss": 0.6163607835769653, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 0.33699703216552734, | |
| "learning_rate": 2.986693038889723e-05, | |
| "loss": 0.6443266272544861, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.0976800976800978, | |
| "grad_norm": 0.12282342463731766, | |
| "learning_rate": 2.976859803029582e-05, | |
| "loss": 0.8652450442314148, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.1001221001221, | |
| "grad_norm": 4.766298294067383, | |
| "learning_rate": 2.9670441081062085e-05, | |
| "loss": 0.7717719674110413, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 0.1252395659685135, | |
| "learning_rate": 2.9572460223585686e-05, | |
| "loss": 0.9670816659927368, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.105006105006105, | |
| "grad_norm": 0.21436013281345367, | |
| "learning_rate": 2.9474656139032124e-05, | |
| "loss": 0.6342899203300476, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.1074481074481075, | |
| "grad_norm": 1.1389250755310059, | |
| "learning_rate": 2.937702950733793e-05, | |
| "loss": 0.38155803084373474, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.10989010989011, | |
| "grad_norm": 0.0522131472826004, | |
| "learning_rate": 2.927958100720606e-05, | |
| "loss": 0.4930419325828552, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.1123321123321124, | |
| "grad_norm": 0.21740849316120148, | |
| "learning_rate": 2.9182311316100987e-05, | |
| "loss": 0.7748350501060486, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.114774114774115, | |
| "grad_norm": 0.3918207883834839, | |
| "learning_rate": 2.908522111024419e-05, | |
| "loss": 0.4242591857910156, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.1172161172161172, | |
| "grad_norm": 0.03445032611489296, | |
| "learning_rate": 2.8988311064609296e-05, | |
| "loss": 0.34727221727371216, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.1196581196581197, | |
| "grad_norm": 0.8715845942497253, | |
| "learning_rate": 2.8891581852917517e-05, | |
| "loss": 0.903748095035553, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.122100122100122, | |
| "grad_norm": 0.1845117211341858, | |
| "learning_rate": 2.8795034147632816e-05, | |
| "loss": 0.7900539040565491, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.1245421245421245, | |
| "grad_norm": 0.11484416574239731, | |
| "learning_rate": 2.8698668619957413e-05, | |
| "loss": 0.7997967600822449, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.126984126984127, | |
| "grad_norm": 1.132584810256958, | |
| "learning_rate": 2.860248593982695e-05, | |
| "loss": 0.7892305254936218, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.1294261294261294, | |
| "grad_norm": 0.1391441971063614, | |
| "learning_rate": 2.850648677590599e-05, | |
| "loss": 0.6479541063308716, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.131868131868132, | |
| "grad_norm": 0.2044140249490738, | |
| "learning_rate": 2.8410671795583176e-05, | |
| "loss": 0.9084620475769043, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.1343101343101343, | |
| "grad_norm": 1.7502288818359375, | |
| "learning_rate": 2.831504166496681e-05, | |
| "loss": 0.2873627245426178, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 0.3529217541217804, | |
| "learning_rate": 2.821959704888003e-05, | |
| "loss": 0.802934467792511, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.139194139194139, | |
| "grad_norm": 0.10790785402059555, | |
| "learning_rate": 2.8124338610856328e-05, | |
| "loss": 0.8992090225219727, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.1416361416361416, | |
| "grad_norm": 0.8812047839164734, | |
| "learning_rate": 2.8029267013134848e-05, | |
| "loss": 0.7742523550987244, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.144078144078144, | |
| "grad_norm": 0.43704017996788025, | |
| "learning_rate": 2.793438291665586e-05, | |
| "loss": 0.9906054735183716, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.1465201465201464, | |
| "grad_norm": 0.18866999447345734, | |
| "learning_rate": 2.7839686981056045e-05, | |
| "loss": 0.32941097021102905, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.148962148962149, | |
| "grad_norm": 0.15389196574687958, | |
| "learning_rate": 2.7745179864664063e-05, | |
| "loss": 0.8320349454879761, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.1514041514041513, | |
| "grad_norm": 0.1416536122560501, | |
| "learning_rate": 2.7650862224495843e-05, | |
| "loss": 0.6188011765480042, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.28659787774086, | |
| "learning_rate": 2.755673471625011e-05, | |
| "loss": 0.6457144618034363, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.156288156288156, | |
| "grad_norm": 0.35756003856658936, | |
| "learning_rate": 2.7462797994303746e-05, | |
| "loss": 0.4471341371536255, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.1587301587301586, | |
| "grad_norm": 0.5172722339630127, | |
| "learning_rate": 2.7369052711707305e-05, | |
| "loss": 0.49003028869628906, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.161172161172161, | |
| "grad_norm": 0.236545130610466, | |
| "learning_rate": 2.7275499520180464e-05, | |
| "loss": 0.49786072969436646, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.1636141636141635, | |
| "grad_norm": 0.2714576721191406, | |
| "learning_rate": 2.7182139070107472e-05, | |
| "loss": 0.5811169147491455, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.166056166056166, | |
| "grad_norm": 0.18183131515979767, | |
| "learning_rate": 2.7088972010532588e-05, | |
| "loss": 0.7186815738677979, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.1684981684981683, | |
| "grad_norm": 0.14280414581298828, | |
| "learning_rate": 2.699599898915568e-05, | |
| "loss": 0.907622754573822, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.1709401709401708, | |
| "grad_norm": 0.5836525559425354, | |
| "learning_rate": 2.6903220652327616e-05, | |
| "loss": 0.5992893576622009, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.173382173382173, | |
| "grad_norm": 0.15364791452884674, | |
| "learning_rate": 2.6810637645045823e-05, | |
| "loss": 0.9291015863418579, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.1758241758241756, | |
| "grad_norm": 1.7347960472106934, | |
| "learning_rate": 2.6718250610949813e-05, | |
| "loss": 0.7348466515541077, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.178266178266178, | |
| "grad_norm": 0.0890185683965683, | |
| "learning_rate": 2.662606019231665e-05, | |
| "loss": 0.9083889126777649, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.1807081807081805, | |
| "grad_norm": 1.1266306638717651, | |
| "learning_rate": 2.6534067030056548e-05, | |
| "loss": 0.730735182762146, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.183150183150183, | |
| "grad_norm": 0.40739908814430237, | |
| "learning_rate": 2.6442271763708365e-05, | |
| "loss": 0.672556459903717, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.185592185592186, | |
| "grad_norm": 1.7451484203338623, | |
| "learning_rate": 2.6350675031435212e-05, | |
| "loss": 0.8369135856628418, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.1880341880341883, | |
| "grad_norm": 0.8240930438041687, | |
| "learning_rate": 2.6259277470019973e-05, | |
| "loss": 0.681310772895813, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 0.13275334239006042, | |
| "learning_rate": 2.61680797148609e-05, | |
| "loss": 0.5112169981002808, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.192918192918193, | |
| "grad_norm": 0.29042962193489075, | |
| "learning_rate": 2.607708239996715e-05, | |
| "loss": 0.6654087901115417, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.1953601953601956, | |
| "grad_norm": 0.1640828251838684, | |
| "learning_rate": 2.598628615795447e-05, | |
| "loss": 0.6851241588592529, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.197802197802198, | |
| "grad_norm": 0.6114941835403442, | |
| "learning_rate": 2.5895691620040686e-05, | |
| "loss": 0.8884647488594055, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2002442002442004, | |
| "grad_norm": 0.4844699501991272, | |
| "learning_rate": 2.5805299416041427e-05, | |
| "loss": 0.6807603240013123, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.202686202686203, | |
| "grad_norm": 0.4512222707271576, | |
| "learning_rate": 2.571511017436563e-05, | |
| "loss": 0.5238524079322815, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.17243556678295135, | |
| "learning_rate": 2.562512452201129e-05, | |
| "loss": 0.9162152409553528, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.2075702075702077, | |
| "grad_norm": 0.2860824763774872, | |
| "learning_rate": 2.5535343084561024e-05, | |
| "loss": 0.8726363778114319, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.21001221001221, | |
| "grad_norm": 0.35127371549606323, | |
| "learning_rate": 2.5445766486177735e-05, | |
| "loss": 0.6082300543785095, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.2124542124542126, | |
| "grad_norm": 0.2696506977081299, | |
| "learning_rate": 2.5356395349600265e-05, | |
| "loss": 0.6777657270431519, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.214896214896215, | |
| "grad_norm": 0.3356308937072754, | |
| "learning_rate": 2.526723029613912e-05, | |
| "loss": 0.325158953666687, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.2173382173382175, | |
| "grad_norm": 0.11445268988609314, | |
| "learning_rate": 2.5178271945672065e-05, | |
| "loss": 0.9346632957458496, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.21978021978022, | |
| "grad_norm": 0.20148615539073944, | |
| "learning_rate": 2.5089520916639898e-05, | |
| "loss": 0.7768259644508362, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.19314219057559967, | |
| "learning_rate": 2.5000977826042092e-05, | |
| "loss": 0.8967069387435913, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.2246642246642248, | |
| "grad_norm": 0.2755802273750305, | |
| "learning_rate": 2.491264328943256e-05, | |
| "loss": 0.46456241607666016, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.227106227106227, | |
| "grad_norm": 0.2613433003425598, | |
| "learning_rate": 2.4824517920915273e-05, | |
| "loss": 0.5768070220947266, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.2295482295482296, | |
| "grad_norm": 0.29999735951423645, | |
| "learning_rate": 2.4736602333140137e-05, | |
| "loss": 0.5904824137687683, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.231990231990232, | |
| "grad_norm": 1.1658880710601807, | |
| "learning_rate": 2.4648897137298595e-05, | |
| "loss": 0.5655071139335632, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.2344322344322345, | |
| "grad_norm": 0.4124068021774292, | |
| "learning_rate": 2.456140294311949e-05, | |
| "loss": 0.5846872329711914, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.236874236874237, | |
| "grad_norm": 0.21505007147789001, | |
| "learning_rate": 2.4474120358864717e-05, | |
| "loss": 0.8542973399162292, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.2393162393162394, | |
| "grad_norm": 0.3325560390949249, | |
| "learning_rate": 2.4387049991325138e-05, | |
| "loss": 0.9094607830047607, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.241758241758242, | |
| "grad_norm": 0.15366816520690918, | |
| "learning_rate": 2.430019244581619e-05, | |
| "loss": 0.8724948167800903, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.244200244200244, | |
| "grad_norm": 0.23001395165920258, | |
| "learning_rate": 2.421354832617384e-05, | |
| "loss": 0.9115993976593018, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.2466422466422467, | |
| "grad_norm": 1.9149470329284668, | |
| "learning_rate": 2.412711823475024e-05, | |
| "loss": 0.4212198555469513, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.249084249084249, | |
| "grad_norm": 0.34548574686050415, | |
| "learning_rate": 2.404090277240968e-05, | |
| "loss": 0.9030619859695435, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.2515262515262515, | |
| "grad_norm": 0.14559565484523773, | |
| "learning_rate": 2.39549025385243e-05, | |
| "loss": 0.8886456489562988, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.253968253968254, | |
| "grad_norm": 0.3923342525959015, | |
| "learning_rate": 2.386911813096997e-05, | |
| "loss": 0.5417113900184631, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.060627490282058716, | |
| "learning_rate": 2.378355014612216e-05, | |
| "loss": 0.5653957724571228, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.258852258852259, | |
| "grad_norm": 0.27988186478614807, | |
| "learning_rate": 2.369819917885174e-05, | |
| "loss": 0.6443363428115845, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.2612942612942613, | |
| "grad_norm": 0.2397046983242035, | |
| "learning_rate": 2.3613065822520854e-05, | |
| "loss": 0.7101399302482605, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.2637362637362637, | |
| "grad_norm": 0.2521998882293701, | |
| "learning_rate": 2.3528150668978852e-05, | |
| "loss": 0.8785298466682434, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.266178266178266, | |
| "grad_norm": 0.37365812063217163, | |
| "learning_rate": 2.3443454308558074e-05, | |
| "loss": 0.8742945194244385, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.2686202686202686, | |
| "grad_norm": 0.18845844268798828, | |
| "learning_rate": 2.3358977330069862e-05, | |
| "loss": 0.6373099088668823, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.271062271062271, | |
| "grad_norm": 0.20244193077087402, | |
| "learning_rate": 2.327472032080037e-05, | |
| "loss": 0.9736915826797485, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.2735042735042734, | |
| "grad_norm": 0.2624374330043793, | |
| "learning_rate": 2.319068386650656e-05, | |
| "loss": 0.59718918800354, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.275946275946276, | |
| "grad_norm": 0.12046441435813904, | |
| "learning_rate": 2.3106868551412028e-05, | |
| "loss": 0.5769340991973877, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.2783882783882783, | |
| "grad_norm": 0.1807142198085785, | |
| "learning_rate": 2.3023274958203073e-05, | |
| "loss": 0.8824018836021423, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.2808302808302807, | |
| "grad_norm": 0.19607388973236084, | |
| "learning_rate": 2.2939903668024516e-05, | |
| "loss": 0.9017168283462524, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.283272283272283, | |
| "grad_norm": 0.16330653429031372, | |
| "learning_rate": 2.2856755260475782e-05, | |
| "loss": 0.9111011624336243, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.1461145281791687, | |
| "learning_rate": 2.2773830313606746e-05, | |
| "loss": 0.912349283695221, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.288156288156288, | |
| "grad_norm": 0.3137403130531311, | |
| "learning_rate": 2.2691129403913837e-05, | |
| "loss": 0.1755107045173645, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.2905982905982905, | |
| "grad_norm": 0.12145491689443588, | |
| "learning_rate": 2.2608653106335944e-05, | |
| "loss": 0.9272356629371643, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.293040293040293, | |
| "grad_norm": 0.3060324788093567, | |
| "learning_rate": 2.2526401994250432e-05, | |
| "loss": 0.584820568561554, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.2954822954822953, | |
| "grad_norm": 0.2952076494693756, | |
| "learning_rate": 2.2444376639469193e-05, | |
| "loss": 0.7527849078178406, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.2979242979242978, | |
| "grad_norm": 0.18966065347194672, | |
| "learning_rate": 2.2362577612234664e-05, | |
| "loss": 0.9209306240081787, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.3003663003663, | |
| "grad_norm": 0.18079566955566406, | |
| "learning_rate": 2.2281005481215798e-05, | |
| "loss": 0.6318021416664124, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.3028083028083026, | |
| "grad_norm": 0.41257551312446594, | |
| "learning_rate": 2.21996608135042e-05, | |
| "loss": 0.9299580454826355, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.305250305250305, | |
| "grad_norm": 0.1051776185631752, | |
| "learning_rate": 2.2118544174610145e-05, | |
| "loss": 0.8678762912750244, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 1.0651531219482422, | |
| "learning_rate": 2.20376561284586e-05, | |
| "loss": 0.9510694742202759, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.31013431013431, | |
| "grad_norm": 0.27115678787231445, | |
| "learning_rate": 2.1956997237385417e-05, | |
| "loss": 0.8932832479476929, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.3125763125763124, | |
| "grad_norm": 0.16390186548233032, | |
| "learning_rate": 2.187656806213329e-05, | |
| "loss": 0.9423433542251587, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.315018315018315, | |
| "grad_norm": 0.16460950672626495, | |
| "learning_rate": 2.1796369161847978e-05, | |
| "loss": 0.9168346524238586, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.317460317460317, | |
| "grad_norm": 0.2277718037366867, | |
| "learning_rate": 2.1716401094074295e-05, | |
| "loss": 0.8421288132667542, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.3199023199023197, | |
| "grad_norm": 0.10204636305570602, | |
| "learning_rate": 2.1636664414752407e-05, | |
| "loss": 0.9080808758735657, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3223443223443225, | |
| "grad_norm": 0.2854328155517578, | |
| "learning_rate": 2.1557159678213747e-05, | |
| "loss": 0.890828549861908, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.324786324786325, | |
| "grad_norm": 0.4696880578994751, | |
| "learning_rate": 2.1477887437177358e-05, | |
| "loss": 0.43194860219955444, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.3272283272283274, | |
| "grad_norm": 0.2895866930484772, | |
| "learning_rate": 2.139884824274591e-05, | |
| "loss": 0.39698588848114014, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.32967032967033, | |
| "grad_norm": 0.19736841320991516, | |
| "learning_rate": 2.132004264440198e-05, | |
| "loss": 0.5856382250785828, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.3321123321123323, | |
| "grad_norm": 0.19205878674983978, | |
| "learning_rate": 2.124147119000413e-05, | |
| "loss": 0.3817310035228729, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.3345543345543347, | |
| "grad_norm": 0.3305794894695282, | |
| "learning_rate": 2.1163134425783175e-05, | |
| "loss": 0.7116564512252808, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.336996336996337, | |
| "grad_norm": 0.22208884358406067, | |
| "learning_rate": 2.1085032896338362e-05, | |
| "loss": 0.7496541142463684, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.3394383394383396, | |
| "grad_norm": 0.8903825879096985, | |
| "learning_rate": 2.1007167144633584e-05, | |
| "loss": 0.8046509027481079, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.341880341880342, | |
| "grad_norm": 0.16632907092571259, | |
| "learning_rate": 2.0929537711993545e-05, | |
| "loss": 0.6315186619758606, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.3443223443223444, | |
| "grad_norm": 0.26480963826179504, | |
| "learning_rate": 2.0852145138100136e-05, | |
| "loss": 0.2272191047668457, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.346764346764347, | |
| "grad_norm": 0.39798590540885925, | |
| "learning_rate": 2.077498996098853e-05, | |
| "loss": 0.5758194923400879, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.3492063492063493, | |
| "grad_norm": 0.07637017965316772, | |
| "learning_rate": 2.0698072717043555e-05, | |
| "loss": 0.49005892872810364, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.3516483516483517, | |
| "grad_norm": 0.20235508680343628, | |
| "learning_rate": 2.0621393940995897e-05, | |
| "loss": 0.8811180591583252, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.354090354090354, | |
| "grad_norm": 0.4823862314224243, | |
| "learning_rate": 2.0544954165918438e-05, | |
| "loss": 0.35716819763183594, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.3565323565323566, | |
| "grad_norm": 0.0784493088722229, | |
| "learning_rate": 2.046875392322247e-05, | |
| "loss": 0.45765265822410583, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 0.16950422525405884, | |
| "learning_rate": 2.0392793742654097e-05, | |
| "loss": 0.5912071466445923, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.3614163614163615, | |
| "grad_norm": 0.27943259477615356, | |
| "learning_rate": 2.031707415229047e-05, | |
| "loss": 0.7345171570777893, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.363858363858364, | |
| "grad_norm": 0.118248350918293, | |
| "learning_rate": 2.0241595678536185e-05, | |
| "loss": 0.5935987830162048, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.3663003663003663, | |
| "grad_norm": 0.3124336004257202, | |
| "learning_rate": 2.016635884611953e-05, | |
| "loss": 0.8682062029838562, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.3687423687423688, | |
| "grad_norm": 0.5722883343696594, | |
| "learning_rate": 2.0091364178088982e-05, | |
| "loss": 0.6845846176147461, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.371184371184371, | |
| "grad_norm": 0.023987067863345146, | |
| "learning_rate": 2.0016612195809413e-05, | |
| "loss": 0.721107542514801, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.3736263736263736, | |
| "grad_norm": 0.15775702893733978, | |
| "learning_rate": 1.994210341895858e-05, | |
| "loss": 0.7121893763542175, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.376068376068376, | |
| "grad_norm": 0.22646969556808472, | |
| "learning_rate": 1.986783836552342e-05, | |
| "loss": 0.17516975104808807, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.3785103785103785, | |
| "grad_norm": 0.08263858407735825, | |
| "learning_rate": 1.979381755179657e-05, | |
| "loss": 0.4393911063671112, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.3274051547050476, | |
| "learning_rate": 1.9720041492372622e-05, | |
| "loss": 0.38030242919921875, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.3833943833943834, | |
| "grad_norm": 0.19202570617198944, | |
| "learning_rate": 1.9646510700144725e-05, | |
| "loss": 0.3027057647705078, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.385836385836386, | |
| "grad_norm": 0.4772564768791199, | |
| "learning_rate": 1.9573225686300828e-05, | |
| "loss": 0.7000046372413635, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.3882783882783882, | |
| "grad_norm": 0.21746446192264557, | |
| "learning_rate": 1.950018696032031e-05, | |
| "loss": 0.9199942946434021, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.3907203907203907, | |
| "grad_norm": 0.246440127491951, | |
| "learning_rate": 1.9427395029970268e-05, | |
| "loss": 0.6997683644294739, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 0.6625522971153259, | |
| "learning_rate": 1.9354850401302137e-05, | |
| "loss": 1.0697118043899536, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.3956043956043955, | |
| "grad_norm": 0.21006573736667633, | |
| "learning_rate": 1.928255357864806e-05, | |
| "loss": 0.623195469379425, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.398046398046398, | |
| "grad_norm": 0.11298193037509918, | |
| "learning_rate": 1.9210505064617445e-05, | |
| "loss": 0.9140588045120239, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.4004884004884004, | |
| "grad_norm": 0.41702476143836975, | |
| "learning_rate": 1.9138705360093466e-05, | |
| "loss": 0.38306570053100586, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.402930402930403, | |
| "grad_norm": 0.2691957354545593, | |
| "learning_rate": 1.9067154964229527e-05, | |
| "loss": 0.6268753409385681, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.4053724053724053, | |
| "grad_norm": 10.021284103393555, | |
| "learning_rate": 1.8995854374445866e-05, | |
| "loss": 0.41380101442337036, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.4078144078144077, | |
| "grad_norm": 0.8261527419090271, | |
| "learning_rate": 1.8924804086426058e-05, | |
| "loss": 0.20622682571411133, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 0.7584331035614014, | |
| "learning_rate": 1.885400459411355e-05, | |
| "loss": 0.1642729490995407, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.4126984126984126, | |
| "grad_norm": 0.33901268243789673, | |
| "learning_rate": 1.8783456389708292e-05, | |
| "loss": 0.37540411949157715, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.415140415140415, | |
| "grad_norm": 0.5125512480735779, | |
| "learning_rate": 1.8713159963663225e-05, | |
| "loss": 0.8112108707427979, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.4175824175824174, | |
| "grad_norm": 0.0952700823545456, | |
| "learning_rate": 1.8643115804680956e-05, | |
| "loss": 0.4625963270664215, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.42002442002442, | |
| "grad_norm": 0.32406389713287354, | |
| "learning_rate": 1.8573324399710324e-05, | |
| "loss": 0.8792840242385864, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.4224664224664223, | |
| "grad_norm": 0.27870288491249084, | |
| "learning_rate": 1.850378623394298e-05, | |
| "loss": 0.5139374136924744, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.4249084249084247, | |
| "grad_norm": 0.14946326613426208, | |
| "learning_rate": 1.8434501790810103e-05, | |
| "loss": 0.9647743701934814, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.427350427350427, | |
| "grad_norm": 0.13209617137908936, | |
| "learning_rate": 1.836547155197893e-05, | |
| "loss": 0.820813775062561, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.42979242979243, | |
| "grad_norm": 0.5546777248382568, | |
| "learning_rate": 1.8296695997349505e-05, | |
| "loss": 0.9282412528991699, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.4322344322344325, | |
| "grad_norm": 0.4288235306739807, | |
| "learning_rate": 1.822817560505129e-05, | |
| "loss": 0.6502490043640137, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.434676434676435, | |
| "grad_norm": 0.3928282856941223, | |
| "learning_rate": 1.8159910851439842e-05, | |
| "loss": 0.836300790309906, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.4371184371184373, | |
| "grad_norm": 0.2621248662471771, | |
| "learning_rate": 1.8091902211093513e-05, | |
| "loss": 0.6482276320457458, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.4395604395604398, | |
| "grad_norm": 0.32245925068855286, | |
| "learning_rate": 1.8024150156810172e-05, | |
| "loss": 0.9061039686203003, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.442002442002442, | |
| "grad_norm": 0.17262418568134308, | |
| "learning_rate": 1.795665515960384e-05, | |
| "loss": 0.5754269361495972, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.16230104863643646, | |
| "learning_rate": 1.788941768870155e-05, | |
| "loss": 0.6996177434921265, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.446886446886447, | |
| "grad_norm": 0.1100786030292511, | |
| "learning_rate": 1.7822438211539904e-05, | |
| "loss": 0.5718708038330078, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.4493284493284495, | |
| "grad_norm": 0.1992870420217514, | |
| "learning_rate": 1.775571719376205e-05, | |
| "loss": 0.6165488958358765, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.451770451770452, | |
| "grad_norm": 0.2712002098560333, | |
| "learning_rate": 1.7689255099214203e-05, | |
| "loss": 0.712196409702301, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.4542124542124544, | |
| "grad_norm": 0.16029568016529083, | |
| "learning_rate": 1.762305238994261e-05, | |
| "loss": 0.5514350533485413, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.456654456654457, | |
| "grad_norm": 0.1269841194152832, | |
| "learning_rate": 1.7557109526190222e-05, | |
| "loss": 0.5049785375595093, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.4590964590964592, | |
| "grad_norm": 0.2610805034637451, | |
| "learning_rate": 1.7491426966393582e-05, | |
| "loss": 0.9741380214691162, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.23219804465770721, | |
| "learning_rate": 1.742600516717953e-05, | |
| "loss": 0.9162542819976807, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.463980463980464, | |
| "grad_norm": 0.15160159766674042, | |
| "learning_rate": 1.7360844583362164e-05, | |
| "loss": 0.9087503552436829, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.4664224664224665, | |
| "grad_norm": 0.46155115962028503, | |
| "learning_rate": 1.729594566793955e-05, | |
| "loss": 0.6472157835960388, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.468864468864469, | |
| "grad_norm": 0.11180655658245087, | |
| "learning_rate": 1.7231308872090666e-05, | |
| "loss": 0.5804570913314819, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.4713064713064714, | |
| "grad_norm": 0.2438187450170517, | |
| "learning_rate": 1.716693464517219e-05, | |
| "loss": 0.6094151735305786, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.473748473748474, | |
| "grad_norm": 0.2587358057498932, | |
| "learning_rate": 1.7102823434715443e-05, | |
| "loss": 0.7836107611656189, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.06034945324063301, | |
| "learning_rate": 1.703897568642324e-05, | |
| "loss": 0.6507312059402466, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 0.2733549475669861, | |
| "learning_rate": 1.6975391844166797e-05, | |
| "loss": 0.9156701564788818, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.481074481074481, | |
| "grad_norm": 0.17805607616901398, | |
| "learning_rate": 1.6912072349982654e-05, | |
| "loss": 0.9989659190177917, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.4835164835164836, | |
| "grad_norm": 0.8150944113731384, | |
| "learning_rate": 1.6849017644069613e-05, | |
| "loss": 0.5687444806098938, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.485958485958486, | |
| "grad_norm": 0.23178020119667053, | |
| "learning_rate": 1.6786228164785607e-05, | |
| "loss": 0.8616185188293457, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.4884004884004884, | |
| "grad_norm": 0.15798431634902954, | |
| "learning_rate": 1.6723704348644763e-05, | |
| "loss": 0.6111150979995728, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.490842490842491, | |
| "grad_norm": 0.9420503377914429, | |
| "learning_rate": 1.666144663031428e-05, | |
| "loss": 0.7210328578948975, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.4932844932844933, | |
| "grad_norm": 0.15810662508010864, | |
| "learning_rate": 1.6599455442611465e-05, | |
| "loss": 0.9326408505439758, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.4957264957264957, | |
| "grad_norm": 0.15084275603294373, | |
| "learning_rate": 1.653773121650064e-05, | |
| "loss": 0.6402502059936523, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.498168498168498, | |
| "grad_norm": 0.3722740411758423, | |
| "learning_rate": 1.647627438109029e-05, | |
| "loss": 0.4539627432823181, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.5006105006105006, | |
| "grad_norm": 0.486492395401001, | |
| "learning_rate": 1.6415085363629917e-05, | |
| "loss": 0.6245033740997314, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.503052503052503, | |
| "grad_norm": 0.1764748990535736, | |
| "learning_rate": 1.6354164589507184e-05, | |
| "loss": 0.7035425901412964, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.5054945054945055, | |
| "grad_norm": 0.19042401015758514, | |
| "learning_rate": 1.62935124822449e-05, | |
| "loss": 0.9986182451248169, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.507936507936508, | |
| "grad_norm": 0.3112033009529114, | |
| "learning_rate": 1.6233129463498114e-05, | |
| "loss": 0.6655936241149902, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.5103785103785103, | |
| "grad_norm": 0.2647741138935089, | |
| "learning_rate": 1.6173015953051145e-05, | |
| "loss": 0.9137226939201355, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 0.17615091800689697, | |
| "learning_rate": 1.6113172368814696e-05, | |
| "loss": 0.2434530407190323, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.515262515262515, | |
| "grad_norm": 0.3077661991119385, | |
| "learning_rate": 1.6053599126822935e-05, | |
| "loss": 0.28476378321647644, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.5177045177045176, | |
| "grad_norm": 0.07086680084466934, | |
| "learning_rate": 1.5994296641230612e-05, | |
| "loss": 0.6296054720878601, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.52014652014652, | |
| "grad_norm": 0.20632986724376678, | |
| "learning_rate": 1.5935265324310136e-05, | |
| "loss": 0.6522476673126221, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.5225885225885225, | |
| "grad_norm": 1.1197623014450073, | |
| "learning_rate": 1.5876505586448794e-05, | |
| "loss": 0.6207690238952637, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.525030525030525, | |
| "grad_norm": 0.8066185116767883, | |
| "learning_rate": 1.5818017836145802e-05, | |
| "loss": 0.531438946723938, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.5274725274725274, | |
| "grad_norm": 0.44343841075897217, | |
| "learning_rate": 1.5759802480009546e-05, | |
| "loss": 1.0111730098724365, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.52991452991453, | |
| "grad_norm": 0.14161907136440277, | |
| "learning_rate": 1.570185992275471e-05, | |
| "loss": 0.5810820460319519, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.5323565323565322, | |
| "grad_norm": 0.11181794852018356, | |
| "learning_rate": 1.5644190567199456e-05, | |
| "loss": 0.9442424178123474, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.5347985347985347, | |
| "grad_norm": 0.2842358350753784, | |
| "learning_rate": 1.5586794814262674e-05, | |
| "loss": 0.7653964161872864, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.537240537240537, | |
| "grad_norm": 0.46459442377090454, | |
| "learning_rate": 1.552967306296112e-05, | |
| "loss": 0.7721389532089233, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.5396825396825395, | |
| "grad_norm": 1.7199498414993286, | |
| "learning_rate": 1.547282571040672e-05, | |
| "loss": 0.5821112394332886, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.542124542124542, | |
| "grad_norm": 0.31185182929039, | |
| "learning_rate": 1.5416253151803756e-05, | |
| "loss": 0.8948327898979187, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.5445665445665444, | |
| "grad_norm": 0.6430724859237671, | |
| "learning_rate": 1.5359955780446145e-05, | |
| "loss": 0.7543439269065857, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.547008547008547, | |
| "grad_norm": 0.15543311834335327, | |
| "learning_rate": 1.5303933987714675e-05, | |
| "loss": 0.6919330358505249, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.5494505494505493, | |
| "grad_norm": 0.3517249822616577, | |
| "learning_rate": 1.5248188163074332e-05, | |
| "loss": 0.7540191411972046, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.5518925518925517, | |
| "grad_norm": 0.19396033883094788, | |
| "learning_rate": 1.5192718694071533e-05, | |
| "loss": 0.6194606423377991, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.554334554334554, | |
| "grad_norm": 0.15331897139549255, | |
| "learning_rate": 1.5137525966331496e-05, | |
| "loss": 0.5733762979507446, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.5567765567765566, | |
| "grad_norm": 0.20664499700069427, | |
| "learning_rate": 1.5082610363555493e-05, | |
| "loss": 0.5452690124511719, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.559218559218559, | |
| "grad_norm": 0.1511068046092987, | |
| "learning_rate": 1.5027972267518242e-05, | |
| "loss": 0.8426185846328735, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.5616605616605614, | |
| "grad_norm": 0.026180965825915337, | |
| "learning_rate": 1.4973612058065223e-05, | |
| "loss": 0.021800726652145386, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.21482513844966888, | |
| "learning_rate": 1.4919530113110036e-05, | |
| "loss": 0.5521060824394226, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5665445665445663, | |
| "grad_norm": 0.8316694498062134, | |
| "learning_rate": 1.4865726808631772e-05, | |
| "loss": 0.9114159941673279, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.5689865689865687, | |
| "grad_norm": 0.7761131525039673, | |
| "learning_rate": 1.4812202518672427e-05, | |
| "loss": 0.6670872569084167, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.2237665355205536, | |
| "learning_rate": 1.4758957615334252e-05, | |
| "loss": 0.7423463463783264, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.5738705738705736, | |
| "grad_norm": 0.5679123997688293, | |
| "learning_rate": 1.4705992468777221e-05, | |
| "loss": 0.5345439910888672, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.576312576312576, | |
| "grad_norm": 0.2362390011548996, | |
| "learning_rate": 1.46533074472164e-05, | |
| "loss": 0.5681825876235962, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.578754578754579, | |
| "grad_norm": 0.16195961833000183, | |
| "learning_rate": 1.4600902916919474e-05, | |
| "loss": 0.9305347204208374, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.5811965811965814, | |
| "grad_norm": 0.19195407629013062, | |
| "learning_rate": 1.4548779242204084e-05, | |
| "loss": 0.2443677932024002, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.583638583638584, | |
| "grad_norm": 0.2873615026473999, | |
| "learning_rate": 1.4496936785435397e-05, | |
| "loss": 0.641851007938385, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.586080586080586, | |
| "grad_norm": 0.22643738985061646, | |
| "learning_rate": 1.4445375907023518e-05, | |
| "loss": 0.9270722270011902, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.5885225885225887, | |
| "grad_norm": 0.5615414381027222, | |
| "learning_rate": 1.4394096965421044e-05, | |
| "loss": 0.5653753280639648, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.590964590964591, | |
| "grad_norm": 0.4698256850242615, | |
| "learning_rate": 1.4343100317120487e-05, | |
| "loss": 0.7999030947685242, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.5934065934065935, | |
| "grad_norm": 0.22171932458877563, | |
| "learning_rate": 1.4292386316651923e-05, | |
| "loss": 0.7787465453147888, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.595848595848596, | |
| "grad_norm": 0.08361851423978806, | |
| "learning_rate": 1.4241955316580386e-05, | |
| "loss": 0.3853815197944641, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.5982905982905984, | |
| "grad_norm": 0.11281153559684753, | |
| "learning_rate": 1.419180766750352e-05, | |
| "loss": 0.9270216822624207, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.600732600732601, | |
| "grad_norm": 0.23104451596736908, | |
| "learning_rate": 1.4141943718049095e-05, | |
| "loss": 0.603350043296814, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.6031746031746033, | |
| "grad_norm": 0.34137165546417236, | |
| "learning_rate": 1.4092363814872588e-05, | |
| "loss": 0.5130594968795776, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.6056166056166057, | |
| "grad_norm": 0.28743472695350647, | |
| "learning_rate": 1.4043068302654791e-05, | |
| "loss": 0.8078405261039734, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.608058608058608, | |
| "grad_norm": 0.3752667009830475, | |
| "learning_rate": 1.3994057524099386e-05, | |
| "loss": 0.5297508239746094, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.6105006105006106, | |
| "grad_norm": 0.1670704185962677, | |
| "learning_rate": 1.3945331819930605e-05, | |
| "loss": 0.4818686544895172, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.612942612942613, | |
| "grad_norm": 0.11463461816310883, | |
| "learning_rate": 1.389689152889082e-05, | |
| "loss": 0.5629554986953735, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.6153846153846154, | |
| "grad_norm": 0.3031578063964844, | |
| "learning_rate": 1.3848736987738186e-05, | |
| "loss": 0.7412528395652771, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.617826617826618, | |
| "grad_norm": 0.13335902988910675, | |
| "learning_rate": 1.380086853124436e-05, | |
| "loss": 0.8015611171722412, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.6202686202686203, | |
| "grad_norm": 0.6250423789024353, | |
| "learning_rate": 1.3753286492192083e-05, | |
| "loss": 0.9416741132736206, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.6227106227106227, | |
| "grad_norm": 0.15486980974674225, | |
| "learning_rate": 1.3705991201372941e-05, | |
| "loss": 0.5695586204528809, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.625152625152625, | |
| "grad_norm": 0.20108355581760406, | |
| "learning_rate": 1.3658982987585044e-05, | |
| "loss": 0.9314045906066895, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.6275946275946276, | |
| "grad_norm": 0.2014009952545166, | |
| "learning_rate": 1.3612262177630733e-05, | |
| "loss": 0.6691548228263855, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.63003663003663, | |
| "grad_norm": 0.638117253780365, | |
| "learning_rate": 1.3565829096314284e-05, | |
| "loss": 0.7646288275718689, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.6324786324786325, | |
| "grad_norm": 0.23792199790477753, | |
| "learning_rate": 1.351968406643971e-05, | |
| "loss": 0.9055653810501099, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.634920634920635, | |
| "grad_norm": 0.25359049439430237, | |
| "learning_rate": 1.3473827408808453e-05, | |
| "loss": 0.7020068168640137, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.6373626373626373, | |
| "grad_norm": 0.20003177225589752, | |
| "learning_rate": 1.3428259442217217e-05, | |
| "loss": 0.43044763803482056, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.6398046398046398, | |
| "grad_norm": 0.17046095430850983, | |
| "learning_rate": 1.3382980483455682e-05, | |
| "loss": 0.543694257736206, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.642246642246642, | |
| "grad_norm": 6.694880962371826, | |
| "learning_rate": 1.333799084730436e-05, | |
| "loss": 0.37838348746299744, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.6446886446886446, | |
| "grad_norm": 0.23152433335781097, | |
| "learning_rate": 1.32932908465324e-05, | |
| "loss": 0.90546053647995, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.647130647130647, | |
| "grad_norm": 0.44327032566070557, | |
| "learning_rate": 1.3248880791895346e-05, | |
| "loss": 0.5909172892570496, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 0.2053682655096054, | |
| "learning_rate": 1.3204760992133086e-05, | |
| "loss": 0.9391003251075745, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.652014652014652, | |
| "grad_norm": 0.3836727738380432, | |
| "learning_rate": 1.316093175396762e-05, | |
| "loss": 0.9521498680114746, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.6544566544566544, | |
| "grad_norm": 2.1571011543273926, | |
| "learning_rate": 1.311739338210095e-05, | |
| "loss": 0.8946664929389954, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.656898656898657, | |
| "grad_norm": 0.24789409339427948, | |
| "learning_rate": 1.3074146179212985e-05, | |
| "loss": 0.6126503348350525, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.659340659340659, | |
| "grad_norm": 0.1362510621547699, | |
| "learning_rate": 1.303119044595942e-05, | |
| "loss": 0.936055600643158, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.6617826617826617, | |
| "grad_norm": 0.4179238975048065, | |
| "learning_rate": 1.2988526480969623e-05, | |
| "loss": 0.7655514478683472, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.664224664224664, | |
| "grad_norm": 0.16768568754196167, | |
| "learning_rate": 1.2946154580844611e-05, | |
| "loss": 0.8849906325340271, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.2775038480758667, | |
| "learning_rate": 1.290407504015493e-05, | |
| "loss": 0.5696703195571899, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.669108669108669, | |
| "grad_norm": 0.1575010120868683, | |
| "learning_rate": 1.2862288151438665e-05, | |
| "loss": 0.8959333300590515, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.6715506715506714, | |
| "grad_norm": 0.13436463475227356, | |
| "learning_rate": 1.282079420519934e-05, | |
| "loss": 0.7810011506080627, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.6739926739926743, | |
| "grad_norm": 0.2730397582054138, | |
| "learning_rate": 1.2779593489903982e-05, | |
| "loss": 0.9554069638252258, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.6764346764346767, | |
| "grad_norm": 0.1561058759689331, | |
| "learning_rate": 1.2738686291981028e-05, | |
| "loss": 0.8356117010116577, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.678876678876679, | |
| "grad_norm": 0.5023959875106812, | |
| "learning_rate": 1.2698072895818397e-05, | |
| "loss": 0.28213539719581604, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.6813186813186816, | |
| "grad_norm": 0.5667269825935364, | |
| "learning_rate": 1.2657753583761476e-05, | |
| "loss": 0.8802306056022644, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.683760683760684, | |
| "grad_norm": 0.12068428844213486, | |
| "learning_rate": 1.2617728636111187e-05, | |
| "loss": 0.8706485033035278, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.6862026862026864, | |
| "grad_norm": 0.5108808279037476, | |
| "learning_rate": 1.2577998331121998e-05, | |
| "loss": 0.8768801093101501, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.688644688644689, | |
| "grad_norm": 0.545958399772644, | |
| "learning_rate": 1.2538562945000051e-05, | |
| "loss": 0.33499714732170105, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.6910866910866913, | |
| "grad_norm": 1.0399829149246216, | |
| "learning_rate": 1.2499422751901172e-05, | |
| "loss": 0.6333140134811401, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.6935286935286937, | |
| "grad_norm": 0.985112190246582, | |
| "learning_rate": 1.2460578023929017e-05, | |
| "loss": 0.45254790782928467, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.695970695970696, | |
| "grad_norm": 0.21014508605003357, | |
| "learning_rate": 1.2422029031133143e-05, | |
| "loss": 0.7272936105728149, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.6984126984126986, | |
| "grad_norm": 0.39050862193107605, | |
| "learning_rate": 1.2383776041507172e-05, | |
| "loss": 0.534014880657196, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.700854700854701, | |
| "grad_norm": 0.11521206796169281, | |
| "learning_rate": 1.2345819320986872e-05, | |
| "loss": 0.8974270224571228, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.7032967032967035, | |
| "grad_norm": 0.14597372710704803, | |
| "learning_rate": 1.2308159133448367e-05, | |
| "loss": 0.5622401237487793, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.705738705738706, | |
| "grad_norm": 0.017224080860614777, | |
| "learning_rate": 1.2270795740706271e-05, | |
| "loss": 0.47363150119781494, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.7081807081807083, | |
| "grad_norm": 0.4101310968399048, | |
| "learning_rate": 1.2233729402511876e-05, | |
| "loss": 0.520636796951294, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.7106227106227108, | |
| "grad_norm": 0.8713362216949463, | |
| "learning_rate": 1.2196960376551318e-05, | |
| "loss": 0.257546991109848, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.713064713064713, | |
| "grad_norm": 0.36402884125709534, | |
| "learning_rate": 1.2160488918443844e-05, | |
| "loss": 0.5223596096038818, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.7155067155067156, | |
| "grad_norm": 0.30874237418174744, | |
| "learning_rate": 1.2124315281739976e-05, | |
| "loss": 0.6043662428855896, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.717948717948718, | |
| "grad_norm": 0.15704752504825592, | |
| "learning_rate": 1.2088439717919797e-05, | |
| "loss": 0.7366093397140503, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.7203907203907205, | |
| "grad_norm": 0.4803425371646881, | |
| "learning_rate": 1.2052862476391146e-05, | |
| "loss": 0.5565959215164185, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.722832722832723, | |
| "grad_norm": 0.18192808330059052, | |
| "learning_rate": 1.2017583804487963e-05, | |
| "loss": 0.914871335029602, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.7252747252747254, | |
| "grad_norm": 0.2773580253124237, | |
| "learning_rate": 1.1982603947468482e-05, | |
| "loss": 0.5831704139709473, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.727716727716728, | |
| "grad_norm": 0.13562490046024323, | |
| "learning_rate": 1.1947923148513598e-05, | |
| "loss": 0.7942695617675781, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.7301587301587302, | |
| "grad_norm": 0.33229681849479675, | |
| "learning_rate": 1.1913541648725125e-05, | |
| "loss": 0.9253073334693909, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.7326007326007327, | |
| "grad_norm": 0.351931631565094, | |
| "learning_rate": 1.1879459687124157e-05, | |
| "loss": 0.8293993473052979, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 0.17059414088726044, | |
| "learning_rate": 1.184567750064937e-05, | |
| "loss": 0.45666587352752686, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.7374847374847375, | |
| "grad_norm": 0.10000285506248474, | |
| "learning_rate": 1.1812195324155426e-05, | |
| "loss": 0.9256134033203125, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.73992673992674, | |
| "grad_norm": 0.42588916420936584, | |
| "learning_rate": 1.1779013390411284e-05, | |
| "loss": 0.5543507933616638, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.7423687423687424, | |
| "grad_norm": 0.24285273253917694, | |
| "learning_rate": 1.1746131930098625e-05, | |
| "loss": 0.550348162651062, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.744810744810745, | |
| "grad_norm": 0.3814103603363037, | |
| "learning_rate": 1.1713551171810215e-05, | |
| "loss": 0.4132462739944458, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.7472527472527473, | |
| "grad_norm": 0.2242743968963623, | |
| "learning_rate": 1.1681271342048347e-05, | |
| "loss": 0.853303849697113, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7496947496947497, | |
| "grad_norm": 0.16938181221485138, | |
| "learning_rate": 1.1649292665223234e-05, | |
| "loss": 0.9305144548416138, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.752136752136752, | |
| "grad_norm": 0.17428834736347198, | |
| "learning_rate": 1.1617615363651478e-05, | |
| "loss": 0.5639536380767822, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.7545787545787546, | |
| "grad_norm": 0.2849515676498413, | |
| "learning_rate": 1.1586239657554517e-05, | |
| "loss": 0.6453112363815308, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.757020757020757, | |
| "grad_norm": 0.3031458258628845, | |
| "learning_rate": 1.1555165765057072e-05, | |
| "loss": 0.9232110381126404, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.7594627594627594, | |
| "grad_norm": 0.44533681869506836, | |
| "learning_rate": 1.1524393902185663e-05, | |
| "loss": 0.8147604465484619, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.38014358282089233, | |
| "learning_rate": 1.1493924282867089e-05, | |
| "loss": 0.8898773789405823, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.7643467643467643, | |
| "grad_norm": 0.19250597059726715, | |
| "learning_rate": 1.1463757118926937e-05, | |
| "loss": 0.8258563280105591, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.7667887667887667, | |
| "grad_norm": 0.5206453800201416, | |
| "learning_rate": 1.1433892620088125e-05, | |
| "loss": 0.720075786113739, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "grad_norm": 0.0654718354344368, | |
| "learning_rate": 1.140433099396943e-05, | |
| "loss": 0.5580767393112183, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.7716727716727716, | |
| "grad_norm": 0.12150853127241135, | |
| "learning_rate": 1.1375072446084048e-05, | |
| "loss": 0.5127288699150085, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.774114774114774, | |
| "grad_norm": 0.044079847633838654, | |
| "learning_rate": 1.1346117179838177e-05, | |
| "loss": 0.1200222373008728, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.7765567765567765, | |
| "grad_norm": 0.20107226073741913, | |
| "learning_rate": 1.1317465396529584e-05, | |
| "loss": 0.9214982986450195, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.778998778998779, | |
| "grad_norm": 0.17506659030914307, | |
| "learning_rate": 1.128911729534622e-05, | |
| "loss": 0.6488751769065857, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.7814407814407813, | |
| "grad_norm": 0.0763765498995781, | |
| "learning_rate": 1.1261073073364818e-05, | |
| "loss": 0.5175724625587463, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.7838827838827838, | |
| "grad_norm": 0.38485652208328247, | |
| "learning_rate": 1.1233332925549555e-05, | |
| "loss": 0.2695869207382202, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.786324786324786, | |
| "grad_norm": 0.16343873739242554, | |
| "learning_rate": 1.1205897044750652e-05, | |
| "loss": 0.8788585662841797, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.7887667887667886, | |
| "grad_norm": 0.1511439085006714, | |
| "learning_rate": 1.1178765621703091e-05, | |
| "loss": 0.8803359866142273, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.791208791208791, | |
| "grad_norm": 0.1273798793554306, | |
| "learning_rate": 1.115193884502522e-05, | |
| "loss": 0.879483699798584, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.7936507936507935, | |
| "grad_norm": 0.087889663875103, | |
| "learning_rate": 1.1125416901217505e-05, | |
| "loss": 0.8987056612968445, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.796092796092796, | |
| "grad_norm": 0.37511512637138367, | |
| "learning_rate": 1.1099199974661181e-05, | |
| "loss": 0.284568727016449, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.7985347985347984, | |
| "grad_norm": 0.44007736444473267, | |
| "learning_rate": 1.1073288247617021e-05, | |
| "loss": 0.5415482521057129, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.800976800976801, | |
| "grad_norm": 0.12672172486782074, | |
| "learning_rate": 1.1047681900224018e-05, | |
| "loss": 0.5931652188301086, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.8034188034188032, | |
| "grad_norm": 0.35362815856933594, | |
| "learning_rate": 1.1022381110498182e-05, | |
| "loss": 0.7559791207313538, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.8058608058608057, | |
| "grad_norm": 0.5573434829711914, | |
| "learning_rate": 1.0997386054331255e-05, | |
| "loss": 0.3488505184650421, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.808302808302808, | |
| "grad_norm": 0.2508881986141205, | |
| "learning_rate": 1.097269690548954e-05, | |
| "loss": 0.5698355436325073, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8107448107448105, | |
| "grad_norm": 0.5273947715759277, | |
| "learning_rate": 1.0948313835612624e-05, | |
| "loss": 0.9447473287582397, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.813186813186813, | |
| "grad_norm": 0.049647558480501175, | |
| "learning_rate": 1.0924237014212265e-05, | |
| "loss": 0.5654804706573486, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.8156288156288154, | |
| "grad_norm": 0.2335987389087677, | |
| "learning_rate": 1.0900466608671147e-05, | |
| "loss": 0.691981315612793, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.818070818070818, | |
| "grad_norm": 0.14841300249099731, | |
| "learning_rate": 1.0877002784241758e-05, | |
| "loss": 0.5586347579956055, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.14393511414527893, | |
| "learning_rate": 1.0853845704045213e-05, | |
| "loss": 0.9617201685905457, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.8229548229548227, | |
| "grad_norm": 0.3243793547153473, | |
| "learning_rate": 1.0830995529070153e-05, | |
| "loss": 0.5552883148193359, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.825396825396825, | |
| "grad_norm": 0.8651401400566101, | |
| "learning_rate": 1.080845241817158e-05, | |
| "loss": 0.6081613898277283, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.8278388278388276, | |
| "grad_norm": 0.1977468729019165, | |
| "learning_rate": 1.0786216528069798e-05, | |
| "loss": 0.5818818211555481, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.8302808302808304, | |
| "grad_norm": 0.5484998822212219, | |
| "learning_rate": 1.0764288013349295e-05, | |
| "loss": 0.8033775091171265, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.832722832722833, | |
| "grad_norm": 0.24685503542423248, | |
| "learning_rate": 1.0742667026457678e-05, | |
| "loss": 0.5256894826889038, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.8351648351648353, | |
| "grad_norm": 0.47894465923309326, | |
| "learning_rate": 1.0721353717704625e-05, | |
| "loss": 0.5885627269744873, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.8376068376068377, | |
| "grad_norm": 0.04807301610708237, | |
| "learning_rate": 1.0700348235260809e-05, | |
| "loss": 0.1685209572315216, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.84004884004884, | |
| "grad_norm": 0.1125311627984047, | |
| "learning_rate": 1.0679650725156892e-05, | |
| "loss": 0.7372404932975769, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.8424908424908426, | |
| "grad_norm": 2.8107287883758545, | |
| "learning_rate": 1.065926133128251e-05, | |
| "loss": 0.6371179223060608, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.844932844932845, | |
| "grad_norm": 0.08771976083517075, | |
| "learning_rate": 1.063918019538527e-05, | |
| "loss": 0.5829328298568726, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.8473748473748475, | |
| "grad_norm": 0.34634438157081604, | |
| "learning_rate": 1.0619407457069744e-05, | |
| "loss": 0.9081605672836304, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.84981684981685, | |
| "grad_norm": 0.442281037569046, | |
| "learning_rate": 1.0599943253796524e-05, | |
| "loss": 0.06729144603013992, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.8522588522588523, | |
| "grad_norm": 0.43480950593948364, | |
| "learning_rate": 1.0580787720881278e-05, | |
| "loss": 0.8884823322296143, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.8547008547008548, | |
| "grad_norm": 0.2847522497177124, | |
| "learning_rate": 1.056194099149376e-05, | |
| "loss": 0.6051849722862244, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.2190149575471878, | |
| "learning_rate": 1.0543403196656928e-05, | |
| "loss": 0.9653596878051758, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.8595848595848596, | |
| "grad_norm": 1.3416590690612793, | |
| "learning_rate": 1.0525174465246024e-05, | |
| "loss": 0.8496460914611816, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.862026862026862, | |
| "grad_norm": 0.239431232213974, | |
| "learning_rate": 1.050725492398766e-05, | |
| "loss": 0.14625638723373413, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.8644688644688645, | |
| "grad_norm": 0.2299855649471283, | |
| "learning_rate": 1.0489644697458956e-05, | |
| "loss": 0.7827672958374023, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.866910866910867, | |
| "grad_norm": 0.03336143493652344, | |
| "learning_rate": 1.047234390808667e-05, | |
| "loss": 0.49717092514038086, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.8693528693528694, | |
| "grad_norm": 0.14469321072101593, | |
| "learning_rate": 1.045535267614634e-05, | |
| "loss": 0.582015335559845, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.871794871794872, | |
| "grad_norm": 0.3091880679130554, | |
| "learning_rate": 1.0438671119761468e-05, | |
| "loss": 0.6304939389228821, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.8742368742368742, | |
| "grad_norm": 0.12924562394618988, | |
| "learning_rate": 1.0422299354902653e-05, | |
| "loss": 0.47916877269744873, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.8766788766788767, | |
| "grad_norm": 0.38469940423965454, | |
| "learning_rate": 1.040623749538685e-05, | |
| "loss": 0.9549888968467712, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.879120879120879, | |
| "grad_norm": 0.5296687483787537, | |
| "learning_rate": 1.0390485652876516e-05, | |
| "loss": 0.634440004825592, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.8815628815628815, | |
| "grad_norm": 0.28588035702705383, | |
| "learning_rate": 1.037504393687888e-05, | |
| "loss": 0.4866846799850464, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.884004884004884, | |
| "grad_norm": 0.25893455743789673, | |
| "learning_rate": 1.035991245474515e-05, | |
| "loss": 0.6054922342300415, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.8864468864468864, | |
| "grad_norm": 0.26358985900878906, | |
| "learning_rate": 1.0345091311669789e-05, | |
| "loss": 0.9277290105819702, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 0.10939505696296692, | |
| "learning_rate": 1.0330580610689777e-05, | |
| "loss": 0.7286083698272705, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.8913308913308913, | |
| "grad_norm": 1.217232584953308, | |
| "learning_rate": 1.0316380452683879e-05, | |
| "loss": 0.6054674983024597, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.8937728937728937, | |
| "grad_norm": 0.18041126430034637, | |
| "learning_rate": 1.0302490936371966e-05, | |
| "loss": 0.956548273563385, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.896214896214896, | |
| "grad_norm": 0.25252801179885864, | |
| "learning_rate": 1.028891215831433e-05, | |
| "loss": 0.8740776777267456, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.8986568986568986, | |
| "grad_norm": 0.07368501275777817, | |
| "learning_rate": 1.027564421291098e-05, | |
| "loss": 0.6913008093833923, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.901098901098901, | |
| "grad_norm": 0.21639108657836914, | |
| "learning_rate": 1.0262687192401022e-05, | |
| "loss": 0.8055187463760376, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.9035409035409034, | |
| "grad_norm": 0.14491795003414154, | |
| "learning_rate": 1.0250041186862011e-05, | |
| "loss": 0.8874975442886353, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 0.16996614634990692, | |
| "learning_rate": 1.0237706284209298e-05, | |
| "loss": 0.5474310517311096, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.9084249084249083, | |
| "grad_norm": 0.16943812370300293, | |
| "learning_rate": 1.0225682570195457e-05, | |
| "loss": 0.8697795271873474, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.9108669108669107, | |
| "grad_norm": 0.14685095846652985, | |
| "learning_rate": 1.0213970128409658e-05, | |
| "loss": 0.8361122608184814, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.913308913308913, | |
| "grad_norm": 0.12646551430225372, | |
| "learning_rate": 1.0202569040277104e-05, | |
| "loss": 0.9396628141403198, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.9157509157509156, | |
| "grad_norm": 0.1660378873348236, | |
| "learning_rate": 1.0191479385058462e-05, | |
| "loss": 0.635297417640686, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.918192918192918, | |
| "grad_norm": 0.18587251007556915, | |
| "learning_rate": 1.0180701239849304e-05, | |
| "loss": 0.3286590576171875, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.9206349206349205, | |
| "grad_norm": 0.27579906582832336, | |
| "learning_rate": 1.0170234679579574e-05, | |
| "loss": 0.470412939786911, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.9230769230769234, | |
| "grad_norm": 0.2389817088842392, | |
| "learning_rate": 1.0160079777013081e-05, | |
| "loss": 0.7788825631141663, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.925518925518926, | |
| "grad_norm": 0.45952925086021423, | |
| "learning_rate": 1.0150236602746965e-05, | |
| "loss": 0.6100042462348938, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.927960927960928, | |
| "grad_norm": 0.18304722011089325, | |
| "learning_rate": 1.0140705225211237e-05, | |
| "loss": 0.5717623233795166, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.9304029304029307, | |
| "grad_norm": 0.37822744250297546, | |
| "learning_rate": 1.013148571066828e-05, | |
| "loss": 0.3323323726654053, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.932844932844933, | |
| "grad_norm": 0.40260812640190125, | |
| "learning_rate": 1.012257812321241e-05, | |
| "loss": 0.5289126634597778, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.9352869352869355, | |
| "grad_norm": 0.11691813170909882, | |
| "learning_rate": 1.0113982524769395e-05, | |
| "loss": 0.25224849581718445, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.937728937728938, | |
| "grad_norm": 0.34821441769599915, | |
| "learning_rate": 1.0105698975096067e-05, | |
| "loss": 0.8060319423675537, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.9401709401709404, | |
| "grad_norm": 0.12309245765209198, | |
| "learning_rate": 1.0097727531779882e-05, | |
| "loss": 0.5822362303733826, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.942612942612943, | |
| "grad_norm": 0.27418801188468933, | |
| "learning_rate": 1.0090068250238521e-05, | |
| "loss": 0.5575027465820312, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.9450549450549453, | |
| "grad_norm": 0.23748725652694702, | |
| "learning_rate": 1.0082721183719512e-05, | |
| "loss": 0.8845927715301514, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.9474969474969477, | |
| "grad_norm": 0.2792826294898987, | |
| "learning_rate": 1.007568638329985e-05, | |
| "loss": 0.9328206181526184, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.94993894993895, | |
| "grad_norm": 0.15178164839744568, | |
| "learning_rate": 1.006896389788565e-05, | |
| "loss": 0.8631373643875122, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 0.5004929900169373, | |
| "learning_rate": 1.0062553774211814e-05, | |
| "loss": 0.5906471014022827, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.954822954822955, | |
| "grad_norm": 0.6168224215507507, | |
| "learning_rate": 1.005645605684169e-05, | |
| "loss": 0.7751826643943787, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.9572649572649574, | |
| "grad_norm": 0.13668419420719147, | |
| "learning_rate": 1.0050670788166761e-05, | |
| "loss": 0.8252941966056824, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.95970695970696, | |
| "grad_norm": 0.2519516944885254, | |
| "learning_rate": 1.0045198008406373e-05, | |
| "loss": 0.45424923300743103, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.9621489621489623, | |
| "grad_norm": 0.6055773496627808, | |
| "learning_rate": 1.0040037755607428e-05, | |
| "loss": 0.8147112131118774, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.9645909645909647, | |
| "grad_norm": 0.37422075867652893, | |
| "learning_rate": 1.0035190065644145e-05, | |
| "loss": 0.4457106590270996, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.967032967032967, | |
| "grad_norm": 0.410826176404953, | |
| "learning_rate": 1.0030654972217791e-05, | |
| "loss": 0.8721326589584351, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.9694749694749696, | |
| "grad_norm": 0.1232210174202919, | |
| "learning_rate": 1.0026432506856445e-05, | |
| "loss": 0.8431730270385742, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.971916971916972, | |
| "grad_norm": 0.22085325419902802, | |
| "learning_rate": 1.0022522698914816e-05, | |
| "loss": 0.8151767253875732, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.9743589743589745, | |
| "grad_norm": 0.1448344588279724, | |
| "learning_rate": 1.0018925575573973e-05, | |
| "loss": 0.49180135130882263, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.976800976800977, | |
| "grad_norm": 0.3198087811470032, | |
| "learning_rate": 1.0015641161841218e-05, | |
| "loss": 0.43782997131347656, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.9792429792429793, | |
| "grad_norm": 0.13322417438030243, | |
| "learning_rate": 1.0012669480549882e-05, | |
| "loss": 0.9003132581710815, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.9816849816849818, | |
| "grad_norm": 0.1805696189403534, | |
| "learning_rate": 1.0010010552359173e-05, | |
| "loss": 0.8543267846107483, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.984126984126984, | |
| "grad_norm": 0.6951938271522522, | |
| "learning_rate": 1.000766439575403e-05, | |
| "loss": 0.664506196975708, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.9865689865689866, | |
| "grad_norm": 0.20818327367305756, | |
| "learning_rate": 1.000563102704499e-05, | |
| "loss": 0.8623005747795105, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.989010989010989, | |
| "grad_norm": 0.21340824663639069, | |
| "learning_rate": 1.0003910460368087e-05, | |
| "loss": 0.8571358323097229, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 0.4306589365005493, | |
| "learning_rate": 1.000250270768475e-05, | |
| "loss": 0.5608749985694885, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.993894993894994, | |
| "grad_norm": 0.10902591794729233, | |
| "learning_rate": 1.0001407778781705e-05, | |
| "loss": 0.8583354949951172, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.9963369963369964, | |
| "grad_norm": 0.11453904956579208, | |
| "learning_rate": 1.0000625681270934e-05, | |
| "loss": 0.8831861615180969, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.998778998778999, | |
| "grad_norm": 0.09050007164478302, | |
| "learning_rate": 1.0000156420589591e-05, | |
| "loss": 0.4622391164302826, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2457, | |
| "total_flos": 6.574576867100066e+18, | |
| "train_loss": 0.8942287936473116, | |
| "train_runtime": 42713.458, | |
| "train_samples_per_second": 0.92, | |
| "train_steps_per_second": 0.058 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2457, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.574576867100066e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |