Text Generation
Transformers
Safetensors
llama
llama-factory
full
Generated from Trainer
conversational
text-generation-inference
Instructions to use formalmathatepfl/DeepseekProverV2Finetuned01 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use formalmathatepfl/DeepseekProverV2Finetuned01 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="formalmathatepfl/DeepseekProverV2Finetuned01") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("formalmathatepfl/DeepseekProverV2Finetuned01") model = AutoModelForCausalLM.from_pretrained("formalmathatepfl/DeepseekProverV2Finetuned01") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use formalmathatepfl/DeepseekProverV2Finetuned01 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "formalmathatepfl/DeepseekProverV2Finetuned01" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "formalmathatepfl/DeepseekProverV2Finetuned01", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/formalmathatepfl/DeepseekProverV2Finetuned01
- SGLang
How to use formalmathatepfl/DeepseekProverV2Finetuned01 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "formalmathatepfl/DeepseekProverV2Finetuned01" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "formalmathatepfl/DeepseekProverV2Finetuned01", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "formalmathatepfl/DeepseekProverV2Finetuned01" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "formalmathatepfl/DeepseekProverV2Finetuned01", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use formalmathatepfl/DeepseekProverV2Finetuned01 with Docker Model Runner:
docker model run hf.co/formalmathatepfl/DeepseekProverV2Finetuned01
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 13705, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001459321415541773, | |
| "grad_norm": 2.6745453947710165, | |
| "learning_rate": 4.611650485436894e-06, | |
| "loss": 1.0637, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002918642831083546, | |
| "grad_norm": 0.8636258229268736, | |
| "learning_rate": 9.466019417475729e-06, | |
| "loss": 0.3278, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004377964246625319, | |
| "grad_norm": 0.6429542529341992, | |
| "learning_rate": 1.4320388349514562e-05, | |
| "loss": 0.1701, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005837285662167092, | |
| "grad_norm": 0.4870228029221957, | |
| "learning_rate": 1.91747572815534e-05, | |
| "loss": 0.1413, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0072966070777088655, | |
| "grad_norm": 0.4961345702002848, | |
| "learning_rate": 2.4029126213592234e-05, | |
| "loss": 0.1396, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.008755928493250638, | |
| "grad_norm": 0.40834540818199705, | |
| "learning_rate": 2.8883495145631068e-05, | |
| "loss": 0.1266, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010215249908792412, | |
| "grad_norm": 0.6018679639107668, | |
| "learning_rate": 3.373786407766991e-05, | |
| "loss": 0.1103, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.011674571324334184, | |
| "grad_norm": 0.5960255000220145, | |
| "learning_rate": 3.859223300970874e-05, | |
| "loss": 0.1325, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.013133892739875957, | |
| "grad_norm": 0.4212109164544222, | |
| "learning_rate": 4.344660194174757e-05, | |
| "loss": 0.1237, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.014593214155417731, | |
| "grad_norm": 0.6565890138369451, | |
| "learning_rate": 4.830097087378641e-05, | |
| "loss": 0.1233, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.016052535570959505, | |
| "grad_norm": 0.3857563740550121, | |
| "learning_rate": 5.3155339805825244e-05, | |
| "loss": 0.1185, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.017511856986501276, | |
| "grad_norm": 0.35890344101071686, | |
| "learning_rate": 5.800970873786408e-05, | |
| "loss": 0.1415, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01897117840204305, | |
| "grad_norm": 0.2979671831060535, | |
| "learning_rate": 6.286407766990293e-05, | |
| "loss": 0.1362, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.020430499817584824, | |
| "grad_norm": 0.38285277593806, | |
| "learning_rate": 6.771844660194175e-05, | |
| "loss": 0.1302, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.021889821233126595, | |
| "grad_norm": 0.5932557337111825, | |
| "learning_rate": 7.25728155339806e-05, | |
| "loss": 0.1286, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02334914264866837, | |
| "grad_norm": 0.39643282763459103, | |
| "learning_rate": 7.742718446601942e-05, | |
| "loss": 0.1494, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.024808464064210143, | |
| "grad_norm": 0.4853895916428549, | |
| "learning_rate": 8.228155339805825e-05, | |
| "loss": 0.1304, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.026267785479751914, | |
| "grad_norm": 0.48381251386974544, | |
| "learning_rate": 8.713592233009709e-05, | |
| "loss": 0.1394, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.027727106895293688, | |
| "grad_norm": 0.4904529766502894, | |
| "learning_rate": 9.199029126213593e-05, | |
| "loss": 0.1509, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.029186428310835462, | |
| "grad_norm": 0.7958367491650168, | |
| "learning_rate": 9.684466019417477e-05, | |
| "loss": 0.1395, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.030645749726377236, | |
| "grad_norm": 0.624215442246557, | |
| "learning_rate": 9.999993157895145e-05, | |
| "loss": 0.1745, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03210507114191901, | |
| "grad_norm": 0.6815596480986827, | |
| "learning_rate": 9.999898206558094e-05, | |
| "loss": 0.1433, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.03356439255746078, | |
| "grad_norm": 0.7939990598700336, | |
| "learning_rate": 9.999691549843376e-05, | |
| "loss": 0.1609, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.03502371397300255, | |
| "grad_norm": 0.4965525216601077, | |
| "learning_rate": 9.999373192368015e-05, | |
| "loss": 0.1523, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.036483035388544326, | |
| "grad_norm": 1.8640551410419721, | |
| "learning_rate": 9.998943141244607e-05, | |
| "loss": 0.1809, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0379423568040861, | |
| "grad_norm": 0.6022579601783719, | |
| "learning_rate": 9.99840140608115e-05, | |
| "loss": 0.1956, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.039401678219627874, | |
| "grad_norm": 0.40549268604230454, | |
| "learning_rate": 9.997747998980835e-05, | |
| "loss": 0.1648, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04086099963516965, | |
| "grad_norm": 0.49940827687014067, | |
| "learning_rate": 9.996982934541781e-05, | |
| "loss": 0.1475, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.04232032105071142, | |
| "grad_norm": 0.5149466244422736, | |
| "learning_rate": 9.996106229856695e-05, | |
| "loss": 0.1518, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04377964246625319, | |
| "grad_norm": 0.29222877668330227, | |
| "learning_rate": 9.995117904512503e-05, | |
| "loss": 0.1682, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.045238963881794964, | |
| "grad_norm": 0.5286646588687645, | |
| "learning_rate": 9.994017980589906e-05, | |
| "loss": 0.1421, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.04669828529733674, | |
| "grad_norm": 1.1957202323680398, | |
| "learning_rate": 9.992806482662887e-05, | |
| "loss": 0.1699, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.04815760671287851, | |
| "grad_norm": 0.5883304358400666, | |
| "learning_rate": 9.991483437798162e-05, | |
| "loss": 0.1338, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.049616928128420286, | |
| "grad_norm": 0.6301249373045459, | |
| "learning_rate": 9.99004887555458e-05, | |
| "loss": 0.1547, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.05107624954396206, | |
| "grad_norm": 0.4012564183658102, | |
| "learning_rate": 9.988502827982458e-05, | |
| "loss": 0.1367, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05253557095950383, | |
| "grad_norm": 0.3168597769245346, | |
| "learning_rate": 9.986845329622862e-05, | |
| "loss": 0.1475, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.0539948923750456, | |
| "grad_norm": 0.38246057665835786, | |
| "learning_rate": 9.985076417506844e-05, | |
| "loss": 0.1397, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.055454213790587376, | |
| "grad_norm": 0.3864484630630238, | |
| "learning_rate": 9.983196131154607e-05, | |
| "loss": 0.1368, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.05691353520612915, | |
| "grad_norm": 0.34381534736001396, | |
| "learning_rate": 9.981204512574626e-05, | |
| "loss": 0.1202, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.058372856621670924, | |
| "grad_norm": 0.5385327614437169, | |
| "learning_rate": 9.979101606262708e-05, | |
| "loss": 0.1444, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0598321780372127, | |
| "grad_norm": 0.3291873594168373, | |
| "learning_rate": 9.976887459200999e-05, | |
| "loss": 0.1344, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06129149945275447, | |
| "grad_norm": 0.35530404400780635, | |
| "learning_rate": 9.97456212085693e-05, | |
| "loss": 0.1455, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.06275082086829624, | |
| "grad_norm": 0.25357381945588353, | |
| "learning_rate": 9.972125643182121e-05, | |
| "loss": 0.1405, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.06421014228383802, | |
| "grad_norm": 0.33587236054655184, | |
| "learning_rate": 9.969578080611211e-05, | |
| "loss": 0.1273, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.06566946369937979, | |
| "grad_norm": 0.2636319125685315, | |
| "learning_rate": 9.966919490060646e-05, | |
| "loss": 0.1155, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.06712878511492155, | |
| "grad_norm": 0.4749750626433427, | |
| "learning_rate": 9.96414993092741e-05, | |
| "loss": 0.1577, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.06858810653046334, | |
| "grad_norm": 0.26401350478445823, | |
| "learning_rate": 9.961269465087691e-05, | |
| "loss": 0.1313, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.0700474279460051, | |
| "grad_norm": 0.3330162979528586, | |
| "learning_rate": 9.958278156895502e-05, | |
| "loss": 0.135, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.07150674936154688, | |
| "grad_norm": 0.5511486073921424, | |
| "learning_rate": 9.955176073181249e-05, | |
| "loss": 0.1274, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.07296607077708865, | |
| "grad_norm": 0.7818317017171568, | |
| "learning_rate": 9.951963283250227e-05, | |
| "loss": 0.1565, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07442539219263043, | |
| "grad_norm": 0.42949055730605057, | |
| "learning_rate": 9.948639858881083e-05, | |
| "loss": 0.1487, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.0758847136081722, | |
| "grad_norm": 0.41682813706150323, | |
| "learning_rate": 9.945205874324201e-05, | |
| "loss": 0.143, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.07734403502371397, | |
| "grad_norm": 1.059389264065714, | |
| "learning_rate": 9.941661406300052e-05, | |
| "loss": 0.1312, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.07880335643925575, | |
| "grad_norm": 0.3764528008769406, | |
| "learning_rate": 9.938006533997475e-05, | |
| "loss": 0.1475, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.08026267785479752, | |
| "grad_norm": 0.3433293556468226, | |
| "learning_rate": 9.934241339071912e-05, | |
| "loss": 0.1379, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0817219992703393, | |
| "grad_norm": 0.4669143421528186, | |
| "learning_rate": 9.930365905643578e-05, | |
| "loss": 0.1528, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.08318132068588106, | |
| "grad_norm": 0.3165847851959828, | |
| "learning_rate": 9.92638032029559e-05, | |
| "loss": 0.1424, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.08464064210142284, | |
| "grad_norm": 0.4112176255009246, | |
| "learning_rate": 9.922284672072021e-05, | |
| "loss": 0.1466, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.08609996351696461, | |
| "grad_norm": 0.24922324308271643, | |
| "learning_rate": 9.918079052475922e-05, | |
| "loss": 0.1151, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.08755928493250638, | |
| "grad_norm": 0.29440735283548447, | |
| "learning_rate": 9.913763555467269e-05, | |
| "loss": 0.1502, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.08901860634804816, | |
| "grad_norm": 0.31114703090098295, | |
| "learning_rate": 9.909338277460872e-05, | |
| "loss": 0.1163, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.09047792776358993, | |
| "grad_norm": 0.26314581527572667, | |
| "learning_rate": 9.904803317324211e-05, | |
| "loss": 0.1124, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.09193724917913171, | |
| "grad_norm": 0.37910467700641326, | |
| "learning_rate": 9.90015877637524e-05, | |
| "loss": 0.1624, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.09339657059467348, | |
| "grad_norm": 0.32917497252554034, | |
| "learning_rate": 9.895404758380109e-05, | |
| "loss": 0.1417, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.09485589201021526, | |
| "grad_norm": 0.4253161624681656, | |
| "learning_rate": 9.890541369550854e-05, | |
| "loss": 0.1243, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.09631521342575702, | |
| "grad_norm": 0.2579086839658108, | |
| "learning_rate": 9.885568718543025e-05, | |
| "loss": 0.1386, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.09777453484129879, | |
| "grad_norm": 0.22942493216872414, | |
| "learning_rate": 9.88048691645326e-05, | |
| "loss": 0.13, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.09923385625684057, | |
| "grad_norm": 0.45710318102883435, | |
| "learning_rate": 9.87529607681679e-05, | |
| "loss": 0.1777, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.10069317767238234, | |
| "grad_norm": 0.27950949145967136, | |
| "learning_rate": 9.869996315604915e-05, | |
| "loss": 0.1397, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.10215249908792412, | |
| "grad_norm": 0.35374191781286285, | |
| "learning_rate": 9.864587751222415e-05, | |
| "loss": 0.1269, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.10361182050346589, | |
| "grad_norm": 0.45122040063810864, | |
| "learning_rate": 9.859070504504894e-05, | |
| "loss": 0.1479, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.10507114191900765, | |
| "grad_norm": 0.3679594319404153, | |
| "learning_rate": 9.85344469871609e-05, | |
| "loss": 0.1333, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.10653046333454944, | |
| "grad_norm": 0.2773979819671957, | |
| "learning_rate": 9.847710459545109e-05, | |
| "loss": 0.1293, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.1079897847500912, | |
| "grad_norm": 0.18675042963035182, | |
| "learning_rate": 9.841867915103632e-05, | |
| "loss": 0.1262, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.10944910616563298, | |
| "grad_norm": 0.30565129208355624, | |
| "learning_rate": 9.835917195923044e-05, | |
| "loss": 0.1197, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.11090842758117475, | |
| "grad_norm": 0.2626135808770724, | |
| "learning_rate": 9.829858434951516e-05, | |
| "loss": 0.1132, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.11236774899671653, | |
| "grad_norm": 0.36381732806040473, | |
| "learning_rate": 9.823691767551042e-05, | |
| "loss": 0.1397, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.1138270704122583, | |
| "grad_norm": 0.2531250566483984, | |
| "learning_rate": 9.817417331494409e-05, | |
| "loss": 0.0946, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.11528639182780007, | |
| "grad_norm": 0.31305747632958897, | |
| "learning_rate": 9.81103526696212e-05, | |
| "loss": 0.1154, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.11674571324334185, | |
| "grad_norm": 0.3203482106895159, | |
| "learning_rate": 9.804545716539265e-05, | |
| "loss": 0.1263, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.11820503465888361, | |
| "grad_norm": 0.21193993774401784, | |
| "learning_rate": 9.797948825212331e-05, | |
| "loss": 0.1282, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.1196643560744254, | |
| "grad_norm": 0.328003998882712, | |
| "learning_rate": 9.791244740365965e-05, | |
| "loss": 0.1217, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.12112367748996716, | |
| "grad_norm": 0.25049879501157474, | |
| "learning_rate": 9.784433611779684e-05, | |
| "loss": 0.1395, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.12258299890550894, | |
| "grad_norm": 0.2889080458455597, | |
| "learning_rate": 9.777515591624522e-05, | |
| "loss": 0.1281, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.12404232032105071, | |
| "grad_norm": 0.32303024299802585, | |
| "learning_rate": 9.77049083445964e-05, | |
| "loss": 0.1279, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.12550164173659248, | |
| "grad_norm": 0.4839798792492212, | |
| "learning_rate": 9.76335949722886e-05, | |
| "loss": 0.1077, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.12696096315213426, | |
| "grad_norm": 0.3053946282511456, | |
| "learning_rate": 9.756121739257173e-05, | |
| "loss": 0.1306, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.12842028456767604, | |
| "grad_norm": 0.37752644019410203, | |
| "learning_rate": 9.748777722247164e-05, | |
| "loss": 0.1219, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1298796059832178, | |
| "grad_norm": 0.3582486729875512, | |
| "learning_rate": 9.741327610275417e-05, | |
| "loss": 0.1098, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.13133892739875958, | |
| "grad_norm": 0.32665426597686364, | |
| "learning_rate": 9.73377156978883e-05, | |
| "loss": 0.1131, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.13279824881430136, | |
| "grad_norm": 0.29447834111629645, | |
| "learning_rate": 9.726109769600915e-05, | |
| "loss": 0.1408, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.1342575702298431, | |
| "grad_norm": 0.3757371639048124, | |
| "learning_rate": 9.718342380888013e-05, | |
| "loss": 0.1181, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.1357168916453849, | |
| "grad_norm": 0.2721970485578736, | |
| "learning_rate": 9.710469577185473e-05, | |
| "loss": 0.1397, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.13717621306092667, | |
| "grad_norm": 0.25541942141740964, | |
| "learning_rate": 9.702491534383779e-05, | |
| "loss": 0.123, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.13863553447646845, | |
| "grad_norm": 0.28833999806064703, | |
| "learning_rate": 9.69440843072462e-05, | |
| "loss": 0.117, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.1400948558920102, | |
| "grad_norm": 0.269673320995455, | |
| "learning_rate": 9.686220446796896e-05, | |
| "loss": 0.1137, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.141554177307552, | |
| "grad_norm": 0.48330990807963714, | |
| "learning_rate": 9.677927765532701e-05, | |
| "loss": 0.1528, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.14301349872309377, | |
| "grad_norm": 0.3363262147438343, | |
| "learning_rate": 9.669530572203227e-05, | |
| "loss": 0.1104, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.14447282013863552, | |
| "grad_norm": 0.25580892613717404, | |
| "learning_rate": 9.661029054414622e-05, | |
| "loss": 0.1193, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1459321415541773, | |
| "grad_norm": 0.45388106676996987, | |
| "learning_rate": 9.652423402103805e-05, | |
| "loss": 0.1592, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.14739146296971908, | |
| "grad_norm": 0.3709494593344652, | |
| "learning_rate": 9.643713807534219e-05, | |
| "loss": 0.1073, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.14885078438526086, | |
| "grad_norm": 0.6788345381577189, | |
| "learning_rate": 9.634900465291534e-05, | |
| "loss": 0.1315, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.15031010580080262, | |
| "grad_norm": 0.2869088910525234, | |
| "learning_rate": 9.625983572279304e-05, | |
| "loss": 0.1184, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.1517694272163444, | |
| "grad_norm": 0.2964111500353762, | |
| "learning_rate": 9.616963327714566e-05, | |
| "loss": 0.1115, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.15322874863188618, | |
| "grad_norm": 0.3152701415407213, | |
| "learning_rate": 9.607839933123386e-05, | |
| "loss": 0.117, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.15468807004742793, | |
| "grad_norm": 0.30792427867208466, | |
| "learning_rate": 9.598613592336364e-05, | |
| "loss": 0.1219, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.15614739146296971, | |
| "grad_norm": 0.28660782708863575, | |
| "learning_rate": 9.589284511484071e-05, | |
| "loss": 0.1436, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.1576067128785115, | |
| "grad_norm": 0.32617953316288606, | |
| "learning_rate": 9.579852898992452e-05, | |
| "loss": 0.1287, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.15906603429405328, | |
| "grad_norm": 0.41379725314108085, | |
| "learning_rate": 9.570318965578163e-05, | |
| "loss": 0.1097, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.16052535570959503, | |
| "grad_norm": 0.22219925652341765, | |
| "learning_rate": 9.560682924243866e-05, | |
| "loss": 0.1171, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1619846771251368, | |
| "grad_norm": 0.2835465449542872, | |
| "learning_rate": 9.550944990273473e-05, | |
| "loss": 0.1275, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.1634439985406786, | |
| "grad_norm": 0.3971492169748388, | |
| "learning_rate": 9.54110538122733e-05, | |
| "loss": 0.1029, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.16490331995622035, | |
| "grad_norm": 0.27416529849431764, | |
| "learning_rate": 9.531164316937362e-05, | |
| "loss": 0.1209, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.16636264137176213, | |
| "grad_norm": 0.2653777493304375, | |
| "learning_rate": 9.52112201950216e-05, | |
| "loss": 0.1132, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.1678219627873039, | |
| "grad_norm": 0.5642093807940471, | |
| "learning_rate": 9.510978713282017e-05, | |
| "loss": 0.1299, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.1692812842028457, | |
| "grad_norm": 0.24584617694789176, | |
| "learning_rate": 9.500734624893914e-05, | |
| "loss": 0.1251, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.17074060561838744, | |
| "grad_norm": 0.272116000365995, | |
| "learning_rate": 9.490389983206466e-05, | |
| "loss": 0.1281, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.17219992703392922, | |
| "grad_norm": 0.3358227142033562, | |
| "learning_rate": 9.4799450193348e-05, | |
| "loss": 0.1296, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.173659248449471, | |
| "grad_norm": 0.2051814222925398, | |
| "learning_rate": 9.469399966635391e-05, | |
| "loss": 0.1191, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.17511856986501276, | |
| "grad_norm": 0.20408049257855926, | |
| "learning_rate": 9.458755060700856e-05, | |
| "loss": 0.1141, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.17657789128055454, | |
| "grad_norm": 0.1849829651088512, | |
| "learning_rate": 9.448010539354685e-05, | |
| "loss": 0.1127, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.17803721269609632, | |
| "grad_norm": 0.18080713942428248, | |
| "learning_rate": 9.437166642645926e-05, | |
| "loss": 0.1394, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.1794965341116381, | |
| "grad_norm": 0.27541852629660835, | |
| "learning_rate": 9.426223612843828e-05, | |
| "loss": 0.1214, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.18095585552717985, | |
| "grad_norm": 0.37092708297153004, | |
| "learning_rate": 9.415181694432423e-05, | |
| "loss": 0.146, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.18241517694272164, | |
| "grad_norm": 0.36083890038936484, | |
| "learning_rate": 9.404041134105066e-05, | |
| "loss": 0.1248, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.18387449835826342, | |
| "grad_norm": 0.2776438139983535, | |
| "learning_rate": 9.392802180758926e-05, | |
| "loss": 0.1368, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.18533381977380517, | |
| "grad_norm": 0.34820547586785217, | |
| "learning_rate": 9.38146508548942e-05, | |
| "loss": 0.1155, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.18679314118934695, | |
| "grad_norm": 0.294238763398288, | |
| "learning_rate": 9.370030101584605e-05, | |
| "loss": 0.1172, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.18825246260488873, | |
| "grad_norm": 0.22876093865750505, | |
| "learning_rate": 9.358497484519524e-05, | |
| "loss": 0.1241, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.1897117840204305, | |
| "grad_norm": 0.2856249418184077, | |
| "learning_rate": 9.34686749195049e-05, | |
| "loss": 0.1251, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.19117110543597227, | |
| "grad_norm": 0.2919772530638231, | |
| "learning_rate": 9.335140383709333e-05, | |
| "loss": 0.12, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.19263042685151405, | |
| "grad_norm": 0.3294893864056268, | |
| "learning_rate": 9.323316421797602e-05, | |
| "loss": 0.1097, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.19408974826705583, | |
| "grad_norm": 0.3920752736984575, | |
| "learning_rate": 9.311395870380698e-05, | |
| "loss": 0.1151, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.19554906968259758, | |
| "grad_norm": 0.1668745397084369, | |
| "learning_rate": 9.299378995781984e-05, | |
| "loss": 0.1191, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.19700839109813936, | |
| "grad_norm": 0.19167495599752757, | |
| "learning_rate": 9.28726606647683e-05, | |
| "loss": 0.1413, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.19846771251368114, | |
| "grad_norm": 0.39783090766324053, | |
| "learning_rate": 9.275057353086611e-05, | |
| "loss": 0.149, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.19992703392922292, | |
| "grad_norm": 0.2810998132604279, | |
| "learning_rate": 9.262753128372672e-05, | |
| "loss": 0.1194, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.20138635534476468, | |
| "grad_norm": 0.2235431056863839, | |
| "learning_rate": 9.25035366723022e-05, | |
| "loss": 0.1339, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.20284567676030646, | |
| "grad_norm": 0.21528060380233013, | |
| "learning_rate": 9.237859246682193e-05, | |
| "loss": 0.1254, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.20430499817584824, | |
| "grad_norm": 0.5942022277992831, | |
| "learning_rate": 9.22527014587307e-05, | |
| "loss": 0.1279, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.20576431959139, | |
| "grad_norm": 0.22522986172233075, | |
| "learning_rate": 9.212586646062626e-05, | |
| "loss": 0.1016, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.20722364100693177, | |
| "grad_norm": 0.38913350777355465, | |
| "learning_rate": 9.19980903061966e-05, | |
| "loss": 0.1321, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.20868296242247356, | |
| "grad_norm": 0.2761445042724322, | |
| "learning_rate": 9.186937585015654e-05, | |
| "loss": 0.1006, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.2101422838380153, | |
| "grad_norm": 0.2904489675134637, | |
| "learning_rate": 9.173972596818399e-05, | |
| "loss": 0.1391, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.2116016052535571, | |
| "grad_norm": 0.3863861717225745, | |
| "learning_rate": 9.160914355685577e-05, | |
| "loss": 0.1338, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.21306092666909887, | |
| "grad_norm": 0.23375929184834016, | |
| "learning_rate": 9.147763153358276e-05, | |
| "loss": 0.1271, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.21452024808464065, | |
| "grad_norm": 0.2103193670388651, | |
| "learning_rate": 9.134519283654483e-05, | |
| "loss": 0.1115, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.2159795695001824, | |
| "grad_norm": 0.253417546073443, | |
| "learning_rate": 9.121183042462517e-05, | |
| "loss": 0.0965, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.2174388909157242, | |
| "grad_norm": 0.36352688754174645, | |
| "learning_rate": 9.107754727734414e-05, | |
| "loss": 0.1257, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.21889821233126597, | |
| "grad_norm": 0.33374450352895074, | |
| "learning_rate": 9.094234639479273e-05, | |
| "loss": 0.12, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.22035753374680772, | |
| "grad_norm": 0.24900650250532352, | |
| "learning_rate": 9.080623079756561e-05, | |
| "loss": 0.1071, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.2218168551623495, | |
| "grad_norm": 0.21315684544430627, | |
| "learning_rate": 9.066920352669353e-05, | |
| "loss": 0.1382, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.22327617657789128, | |
| "grad_norm": 0.2263034759808627, | |
| "learning_rate": 9.053126764357537e-05, | |
| "loss": 0.145, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.22473549799343306, | |
| "grad_norm": 0.17408999521123467, | |
| "learning_rate": 9.03924262299099e-05, | |
| "loss": 0.1125, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.22619481940897482, | |
| "grad_norm": 0.26382605132819903, | |
| "learning_rate": 9.025268238762678e-05, | |
| "loss": 0.1345, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.2276541408245166, | |
| "grad_norm": 0.428881304462081, | |
| "learning_rate": 9.011203923881728e-05, | |
| "loss": 0.1223, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.22911346224005838, | |
| "grad_norm": 0.23176671690985062, | |
| "learning_rate": 8.997049992566462e-05, | |
| "loss": 0.1259, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.23057278365560013, | |
| "grad_norm": 0.27711123572562557, | |
| "learning_rate": 8.982806761037363e-05, | |
| "loss": 0.1228, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.23203210507114191, | |
| "grad_norm": 0.28590819940098056, | |
| "learning_rate": 8.968474547510022e-05, | |
| "loss": 0.1312, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.2334914264866837, | |
| "grad_norm": 0.29608561342126305, | |
| "learning_rate": 8.954053672188022e-05, | |
| "loss": 0.1123, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.23495074790222548, | |
| "grad_norm": 0.22352582010864266, | |
| "learning_rate": 8.93954445725579e-05, | |
| "loss": 0.102, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.23641006931776723, | |
| "grad_norm": 0.17310790477532703, | |
| "learning_rate": 8.924947226871392e-05, | |
| "loss": 0.1326, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.237869390733309, | |
| "grad_norm": 0.21667547494734962, | |
| "learning_rate": 8.91026230715929e-05, | |
| "loss": 0.1367, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.2393287121488508, | |
| "grad_norm": 0.20281398075174428, | |
| "learning_rate": 8.895490026203067e-05, | |
| "loss": 0.1289, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.24078803356439255, | |
| "grad_norm": 0.2869894200024577, | |
| "learning_rate": 8.880630714038087e-05, | |
| "loss": 0.1356, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.24224735497993433, | |
| "grad_norm": 0.25752530118391737, | |
| "learning_rate": 8.865684702644121e-05, | |
| "loss": 0.1265, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.2437066763954761, | |
| "grad_norm": 0.3193768272298343, | |
| "learning_rate": 8.85065232593794e-05, | |
| "loss": 0.112, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.2451659978110179, | |
| "grad_norm": 0.36091443188401096, | |
| "learning_rate": 8.835533919765844e-05, | |
| "loss": 0.095, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.24662531922655964, | |
| "grad_norm": 1.3698974484227704, | |
| "learning_rate": 8.820329821896163e-05, | |
| "loss": 0.1168, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.24808464064210142, | |
| "grad_norm": 0.2487563159790042, | |
| "learning_rate": 8.805040372011712e-05, | |
| "loss": 0.113, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.2495439620576432, | |
| "grad_norm": 0.3698970509982561, | |
| "learning_rate": 8.789665911702199e-05, | |
| "loss": 0.1215, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.25100328347318496, | |
| "grad_norm": 0.1876341055377898, | |
| "learning_rate": 8.774206784456597e-05, | |
| "loss": 0.1209, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.25246260488872674, | |
| "grad_norm": 0.24295665772880776, | |
| "learning_rate": 8.758663335655469e-05, | |
| "loss": 0.1229, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.2539219263042685, | |
| "grad_norm": 0.2689776977679599, | |
| "learning_rate": 8.743035912563244e-05, | |
| "loss": 0.0832, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.2553812477198103, | |
| "grad_norm": 0.379412893920958, | |
| "learning_rate": 8.727324864320472e-05, | |
| "loss": 0.1106, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2568405691353521, | |
| "grad_norm": 0.31695262624414844, | |
| "learning_rate": 8.711530541936017e-05, | |
| "loss": 0.1023, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.2582998905508938, | |
| "grad_norm": 0.21783049074759805, | |
| "learning_rate": 8.695653298279208e-05, | |
| "loss": 0.1009, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.2597592119664356, | |
| "grad_norm": 0.2703353850483838, | |
| "learning_rate": 8.67969348807197e-05, | |
| "loss": 0.1211, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.26121853338197737, | |
| "grad_norm": 0.1932384187079349, | |
| "learning_rate": 8.663651467880885e-05, | |
| "loss": 0.1039, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.26267785479751915, | |
| "grad_norm": 0.28285556612386037, | |
| "learning_rate": 8.647527596109237e-05, | |
| "loss": 0.1158, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.26413717621306093, | |
| "grad_norm": 0.32813759107134893, | |
| "learning_rate": 8.631322232988994e-05, | |
| "loss": 0.1311, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.2655964976286027, | |
| "grad_norm": 0.2312850774172843, | |
| "learning_rate": 8.615035740572773e-05, | |
| "loss": 0.1129, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.2670558190441445, | |
| "grad_norm": 0.3303464994517715, | |
| "learning_rate": 8.598668482725732e-05, | |
| "loss": 0.1278, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.2685151404596862, | |
| "grad_norm": 0.24992627997941425, | |
| "learning_rate": 8.582220825117467e-05, | |
| "loss": 0.0928, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.269974461875228, | |
| "grad_norm": 0.4135400436284091, | |
| "learning_rate": 8.565693135213815e-05, | |
| "loss": 0.1032, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.2714337832907698, | |
| "grad_norm": 0.16234055427123434, | |
| "learning_rate": 8.549085782268663e-05, | |
| "loss": 0.1187, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.27289310470631156, | |
| "grad_norm": 0.21509892954974083, | |
| "learning_rate": 8.532399137315693e-05, | |
| "loss": 0.1312, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.27435242612185334, | |
| "grad_norm": 0.484277155110313, | |
| "learning_rate": 8.51563357316009e-05, | |
| "loss": 0.0971, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.2758117475373951, | |
| "grad_norm": 0.4137856953789829, | |
| "learning_rate": 8.498789464370212e-05, | |
| "loss": 0.1153, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.2772710689529369, | |
| "grad_norm": 0.5336023621351729, | |
| "learning_rate": 8.48186718726923e-05, | |
| "loss": 0.1133, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.27873039036847863, | |
| "grad_norm": 0.4245926967265952, | |
| "learning_rate": 8.464867119926711e-05, | |
| "loss": 0.1188, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.2801897117840204, | |
| "grad_norm": 0.37902596075543216, | |
| "learning_rate": 8.447789642150176e-05, | |
| "loss": 0.1054, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.2816490331995622, | |
| "grad_norm": 0.31818209527759106, | |
| "learning_rate": 8.430635135476615e-05, | |
| "loss": 0.1362, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.283108354615104, | |
| "grad_norm": 0.22795920895858368, | |
| "learning_rate": 8.413403983163958e-05, | |
| "loss": 0.111, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.28456767603064576, | |
| "grad_norm": 0.37896391196946616, | |
| "learning_rate": 8.396096570182519e-05, | |
| "loss": 0.1027, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.28602699744618754, | |
| "grad_norm": 0.3346173686783235, | |
| "learning_rate": 8.378713283206389e-05, | |
| "loss": 0.1245, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.2874863188617293, | |
| "grad_norm": 0.1912037431397104, | |
| "learning_rate": 8.361254510604804e-05, | |
| "loss": 0.1106, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.28894564027727104, | |
| "grad_norm": 0.2345929112818003, | |
| "learning_rate": 8.343720642433462e-05, | |
| "loss": 0.0864, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.2904049616928128, | |
| "grad_norm": 0.2699123096192621, | |
| "learning_rate": 8.326112070425811e-05, | |
| "loss": 0.1085, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.2918642831083546, | |
| "grad_norm": 0.23429437678203588, | |
| "learning_rate": 8.308429187984297e-05, | |
| "loss": 0.1356, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2933236045238964, | |
| "grad_norm": 0.28130939764083146, | |
| "learning_rate": 8.290672390171576e-05, | |
| "loss": 0.1102, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.29478292593943817, | |
| "grad_norm": 0.31157607453363195, | |
| "learning_rate": 8.272842073701688e-05, | |
| "loss": 0.1004, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.29624224735497995, | |
| "grad_norm": 0.34115856403247896, | |
| "learning_rate": 8.254938636931184e-05, | |
| "loss": 0.0911, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.29770156877052173, | |
| "grad_norm": 0.23348388763532596, | |
| "learning_rate": 8.236962479850247e-05, | |
| "loss": 0.0934, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.29916089018606346, | |
| "grad_norm": 0.25310795043145723, | |
| "learning_rate": 8.218914004073734e-05, | |
| "loss": 0.107, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.30062021160160524, | |
| "grad_norm": 0.2859341390716333, | |
| "learning_rate": 8.200793612832213e-05, | |
| "loss": 0.1034, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.302079533017147, | |
| "grad_norm": 0.1589307537613677, | |
| "learning_rate": 8.182601710962958e-05, | |
| "loss": 0.1024, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.3035388544326888, | |
| "grad_norm": 0.21346275027988693, | |
| "learning_rate": 8.164338704900894e-05, | |
| "loss": 0.1008, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.3049981758482306, | |
| "grad_norm": 0.23819056236182787, | |
| "learning_rate": 8.14600500266953e-05, | |
| "loss": 0.1106, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.30645749726377236, | |
| "grad_norm": 0.28549063766432503, | |
| "learning_rate": 8.127601013871829e-05, | |
| "loss": 0.1127, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.30791681867931414, | |
| "grad_norm": 0.42956976973150385, | |
| "learning_rate": 8.109127149681066e-05, | |
| "loss": 0.119, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.30937614009485587, | |
| "grad_norm": 0.4318716465775294, | |
| "learning_rate": 8.090583822831637e-05, | |
| "loss": 0.1213, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.31083546151039765, | |
| "grad_norm": 0.3083984081079968, | |
| "learning_rate": 8.071971447609847e-05, | |
| "loss": 0.1161, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.31229478292593943, | |
| "grad_norm": 0.38276763388234036, | |
| "learning_rate": 8.053290439844639e-05, | |
| "loss": 0.1277, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.3137541043414812, | |
| "grad_norm": 0.1908229423823102, | |
| "learning_rate": 8.034541216898315e-05, | |
| "loss": 0.0972, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.315213425757023, | |
| "grad_norm": 0.1542948433528475, | |
| "learning_rate": 8.01572419765721e-05, | |
| "loss": 0.0921, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.3166727471725648, | |
| "grad_norm": 0.21419210109077533, | |
| "learning_rate": 7.996839802522331e-05, | |
| "loss": 0.1182, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.31813206858810655, | |
| "grad_norm": 0.3984295641541688, | |
| "learning_rate": 7.977888453399967e-05, | |
| "loss": 0.1277, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.3195913900036483, | |
| "grad_norm": 0.23812270948662653, | |
| "learning_rate": 7.958870573692258e-05, | |
| "loss": 0.1189, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.32105071141919006, | |
| "grad_norm": 0.2698116268855808, | |
| "learning_rate": 7.939786588287743e-05, | |
| "loss": 0.1028, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.32251003283473184, | |
| "grad_norm": 0.23000489317809045, | |
| "learning_rate": 7.92063692355186e-05, | |
| "loss": 0.1179, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.3239693542502736, | |
| "grad_norm": 0.2804136973700269, | |
| "learning_rate": 7.901422007317426e-05, | |
| "loss": 0.1039, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.3254286756658154, | |
| "grad_norm": 0.24908274716197615, | |
| "learning_rate": 7.882142268875075e-05, | |
| "loss": 0.1189, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.3268879970813572, | |
| "grad_norm": 0.23252680742999735, | |
| "learning_rate": 7.862798138963672e-05, | |
| "loss": 0.1131, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.32834731849689897, | |
| "grad_norm": 0.1786470217408672, | |
| "learning_rate": 7.843390049760679e-05, | |
| "loss": 0.1073, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3298066399124407, | |
| "grad_norm": 0.26097955557070734, | |
| "learning_rate": 7.823918434872515e-05, | |
| "loss": 0.1315, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.33126596132798247, | |
| "grad_norm": 0.32002806565738656, | |
| "learning_rate": 7.80438372932485e-05, | |
| "loss": 0.1045, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.33272528274352425, | |
| "grad_norm": 0.2131844778500901, | |
| "learning_rate": 7.784786369552905e-05, | |
| "loss": 0.0941, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.33418460415906603, | |
| "grad_norm": 0.1798301571622198, | |
| "learning_rate": 7.765126793391691e-05, | |
| "loss": 0.1088, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.3356439255746078, | |
| "grad_norm": 0.17051341313737456, | |
| "learning_rate": 7.74540544006622e-05, | |
| "loss": 0.1042, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.3371032469901496, | |
| "grad_norm": 0.22572869425970987, | |
| "learning_rate": 7.725622750181712e-05, | |
| "loss": 0.1007, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.3385625684056914, | |
| "grad_norm": 0.1968135681484681, | |
| "learning_rate": 7.70577916571373e-05, | |
| "loss": 0.0884, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.3400218898212331, | |
| "grad_norm": 0.2868939162865515, | |
| "learning_rate": 7.68587512999832e-05, | |
| "loss": 0.0973, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.3414812112367749, | |
| "grad_norm": 0.1585501010853745, | |
| "learning_rate": 7.665911087722103e-05, | |
| "loss": 0.1008, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.34294053265231667, | |
| "grad_norm": 0.4516898386815449, | |
| "learning_rate": 7.645887484912334e-05, | |
| "loss": 0.1146, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.34439985406785845, | |
| "grad_norm": 0.3086424574036419, | |
| "learning_rate": 7.625804768926944e-05, | |
| "loss": 0.1184, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.3458591754834002, | |
| "grad_norm": 0.20584040402606343, | |
| "learning_rate": 7.605663388444541e-05, | |
| "loss": 0.1147, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.347318496898942, | |
| "grad_norm": 0.1966429839833748, | |
| "learning_rate": 7.585463793454393e-05, | |
| "loss": 0.0878, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.3487778183144838, | |
| "grad_norm": 0.2445766881016643, | |
| "learning_rate": 7.56520643524636e-05, | |
| "loss": 0.13, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.3502371397300255, | |
| "grad_norm": 0.21158701765416985, | |
| "learning_rate": 7.544891766400827e-05, | |
| "loss": 0.0956, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.3516964611455673, | |
| "grad_norm": 0.4238803079261724, | |
| "learning_rate": 7.524520240778587e-05, | |
| "loss": 0.1174, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.3531557825611091, | |
| "grad_norm": 0.3095728288400587, | |
| "learning_rate": 7.504092313510697e-05, | |
| "loss": 0.1155, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.35461510397665086, | |
| "grad_norm": 0.4293980639357039, | |
| "learning_rate": 7.483608440988316e-05, | |
| "loss": 0.1079, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.35607442539219264, | |
| "grad_norm": 0.16320882425277555, | |
| "learning_rate": 7.463069080852503e-05, | |
| "loss": 0.1044, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.3575337468077344, | |
| "grad_norm": 0.1566741416723225, | |
| "learning_rate": 7.442474691983996e-05, | |
| "loss": 0.1043, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.3589930682232762, | |
| "grad_norm": 0.20253896848392447, | |
| "learning_rate": 7.421825734492963e-05, | |
| "loss": 0.1061, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.3604523896388179, | |
| "grad_norm": 0.23451042679193784, | |
| "learning_rate": 7.40112266970871e-05, | |
| "loss": 0.0984, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.3619117110543597, | |
| "grad_norm": 0.31761969552147234, | |
| "learning_rate": 7.380365960169391e-05, | |
| "loss": 0.0982, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.3633710324699015, | |
| "grad_norm": 0.19087054811891263, | |
| "learning_rate": 7.35955606961166e-05, | |
| "loss": 0.0834, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.36483035388544327, | |
| "grad_norm": 0.26333137912323057, | |
| "learning_rate": 7.338693462960324e-05, | |
| "loss": 0.115, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.36628967530098505, | |
| "grad_norm": 0.35946333892955823, | |
| "learning_rate": 7.317778606317937e-05, | |
| "loss": 0.109, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.36774899671652683, | |
| "grad_norm": 0.29217079786710687, | |
| "learning_rate": 7.296811966954411e-05, | |
| "loss": 0.1061, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.3692083181320686, | |
| "grad_norm": 0.16099974509802562, | |
| "learning_rate": 7.27579401329655e-05, | |
| "loss": 0.1023, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.37066763954761034, | |
| "grad_norm": 0.14270002573644924, | |
| "learning_rate": 7.254725214917607e-05, | |
| "loss": 0.1363, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.3721269609631521, | |
| "grad_norm": 0.2765615965792803, | |
| "learning_rate": 7.233606042526781e-05, | |
| "loss": 0.1226, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.3735862823786939, | |
| "grad_norm": 0.352531222497615, | |
| "learning_rate": 7.212436967958703e-05, | |
| "loss": 0.116, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.3750456037942357, | |
| "grad_norm": 0.3179852115996289, | |
| "learning_rate": 7.191218464162897e-05, | |
| "loss": 0.1106, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.37650492520977746, | |
| "grad_norm": 0.21044459839378338, | |
| "learning_rate": 7.169951005193207e-05, | |
| "loss": 0.0794, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.37796424662531924, | |
| "grad_norm": 0.29435428319089024, | |
| "learning_rate": 7.148635066197216e-05, | |
| "loss": 0.1098, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.379423568040861, | |
| "grad_norm": 0.3678808726276575, | |
| "learning_rate": 7.127271123405622e-05, | |
| "loss": 0.1201, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.38088288945640275, | |
| "grad_norm": 0.3444852709706006, | |
| "learning_rate": 7.105859654121602e-05, | |
| "loss": 0.1186, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.38234221087194453, | |
| "grad_norm": 0.1407926086621674, | |
| "learning_rate": 7.084401136710149e-05, | |
| "loss": 0.1187, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.3838015322874863, | |
| "grad_norm": 0.1677280774460152, | |
| "learning_rate": 7.062896050587377e-05, | |
| "loss": 0.1027, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.3852608537030281, | |
| "grad_norm": 0.2039504177031652, | |
| "learning_rate": 7.041344876209827e-05, | |
| "loss": 0.0913, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.3867201751185699, | |
| "grad_norm": 0.23188710135621454, | |
| "learning_rate": 7.019748095063712e-05, | |
| "loss": 0.1141, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.38817949653411166, | |
| "grad_norm": 0.24724368926384838, | |
| "learning_rate": 6.998106189654176e-05, | |
| "loss": 0.1037, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.38963881794965344, | |
| "grad_norm": 0.32541077271508595, | |
| "learning_rate": 6.976419643494504e-05, | |
| "loss": 0.1199, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.39109813936519516, | |
| "grad_norm": 0.2385801144209165, | |
| "learning_rate": 6.954688941095327e-05, | |
| "loss": 0.0933, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.39255746078073694, | |
| "grad_norm": 0.20684187377778415, | |
| "learning_rate": 6.932914567953792e-05, | |
| "loss": 0.1046, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.3940167821962787, | |
| "grad_norm": 0.22717043667652123, | |
| "learning_rate": 6.91109701054272e-05, | |
| "loss": 0.0973, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.3954761036118205, | |
| "grad_norm": 0.1784316153067281, | |
| "learning_rate": 6.889236756299732e-05, | |
| "loss": 0.122, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.3969354250273623, | |
| "grad_norm": 0.2992073258265761, | |
| "learning_rate": 6.867334293616361e-05, | |
| "loss": 0.1064, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.39839474644290407, | |
| "grad_norm": 0.35809119189138666, | |
| "learning_rate": 6.845390111827142e-05, | |
| "loss": 0.1215, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.39985406785844585, | |
| "grad_norm": 0.24770942776501734, | |
| "learning_rate": 6.823404701198683e-05, | |
| "loss": 0.1015, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.4013133892739876, | |
| "grad_norm": 0.2676532625134847, | |
| "learning_rate": 6.801378552918697e-05, | |
| "loss": 0.1024, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.40277271068952936, | |
| "grad_norm": 0.17695706700427993, | |
| "learning_rate": 6.779312159085051e-05, | |
| "loss": 0.0866, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.40423203210507114, | |
| "grad_norm": 0.22513044418817704, | |
| "learning_rate": 6.757206012694751e-05, | |
| "loss": 0.0898, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4056913535206129, | |
| "grad_norm": 0.24221056958067821, | |
| "learning_rate": 6.735060607632937e-05, | |
| "loss": 0.0923, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.4071506749361547, | |
| "grad_norm": 0.2516010085453918, | |
| "learning_rate": 6.71287643866185e-05, | |
| "loss": 0.0821, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.4086099963516965, | |
| "grad_norm": 0.16792435621352286, | |
| "learning_rate": 6.690654001409773e-05, | |
| "loss": 0.1064, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.41006931776723826, | |
| "grad_norm": 0.24148115931630454, | |
| "learning_rate": 6.668393792359967e-05, | |
| "loss": 0.1002, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.41152863918278, | |
| "grad_norm": 0.20454443340824457, | |
| "learning_rate": 6.646096308839564e-05, | |
| "loss": 0.0955, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.41298796059832177, | |
| "grad_norm": 0.22632163953996756, | |
| "learning_rate": 6.623762049008475e-05, | |
| "loss": 0.1067, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.41444728201386355, | |
| "grad_norm": 0.18189039381318844, | |
| "learning_rate": 6.60139151184824e-05, | |
| "loss": 0.0999, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.41590660342940533, | |
| "grad_norm": 0.2179009200010762, | |
| "learning_rate": 6.578985197150893e-05, | |
| "loss": 0.0897, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.4173659248449471, | |
| "grad_norm": 0.25728882549010346, | |
| "learning_rate": 6.5565436055078e-05, | |
| "loss": 0.092, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.4188252462604889, | |
| "grad_norm": 0.21132545096135683, | |
| "learning_rate": 6.53406723829846e-05, | |
| "loss": 0.1093, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.4202845676760306, | |
| "grad_norm": 0.3259354513604496, | |
| "learning_rate": 6.511556597679313e-05, | |
| "loss": 0.0939, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.4217438890915724, | |
| "grad_norm": 0.14428503360326553, | |
| "learning_rate": 6.48901218657252e-05, | |
| "loss": 0.113, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.4232032105071142, | |
| "grad_norm": 0.20596362998777773, | |
| "learning_rate": 6.466434508654729e-05, | |
| "loss": 0.1221, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.42466253192265596, | |
| "grad_norm": 0.2076732092116106, | |
| "learning_rate": 6.443824068345814e-05, | |
| "loss": 0.0981, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.42612185333819774, | |
| "grad_norm": 0.23210274197312583, | |
| "learning_rate": 6.421181370797616e-05, | |
| "loss": 0.1091, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.4275811747537395, | |
| "grad_norm": 0.34773891076479957, | |
| "learning_rate": 6.39850692188265e-05, | |
| "loss": 0.1152, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.4290404961692813, | |
| "grad_norm": 0.25768381087645986, | |
| "learning_rate": 6.375801228182804e-05, | |
| "loss": 0.0833, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.43049981758482303, | |
| "grad_norm": 0.20272718271445447, | |
| "learning_rate": 6.353064796978025e-05, | |
| "loss": 0.0821, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.4319591390003648, | |
| "grad_norm": 0.3769192263755196, | |
| "learning_rate": 6.330298136234981e-05, | |
| "loss": 0.1047, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.4334184604159066, | |
| "grad_norm": 0.18218184597043124, | |
| "learning_rate": 6.307501754595712e-05, | |
| "loss": 0.1114, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.4348777818314484, | |
| "grad_norm": 0.20670653864614483, | |
| "learning_rate": 6.284676161366276e-05, | |
| "loss": 0.0885, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.43633710324699015, | |
| "grad_norm": 0.21541995858015078, | |
| "learning_rate": 6.261821866505353e-05, | |
| "loss": 0.1153, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.43779642466253194, | |
| "grad_norm": 0.2073422866021902, | |
| "learning_rate": 6.23893938061287e-05, | |
| "loss": 0.0958, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4392557460780737, | |
| "grad_norm": 0.3614983325797712, | |
| "learning_rate": 6.216029214918576e-05, | |
| "loss": 0.1039, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.44071506749361544, | |
| "grad_norm": 0.25937791265177973, | |
| "learning_rate": 6.193091881270639e-05, | |
| "loss": 0.0884, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.4421743889091572, | |
| "grad_norm": 0.27236848714431067, | |
| "learning_rate": 6.17012789212419e-05, | |
| "loss": 0.104, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.443633710324699, | |
| "grad_norm": 0.2193972961735453, | |
| "learning_rate": 6.147137760529893e-05, | |
| "loss": 0.1051, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.4450930317402408, | |
| "grad_norm": 0.3288936349913114, | |
| "learning_rate": 6.124122000122474e-05, | |
| "loss": 0.1194, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.44655235315578257, | |
| "grad_norm": 0.28030638179884143, | |
| "learning_rate": 6.101081125109238e-05, | |
| "loss": 0.0987, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.44801167457132435, | |
| "grad_norm": 0.28963057861922986, | |
| "learning_rate": 6.0780156502585974e-05, | |
| "loss": 0.104, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.44947099598686613, | |
| "grad_norm": 0.2107381404732794, | |
| "learning_rate": 6.054926090888559e-05, | |
| "loss": 0.0944, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.45093031740240785, | |
| "grad_norm": 0.22426758617758438, | |
| "learning_rate": 6.031812962855212e-05, | |
| "loss": 0.1088, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.45238963881794964, | |
| "grad_norm": 0.19433617911544113, | |
| "learning_rate": 6.008676782541214e-05, | |
| "loss": 0.0934, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.4538489602334914, | |
| "grad_norm": 0.16786745480860363, | |
| "learning_rate": 5.985518066844235e-05, | |
| "loss": 0.1065, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.4553082816490332, | |
| "grad_norm": 0.28967266828058946, | |
| "learning_rate": 5.9623373331654296e-05, | |
| "loss": 0.1104, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.456767603064575, | |
| "grad_norm": 0.2857631028326485, | |
| "learning_rate": 5.9391350993978586e-05, | |
| "loss": 0.1059, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.45822692448011676, | |
| "grad_norm": 0.16887743736632202, | |
| "learning_rate": 5.915911883914937e-05, | |
| "loss": 0.0921, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.45968624589565854, | |
| "grad_norm": 0.45462588569274826, | |
| "learning_rate": 5.892668205558838e-05, | |
| "loss": 0.1062, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.46114556731120027, | |
| "grad_norm": 0.1874149476062009, | |
| "learning_rate": 5.869404583628906e-05, | |
| "loss": 0.0877, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.46260488872674205, | |
| "grad_norm": 0.2035453150986139, | |
| "learning_rate": 5.846121537870059e-05, | |
| "loss": 0.0826, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.46406421014228383, | |
| "grad_norm": 0.2009650369528326, | |
| "learning_rate": 5.822819588461167e-05, | |
| "loss": 0.0988, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.4655235315578256, | |
| "grad_norm": 0.33321536680847463, | |
| "learning_rate": 5.799499256003447e-05, | |
| "loss": 0.0827, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.4669828529733674, | |
| "grad_norm": 0.20073386151251446, | |
| "learning_rate": 5.77616106150881e-05, | |
| "loss": 0.0831, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.46844217438890917, | |
| "grad_norm": 0.32718978675294735, | |
| "learning_rate": 5.7528055263882394e-05, | |
| "loss": 0.1012, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.46990149580445095, | |
| "grad_norm": 0.21478649269135933, | |
| "learning_rate": 5.729433172440133e-05, | |
| "loss": 0.1003, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.4713608172199927, | |
| "grad_norm": 0.35055435747379776, | |
| "learning_rate": 5.706044521838645e-05, | |
| "loss": 0.1186, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.47282013863553446, | |
| "grad_norm": 0.17583973159572328, | |
| "learning_rate": 5.682640097122024e-05, | |
| "loss": 0.1025, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.47427946005107624, | |
| "grad_norm": 0.19007944644812688, | |
| "learning_rate": 5.659220421180935e-05, | |
| "loss": 0.0897, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.475738781466618, | |
| "grad_norm": 0.20795427294782404, | |
| "learning_rate": 5.635786017246782e-05, | |
| "loss": 0.1066, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.4771981028821598, | |
| "grad_norm": 0.18309256793902484, | |
| "learning_rate": 5.612337408880011e-05, | |
| "loss": 0.0845, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.4786574242977016, | |
| "grad_norm": 0.3124216891183694, | |
| "learning_rate": 5.5888751199584156e-05, | |
| "loss": 0.1096, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.48011674571324336, | |
| "grad_norm": 0.18822513299788007, | |
| "learning_rate": 5.56539967466544e-05, | |
| "loss": 0.0957, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.4815760671287851, | |
| "grad_norm": 0.26561985090282353, | |
| "learning_rate": 5.541911597478458e-05, | |
| "loss": 0.0862, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.48303538854432687, | |
| "grad_norm": 0.2720185239193176, | |
| "learning_rate": 5.5184114131570574e-05, | |
| "loss": 0.0968, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.48449470995986865, | |
| "grad_norm": 0.23325174950351915, | |
| "learning_rate": 5.494899646731322e-05, | |
| "loss": 0.0987, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.48595403137541043, | |
| "grad_norm": 0.5924963942808815, | |
| "learning_rate": 5.4713768234900956e-05, | |
| "loss": 0.0865, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.4874133527909522, | |
| "grad_norm": 0.2626819591515115, | |
| "learning_rate": 5.447843468969247e-05, | |
| "loss": 0.0933, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.488872674206494, | |
| "grad_norm": 0.30178159697329016, | |
| "learning_rate": 5.4243001089399305e-05, | |
| "loss": 0.094, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.4903319956220358, | |
| "grad_norm": 0.32298125922899934, | |
| "learning_rate": 5.400747269396842e-05, | |
| "loss": 0.0892, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.4917913170375775, | |
| "grad_norm": 0.29452417657729046, | |
| "learning_rate": 5.37718547654646e-05, | |
| "loss": 0.1021, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.4932506384531193, | |
| "grad_norm": 0.15177550207237628, | |
| "learning_rate": 5.353615256795297e-05, | |
| "loss": 0.0992, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.49470995986866106, | |
| "grad_norm": 0.3145374189570284, | |
| "learning_rate": 5.3300371367381306e-05, | |
| "loss": 0.0978, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.49616928128420285, | |
| "grad_norm": 0.28863235717217767, | |
| "learning_rate": 5.306451643146247e-05, | |
| "loss": 0.0908, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.4976286026997446, | |
| "grad_norm": 0.20954696658671135, | |
| "learning_rate": 5.2828593029556705e-05, | |
| "loss": 0.1084, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.4990879241152864, | |
| "grad_norm": 0.2199618710390789, | |
| "learning_rate": 5.2592606432553846e-05, | |
| "loss": 0.0972, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.5005472455308282, | |
| "grad_norm": 0.20271409907110083, | |
| "learning_rate": 5.235656191275561e-05, | |
| "loss": 0.0999, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5020065669463699, | |
| "grad_norm": 0.29349205157450786, | |
| "learning_rate": 5.21204647437578e-05, | |
| "loss": 0.0931, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5034658883619118, | |
| "grad_norm": 0.33489812333335206, | |
| "learning_rate": 5.1884320200332517e-05, | |
| "loss": 0.0996, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5049252097774535, | |
| "grad_norm": 0.19799483926382305, | |
| "learning_rate": 5.164813355831023e-05, | |
| "loss": 0.1108, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.5063845311929952, | |
| "grad_norm": 0.3720539958012567, | |
| "learning_rate": 5.141191009446198e-05, | |
| "loss": 0.1104, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.507843852608537, | |
| "grad_norm": 0.2954464123920451, | |
| "learning_rate": 5.1175655086381466e-05, | |
| "loss": 0.11, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5093031740240788, | |
| "grad_norm": 0.1642741168702313, | |
| "learning_rate": 5.093937381236712e-05, | |
| "loss": 0.1031, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5107624954396206, | |
| "grad_norm": 0.20935330232427551, | |
| "learning_rate": 5.0703071551304214e-05, | |
| "loss": 0.0978, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5122218168551623, | |
| "grad_norm": 0.1993779996730668, | |
| "learning_rate": 5.04667535825469e-05, | |
| "loss": 0.0972, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.5136811382707042, | |
| "grad_norm": 0.19115582036939682, | |
| "learning_rate": 5.023042518580022e-05, | |
| "loss": 0.0847, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5151404596862459, | |
| "grad_norm": 0.27120517340424255, | |
| "learning_rate": 4.999409164100226e-05, | |
| "loss": 0.1042, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5165997811017876, | |
| "grad_norm": 0.10934691309913587, | |
| "learning_rate": 4.9757758228206084e-05, | |
| "loss": 0.081, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.5180591025173295, | |
| "grad_norm": 0.24394002533972173, | |
| "learning_rate": 4.952143022746181e-05, | |
| "loss": 0.094, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5195184239328712, | |
| "grad_norm": 0.16029024193022853, | |
| "learning_rate": 4.928511291869865e-05, | |
| "loss": 0.0826, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.520977745348413, | |
| "grad_norm": 0.23900126446494024, | |
| "learning_rate": 4.9048811581606934e-05, | |
| "loss": 0.0961, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.5224370667639547, | |
| "grad_norm": 0.19889558694631473, | |
| "learning_rate": 4.8812531495520155e-05, | |
| "loss": 0.1087, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.5238963881794966, | |
| "grad_norm": 0.23105375198475658, | |
| "learning_rate": 4.857627793929705e-05, | |
| "loss": 0.0869, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.5253557095950383, | |
| "grad_norm": 0.22834103771419306, | |
| "learning_rate": 4.8340056191203615e-05, | |
| "loss": 0.0899, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.52681503101058, | |
| "grad_norm": 0.14832148410977408, | |
| "learning_rate": 4.810387152879521e-05, | |
| "loss": 0.0824, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.5282743524261219, | |
| "grad_norm": 0.14992555758737747, | |
| "learning_rate": 4.786772922879863e-05, | |
| "loss": 0.0887, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.5297336738416636, | |
| "grad_norm": 0.2316964809656849, | |
| "learning_rate": 4.763163456699427e-05, | |
| "loss": 0.1093, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.5311929952572054, | |
| "grad_norm": 0.1330630758475358, | |
| "learning_rate": 4.739559281809818e-05, | |
| "loss": 0.1009, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.5326523166727472, | |
| "grad_norm": 0.18333429681957206, | |
| "learning_rate": 4.715960925564427e-05, | |
| "loss": 0.1004, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.534111638088289, | |
| "grad_norm": 0.270128715362079, | |
| "learning_rate": 4.6923689151866444e-05, | |
| "loss": 0.1018, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.5355709595038307, | |
| "grad_norm": 0.23984397395024673, | |
| "learning_rate": 4.6687837777580886e-05, | |
| "loss": 0.0887, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.5370302809193724, | |
| "grad_norm": 0.4475338081181222, | |
| "learning_rate": 4.645206040206824e-05, | |
| "loss": 0.1036, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.5384896023349143, | |
| "grad_norm": 0.18351540680366502, | |
| "learning_rate": 4.621636229295591e-05, | |
| "loss": 0.0868, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.539948923750456, | |
| "grad_norm": 0.32433205707718027, | |
| "learning_rate": 4.5980748716100346e-05, | |
| "loss": 0.112, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.5414082451659978, | |
| "grad_norm": 0.26936021946381955, | |
| "learning_rate": 4.574522493546944e-05, | |
| "loss": 0.0752, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.5428675665815396, | |
| "grad_norm": 0.23965825882528494, | |
| "learning_rate": 4.550979621302488e-05, | |
| "loss": 0.0987, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.5443268879970814, | |
| "grad_norm": 0.15591543995056015, | |
| "learning_rate": 4.527446780860464e-05, | |
| "loss": 0.1019, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.5457862094126231, | |
| "grad_norm": 0.1448561831202914, | |
| "learning_rate": 4.5039244979805403e-05, | |
| "loss": 0.0764, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.5472455308281649, | |
| "grad_norm": 0.13626905980903883, | |
| "learning_rate": 4.480413298186516e-05, | |
| "loss": 0.0774, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.5487048522437067, | |
| "grad_norm": 0.28395775526753664, | |
| "learning_rate": 4.456913706754573e-05, | |
| "loss": 0.0746, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.5501641736592484, | |
| "grad_norm": 0.3285254794237905, | |
| "learning_rate": 4.4334262487015474e-05, | |
| "loss": 0.0835, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.5516234950747902, | |
| "grad_norm": 0.2496952670799407, | |
| "learning_rate": 4.4099514487732e-05, | |
| "loss": 0.0935, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.553082816490332, | |
| "grad_norm": 0.19932665975830546, | |
| "learning_rate": 4.386489831432483e-05, | |
| "loss": 0.0921, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.5545421379058738, | |
| "grad_norm": 0.18400201820649104, | |
| "learning_rate": 4.3630419208478356e-05, | |
| "loss": 0.0919, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.5560014593214155, | |
| "grad_norm": 0.22155855058445367, | |
| "learning_rate": 4.339608240881462e-05, | |
| "loss": 0.0764, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.5574607807369573, | |
| "grad_norm": 0.19580138253597132, | |
| "learning_rate": 4.316189315077636e-05, | |
| "loss": 0.0897, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.5589201021524991, | |
| "grad_norm": 0.19401483076890932, | |
| "learning_rate": 4.2927856666510005e-05, | |
| "loss": 0.0757, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.5603794235680408, | |
| "grad_norm": 0.20987182850074507, | |
| "learning_rate": 4.269397818474878e-05, | |
| "loss": 0.0882, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.5618387449835827, | |
| "grad_norm": 0.35798764065337424, | |
| "learning_rate": 4.246026293069588e-05, | |
| "loss": 0.0966, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.5632980663991244, | |
| "grad_norm": 0.257029410117375, | |
| "learning_rate": 4.222671612590775e-05, | |
| "loss": 0.0947, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.5647573878146662, | |
| "grad_norm": 0.1521057062552542, | |
| "learning_rate": 4.1993342988177434e-05, | |
| "loss": 0.0804, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.566216709230208, | |
| "grad_norm": 0.1742108319695617, | |
| "learning_rate": 4.176014873141798e-05, | |
| "loss": 0.0866, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.5676760306457497, | |
| "grad_norm": 0.13073108248641252, | |
| "learning_rate": 4.152713856554595e-05, | |
| "loss": 0.0878, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.5691353520612915, | |
| "grad_norm": 0.24385879986625963, | |
| "learning_rate": 4.129431769636505e-05, | |
| "loss": 0.0854, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.5705946734768332, | |
| "grad_norm": 0.22983093362986062, | |
| "learning_rate": 4.106169132544979e-05, | |
| "loss": 0.0882, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.5720539948923751, | |
| "grad_norm": 0.2362534084443868, | |
| "learning_rate": 4.082926465002932e-05, | |
| "loss": 0.0841, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.5735133163079168, | |
| "grad_norm": 0.19860411744987813, | |
| "learning_rate": 4.0597042862871257e-05, | |
| "loss": 0.0911, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.5749726377234586, | |
| "grad_norm": 0.14469692761467015, | |
| "learning_rate": 4.0365031152165724e-05, | |
| "loss": 0.0705, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.5764319591390004, | |
| "grad_norm": 0.3026702131043989, | |
| "learning_rate": 4.0133234701409386e-05, | |
| "loss": 0.1141, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.5778912805545421, | |
| "grad_norm": 0.20135466725204315, | |
| "learning_rate": 3.99016586892897e-05, | |
| "loss": 0.0981, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.5793506019700839, | |
| "grad_norm": 0.2627174484653081, | |
| "learning_rate": 3.967030828956918e-05, | |
| "loss": 0.0886, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.5808099233856256, | |
| "grad_norm": 0.13106325907585273, | |
| "learning_rate": 3.943918867096981e-05, | |
| "loss": 0.098, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.5822692448011675, | |
| "grad_norm": 0.17654659766268968, | |
| "learning_rate": 3.9208304997057566e-05, | |
| "loss": 0.093, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.5837285662167092, | |
| "grad_norm": 0.20015436956986157, | |
| "learning_rate": 3.897766242612706e-05, | |
| "loss": 0.0874, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.585187887632251, | |
| "grad_norm": 0.17037302015456898, | |
| "learning_rate": 3.874726611108628e-05, | |
| "loss": 0.0913, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.5866472090477928, | |
| "grad_norm": 0.1687960234761524, | |
| "learning_rate": 3.8517121199341535e-05, | |
| "loss": 0.0786, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.5881065304633345, | |
| "grad_norm": 0.29620459255170106, | |
| "learning_rate": 3.8287232832682335e-05, | |
| "loss": 0.0905, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.5895658518788763, | |
| "grad_norm": 0.1947016220691186, | |
| "learning_rate": 3.805760614716662e-05, | |
| "loss": 0.0852, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.5910251732944181, | |
| "grad_norm": 0.14437887795334717, | |
| "learning_rate": 3.782824627300593e-05, | |
| "loss": 0.0931, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.5924844947099599, | |
| "grad_norm": 0.21909261220655682, | |
| "learning_rate": 3.759915833445092e-05, | |
| "loss": 0.0878, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.5939438161255016, | |
| "grad_norm": 0.14839231943289036, | |
| "learning_rate": 3.737034744967669e-05, | |
| "loss": 0.0962, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.5954031375410435, | |
| "grad_norm": 0.23104939520938056, | |
| "learning_rate": 3.714181873066857e-05, | |
| "loss": 0.0912, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.5968624589565852, | |
| "grad_norm": 0.25933402155627416, | |
| "learning_rate": 3.691357728310789e-05, | |
| "loss": 0.081, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.5983217803721269, | |
| "grad_norm": 0.24053747298935832, | |
| "learning_rate": 3.668562820625785e-05, | |
| "loss": 0.0855, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.5997811017876687, | |
| "grad_norm": 0.23989767806744336, | |
| "learning_rate": 3.6457976592849754e-05, | |
| "loss": 0.0983, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.6012404232032105, | |
| "grad_norm": 0.3322203458314159, | |
| "learning_rate": 3.6230627528968964e-05, | |
| "loss": 0.1073, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.6026997446187523, | |
| "grad_norm": 0.2388704578332255, | |
| "learning_rate": 3.6003586093941534e-05, | |
| "loss": 0.0839, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.604159066034294, | |
| "grad_norm": 0.29568333505671857, | |
| "learning_rate": 3.577685736022056e-05, | |
| "loss": 0.0986, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.6056183874498359, | |
| "grad_norm": 0.2764304009893813, | |
| "learning_rate": 3.555044639327293e-05, | |
| "loss": 0.0914, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6070777088653776, | |
| "grad_norm": 0.1994542234940755, | |
| "learning_rate": 3.532435825146618e-05, | |
| "loss": 0.0722, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.6085370302809193, | |
| "grad_norm": 0.1850674654066651, | |
| "learning_rate": 3.509859798595537e-05, | |
| "loss": 0.1007, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.6099963516964612, | |
| "grad_norm": 0.2602392735140372, | |
| "learning_rate": 3.487317064057033e-05, | |
| "loss": 0.0795, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.6114556731120029, | |
| "grad_norm": 0.236171327726822, | |
| "learning_rate": 3.464808125170295e-05, | |
| "loss": 0.0868, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.6129149945275447, | |
| "grad_norm": 0.24511982789375833, | |
| "learning_rate": 3.442333484819462e-05, | |
| "loss": 0.1099, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.6143743159430864, | |
| "grad_norm": 0.18929289550803868, | |
| "learning_rate": 3.4198936451224006e-05, | |
| "loss": 0.0639, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.6158336373586283, | |
| "grad_norm": 0.29573231890882484, | |
| "learning_rate": 3.397489107419466e-05, | |
| "loss": 0.086, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.61729295877417, | |
| "grad_norm": 0.15110985532433188, | |
| "learning_rate": 3.3751203722623185e-05, | |
| "loss": 0.0826, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.6187522801897117, | |
| "grad_norm": 0.2240730141612783, | |
| "learning_rate": 3.352787939402734e-05, | |
| "loss": 0.1002, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.6202116016052536, | |
| "grad_norm": 0.17617940816910643, | |
| "learning_rate": 3.330492307781442e-05, | |
| "loss": 0.0814, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6216709230207953, | |
| "grad_norm": 0.20532201236991618, | |
| "learning_rate": 3.3082339755169724e-05, | |
| "loss": 0.0866, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.6231302444363371, | |
| "grad_norm": 0.2625908408503322, | |
| "learning_rate": 3.286013439894532e-05, | |
| "loss": 0.0824, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.6245895658518789, | |
| "grad_norm": 0.1370998701397795, | |
| "learning_rate": 3.2638311973548904e-05, | |
| "loss": 0.0775, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.6260488872674207, | |
| "grad_norm": 0.16525999384073028, | |
| "learning_rate": 3.241687743483293e-05, | |
| "loss": 0.0859, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.6275082086829624, | |
| "grad_norm": 0.1019124816557733, | |
| "learning_rate": 3.2195835729983914e-05, | |
| "loss": 0.0758, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.6289675300985041, | |
| "grad_norm": 0.16025292416956477, | |
| "learning_rate": 3.1975191797411786e-05, | |
| "loss": 0.0768, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.630426851514046, | |
| "grad_norm": 0.16545554798031012, | |
| "learning_rate": 3.1754950566639685e-05, | |
| "loss": 0.0736, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.6318861729295877, | |
| "grad_norm": 0.17355211388140368, | |
| "learning_rate": 3.153511695819374e-05, | |
| "loss": 0.0735, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.6333454943451295, | |
| "grad_norm": 0.4006051455854273, | |
| "learning_rate": 3.131569588349319e-05, | |
| "loss": 0.0765, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.6348048157606713, | |
| "grad_norm": 0.24052078851822786, | |
| "learning_rate": 3.1096692244740664e-05, | |
| "loss": 0.1022, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.6362641371762131, | |
| "grad_norm": 0.1533414015506768, | |
| "learning_rate": 3.08781109348126e-05, | |
| "loss": 0.0809, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.6377234585917548, | |
| "grad_norm": 0.1340242757509518, | |
| "learning_rate": 3.0659956837149985e-05, | |
| "loss": 0.0781, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.6391827800072966, | |
| "grad_norm": 0.1691744273675545, | |
| "learning_rate": 3.0442234825649185e-05, | |
| "loss": 0.0905, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.6406421014228384, | |
| "grad_norm": 0.23981672728497094, | |
| "learning_rate": 3.0224949764553144e-05, | |
| "loss": 0.0892, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.6421014228383801, | |
| "grad_norm": 0.24286106288672432, | |
| "learning_rate": 3.000810650834269e-05, | |
| "loss": 0.0817, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.643560744253922, | |
| "grad_norm": 0.1455899222141873, | |
| "learning_rate": 2.979170990162799e-05, | |
| "loss": 0.0836, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.6450200656694637, | |
| "grad_norm": 0.22817756184035107, | |
| "learning_rate": 2.9575764779040427e-05, | |
| "loss": 0.0789, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.6464793870850055, | |
| "grad_norm": 0.24718132089424502, | |
| "learning_rate": 2.9360275965124484e-05, | |
| "loss": 0.0966, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.6479387085005472, | |
| "grad_norm": 0.2830882130796971, | |
| "learning_rate": 2.914524827423006e-05, | |
| "loss": 0.0844, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.649398029916089, | |
| "grad_norm": 0.13154886405230762, | |
| "learning_rate": 2.8930686510404848e-05, | |
| "loss": 0.0882, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.6508573513316308, | |
| "grad_norm": 0.1628509299026824, | |
| "learning_rate": 2.871659546728701e-05, | |
| "loss": 0.1051, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.6523166727471725, | |
| "grad_norm": 0.2870676908057582, | |
| "learning_rate": 2.8502979927998096e-05, | |
| "loss": 0.0856, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.6537759941627144, | |
| "grad_norm": 0.25963714561124496, | |
| "learning_rate": 2.8289844665036136e-05, | |
| "loss": 0.0961, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.6552353155782561, | |
| "grad_norm": 0.2751758771997854, | |
| "learning_rate": 2.8077194440169117e-05, | |
| "loss": 0.0788, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.6566946369937979, | |
| "grad_norm": 0.30269194089469403, | |
| "learning_rate": 2.7865034004328496e-05, | |
| "loss": 0.0832, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.6581539584093397, | |
| "grad_norm": 0.21562618811033948, | |
| "learning_rate": 2.7653368097503085e-05, | |
| "loss": 0.0885, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.6596132798248814, | |
| "grad_norm": 0.2128702249563008, | |
| "learning_rate": 2.7442201448633165e-05, | |
| "loss": 0.0847, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.6610726012404232, | |
| "grad_norm": 0.19016118127632922, | |
| "learning_rate": 2.7231538775504846e-05, | |
| "loss": 0.0836, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.6625319226559649, | |
| "grad_norm": 0.1497778616340849, | |
| "learning_rate": 2.7021384784644632e-05, | |
| "loss": 0.0754, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.6639912440715068, | |
| "grad_norm": 0.23437427731049273, | |
| "learning_rate": 2.6811744171214303e-05, | |
| "loss": 0.0734, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.6654505654870485, | |
| "grad_norm": 0.3171739171707465, | |
| "learning_rate": 2.6602621618905988e-05, | |
| "loss": 0.0907, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.6669098869025903, | |
| "grad_norm": 0.34300590939562675, | |
| "learning_rate": 2.639402179983754e-05, | |
| "loss": 0.0913, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.6683692083181321, | |
| "grad_norm": 0.2917703835582748, | |
| "learning_rate": 2.6185949374448136e-05, | |
| "loss": 0.0789, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.6698285297336738, | |
| "grad_norm": 0.22378572737202726, | |
| "learning_rate": 2.5978408991394233e-05, | |
| "loss": 0.0815, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.6712878511492156, | |
| "grad_norm": 0.12173835938546967, | |
| "learning_rate": 2.5771405287445576e-05, | |
| "loss": 0.0758, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.6727471725647574, | |
| "grad_norm": 0.14662571413630113, | |
| "learning_rate": 2.5564942887381705e-05, | |
| "loss": 0.0714, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.6742064939802992, | |
| "grad_norm": 0.27764203523675757, | |
| "learning_rate": 2.535902640388861e-05, | |
| "loss": 0.089, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.6756658153958409, | |
| "grad_norm": 0.19946437768799996, | |
| "learning_rate": 2.5153660437455634e-05, | |
| "loss": 0.0703, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.6771251368113828, | |
| "grad_norm": 0.20290178502096412, | |
| "learning_rate": 2.494884957627282e-05, | |
| "loss": 0.0821, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.6785844582269245, | |
| "grad_norm": 0.2154294699026174, | |
| "learning_rate": 2.4744598396128183e-05, | |
| "loss": 0.0974, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.6800437796424662, | |
| "grad_norm": 0.2511874205987635, | |
| "learning_rate": 2.4540911460305694e-05, | |
| "loss": 0.0825, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.681503101058008, | |
| "grad_norm": 0.1810086992559282, | |
| "learning_rate": 2.4337793319483186e-05, | |
| "loss": 0.0874, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.6829624224735498, | |
| "grad_norm": 0.16642882654866892, | |
| "learning_rate": 2.4135248511630824e-05, | |
| "loss": 0.0736, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.6844217438890916, | |
| "grad_norm": 0.18730698333216458, | |
| "learning_rate": 2.3933281561909566e-05, | |
| "loss": 0.0682, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.6858810653046333, | |
| "grad_norm": 0.1991805058045345, | |
| "learning_rate": 2.373189698257014e-05, | |
| "loss": 0.0763, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.6873403867201752, | |
| "grad_norm": 0.1692253705188508, | |
| "learning_rate": 2.353109927285226e-05, | |
| "loss": 0.0825, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.6887997081357169, | |
| "grad_norm": 0.24581712588330082, | |
| "learning_rate": 2.333089291888403e-05, | |
| "loss": 0.072, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.6902590295512586, | |
| "grad_norm": 0.15001533830747885, | |
| "learning_rate": 2.3131282393581822e-05, | |
| "loss": 0.0835, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.6917183509668005, | |
| "grad_norm": 0.2572418200751706, | |
| "learning_rate": 2.293227215655026e-05, | |
| "loss": 0.0777, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.6931776723823422, | |
| "grad_norm": 0.23284739021368367, | |
| "learning_rate": 2.273386665398256e-05, | |
| "loss": 0.0909, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.694636993797884, | |
| "grad_norm": 0.18808290689449877, | |
| "learning_rate": 2.253607031856131e-05, | |
| "loss": 0.0806, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.6960963152134257, | |
| "grad_norm": 0.21599851137275142, | |
| "learning_rate": 2.2338887569359313e-05, | |
| "loss": 0.0818, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.6975556366289676, | |
| "grad_norm": 0.16231435724106183, | |
| "learning_rate": 2.2142322811740994e-05, | |
| "loss": 0.0651, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.6990149580445093, | |
| "grad_norm": 0.16358563863117298, | |
| "learning_rate": 2.194638043726384e-05, | |
| "loss": 0.0941, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.700474279460051, | |
| "grad_norm": 0.18696897549337352, | |
| "learning_rate": 2.175106482358037e-05, | |
| "loss": 0.077, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.7019336008755929, | |
| "grad_norm": 0.27938821625111243, | |
| "learning_rate": 2.1556380334340287e-05, | |
| "loss": 0.0691, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.7033929222911346, | |
| "grad_norm": 0.276170709188811, | |
| "learning_rate": 2.136233131909301e-05, | |
| "loss": 0.0891, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.7048522437066764, | |
| "grad_norm": 0.2066704076900185, | |
| "learning_rate": 2.116892211319054e-05, | |
| "loss": 0.08, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.7063115651222182, | |
| "grad_norm": 0.34640850265903106, | |
| "learning_rate": 2.0976157037690537e-05, | |
| "loss": 0.097, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.70777088653776, | |
| "grad_norm": 0.19562017264957848, | |
| "learning_rate": 2.078404039925974e-05, | |
| "loss": 0.077, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.7092302079533017, | |
| "grad_norm": 0.15661479322837638, | |
| "learning_rate": 2.0592576490077886e-05, | |
| "loss": 0.0709, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.7106895293688434, | |
| "grad_norm": 0.2717282580952972, | |
| "learning_rate": 2.040176958774171e-05, | |
| "loss": 0.0787, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.7121488507843853, | |
| "grad_norm": 0.27436935761606934, | |
| "learning_rate": 2.021162395516944e-05, | |
| "loss": 0.0742, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.713608172199927, | |
| "grad_norm": 0.14755232060666437, | |
| "learning_rate": 2.002214384050549e-05, | |
| "loss": 0.0805, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.7150674936154688, | |
| "grad_norm": 0.24785156225144722, | |
| "learning_rate": 1.98333334770256e-05, | |
| "loss": 0.0774, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.7165268150310106, | |
| "grad_norm": 0.276700390822216, | |
| "learning_rate": 1.9645197083042217e-05, | |
| "loss": 0.081, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.7179861364465524, | |
| "grad_norm": 0.18486086426882578, | |
| "learning_rate": 1.9457738861810344e-05, | |
| "loss": 0.0663, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.7194454578620941, | |
| "grad_norm": 0.20602502291890026, | |
| "learning_rate": 1.9270963001433506e-05, | |
| "loss": 0.0826, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.7209047792776359, | |
| "grad_norm": 0.17095661359091108, | |
| "learning_rate": 1.9084873674770258e-05, | |
| "loss": 0.0764, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.7223641006931777, | |
| "grad_norm": 0.1780004922244762, | |
| "learning_rate": 1.889947503934097e-05, | |
| "loss": 0.0849, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.7238234221087194, | |
| "grad_norm": 0.17930810952796022, | |
| "learning_rate": 1.871477123723483e-05, | |
| "loss": 0.0848, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.7252827435242613, | |
| "grad_norm": 0.11631516474736948, | |
| "learning_rate": 1.853076639501749e-05, | |
| "loss": 0.0726, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.726742064939803, | |
| "grad_norm": 0.25877277599531223, | |
| "learning_rate": 1.8347464623638716e-05, | |
| "loss": 0.0799, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.7282013863553448, | |
| "grad_norm": 0.2888977002420134, | |
| "learning_rate": 1.8164870018340595e-05, | |
| "loss": 0.0808, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.7296607077708865, | |
| "grad_norm": 0.24770955121408422, | |
| "learning_rate": 1.798298665856605e-05, | |
| "loss": 0.0933, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.7311200291864283, | |
| "grad_norm": 0.17145855088561357, | |
| "learning_rate": 1.780181860786767e-05, | |
| "loss": 0.0666, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.7325793506019701, | |
| "grad_norm": 0.1888311975799085, | |
| "learning_rate": 1.7621369913816998e-05, | |
| "loss": 0.0688, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 0.7340386720175118, | |
| "grad_norm": 0.21899758481758985, | |
| "learning_rate": 1.7441644607913997e-05, | |
| "loss": 0.0819, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 0.7354979934330537, | |
| "grad_norm": 0.15837265511931634, | |
| "learning_rate": 1.7262646705497054e-05, | |
| "loss": 0.0773, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.7369573148485954, | |
| "grad_norm": 0.16230722759639937, | |
| "learning_rate": 1.708438020565325e-05, | |
| "loss": 0.083, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.7384166362641372, | |
| "grad_norm": 0.1889470693308101, | |
| "learning_rate": 1.690684909112896e-05, | |
| "loss": 0.0648, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 0.739875957679679, | |
| "grad_norm": 0.35825558366398547, | |
| "learning_rate": 1.6730057328241032e-05, | |
| "loss": 0.0914, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.7413352790952207, | |
| "grad_norm": 0.17595763950131643, | |
| "learning_rate": 1.6554008866787978e-05, | |
| "loss": 0.0626, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 0.7427946005107625, | |
| "grad_norm": 0.2191406674992623, | |
| "learning_rate": 1.6378707639961847e-05, | |
| "loss": 0.118, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 0.7442539219263042, | |
| "grad_norm": 0.1977690967160498, | |
| "learning_rate": 1.620415756426032e-05, | |
| "loss": 0.0825, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.7457132433418461, | |
| "grad_norm": 0.16945524182505178, | |
| "learning_rate": 1.6030362539399235e-05, | |
| "loss": 0.0721, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 0.7471725647573878, | |
| "grad_norm": 0.2137107271036752, | |
| "learning_rate": 1.5857326448225413e-05, | |
| "loss": 0.0933, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.7486318861729296, | |
| "grad_norm": 0.14388503061137067, | |
| "learning_rate": 1.5685053156629936e-05, | |
| "loss": 0.0697, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.7500912075884714, | |
| "grad_norm": 0.16396133345953567, | |
| "learning_rate": 1.551354651346178e-05, | |
| "loss": 0.072, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 0.7515505290040131, | |
| "grad_norm": 0.20758612199072954, | |
| "learning_rate": 1.534281035044183e-05, | |
| "loss": 0.0782, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.7530098504195549, | |
| "grad_norm": 0.2677007162576603, | |
| "learning_rate": 1.5172848482077251e-05, | |
| "loss": 0.088, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.7544691718350967, | |
| "grad_norm": 0.22981080747403948, | |
| "learning_rate": 1.5003664705576292e-05, | |
| "loss": 0.0675, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 0.7559284932506385, | |
| "grad_norm": 0.14595975992209873, | |
| "learning_rate": 1.4835262800763433e-05, | |
| "loss": 0.0598, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 0.7573878146661802, | |
| "grad_norm": 0.2442624180440446, | |
| "learning_rate": 1.4667646529994955e-05, | |
| "loss": 0.0803, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.758847136081722, | |
| "grad_norm": 0.1769769305897374, | |
| "learning_rate": 1.4500819638074836e-05, | |
| "loss": 0.0717, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.7603064574972638, | |
| "grad_norm": 0.22752965532855682, | |
| "learning_rate": 1.4334785852171189e-05, | |
| "loss": 0.0773, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 0.7617657789128055, | |
| "grad_norm": 0.17198904935541695, | |
| "learning_rate": 1.4169548881732863e-05, | |
| "loss": 0.0679, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.7632251003283473, | |
| "grad_norm": 0.18227879189264046, | |
| "learning_rate": 1.4005112418406658e-05, | |
| "loss": 0.0779, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 0.7646844217438891, | |
| "grad_norm": 0.17918974376239177, | |
| "learning_rate": 1.3841480135954815e-05, | |
| "loss": 0.0755, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 0.7661437431594309, | |
| "grad_norm": 0.1808913214191542, | |
| "learning_rate": 1.3678655690172937e-05, | |
| "loss": 0.073, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.7676030645749726, | |
| "grad_norm": 0.19182604410775567, | |
| "learning_rate": 1.351664271880833e-05, | |
| "loss": 0.076, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 0.7690623859905145, | |
| "grad_norm": 0.2258827316579577, | |
| "learning_rate": 1.335544484147872e-05, | |
| "loss": 0.0736, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.7705217074060562, | |
| "grad_norm": 0.18920980773130505, | |
| "learning_rate": 1.3195065659591377e-05, | |
| "loss": 0.0979, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.7719810288215979, | |
| "grad_norm": 0.16526779110459391, | |
| "learning_rate": 1.303550875626266e-05, | |
| "loss": 0.0822, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 0.7734403502371398, | |
| "grad_norm": 0.11874076858796079, | |
| "learning_rate": 1.2876777696237957e-05, | |
| "loss": 0.0784, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.7748996716526815, | |
| "grad_norm": 0.13406374720218356, | |
| "learning_rate": 1.271887602581211e-05, | |
| "loss": 0.0607, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.7763589930682233, | |
| "grad_norm": 0.16665764155851395, | |
| "learning_rate": 1.2561807272750053e-05, | |
| "loss": 0.0775, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 0.777818314483765, | |
| "grad_norm": 0.1644277062999908, | |
| "learning_rate": 1.2405574946208116e-05, | |
| "loss": 0.0778, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 0.7792776358993069, | |
| "grad_norm": 0.36519522425925205, | |
| "learning_rate": 1.2250182536655563e-05, | |
| "loss": 0.0693, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.7807369573148486, | |
| "grad_norm": 0.16841701778065343, | |
| "learning_rate": 1.2095633515796639e-05, | |
| "loss": 0.0789, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.7821962787303903, | |
| "grad_norm": 0.23811108549083682, | |
| "learning_rate": 1.1941931336492984e-05, | |
| "loss": 0.07, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 0.7836556001459322, | |
| "grad_norm": 0.18785323479769722, | |
| "learning_rate": 1.1789079432686501e-05, | |
| "loss": 0.0679, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.7851149215614739, | |
| "grad_norm": 0.2187740182681998, | |
| "learning_rate": 1.1637081219322648e-05, | |
| "loss": 0.0783, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 0.7865742429770157, | |
| "grad_norm": 0.2083550955615201, | |
| "learning_rate": 1.1485940092274117e-05, | |
| "loss": 0.0847, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 0.7880335643925575, | |
| "grad_norm": 0.22098387990816473, | |
| "learning_rate": 1.1335659428265012e-05, | |
| "loss": 0.0741, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.7894928858080993, | |
| "grad_norm": 0.25484065655198496, | |
| "learning_rate": 1.1186242584795331e-05, | |
| "loss": 0.0743, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 0.790952207223641, | |
| "grad_norm": 0.3490731944764208, | |
| "learning_rate": 1.1037692900066038e-05, | |
| "loss": 0.0847, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 0.7924115286391827, | |
| "grad_norm": 0.16585089936476097, | |
| "learning_rate": 1.0890013692904411e-05, | |
| "loss": 0.0615, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.7938708500547246, | |
| "grad_norm": 0.11879388400453102, | |
| "learning_rate": 1.0743208262689958e-05, | |
| "loss": 0.0866, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 0.7953301714702663, | |
| "grad_norm": 0.16974280932555014, | |
| "learning_rate": 1.0597279889280649e-05, | |
| "loss": 0.0711, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.7967894928858081, | |
| "grad_norm": 0.2879906296131935, | |
| "learning_rate": 1.0452231832939669e-05, | |
| "loss": 0.087, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.7982488143013499, | |
| "grad_norm": 0.30448023595487583, | |
| "learning_rate": 1.0308067334262578e-05, | |
| "loss": 0.079, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 0.7997081357168917, | |
| "grad_norm": 0.1346240174177682, | |
| "learning_rate": 1.0164789614104909e-05, | |
| "loss": 0.0663, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 0.8011674571324334, | |
| "grad_norm": 0.11374165016723602, | |
| "learning_rate": 1.002240187351018e-05, | |
| "loss": 0.0716, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.8026267785479752, | |
| "grad_norm": 0.1995760830889922, | |
| "learning_rate": 9.880907293638447e-06, | |
| "loss": 0.0779, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.804086099963517, | |
| "grad_norm": 0.16050456710211813, | |
| "learning_rate": 9.740309035695156e-06, | |
| "loss": 0.0754, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 0.8055454213790587, | |
| "grad_norm": 0.20346144604531705, | |
| "learning_rate": 9.600610240860557e-06, | |
| "loss": 0.0744, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.8070047427946005, | |
| "grad_norm": 0.16473468373629052, | |
| "learning_rate": 9.461814030219518e-06, | |
| "loss": 0.066, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 0.8084640642101423, | |
| "grad_norm": 0.2408642242586706, | |
| "learning_rate": 9.323923504691795e-06, | |
| "loss": 0.0873, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 0.8099233856256841, | |
| "grad_norm": 0.18983244827766566, | |
| "learning_rate": 9.186941744962752e-06, | |
| "loss": 0.0727, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.8113827070412258, | |
| "grad_norm": 0.13201659836858892, | |
| "learning_rate": 9.050871811414535e-06, | |
| "loss": 0.0771, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 0.8128420284567676, | |
| "grad_norm": 0.28031452690082004, | |
| "learning_rate": 8.915716744057706e-06, | |
| "loss": 0.0854, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 0.8143013498723094, | |
| "grad_norm": 0.32592701500229143, | |
| "learning_rate": 8.781479562463285e-06, | |
| "loss": 0.0929, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.8157606712878511, | |
| "grad_norm": 0.2502727337909842, | |
| "learning_rate": 8.648163265695369e-06, | |
| "loss": 0.0823, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 0.817219992703393, | |
| "grad_norm": 0.15230810954172375, | |
| "learning_rate": 8.515770832244047e-06, | |
| "loss": 0.0713, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.8186793141189347, | |
| "grad_norm": 0.16309233407580462, | |
| "learning_rate": 8.384305219958889e-06, | |
| "loss": 0.0596, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.8201386355344765, | |
| "grad_norm": 0.23482748758834446, | |
| "learning_rate": 8.25376936598286e-06, | |
| "loss": 0.0655, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 0.8215979569500182, | |
| "grad_norm": 0.16819291222882218, | |
| "learning_rate": 8.1241661866867e-06, | |
| "loss": 0.0767, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 0.82305727836556, | |
| "grad_norm": 0.155955940914946, | |
| "learning_rate": 7.995498577603816e-06, | |
| "loss": 0.07, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.8245165997811018, | |
| "grad_norm": 0.16190605473805825, | |
| "learning_rate": 7.867769413365461e-06, | |
| "loss": 0.0695, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.8259759211966435, | |
| "grad_norm": 0.20401149032189506, | |
| "learning_rate": 7.740981547636656e-06, | |
| "loss": 0.0725, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 0.8274352426121854, | |
| "grad_norm": 0.18866486867252952, | |
| "learning_rate": 7.615137813052353e-06, | |
| "loss": 0.0765, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.8288945640277271, | |
| "grad_norm": 0.22917668312380834, | |
| "learning_rate": 7.490241021154154e-06, | |
| "loss": 0.0731, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 0.8303538854432689, | |
| "grad_norm": 0.15573965400489234, | |
| "learning_rate": 7.366293962327564e-06, | |
| "loss": 0.078, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 0.8318132068588107, | |
| "grad_norm": 0.20293931138222282, | |
| "learning_rate": 7.243299405739539e-06, | |
| "loss": 0.0653, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.8332725282743524, | |
| "grad_norm": 0.3734354194730838, | |
| "learning_rate": 7.1212600992767165e-06, | |
| "loss": 0.0729, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 0.8347318496898942, | |
| "grad_norm": 0.18655222782248948, | |
| "learning_rate": 7.0001787694839504e-06, | |
| "loss": 0.0697, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 0.836191171105436, | |
| "grad_norm": 0.20224114343218869, | |
| "learning_rate": 6.880058121503452e-06, | |
| "loss": 0.0672, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.8376504925209778, | |
| "grad_norm": 0.23796394228026443, | |
| "learning_rate": 6.760900839014356e-06, | |
| "loss": 0.0822, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 0.8391098139365195, | |
| "grad_norm": 0.20702446741177674, | |
| "learning_rate": 6.642709584172674e-06, | |
| "loss": 0.0709, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.8405691353520612, | |
| "grad_norm": 0.21404136231074256, | |
| "learning_rate": 6.525486997551933e-06, | |
| "loss": 0.0647, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.8420284567676031, | |
| "grad_norm": 0.19107414563165565, | |
| "learning_rate": 6.409235698084093e-06, | |
| "loss": 0.0704, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 0.8434877781831448, | |
| "grad_norm": 0.14301208487157718, | |
| "learning_rate": 6.293958283001122e-06, | |
| "loss": 0.0638, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 0.8449470995986866, | |
| "grad_norm": 0.16690480624670154, | |
| "learning_rate": 6.179657327776872e-06, | |
| "loss": 0.0718, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.8464064210142284, | |
| "grad_norm": 0.14535518790707497, | |
| "learning_rate": 6.066335386069616e-06, | |
| "loss": 0.064, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.8478657424297702, | |
| "grad_norm": 0.25689609440186906, | |
| "learning_rate": 5.953994989664952e-06, | |
| "loss": 0.0739, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 0.8493250638453119, | |
| "grad_norm": 0.2874364243397915, | |
| "learning_rate": 5.842638648419252e-06, | |
| "loss": 0.0798, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.8507843852608536, | |
| "grad_norm": 0.32559338716955627, | |
| "learning_rate": 5.7322688502036145e-06, | |
| "loss": 0.0795, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 0.8522437066763955, | |
| "grad_norm": 0.26451174523619075, | |
| "learning_rate": 5.622888060848225e-06, | |
| "loss": 0.0638, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 0.8537030280919372, | |
| "grad_norm": 0.15011807077671308, | |
| "learning_rate": 5.51449872408733e-06, | |
| "loss": 0.0799, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.855162349507479, | |
| "grad_norm": 0.15800610605357415, | |
| "learning_rate": 5.407103261504565e-06, | |
| "loss": 0.0633, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 0.8566216709230208, | |
| "grad_norm": 0.2885871145585813, | |
| "learning_rate": 5.300704072478918e-06, | |
| "loss": 0.0814, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 0.8580809923385626, | |
| "grad_norm": 0.19478064630131137, | |
| "learning_rate": 5.195303534131124e-06, | |
| "loss": 0.0708, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.8595403137541043, | |
| "grad_norm": 0.1669746056578264, | |
| "learning_rate": 5.090904001270502e-06, | |
| "loss": 0.0662, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.8609996351696461, | |
| "grad_norm": 0.19509662064642885, | |
| "learning_rate": 4.987507806342395e-06, | |
| "loss": 0.0604, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.8624589565851879, | |
| "grad_norm": 0.13302036796781258, | |
| "learning_rate": 4.885117259376021e-06, | |
| "loss": 0.0665, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.8639182780007296, | |
| "grad_norm": 0.2705229793863414, | |
| "learning_rate": 4.783734647932891e-06, | |
| "loss": 0.0812, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 0.8653775994162715, | |
| "grad_norm": 0.21217950595702748, | |
| "learning_rate": 4.683362237055716e-06, | |
| "loss": 0.0851, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 0.8668369208318132, | |
| "grad_norm": 0.10430228014392347, | |
| "learning_rate": 4.584002269217758e-06, | |
| "loss": 0.0797, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.868296242247355, | |
| "grad_norm": 0.18383288154386884, | |
| "learning_rate": 4.485656964272761e-06, | |
| "loss": 0.0687, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.8697555636628967, | |
| "grad_norm": 0.15062393283221726, | |
| "learning_rate": 4.388328519405321e-06, | |
| "loss": 0.0726, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 0.8712148850784385, | |
| "grad_norm": 0.18392906928261096, | |
| "learning_rate": 4.292019109081863e-06, | |
| "loss": 0.0728, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.8726742064939803, | |
| "grad_norm": 0.19301702379177835, | |
| "learning_rate": 4.196730885002003e-06, | |
| "loss": 0.0743, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 0.874133527909522, | |
| "grad_norm": 0.28144373325731764, | |
| "learning_rate": 4.102465976050495e-06, | |
| "loss": 0.0765, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 0.8755928493250639, | |
| "grad_norm": 0.33870680329228003, | |
| "learning_rate": 4.009226488249656e-06, | |
| "loss": 0.0741, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.8770521707406056, | |
| "grad_norm": 0.14028718131513995, | |
| "learning_rate": 3.917014504712341e-06, | |
| "loss": 0.0826, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 0.8785114921561474, | |
| "grad_norm": 0.16242741860000037, | |
| "learning_rate": 3.825832085595382e-06, | |
| "loss": 0.0827, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 0.8799708135716892, | |
| "grad_norm": 0.2236973430421186, | |
| "learning_rate": 3.73568126805357e-06, | |
| "loss": 0.0738, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.8814301349872309, | |
| "grad_norm": 0.18121546816554587, | |
| "learning_rate": 3.6465640661941305e-06, | |
| "loss": 0.0759, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 0.8828894564027727, | |
| "grad_norm": 0.1671473845979495, | |
| "learning_rate": 3.5584824710317433e-06, | |
| "loss": 0.0707, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.8843487778183144, | |
| "grad_norm": 0.287452266941361, | |
| "learning_rate": 3.4714384504440145e-06, | |
| "loss": 0.0702, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.8858080992338563, | |
| "grad_norm": 0.2603122182092065, | |
| "learning_rate": 3.3854339491276034e-06, | |
| "loss": 0.0763, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 0.887267420649398, | |
| "grad_norm": 0.23937881451583296, | |
| "learning_rate": 3.30047088855468e-06, | |
| "loss": 0.0688, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 0.8887267420649398, | |
| "grad_norm": 0.3034105099595713, | |
| "learning_rate": 3.2165511669300374e-06, | |
| "loss": 0.07, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.8901860634804816, | |
| "grad_norm": 0.23546891436663464, | |
| "learning_rate": 3.1336766591486986e-06, | |
| "loss": 0.0764, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.8916453848960233, | |
| "grad_norm": 0.22874430917476343, | |
| "learning_rate": 3.051849216753977e-06, | |
| "loss": 0.0813, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 0.8931047063115651, | |
| "grad_norm": 0.18731436997461934, | |
| "learning_rate": 2.971070667896181e-06, | |
| "loss": 0.0748, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.8945640277271069, | |
| "grad_norm": 0.28697155444258127, | |
| "learning_rate": 2.8913428172917088e-06, | |
| "loss": 0.0714, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 0.8960233491426487, | |
| "grad_norm": 0.1665356610579465, | |
| "learning_rate": 2.812667446182754e-06, | |
| "loss": 0.0619, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 0.8974826705581904, | |
| "grad_norm": 0.3780123004550475, | |
| "learning_rate": 2.735046312297512e-06, | |
| "loss": 0.0897, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.8989419919737323, | |
| "grad_norm": 0.17553586444633024, | |
| "learning_rate": 2.658481149810904e-06, | |
| "loss": 0.0795, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 0.900401313389274, | |
| "grad_norm": 0.22562570429434525, | |
| "learning_rate": 2.5829736693058324e-06, | |
| "loss": 0.0791, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 0.9018606348048157, | |
| "grad_norm": 0.18563882080258232, | |
| "learning_rate": 2.508525557734964e-06, | |
| "loss": 0.0844, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.9033199562203575, | |
| "grad_norm": 0.16316948443096166, | |
| "learning_rate": 2.4351384783830476e-06, | |
| "loss": 0.078, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 0.9047792776358993, | |
| "grad_norm": 0.33268252514205654, | |
| "learning_rate": 2.3628140708297387e-06, | |
| "loss": 0.0804, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.9062385990514411, | |
| "grad_norm": 0.22218951884610635, | |
| "learning_rate": 2.2915539509130056e-06, | |
| "loss": 0.0743, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.9076979204669828, | |
| "grad_norm": 0.2798071798612865, | |
| "learning_rate": 2.221359710692961e-06, | |
| "loss": 0.0797, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 0.9091572418825247, | |
| "grad_norm": 0.18122090589025527, | |
| "learning_rate": 2.1522329184163693e-06, | |
| "loss": 0.0829, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 0.9106165632980664, | |
| "grad_norm": 0.16423755657097971, | |
| "learning_rate": 2.084175118481552e-06, | |
| "loss": 0.0711, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.9120758847136081, | |
| "grad_norm": 0.26153582906662814, | |
| "learning_rate": 2.0171878314039216e-06, | |
| "loss": 0.1026, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.91353520612915, | |
| "grad_norm": 0.14423016740050856, | |
| "learning_rate": 1.951272553781974e-06, | |
| "loss": 0.0568, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 0.9149945275446917, | |
| "grad_norm": 0.1767728721541835, | |
| "learning_rate": 1.8864307582639018e-06, | |
| "loss": 0.0709, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.9164538489602335, | |
| "grad_norm": 0.2557552892882785, | |
| "learning_rate": 1.8226638935146368e-06, | |
| "loss": 0.0655, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 0.9179131703757752, | |
| "grad_norm": 0.16552993833577492, | |
| "learning_rate": 1.759973384183533e-06, | |
| "loss": 0.0778, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 0.9193724917913171, | |
| "grad_norm": 0.20666952535042302, | |
| "learning_rate": 1.6983606308724975e-06, | |
| "loss": 0.0594, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.9208318132068588, | |
| "grad_norm": 0.17169473621194706, | |
| "learning_rate": 1.6378270101047476e-06, | |
| "loss": 0.0615, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 0.9222911346224005, | |
| "grad_norm": 0.19249775498672067, | |
| "learning_rate": 1.5783738742940035e-06, | |
| "loss": 0.0768, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 0.9237504560379424, | |
| "grad_norm": 0.23939912325403057, | |
| "learning_rate": 1.5200025517143002e-06, | |
| "loss": 0.073, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.9252097774534841, | |
| "grad_norm": 0.1470790842962273, | |
| "learning_rate": 1.4627143464703175e-06, | |
| "loss": 0.0643, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 0.9266690988690259, | |
| "grad_norm": 0.14616263478217778, | |
| "learning_rate": 1.4065105384682365e-06, | |
| "loss": 0.0748, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.9281284202845677, | |
| "grad_norm": 0.19447868042741956, | |
| "learning_rate": 1.3513923833871344e-06, | |
| "loss": 0.0785, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.9295877417001095, | |
| "grad_norm": 0.2699774519408524, | |
| "learning_rate": 1.2973611126509465e-06, | |
| "loss": 0.0573, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 0.9310470631156512, | |
| "grad_norm": 0.2363625074829158, | |
| "learning_rate": 1.2444179334009598e-06, | |
| "loss": 0.0774, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 0.9325063845311929, | |
| "grad_norm": 0.21819478694246727, | |
| "learning_rate": 1.1925640284688067e-06, | |
| "loss": 0.0646, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.9339657059467348, | |
| "grad_norm": 0.21027646446608875, | |
| "learning_rate": 1.1418005563500977e-06, | |
| "loss": 0.0831, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.9354250273622765, | |
| "grad_norm": 0.16671790982554727, | |
| "learning_rate": 1.0921286511784757e-06, | |
| "loss": 0.0604, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 0.9368843487778183, | |
| "grad_norm": 0.21424601887889774, | |
| "learning_rate": 1.0435494227003183e-06, | |
| "loss": 0.0794, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.9383436701933601, | |
| "grad_norm": 0.2758342830425722, | |
| "learning_rate": 9.960639562499374e-07, | |
| "loss": 0.0558, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 0.9398029916089019, | |
| "grad_norm": 0.2067849866378926, | |
| "learning_rate": 9.496733127253243e-07, | |
| "loss": 0.0708, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 0.9412623130244436, | |
| "grad_norm": 0.25852268360753444, | |
| "learning_rate": 9.043785285644534e-07, | |
| "loss": 0.0658, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.9427216344399854, | |
| "grad_norm": 0.13562651531215247, | |
| "learning_rate": 8.601806157221171e-07, | |
| "loss": 0.0543, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 0.9441809558555272, | |
| "grad_norm": 0.1276801534608157, | |
| "learning_rate": 8.170805616473265e-07, | |
| "loss": 0.0589, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 0.9456402772710689, | |
| "grad_norm": 0.21656400304795456, | |
| "learning_rate": 7.750793292612469e-07, | |
| "loss": 0.0653, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.9470995986866108, | |
| "grad_norm": 0.15236754016229437, | |
| "learning_rate": 7.341778569356916e-07, | |
| "loss": 0.0861, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 0.9485589201021525, | |
| "grad_norm": 0.13042448104911, | |
| "learning_rate": 6.943770584721565e-07, | |
| "loss": 0.0558, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.9500182415176943, | |
| "grad_norm": 0.16814342944607893, | |
| "learning_rate": 6.556778230813743e-07, | |
| "loss": 0.0693, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.951477562933236, | |
| "grad_norm": 0.24541824676137683, | |
| "learning_rate": 6.180810153634919e-07, | |
| "loss": 0.0654, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 0.9529368843487778, | |
| "grad_norm": 0.23375974468426802, | |
| "learning_rate": 5.815874752887362e-07, | |
| "loss": 0.0774, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 0.9543962057643196, | |
| "grad_norm": 0.19559616963339815, | |
| "learning_rate": 5.461980181786397e-07, | |
| "loss": 0.079, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 0.9558555271798613, | |
| "grad_norm": 0.27441559379098057, | |
| "learning_rate": 5.119134346878273e-07, | |
| "loss": 0.0865, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.9573148485954032, | |
| "grad_norm": 0.27156969288522603, | |
| "learning_rate": 4.7873449078637e-07, | |
| "loss": 0.0629, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 0.9587741700109449, | |
| "grad_norm": 0.28696180329193466, | |
| "learning_rate": 4.466619277426476e-07, | |
| "loss": 0.0631, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 0.9602334914264867, | |
| "grad_norm": 0.15271798864372269, | |
| "learning_rate": 4.1569646210680156e-07, | |
| "loss": 0.063, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 0.9616928128420285, | |
| "grad_norm": 0.23490129701109025, | |
| "learning_rate": 3.858387856947254e-07, | |
| "loss": 0.0731, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 0.9631521342575702, | |
| "grad_norm": 0.22618295012189688, | |
| "learning_rate": 3.570895655725992e-07, | |
| "loss": 0.0702, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.964611455673112, | |
| "grad_norm": 0.20700633424973702, | |
| "learning_rate": 3.2944944404200153e-07, | |
| "loss": 0.0818, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 0.9660707770886537, | |
| "grad_norm": 0.15624758185553145, | |
| "learning_rate": 3.0291903862554873e-07, | |
| "loss": 0.0711, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 0.9675300985041956, | |
| "grad_norm": 0.16367727608879512, | |
| "learning_rate": 2.774989420530949e-07, | |
| "loss": 0.0682, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 0.9689894199197373, | |
| "grad_norm": 0.152753645721427, | |
| "learning_rate": 2.531897222485036e-07, | |
| "loss": 0.0678, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 0.9704487413352791, | |
| "grad_norm": 0.12746459313433045, | |
| "learning_rate": 2.2999192231694667e-07, | |
| "loss": 0.0673, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.9719080627508209, | |
| "grad_norm": 0.18022389515283288, | |
| "learning_rate": 2.0790606053276984e-07, | |
| "loss": 0.0733, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 0.9733673841663626, | |
| "grad_norm": 0.2043683161630788, | |
| "learning_rate": 1.8693263032793506e-07, | |
| "loss": 0.0664, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 0.9748267055819044, | |
| "grad_norm": 0.21315387218220452, | |
| "learning_rate": 1.6707210028095722e-07, | |
| "loss": 0.0766, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 0.9762860269974462, | |
| "grad_norm": 0.10838765239735793, | |
| "learning_rate": 1.4832491410649018e-07, | |
| "loss": 0.0668, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 0.977745348412988, | |
| "grad_norm": 0.20493249133697297, | |
| "learning_rate": 1.3069149064534603e-07, | |
| "loss": 0.0668, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.9792046698285297, | |
| "grad_norm": 0.12533859768480493, | |
| "learning_rate": 1.1417222385520232e-07, | |
| "loss": 0.0624, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 0.9806639912440716, | |
| "grad_norm": 0.13640407728401674, | |
| "learning_rate": 9.876748280175374e-08, | |
| "loss": 0.0648, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 0.9821233126596133, | |
| "grad_norm": 0.26255640137890773, | |
| "learning_rate": 8.447761165049084e-08, | |
| "loss": 0.0748, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 0.983582634075155, | |
| "grad_norm": 0.2045065387998203, | |
| "learning_rate": 7.130292965901176e-08, | |
| "loss": 0.0736, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 0.9850419554906968, | |
| "grad_norm": 0.2589541202073395, | |
| "learning_rate": 5.924373116986126e-08, | |
| "loss": 0.0804, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.9865012769062386, | |
| "grad_norm": 0.23813447695351278, | |
| "learning_rate": 4.830028560399713e-08, | |
| "loss": 0.0717, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 0.9879605983217804, | |
| "grad_norm": 0.22908649033900605, | |
| "learning_rate": 3.84728374547394e-08, | |
| "loss": 0.0615, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 0.9894199197373221, | |
| "grad_norm": 0.134381505549619, | |
| "learning_rate": 2.9761606282319164e-08, | |
| "loss": 0.0696, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 0.990879241152864, | |
| "grad_norm": 0.20686107675298593, | |
| "learning_rate": 2.2166786708976983e-08, | |
| "loss": 0.0608, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 0.9923385625684057, | |
| "grad_norm": 0.1470958235212741, | |
| "learning_rate": 1.5688548414594107e-08, | |
| "loss": 0.0722, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.9937978839839474, | |
| "grad_norm": 0.28992636074658135, | |
| "learning_rate": 1.0327036132939949e-08, | |
| "loss": 0.079, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 0.9952572053994893, | |
| "grad_norm": 0.1518264410524196, | |
| "learning_rate": 6.082369648396924e-09, | |
| "loss": 0.0716, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 0.996716526815031, | |
| "grad_norm": 0.1355086227621071, | |
| "learning_rate": 2.9546437933070104e-09, | |
| "loss": 0.0631, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 0.9981758482305728, | |
| "grad_norm": 0.1903490129846117, | |
| "learning_rate": 9.439284458623299e-10, | |
| "loss": 0.0774, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.9996351696461145, | |
| "grad_norm": 0.1922983216458204, | |
| "learning_rate": 5.02685285175275e-11, | |
| "loss": 0.0681, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 13705, | |
| "total_flos": 336607794167808.0, | |
| "train_loss": 0.10042130719436776, | |
| "train_runtime": 24357.1331, | |
| "train_samples_per_second": 4.501, | |
| "train_steps_per_second": 0.563 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 13705, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 336607794167808.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |