Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-125-3 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-125-3 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-125-3") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-125-3") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-125-3") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-125-3 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-125-3" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-125-3
- SGLang
How to use furproxy/9b-125-3 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-125-3" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-125-3" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-3", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-125-3 with Docker Model Runner:
docker model run hf.co/furproxy/9b-125-3
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2781, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002157497303128371, | |
| "grad_norm": 2.570061445236206, | |
| "learning_rate": 3.571428571428571e-09, | |
| "loss": 0.9756889939308167, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004314994606256742, | |
| "grad_norm": 2.8094983100891113, | |
| "learning_rate": 1.0714285714285715e-08, | |
| "loss": 0.6637275218963623, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006472491909385114, | |
| "grad_norm": 4.699804782867432, | |
| "learning_rate": 1.7857142857142856e-08, | |
| "loss": 0.8357824087142944, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008629989212513484, | |
| "grad_norm": 4.096808433532715, | |
| "learning_rate": 2.5e-08, | |
| "loss": 0.8224667906761169, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010787486515641856, | |
| "grad_norm": 2.105877161026001, | |
| "learning_rate": 3.214285714285714e-08, | |
| "loss": 0.7087568044662476, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012944983818770227, | |
| "grad_norm": 4.518003463745117, | |
| "learning_rate": 3.9285714285714285e-08, | |
| "loss": 0.7910435795783997, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.015102481121898598, | |
| "grad_norm": 5.335687160491943, | |
| "learning_rate": 4.642857142857143e-08, | |
| "loss": 0.8569395542144775, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.017259978425026967, | |
| "grad_norm": 2.8184239864349365, | |
| "learning_rate": 5.3571428571428564e-08, | |
| "loss": 1.1704014539718628, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.019417475728155338, | |
| "grad_norm": 2.6054298877716064, | |
| "learning_rate": 6.071428571428572e-08, | |
| "loss": 1.0189613103866577, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021574973031283712, | |
| "grad_norm": 2.1738038063049316, | |
| "learning_rate": 6.785714285714285e-08, | |
| "loss": 0.6196831464767456, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023732470334412083, | |
| "grad_norm": 3.19342041015625, | |
| "learning_rate": 7.5e-08, | |
| "loss": 0.9074231386184692, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.025889967637540454, | |
| "grad_norm": 6.756741046905518, | |
| "learning_rate": 8.214285714285714e-08, | |
| "loss": 0.80443274974823, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.028047464940668825, | |
| "grad_norm": 4.307950496673584, | |
| "learning_rate": 8.928571428571429e-08, | |
| "loss": 0.7722480893135071, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.030204962243797196, | |
| "grad_norm": 4.798768043518066, | |
| "learning_rate": 9.642857142857142e-08, | |
| "loss": 0.7002389430999756, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.032362459546925564, | |
| "grad_norm": 4.575655460357666, | |
| "learning_rate": 1.0357142857142857e-07, | |
| "loss": 1.014316439628601, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.034519956850053934, | |
| "grad_norm": 1.0345690250396729, | |
| "learning_rate": 1.107142857142857e-07, | |
| "loss": 0.6911119222640991, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.036677454153182305, | |
| "grad_norm": 9.628297805786133, | |
| "learning_rate": 1.1785714285714285e-07, | |
| "loss": 1.0148026943206787, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.038834951456310676, | |
| "grad_norm": 2.975783109664917, | |
| "learning_rate": 1.25e-07, | |
| "loss": 0.8927556276321411, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.040992448759439054, | |
| "grad_norm": 9.633585929870605, | |
| "learning_rate": 1.3214285714285714e-07, | |
| "loss": 1.5045725107192993, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.043149946062567425, | |
| "grad_norm": 6.078215599060059, | |
| "learning_rate": 1.392857142857143e-07, | |
| "loss": 0.510919988155365, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.045307443365695796, | |
| "grad_norm": 2.061255693435669, | |
| "learning_rate": 1.4642857142857143e-07, | |
| "loss": 0.7295307517051697, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04746494066882417, | |
| "grad_norm": 1.5910701751708984, | |
| "learning_rate": 1.5357142857142858e-07, | |
| "loss": 0.8399662375450134, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04962243797195254, | |
| "grad_norm": 2.5256459712982178, | |
| "learning_rate": 1.6071428571428573e-07, | |
| "loss": 0.6381903290748596, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05177993527508091, | |
| "grad_norm": 2.43086314201355, | |
| "learning_rate": 1.6785714285714285e-07, | |
| "loss": 0.8072210550308228, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05393743257820928, | |
| "grad_norm": 2.4569172859191895, | |
| "learning_rate": 1.75e-07, | |
| "loss": 0.6129989624023438, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05609492988133765, | |
| "grad_norm": 1.5575639009475708, | |
| "learning_rate": 1.8214285714285714e-07, | |
| "loss": 0.39090248942375183, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05825242718446602, | |
| "grad_norm": 7.692088603973389, | |
| "learning_rate": 1.8928571428571426e-07, | |
| "loss": 0.9982097148895264, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06040992448759439, | |
| "grad_norm": 28.100284576416016, | |
| "learning_rate": 1.964285714285714e-07, | |
| "loss": 1.190761923789978, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06256742179072276, | |
| "grad_norm": 4.549140930175781, | |
| "learning_rate": 2.0357142857142855e-07, | |
| "loss": 0.6789318323135376, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06472491909385113, | |
| "grad_norm": 12.27080249786377, | |
| "learning_rate": 2.107142857142857e-07, | |
| "loss": 0.8964890241622925, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0668824163969795, | |
| "grad_norm": 8.274192810058594, | |
| "learning_rate": 2.1785714285714284e-07, | |
| "loss": 1.1355807781219482, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06903991370010787, | |
| "grad_norm": 2.232541084289551, | |
| "learning_rate": 2.25e-07, | |
| "loss": 0.9159867763519287, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07119741100323625, | |
| "grad_norm": 3.91237735748291, | |
| "learning_rate": 2.3214285714285714e-07, | |
| "loss": 1.0847519636154175, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07335490830636461, | |
| "grad_norm": 3.247027635574341, | |
| "learning_rate": 2.392857142857143e-07, | |
| "loss": 0.8140153884887695, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07551240560949299, | |
| "grad_norm": 13.169454574584961, | |
| "learning_rate": 2.4642857142857143e-07, | |
| "loss": 0.9606142640113831, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07766990291262135, | |
| "grad_norm": 2.072512626647949, | |
| "learning_rate": 2.5357142857142855e-07, | |
| "loss": 0.7430305480957031, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07982740021574973, | |
| "grad_norm": 2.4250895977020264, | |
| "learning_rate": 2.607142857142857e-07, | |
| "loss": 0.6543456315994263, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08198489751887811, | |
| "grad_norm": 6.248291492462158, | |
| "learning_rate": 2.6785714285714284e-07, | |
| "loss": 0.7182219624519348, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08414239482200647, | |
| "grad_norm": 2.01570725440979, | |
| "learning_rate": 2.75e-07, | |
| "loss": 0.6486653685569763, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08629989212513485, | |
| "grad_norm": 4.23671817779541, | |
| "learning_rate": 2.8214285714285713e-07, | |
| "loss": 1.1015154123306274, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08845738942826321, | |
| "grad_norm": 6.89118766784668, | |
| "learning_rate": 2.892857142857143e-07, | |
| "loss": 0.7223177552223206, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09061488673139159, | |
| "grad_norm": 3.134542226791382, | |
| "learning_rate": 2.9642857142857143e-07, | |
| "loss": 0.5234851837158203, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09277238403451996, | |
| "grad_norm": 1.6814379692077637, | |
| "learning_rate": 3.0357142857142855e-07, | |
| "loss": 0.7054818868637085, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09492988133764833, | |
| "grad_norm": 2.541091203689575, | |
| "learning_rate": 3.107142857142857e-07, | |
| "loss": 0.7358066439628601, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0970873786407767, | |
| "grad_norm": 7.0923991203308105, | |
| "learning_rate": 3.1785714285714284e-07, | |
| "loss": 0.7044313549995422, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09924487594390508, | |
| "grad_norm": 1.4156235456466675, | |
| "learning_rate": 3.25e-07, | |
| "loss": 0.7540506720542908, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10140237324703344, | |
| "grad_norm": 2.159705877304077, | |
| "learning_rate": 3.3214285714285713e-07, | |
| "loss": 0.8926774859428406, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10355987055016182, | |
| "grad_norm": 1.109196662902832, | |
| "learning_rate": 3.392857142857143e-07, | |
| "loss": 0.8637357950210571, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10571736785329018, | |
| "grad_norm": 2.337041139602661, | |
| "learning_rate": 3.464285714285714e-07, | |
| "loss": 0.748981237411499, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10787486515641856, | |
| "grad_norm": 11.489009857177734, | |
| "learning_rate": 3.535714285714286e-07, | |
| "loss": 0.6166712641716003, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11003236245954692, | |
| "grad_norm": 2.5563786029815674, | |
| "learning_rate": 3.607142857142857e-07, | |
| "loss": 0.8990151286125183, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1121898597626753, | |
| "grad_norm": 11.782515525817871, | |
| "learning_rate": 3.678571428571429e-07, | |
| "loss": 0.9428755044937134, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11434735706580366, | |
| "grad_norm": 4.793514251708984, | |
| "learning_rate": 3.75e-07, | |
| "loss": 0.515690803527832, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11650485436893204, | |
| "grad_norm": 10.346397399902344, | |
| "learning_rate": 3.821428571428571e-07, | |
| "loss": 1.0367025136947632, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1186623516720604, | |
| "grad_norm": 1.8307247161865234, | |
| "learning_rate": 3.8928571428571425e-07, | |
| "loss": 0.682608962059021, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12081984897518878, | |
| "grad_norm": 2.618833541870117, | |
| "learning_rate": 3.9642857142857137e-07, | |
| "loss": 0.5873494148254395, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12297734627831715, | |
| "grad_norm": 4.1070427894592285, | |
| "learning_rate": 4.0357142857142854e-07, | |
| "loss": 0.5457082390785217, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12513484358144553, | |
| "grad_norm": 3.505392074584961, | |
| "learning_rate": 4.1071428571428566e-07, | |
| "loss": 0.7192925214767456, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1272923408845739, | |
| "grad_norm": 4.676717758178711, | |
| "learning_rate": 4.1785714285714283e-07, | |
| "loss": 0.6860368847846985, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12944983818770225, | |
| "grad_norm": 1.218853235244751, | |
| "learning_rate": 4.2499999999999995e-07, | |
| "loss": 0.5903947353363037, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13160733549083065, | |
| "grad_norm": 1.845142126083374, | |
| "learning_rate": 4.3214285714285713e-07, | |
| "loss": 0.7133704423904419, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.133764832793959, | |
| "grad_norm": 6.926656246185303, | |
| "learning_rate": 4.3928571428571425e-07, | |
| "loss": 0.7799294590950012, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13592233009708737, | |
| "grad_norm": 0.8671985268592834, | |
| "learning_rate": 4.464285714285714e-07, | |
| "loss": 0.9356327056884766, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.13807982740021574, | |
| "grad_norm": 2.000596761703491, | |
| "learning_rate": 4.5357142857142854e-07, | |
| "loss": 0.65338534116745, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14023732470334413, | |
| "grad_norm": 2.7190310955047607, | |
| "learning_rate": 4.6071428571428566e-07, | |
| "loss": 0.3992256820201874, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1423948220064725, | |
| "grad_norm": 4.049299240112305, | |
| "learning_rate": 4.6785714285714283e-07, | |
| "loss": 0.68809974193573, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.14455231930960086, | |
| "grad_norm": 3.6315855979919434, | |
| "learning_rate": 4.7499999999999995e-07, | |
| "loss": 0.8035519123077393, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14670981661272922, | |
| "grad_norm": 1.8729430437088013, | |
| "learning_rate": 4.821428571428571e-07, | |
| "loss": 0.9223624467849731, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1488673139158576, | |
| "grad_norm": 2.9594123363494873, | |
| "learning_rate": 4.892857142857142e-07, | |
| "loss": 0.7450867891311646, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.15102481121898598, | |
| "grad_norm": 1.094379186630249, | |
| "learning_rate": 4.964285714285715e-07, | |
| "loss": 0.521569550037384, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15318230852211434, | |
| "grad_norm": 3.2890093326568604, | |
| "learning_rate": 4.999998408101351e-07, | |
| "loss": 0.7194236516952515, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1553398058252427, | |
| "grad_norm": 4.202434062957764, | |
| "learning_rate": 4.999985672925673e-07, | |
| "loss": 0.6269642114639282, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1574973031283711, | |
| "grad_norm": 9.459905624389648, | |
| "learning_rate": 4.999960202646399e-07, | |
| "loss": 0.798893392086029, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.15965480043149946, | |
| "grad_norm": 1.1723222732543945, | |
| "learning_rate": 4.999921997407693e-07, | |
| "loss": 0.827728271484375, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16181229773462782, | |
| "grad_norm": 2.3163399696350098, | |
| "learning_rate": 4.999871057425801e-07, | |
| "loss": 0.6281718015670776, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16396979503775622, | |
| "grad_norm": 1.8294389247894287, | |
| "learning_rate": 4.999807382989047e-07, | |
| "loss": 0.4277426302433014, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.16612729234088458, | |
| "grad_norm": 7.350142955780029, | |
| "learning_rate": 4.999730974457832e-07, | |
| "loss": 0.9866698980331421, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.16828478964401294, | |
| "grad_norm": 13.259355545043945, | |
| "learning_rate": 4.999641832264634e-07, | |
| "loss": 0.8262766599655151, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.1704422869471413, | |
| "grad_norm": 1.965742588043213, | |
| "learning_rate": 4.999539956914009e-07, | |
| "loss": 0.6971994042396545, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1725997842502697, | |
| "grad_norm": 5.429571628570557, | |
| "learning_rate": 4.999425348982576e-07, | |
| "loss": 0.8861981630325317, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17475728155339806, | |
| "grad_norm": 2.0696070194244385, | |
| "learning_rate": 4.999298009119028e-07, | |
| "loss": 0.7111194133758545, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.17691477885652643, | |
| "grad_norm": 1.6481966972351074, | |
| "learning_rate": 4.999157938044117e-07, | |
| "loss": 0.5676076412200928, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1790722761596548, | |
| "grad_norm": 1.6638914346694946, | |
| "learning_rate": 4.999005136550658e-07, | |
| "loss": 0.6949500441551208, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18122977346278318, | |
| "grad_norm": 1.638273000717163, | |
| "learning_rate": 4.998839605503519e-07, | |
| "loss": 0.5900384783744812, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.18338727076591155, | |
| "grad_norm": 4.502748012542725, | |
| "learning_rate": 4.998661345839621e-07, | |
| "loss": 0.6821661591529846, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1855447680690399, | |
| "grad_norm": 7.763872146606445, | |
| "learning_rate": 4.998470358567927e-07, | |
| "loss": 0.942156195640564, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.18770226537216828, | |
| "grad_norm": 2.503589630126953, | |
| "learning_rate": 4.998266644769442e-07, | |
| "loss": 0.7288610935211182, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.18985976267529667, | |
| "grad_norm": 1.9182608127593994, | |
| "learning_rate": 4.998050205597199e-07, | |
| "loss": 0.6357927918434143, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19201725997842503, | |
| "grad_norm": 6.610482692718506, | |
| "learning_rate": 4.997821042276267e-07, | |
| "loss": 0.6117627024650574, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.1941747572815534, | |
| "grad_norm": 1.5938338041305542, | |
| "learning_rate": 4.997579156103726e-07, | |
| "loss": 0.5153307914733887, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19633225458468176, | |
| "grad_norm": 3.5181422233581543, | |
| "learning_rate": 4.99732454844867e-07, | |
| "loss": 0.8765067458152771, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.19848975188781015, | |
| "grad_norm": 2.738868236541748, | |
| "learning_rate": 4.997057220752203e-07, | |
| "loss": 0.6484391689300537, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20064724919093851, | |
| "grad_norm": 4.510643005371094, | |
| "learning_rate": 4.996777174527419e-07, | |
| "loss": 0.7230756878852844, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.20280474649406688, | |
| "grad_norm": 8.01621150970459, | |
| "learning_rate": 4.996484411359404e-07, | |
| "loss": 0.5043923854827881, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.20496224379719524, | |
| "grad_norm": 3.4308345317840576, | |
| "learning_rate": 4.996178932905221e-07, | |
| "loss": 0.43938198685646057, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20711974110032363, | |
| "grad_norm": 1.9731006622314453, | |
| "learning_rate": 4.995860740893904e-07, | |
| "loss": 0.5857755541801453, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.209277238403452, | |
| "grad_norm": 5.68419075012207, | |
| "learning_rate": 4.995529837126445e-07, | |
| "loss": 0.6594390273094177, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21143473570658036, | |
| "grad_norm": 2.10448956489563, | |
| "learning_rate": 4.995186223475785e-07, | |
| "loss": 0.5797517895698547, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21359223300970873, | |
| "grad_norm": 3.166001081466675, | |
| "learning_rate": 4.99482990188681e-07, | |
| "loss": 0.6418941617012024, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.21574973031283712, | |
| "grad_norm": 4.582778453826904, | |
| "learning_rate": 4.994460874376325e-07, | |
| "loss": 0.6962154507637024, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21790722761596548, | |
| "grad_norm": 2.809333086013794, | |
| "learning_rate": 4.994079143033057e-07, | |
| "loss": 0.5948184132575989, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22006472491909385, | |
| "grad_norm": 1.9424008131027222, | |
| "learning_rate": 4.993684710017639e-07, | |
| "loss": 0.6439061164855957, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 1.9433950185775757, | |
| "learning_rate": 4.993277577562591e-07, | |
| "loss": 0.6355498433113098, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2243797195253506, | |
| "grad_norm": 1.5145349502563477, | |
| "learning_rate": 4.992857747972318e-07, | |
| "loss": 0.5916112661361694, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22653721682847897, | |
| "grad_norm": 4.159152507781982, | |
| "learning_rate": 4.99242522362309e-07, | |
| "loss": 0.6694331169128418, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22869471413160733, | |
| "grad_norm": 2.050935983657837, | |
| "learning_rate": 4.991980006963029e-07, | |
| "loss": 0.5539838075637817, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2308522114347357, | |
| "grad_norm": 3.0738911628723145, | |
| "learning_rate": 4.9915221005121e-07, | |
| "loss": 0.5762456655502319, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.23300970873786409, | |
| "grad_norm": 5.252814292907715, | |
| "learning_rate": 4.991051506862089e-07, | |
| "loss": 0.593986988067627, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.23516720604099245, | |
| "grad_norm": 4.718217372894287, | |
| "learning_rate": 4.990568228676597e-07, | |
| "loss": 0.6690990328788757, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2373247033441208, | |
| "grad_norm": 3.038825035095215, | |
| "learning_rate": 4.990072268691015e-07, | |
| "loss": 0.703670859336853, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23948220064724918, | |
| "grad_norm": 2.9468464851379395, | |
| "learning_rate": 4.98956362971252e-07, | |
| "loss": 0.7173536419868469, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.24163969795037757, | |
| "grad_norm": 2.068927526473999, | |
| "learning_rate": 4.989042314620048e-07, | |
| "loss": 0.658054769039154, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.24379719525350593, | |
| "grad_norm": 6.555632591247559, | |
| "learning_rate": 4.988508326364288e-07, | |
| "loss": 0.6826736927032471, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2459546925566343, | |
| "grad_norm": 16.916873931884766, | |
| "learning_rate": 4.987961667967655e-07, | |
| "loss": 0.5197000503540039, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2481121898597627, | |
| "grad_norm": 1.6320703029632568, | |
| "learning_rate": 4.987402342524282e-07, | |
| "loss": 0.6915658712387085, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25026968716289105, | |
| "grad_norm": 8.044790267944336, | |
| "learning_rate": 4.986830353199997e-07, | |
| "loss": 0.9126973748207092, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2524271844660194, | |
| "grad_norm": 6.615198135375977, | |
| "learning_rate": 4.986245703232305e-07, | |
| "loss": 0.8132709264755249, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2545846817691478, | |
| "grad_norm": 10.388949394226074, | |
| "learning_rate": 4.985648395930373e-07, | |
| "loss": 0.7186510562896729, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.25674217907227614, | |
| "grad_norm": 1.9609507322311401, | |
| "learning_rate": 4.985038434675011e-07, | |
| "loss": 0.6574066877365112, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2588996763754045, | |
| "grad_norm": 1.8315008878707886, | |
| "learning_rate": 4.984415822918648e-07, | |
| "loss": 0.7553728818893433, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26105717367853293, | |
| "grad_norm": 1.6982944011688232, | |
| "learning_rate": 4.983780564185318e-07, | |
| "loss": 0.6477434635162354, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2632146709816613, | |
| "grad_norm": 3.038011312484741, | |
| "learning_rate": 4.983132662070639e-07, | |
| "loss": 0.6487295031547546, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.26537216828478966, | |
| "grad_norm": 2.443270683288574, | |
| "learning_rate": 4.982472120241788e-07, | |
| "loss": 0.6732483506202698, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.267529665587918, | |
| "grad_norm": 23.088842391967773, | |
| "learning_rate": 4.981798942437488e-07, | |
| "loss": 0.8093491792678833, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2696871628910464, | |
| "grad_norm": 1.265063762664795, | |
| "learning_rate": 4.981113132467979e-07, | |
| "loss": 0.6368775963783264, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27184466019417475, | |
| "grad_norm": 4.324744701385498, | |
| "learning_rate": 4.980414694215002e-07, | |
| "loss": 0.7633779644966125, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2740021574973031, | |
| "grad_norm": 1.693095326423645, | |
| "learning_rate": 4.979703631631776e-07, | |
| "loss": 0.4923373758792877, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2761596548004315, | |
| "grad_norm": 1.543394923210144, | |
| "learning_rate": 4.978979948742973e-07, | |
| "loss": 0.580363392829895, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2783171521035599, | |
| "grad_norm": 4.408818244934082, | |
| "learning_rate": 4.978243649644698e-07, | |
| "loss": 0.45743411779403687, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.28047464940668826, | |
| "grad_norm": 1.3202674388885498, | |
| "learning_rate": 4.977494738504462e-07, | |
| "loss": 0.7671762108802795, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2826321467098166, | |
| "grad_norm": 1.4401205778121948, | |
| "learning_rate": 4.976733219561166e-07, | |
| "loss": 0.5078914761543274, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.284789644012945, | |
| "grad_norm": 2.1078848838806152, | |
| "learning_rate": 4.97595909712507e-07, | |
| "loss": 0.6809296607971191, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.28694714131607335, | |
| "grad_norm": 1.4545615911483765, | |
| "learning_rate": 4.975172375577768e-07, | |
| "loss": 0.5965730547904968, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2891046386192017, | |
| "grad_norm": 3.4635396003723145, | |
| "learning_rate": 4.974373059372171e-07, | |
| "loss": 0.6723196506500244, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2912621359223301, | |
| "grad_norm": 1.4718010425567627, | |
| "learning_rate": 4.973561153032472e-07, | |
| "loss": 0.5375315546989441, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29341963322545844, | |
| "grad_norm": 2.598886489868164, | |
| "learning_rate": 4.972736661154131e-07, | |
| "loss": 0.6744803786277771, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.29557713052858686, | |
| "grad_norm": 2.894680976867676, | |
| "learning_rate": 4.971899588403836e-07, | |
| "loss": 0.6589304804801941, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2977346278317152, | |
| "grad_norm": 6.139466762542725, | |
| "learning_rate": 4.97104993951949e-07, | |
| "loss": 0.6724509000778198, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2998921251348436, | |
| "grad_norm": 4.252213954925537, | |
| "learning_rate": 4.970187719310173e-07, | |
| "loss": 0.7082564830780029, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.30204962243797195, | |
| "grad_norm": 4.135262489318848, | |
| "learning_rate": 4.969312932656125e-07, | |
| "loss": 0.6953362822532654, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3042071197411003, | |
| "grad_norm": 6.377979755401611, | |
| "learning_rate": 4.968425584508709e-07, | |
| "loss": 0.6467074155807495, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3063646170442287, | |
| "grad_norm": 2.111952066421509, | |
| "learning_rate": 4.967525679890388e-07, | |
| "loss": 0.6030433177947998, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.30852211434735705, | |
| "grad_norm": 2.0884768962860107, | |
| "learning_rate": 4.966613223894696e-07, | |
| "loss": 0.4877060651779175, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3106796116504854, | |
| "grad_norm": 8.26274585723877, | |
| "learning_rate": 4.96568822168621e-07, | |
| "loss": 0.6577244400978088, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.31283710895361383, | |
| "grad_norm": 1.4854352474212646, | |
| "learning_rate": 4.964750678500517e-07, | |
| "loss": 0.6866559386253357, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3149946062567422, | |
| "grad_norm": 2.8770534992218018, | |
| "learning_rate": 4.963800599644189e-07, | |
| "loss": 0.6283307075500488, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.31715210355987056, | |
| "grad_norm": 3.0127577781677246, | |
| "learning_rate": 4.96283799049475e-07, | |
| "loss": 0.7268189191818237, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3193096008629989, | |
| "grad_norm": 1.608444333076477, | |
| "learning_rate": 4.961862856500647e-07, | |
| "loss": 0.4435058832168579, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3214670981661273, | |
| "grad_norm": 4.365738391876221, | |
| "learning_rate": 4.960875203181219e-07, | |
| "loss": 0.6397342085838318, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.32362459546925565, | |
| "grad_norm": 17.852121353149414, | |
| "learning_rate": 4.959875036126664e-07, | |
| "loss": 0.7305557131767273, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.325782092772384, | |
| "grad_norm": 5.299609661102295, | |
| "learning_rate": 4.958862360998011e-07, | |
| "loss": 0.45774808526039124, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.32793959007551243, | |
| "grad_norm": 3.3113415241241455, | |
| "learning_rate": 4.957837183527081e-07, | |
| "loss": 0.577039897441864, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3300970873786408, | |
| "grad_norm": 1.8541628122329712, | |
| "learning_rate": 4.956799509516467e-07, | |
| "loss": 0.6706950664520264, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.33225458468176916, | |
| "grad_norm": 2.7589690685272217, | |
| "learning_rate": 4.955749344839487e-07, | |
| "loss": 0.6480457186698914, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3344120819848975, | |
| "grad_norm": 2.4146876335144043, | |
| "learning_rate": 4.954686695440159e-07, | |
| "loss": 0.7360544204711914, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3365695792880259, | |
| "grad_norm": 2.073716878890991, | |
| "learning_rate": 4.953611567333166e-07, | |
| "loss": 0.6692315340042114, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.33872707659115425, | |
| "grad_norm": 5.253190994262695, | |
| "learning_rate": 4.952523966603822e-07, | |
| "loss": 0.6974945068359375, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3408845738942826, | |
| "grad_norm": 2.9199769496917725, | |
| "learning_rate": 4.951423899408035e-07, | |
| "loss": 0.7332634329795837, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.343042071197411, | |
| "grad_norm": 4.3365254402160645, | |
| "learning_rate": 4.950311371972277e-07, | |
| "loss": 0.7718333005905151, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3451995685005394, | |
| "grad_norm": 3.805959701538086, | |
| "learning_rate": 4.949186390593544e-07, | |
| "loss": 0.5337668657302856, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34735706580366776, | |
| "grad_norm": 0.888100266456604, | |
| "learning_rate": 4.948048961639323e-07, | |
| "loss": 0.40571683645248413, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.34951456310679613, | |
| "grad_norm": 1.4906032085418701, | |
| "learning_rate": 4.946899091547556e-07, | |
| "loss": 0.5962201356887817, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3516720604099245, | |
| "grad_norm": 13.393712997436523, | |
| "learning_rate": 4.945736786826601e-07, | |
| "loss": 0.7609850168228149, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.35382955771305286, | |
| "grad_norm": 6.983129024505615, | |
| "learning_rate": 4.944562054055198e-07, | |
| "loss": 0.6627951860427856, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3559870550161812, | |
| "grad_norm": 1.3096721172332764, | |
| "learning_rate": 4.943374899882432e-07, | |
| "loss": 0.6522207856178284, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3581445523193096, | |
| "grad_norm": 4.269274711608887, | |
| "learning_rate": 4.942175331027693e-07, | |
| "loss": 0.4938734173774719, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.36030204962243795, | |
| "grad_norm": 5.267324924468994, | |
| "learning_rate": 4.940963354280638e-07, | |
| "loss": 0.6275608539581299, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.36245954692556637, | |
| "grad_norm": 5.786050796508789, | |
| "learning_rate": 4.939738976501156e-07, | |
| "loss": 0.49768128991127014, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.36461704422869473, | |
| "grad_norm": 2.491793155670166, | |
| "learning_rate": 4.938502204619325e-07, | |
| "loss": 0.5501651763916016, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3667745415318231, | |
| "grad_norm": 33.490753173828125, | |
| "learning_rate": 4.937253045635375e-07, | |
| "loss": 0.6647762656211853, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36893203883495146, | |
| "grad_norm": 2.3751609325408936, | |
| "learning_rate": 4.93599150661965e-07, | |
| "loss": 0.6672598719596863, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3710895361380798, | |
| "grad_norm": 2.2334680557250977, | |
| "learning_rate": 4.934717594712564e-07, | |
| "loss": 0.5913785099983215, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3732470334412082, | |
| "grad_norm": 2.6074905395507812, | |
| "learning_rate": 4.933431317124562e-07, | |
| "loss": 0.6229710578918457, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.37540453074433655, | |
| "grad_norm": 13.647238731384277, | |
| "learning_rate": 4.932132681136079e-07, | |
| "loss": 0.7039205431938171, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3775620280474649, | |
| "grad_norm": 1.9593160152435303, | |
| "learning_rate": 4.930821694097507e-07, | |
| "loss": 0.6190288662910461, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.37971952535059333, | |
| "grad_norm": 5.780766487121582, | |
| "learning_rate": 4.929498363429135e-07, | |
| "loss": 0.6188508868217468, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.3818770226537217, | |
| "grad_norm": 13.814835548400879, | |
| "learning_rate": 4.928162696621125e-07, | |
| "loss": 0.6810972094535828, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.38403451995685006, | |
| "grad_norm": 2.1199731826782227, | |
| "learning_rate": 4.926814701233461e-07, | |
| "loss": 0.5854727029800415, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3861920172599784, | |
| "grad_norm": 6.030026435852051, | |
| "learning_rate": 4.925454384895906e-07, | |
| "loss": 0.5390771627426147, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3883495145631068, | |
| "grad_norm": 1.6937010288238525, | |
| "learning_rate": 4.924081755307964e-07, | |
| "loss": 0.6610192060470581, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39050701186623515, | |
| "grad_norm": 2.114851236343384, | |
| "learning_rate": 4.922696820238831e-07, | |
| "loss": 0.6132209300994873, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3926645091693635, | |
| "grad_norm": 5.5387749671936035, | |
| "learning_rate": 4.921299587527352e-07, | |
| "loss": 0.5963850021362305, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3948220064724919, | |
| "grad_norm": 6.011134147644043, | |
| "learning_rate": 4.919890065081979e-07, | |
| "loss": 0.7333153486251831, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3969795037756203, | |
| "grad_norm": 6.8634209632873535, | |
| "learning_rate": 4.918468260880726e-07, | |
| "loss": 0.7082594633102417, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.39913700107874867, | |
| "grad_norm": 0.9249289631843567, | |
| "learning_rate": 4.917034182971122e-07, | |
| "loss": 0.6958880424499512, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.40129449838187703, | |
| "grad_norm": 2.1919021606445312, | |
| "learning_rate": 4.915587839470163e-07, | |
| "loss": 0.6318715214729309, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4034519956850054, | |
| "grad_norm": 1.5956162214279175, | |
| "learning_rate": 4.914129238564272e-07, | |
| "loss": 0.6127752661705017, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.40560949298813376, | |
| "grad_norm": 1.8250914812088013, | |
| "learning_rate": 4.912658388509253e-07, | |
| "loss": 0.5225449204444885, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4077669902912621, | |
| "grad_norm": 3.8166756629943848, | |
| "learning_rate": 4.911175297630236e-07, | |
| "loss": 0.3997286558151245, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4099244875943905, | |
| "grad_norm": 2.1530375480651855, | |
| "learning_rate": 4.909679974321636e-07, | |
| "loss": 0.5367651581764221, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4120819848975189, | |
| "grad_norm": 12.938714981079102, | |
| "learning_rate": 4.908172427047109e-07, | |
| "loss": 0.5885851383209229, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.41423948220064727, | |
| "grad_norm": 2.9967093467712402, | |
| "learning_rate": 4.906652664339493e-07, | |
| "loss": 0.6215783357620239, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.41639697950377563, | |
| "grad_norm": 4.113240718841553, | |
| "learning_rate": 4.905120694800772e-07, | |
| "loss": 0.6612198352813721, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.418554476806904, | |
| "grad_norm": 2.52058482170105, | |
| "learning_rate": 4.903576527102018e-07, | |
| "loss": 0.5840904712677002, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.42071197411003236, | |
| "grad_norm": 5.358467102050781, | |
| "learning_rate": 4.902020169983346e-07, | |
| "loss": 0.3897056579589844, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4228694714131607, | |
| "grad_norm": 9.863550186157227, | |
| "learning_rate": 4.900451632253868e-07, | |
| "loss": 0.6255682706832886, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4250269687162891, | |
| "grad_norm": 3.4259397983551025, | |
| "learning_rate": 4.898870922791634e-07, | |
| "loss": 0.6414270997047424, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.42718446601941745, | |
| "grad_norm": 8.052302360534668, | |
| "learning_rate": 4.89727805054359e-07, | |
| "loss": 0.761306881904602, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.42934196332254587, | |
| "grad_norm": 3.4023313522338867, | |
| "learning_rate": 4.895673024525522e-07, | |
| "loss": 0.5789651274681091, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.43149946062567424, | |
| "grad_norm": 3.1743762493133545, | |
| "learning_rate": 4.894055853822012e-07, | |
| "loss": 0.5212793946266174, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4336569579288026, | |
| "grad_norm": 2.2208149433135986, | |
| "learning_rate": 4.892426547586378e-07, | |
| "loss": 0.5916732549667358, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.43581445523193096, | |
| "grad_norm": 2.620041608810425, | |
| "learning_rate": 4.890785115040626e-07, | |
| "loss": 0.6305989027023315, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.43797195253505933, | |
| "grad_norm": 3.6425275802612305, | |
| "learning_rate": 4.889131565475401e-07, | |
| "loss": 0.756013035774231, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4401294498381877, | |
| "grad_norm": 5.899089336395264, | |
| "learning_rate": 4.887465908249925e-07, | |
| "loss": 0.5666382908821106, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.44228694714131606, | |
| "grad_norm": 2.683706283569336, | |
| "learning_rate": 4.885788152791959e-07, | |
| "loss": 0.774447500705719, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 2.967109203338623, | |
| "learning_rate": 4.884098308597734e-07, | |
| "loss": 0.616926908493042, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.44660194174757284, | |
| "grad_norm": 1.796120047569275, | |
| "learning_rate": 4.882396385231909e-07, | |
| "loss": 0.5476500391960144, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4487594390507012, | |
| "grad_norm": 1.2322068214416504, | |
| "learning_rate": 4.880682392327509e-07, | |
| "loss": 0.6756587028503418, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.45091693635382957, | |
| "grad_norm": 2.247699737548828, | |
| "learning_rate": 4.878956339585874e-07, | |
| "loss": 0.6002364754676819, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.45307443365695793, | |
| "grad_norm": 1.9976741075515747, | |
| "learning_rate": 4.877218236776603e-07, | |
| "loss": 0.5875912308692932, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4552319309600863, | |
| "grad_norm": 1.5975956916809082, | |
| "learning_rate": 4.875468093737504e-07, | |
| "loss": 0.6899119019508362, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.45738942826321466, | |
| "grad_norm": 1.6121068000793457, | |
| "learning_rate": 4.873705920374528e-07, | |
| "loss": 0.5816035270690918, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.459546925566343, | |
| "grad_norm": 9.736807823181152, | |
| "learning_rate": 4.87193172666172e-07, | |
| "loss": 0.6405006647109985, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4617044228694714, | |
| "grad_norm": 3.0928378105163574, | |
| "learning_rate": 4.870145522641164e-07, | |
| "loss": 0.8041344881057739, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4638619201725998, | |
| "grad_norm": 1.493276596069336, | |
| "learning_rate": 4.868347318422921e-07, | |
| "loss": 0.6042853593826294, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.46601941747572817, | |
| "grad_norm": 5.470495700836182, | |
| "learning_rate": 4.866537124184973e-07, | |
| "loss": 0.6030522584915161, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.46817691477885653, | |
| "grad_norm": 2.748800754547119, | |
| "learning_rate": 4.864714950173171e-07, | |
| "loss": 0.449870765209198, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4703344120819849, | |
| "grad_norm": 6.604589939117432, | |
| "learning_rate": 4.862880806701166e-07, | |
| "loss": 0.7801079154014587, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.47249190938511326, | |
| "grad_norm": 15.256587028503418, | |
| "learning_rate": 4.861034704150363e-07, | |
| "loss": 0.8985238075256348, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4746494066882416, | |
| "grad_norm": 2.490304946899414, | |
| "learning_rate": 4.859176652969853e-07, | |
| "loss": 0.5823092460632324, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.47680690399137, | |
| "grad_norm": 1.5544885396957397, | |
| "learning_rate": 4.857306663676358e-07, | |
| "loss": 0.5551883578300476, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.47896440129449835, | |
| "grad_norm": 3.0149269104003906, | |
| "learning_rate": 4.855424746854171e-07, | |
| "loss": 0.6494267582893372, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.4811218985976268, | |
| "grad_norm": 6.602444171905518, | |
| "learning_rate": 4.853530913155097e-07, | |
| "loss": 0.6156390905380249, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.48327939590075514, | |
| "grad_norm": 2.8398349285125732, | |
| "learning_rate": 4.851625173298389e-07, | |
| "loss": 0.6284780502319336, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 2.238544225692749, | |
| "learning_rate": 4.84970753807069e-07, | |
| "loss": 0.6789742112159729, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.48759439050701187, | |
| "grad_norm": 5.265446186065674, | |
| "learning_rate": 4.847778018325974e-07, | |
| "loss": 0.6509313583374023, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.48975188781014023, | |
| "grad_norm": 1.3713135719299316, | |
| "learning_rate": 4.845836624985484e-07, | |
| "loss": 0.5825515985488892, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4919093851132686, | |
| "grad_norm": 2.7798221111297607, | |
| "learning_rate": 4.84388336903766e-07, | |
| "loss": 0.46305614709854126, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.49406688241639696, | |
| "grad_norm": 5.081538200378418, | |
| "learning_rate": 4.841918261538093e-07, | |
| "loss": 0.6566568613052368, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.4962243797195254, | |
| "grad_norm": 8.522509574890137, | |
| "learning_rate": 4.839941313609456e-07, | |
| "loss": 0.7881090641021729, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.49838187702265374, | |
| "grad_norm": 3.8689656257629395, | |
| "learning_rate": 4.837952536441432e-07, | |
| "loss": 0.592060387134552, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5005393743257821, | |
| "grad_norm": 3.3025739192962646, | |
| "learning_rate": 4.835951941290665e-07, | |
| "loss": 0.6704021096229553, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5026968716289104, | |
| "grad_norm": 1.400242805480957, | |
| "learning_rate": 4.833939539480689e-07, | |
| "loss": 0.7260591983795166, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5048543689320388, | |
| "grad_norm": 1.9812822341918945, | |
| "learning_rate": 4.831915342401862e-07, | |
| "loss": 0.6312894225120544, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5070118662351673, | |
| "grad_norm": 5.561008930206299, | |
| "learning_rate": 4.829879361511305e-07, | |
| "loss": 0.5982025861740112, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5091693635382956, | |
| "grad_norm": 1.6053967475891113, | |
| "learning_rate": 4.827831608332839e-07, | |
| "loss": 0.6564121246337891, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.511326860841424, | |
| "grad_norm": 1.3539955615997314, | |
| "learning_rate": 4.825772094456913e-07, | |
| "loss": 0.7475908994674683, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5134843581445523, | |
| "grad_norm": 1.227378249168396, | |
| "learning_rate": 4.823700831540547e-07, | |
| "loss": 0.8186240792274475, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5156418554476807, | |
| "grad_norm": 1.6025233268737793, | |
| "learning_rate": 4.821617831307256e-07, | |
| "loss": 0.6983221769332886, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.517799352750809, | |
| "grad_norm": 1.7689329385757446, | |
| "learning_rate": 4.819523105546994e-07, | |
| "loss": 0.4151468276977539, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5199568500539374, | |
| "grad_norm": 1.0149801969528198, | |
| "learning_rate": 4.817416666116082e-07, | |
| "loss": 0.6161211729049683, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5221143473570659, | |
| "grad_norm": 3.3374340534210205, | |
| "learning_rate": 4.815298524937138e-07, | |
| "loss": 0.6058178544044495, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5242718446601942, | |
| "grad_norm": 1.603325605392456, | |
| "learning_rate": 4.813168693999016e-07, | |
| "loss": 0.7110425233840942, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5264293419633226, | |
| "grad_norm": 8.539067268371582, | |
| "learning_rate": 4.811027185356733e-07, | |
| "loss": 0.6150040626525879, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5285868392664509, | |
| "grad_norm": 1.6875640153884888, | |
| "learning_rate": 4.808874011131405e-07, | |
| "loss": 0.5068952441215515, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5307443365695793, | |
| "grad_norm": 3.954596996307373, | |
| "learning_rate": 4.806709183510174e-07, | |
| "loss": 0.6284441947937012, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5329018338727076, | |
| "grad_norm": 11.474177360534668, | |
| "learning_rate": 4.804532714746142e-07, | |
| "loss": 0.7361968159675598, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.535059331175836, | |
| "grad_norm": 5.204519271850586, | |
| "learning_rate": 4.8023446171583e-07, | |
| "loss": 0.6124238967895508, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5372168284789643, | |
| "grad_norm": 1.8010599613189697, | |
| "learning_rate": 4.800144903131462e-07, | |
| "loss": 0.7173029780387878, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5393743257820928, | |
| "grad_norm": 3.062251567840576, | |
| "learning_rate": 4.79793358511619e-07, | |
| "loss": 0.5613416433334351, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5415318230852212, | |
| "grad_norm": 1.9310272932052612, | |
| "learning_rate": 4.795710675628724e-07, | |
| "loss": 0.6559145450592041, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5436893203883495, | |
| "grad_norm": 2.027998924255371, | |
| "learning_rate": 4.793476187250913e-07, | |
| "loss": 0.5871425271034241, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5458468176914779, | |
| "grad_norm": 1.8883675336837769, | |
| "learning_rate": 4.791230132630148e-07, | |
| "loss": 0.6756701469421387, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5480043149946062, | |
| "grad_norm": 1.6534463167190552, | |
| "learning_rate": 4.78897252447928e-07, | |
| "loss": 0.747499942779541, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5501618122977346, | |
| "grad_norm": 1.547232747077942, | |
| "learning_rate": 4.786703375576557e-07, | |
| "loss": 0.5784983038902283, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.552319309600863, | |
| "grad_norm": 2.1741976737976074, | |
| "learning_rate": 4.784422698765549e-07, | |
| "loss": 0.6561665534973145, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5544768069039914, | |
| "grad_norm": 7.314396381378174, | |
| "learning_rate": 4.782130506955072e-07, | |
| "loss": 0.6497671008110046, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5566343042071198, | |
| "grad_norm": 4.359201908111572, | |
| "learning_rate": 4.779826813119122e-07, | |
| "loss": 0.5485538840293884, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5587918015102481, | |
| "grad_norm": 1.9365326166152954, | |
| "learning_rate": 4.777511630296795e-07, | |
| "loss": 0.6849959492683411, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5609492988133765, | |
| "grad_norm": 3.751213312149048, | |
| "learning_rate": 4.775184971592214e-07, | |
| "loss": 0.6562321186065674, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5631067961165048, | |
| "grad_norm": 4.876345634460449, | |
| "learning_rate": 4.772846850174459e-07, | |
| "loss": 0.6159510016441345, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5652642934196332, | |
| "grad_norm": 1.8032896518707275, | |
| "learning_rate": 4.77049727927749e-07, | |
| "loss": 0.37065228819847107, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5674217907227616, | |
| "grad_norm": 1.5449891090393066, | |
| "learning_rate": 4.7681362722000703e-07, | |
| "loss": 0.6935975551605225, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.56957928802589, | |
| "grad_norm": 1.7247692346572876, | |
| "learning_rate": 4.7657638423056947e-07, | |
| "loss": 0.5516192317008972, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5717367853290184, | |
| "grad_norm": 2.4441468715667725, | |
| "learning_rate": 4.76338000302251e-07, | |
| "loss": 0.6022404432296753, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5738942826321467, | |
| "grad_norm": 2.1737172603607178, | |
| "learning_rate": 4.760984767843242e-07, | |
| "loss": 0.6502859592437744, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5760517799352751, | |
| "grad_norm": 1.620774269104004, | |
| "learning_rate": 4.7585781503251197e-07, | |
| "loss": 0.660953938961029, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5782092772384034, | |
| "grad_norm": 2.282656669616699, | |
| "learning_rate": 4.7561601640897956e-07, | |
| "loss": 0.5781145095825195, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5803667745415318, | |
| "grad_norm": 1.9041107892990112, | |
| "learning_rate": 4.75373082282327e-07, | |
| "loss": 0.6320368647575378, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5825242718446602, | |
| "grad_norm": 5.685824871063232, | |
| "learning_rate": 4.751290140275813e-07, | |
| "loss": 0.7650933265686035, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5846817691477886, | |
| "grad_norm": 39.65275955200195, | |
| "learning_rate": 4.7488381302618887e-07, | |
| "loss": 0.6836517453193665, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5868392664509169, | |
| "grad_norm": 1.5295366048812866, | |
| "learning_rate": 4.7463748066600754e-07, | |
| "loss": 0.5143399834632874, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5889967637540453, | |
| "grad_norm": 1.5446751117706299, | |
| "learning_rate": 4.7439001834129876e-07, | |
| "loss": 0.7021965980529785, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5911542610571737, | |
| "grad_norm": 0.48094552755355835, | |
| "learning_rate": 4.7414142745271944e-07, | |
| "loss": 0.7895625829696655, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.593311758360302, | |
| "grad_norm": 1.5283570289611816, | |
| "learning_rate": 4.738917094073146e-07, | |
| "loss": 0.5346397757530212, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5954692556634305, | |
| "grad_norm": 1.5133683681488037, | |
| "learning_rate": 4.7364086561850866e-07, | |
| "loss": 0.6586446762084961, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5976267529665588, | |
| "grad_norm": 1.5767534971237183, | |
| "learning_rate": 4.733888975060981e-07, | |
| "loss": 0.6797043681144714, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5997842502696872, | |
| "grad_norm": 1.8032898902893066, | |
| "learning_rate": 4.7313580649624335e-07, | |
| "loss": 0.5901877284049988, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6019417475728155, | |
| "grad_norm": 12.88221549987793, | |
| "learning_rate": 4.7288159402146e-07, | |
| "loss": 0.7293663024902344, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6040992448759439, | |
| "grad_norm": 3.4143638610839844, | |
| "learning_rate": 4.726262615206117e-07, | |
| "loss": 0.7597730159759521, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6062567421790723, | |
| "grad_norm": 1.426260232925415, | |
| "learning_rate": 4.723698104389013e-07, | |
| "loss": 0.6413673162460327, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6084142394822006, | |
| "grad_norm": 1.8875607252120972, | |
| "learning_rate": 4.72112242227863e-07, | |
| "loss": 0.705469012260437, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6105717367853291, | |
| "grad_norm": 1.8347193002700806, | |
| "learning_rate": 4.71853558345354e-07, | |
| "loss": 0.5616622567176819, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6127292340884574, | |
| "grad_norm": 1.3484468460083008, | |
| "learning_rate": 4.715937602555464e-07, | |
| "loss": 0.615619957447052, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6148867313915858, | |
| "grad_norm": 1.4341267347335815, | |
| "learning_rate": 4.7133284942891846e-07, | |
| "loss": 0.6071439981460571, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6170442286947141, | |
| "grad_norm": 7.157674312591553, | |
| "learning_rate": 4.7107082734224713e-07, | |
| "loss": 0.668420672416687, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6192017259978425, | |
| "grad_norm": 1.7236965894699097, | |
| "learning_rate": 4.7080769547859884e-07, | |
| "loss": 0.6496031284332275, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6213592233009708, | |
| "grad_norm": 3.2555484771728516, | |
| "learning_rate": 4.7054345532732155e-07, | |
| "loss": 0.5818045735359192, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6235167206040992, | |
| "grad_norm": 3.8229000568389893, | |
| "learning_rate": 4.7027810838403613e-07, | |
| "loss": 0.6198642253875732, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6256742179072277, | |
| "grad_norm": 3.169841766357422, | |
| "learning_rate": 4.700116561506282e-07, | |
| "loss": 0.666779637336731, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.627831715210356, | |
| "grad_norm": 8.474517822265625, | |
| "learning_rate": 4.697441001352392e-07, | |
| "loss": 0.564462423324585, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6299892125134844, | |
| "grad_norm": 2.2855448722839355, | |
| "learning_rate": 4.6947544185225805e-07, | |
| "loss": 0.6741440296173096, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6321467098166127, | |
| "grad_norm": 3.856265068054199, | |
| "learning_rate": 4.692056828223129e-07, | |
| "loss": 0.7456379532814026, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6343042071197411, | |
| "grad_norm": 2.731037139892578, | |
| "learning_rate": 4.6893482457226174e-07, | |
| "loss": 0.5937924385070801, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6364617044228694, | |
| "grad_norm": 1.8991349935531616, | |
| "learning_rate": 4.6866286863518465e-07, | |
| "loss": 0.7059175372123718, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6386192017259978, | |
| "grad_norm": 11.739773750305176, | |
| "learning_rate": 4.6838981655037463e-07, | |
| "loss": 0.7047215104103088, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6407766990291263, | |
| "grad_norm": 12.105711936950684, | |
| "learning_rate": 4.6811566986332875e-07, | |
| "loss": 0.8416004180908203, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6429341963322546, | |
| "grad_norm": 1.6077960729599, | |
| "learning_rate": 4.678404301257398e-07, | |
| "loss": 0.6131125688552856, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.645091693635383, | |
| "grad_norm": 1.12235689163208, | |
| "learning_rate": 4.6756409889548734e-07, | |
| "loss": 0.4833483397960663, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6472491909385113, | |
| "grad_norm": 3.067166328430176, | |
| "learning_rate": 4.6728667773662873e-07, | |
| "loss": 0.7458387017250061, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6494066882416397, | |
| "grad_norm": 4.32982873916626, | |
| "learning_rate": 4.6700816821939056e-07, | |
| "loss": 0.6110833883285522, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.651564185544768, | |
| "grad_norm": 1.638934850692749, | |
| "learning_rate": 4.667285719201595e-07, | |
| "loss": 0.8020920753479004, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6537216828478964, | |
| "grad_norm": 4.146369457244873, | |
| "learning_rate": 4.6644789042147366e-07, | |
| "loss": 0.6712560653686523, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6558791801510249, | |
| "grad_norm": 1.4545388221740723, | |
| "learning_rate": 4.6616612531201324e-07, | |
| "loss": 0.4938512444496155, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6580366774541532, | |
| "grad_norm": 2.343878746032715, | |
| "learning_rate": 4.6588327818659195e-07, | |
| "loss": 0.6228682994842529, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6601941747572816, | |
| "grad_norm": 1.3540754318237305, | |
| "learning_rate": 4.655993506461478e-07, | |
| "loss": 0.5692847371101379, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6623516720604099, | |
| "grad_norm": 8.236356735229492, | |
| "learning_rate": 4.6531434429773384e-07, | |
| "loss": 0.7082728147506714, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6645091693635383, | |
| "grad_norm": 2.163950204849243, | |
| "learning_rate": 4.650282607545096e-07, | |
| "loss": 0.7548322081565857, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.8856990337371826, | |
| "learning_rate": 4.6474110163573114e-07, | |
| "loss": 0.6661372184753418, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.668824163969795, | |
| "grad_norm": 3.379833698272705, | |
| "learning_rate": 4.644528685667428e-07, | |
| "loss": 0.6216427087783813, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6709816612729234, | |
| "grad_norm": 1.583329439163208, | |
| "learning_rate": 4.641635631789675e-07, | |
| "loss": 0.4488504230976105, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6731391585760518, | |
| "grad_norm": 1.1011863946914673, | |
| "learning_rate": 4.638731871098973e-07, | |
| "loss": 0.7307155728340149, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6752966558791802, | |
| "grad_norm": 2.518486976623535, | |
| "learning_rate": 4.635817420030847e-07, | |
| "loss": 0.605812668800354, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6774541531823085, | |
| "grad_norm": 3.132564067840576, | |
| "learning_rate": 4.6328922950813276e-07, | |
| "loss": 0.6636737585067749, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6796116504854369, | |
| "grad_norm": 2.8926432132720947, | |
| "learning_rate": 4.629956512806865e-07, | |
| "loss": 0.5622783899307251, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6817691477885652, | |
| "grad_norm": 0.8876265287399292, | |
| "learning_rate": 4.6270100898242257e-07, | |
| "loss": 0.7432127594947815, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6839266450916937, | |
| "grad_norm": 2.402222156524658, | |
| "learning_rate": 4.6240530428104064e-07, | |
| "loss": 0.495096892118454, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.686084142394822, | |
| "grad_norm": 1.8342965841293335, | |
| "learning_rate": 4.6210853885025357e-07, | |
| "loss": 0.41085928678512573, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6882416396979504, | |
| "grad_norm": 6.494576930999756, | |
| "learning_rate": 4.6181071436977803e-07, | |
| "loss": 0.7552844285964966, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6903991370010788, | |
| "grad_norm": 1.355686902999878, | |
| "learning_rate": 4.615118325253251e-07, | |
| "loss": 0.7046399116516113, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6925566343042071, | |
| "grad_norm": 1.4658502340316772, | |
| "learning_rate": 4.612118950085905e-07, | |
| "loss": 0.6357789039611816, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6947141316073355, | |
| "grad_norm": 10.526252746582031, | |
| "learning_rate": 4.6091090351724523e-07, | |
| "loss": 0.8061111569404602, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6968716289104638, | |
| "grad_norm": 5.159208297729492, | |
| "learning_rate": 4.606088597549258e-07, | |
| "loss": 0.6721555590629578, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6990291262135923, | |
| "grad_norm": 13.965704917907715, | |
| "learning_rate": 4.603057654312247e-07, | |
| "loss": 0.5817508697509766, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7011866235167206, | |
| "grad_norm": 2.7752487659454346, | |
| "learning_rate": 4.600016222616807e-07, | |
| "loss": 0.6185034513473511, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.703344120819849, | |
| "grad_norm": 1.5174920558929443, | |
| "learning_rate": 4.5969643196776907e-07, | |
| "loss": 0.6131242513656616, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7055016181229773, | |
| "grad_norm": 3.6596693992614746, | |
| "learning_rate": 4.5939019627689196e-07, | |
| "loss": 0.6017345190048218, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7076591154261057, | |
| "grad_norm": 1.590834379196167, | |
| "learning_rate": 4.590829169223686e-07, | |
| "loss": 0.6057524681091309, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7098166127292341, | |
| "grad_norm": 3.4560601711273193, | |
| "learning_rate": 4.587745956434252e-07, | |
| "loss": 0.48293402791023254, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7119741100323624, | |
| "grad_norm": 3.8374826908111572, | |
| "learning_rate": 4.584652341851855e-07, | |
| "loss": 0.6879529356956482, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7141316073354909, | |
| "grad_norm": 1.8356127738952637, | |
| "learning_rate": 4.581548342986609e-07, | |
| "loss": 0.6557474136352539, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7162891046386192, | |
| "grad_norm": 2.937380313873291, | |
| "learning_rate": 4.578433977407401e-07, | |
| "loss": 0.5540982484817505, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7184466019417476, | |
| "grad_norm": 9.279194831848145, | |
| "learning_rate": 4.5753092627417966e-07, | |
| "loss": 0.6095687747001648, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7206040992448759, | |
| "grad_norm": 1.7384564876556396, | |
| "learning_rate": 4.572174216675938e-07, | |
| "loss": 0.5323720574378967, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7227615965480043, | |
| "grad_norm": 3.279447555541992, | |
| "learning_rate": 4.5690288569544423e-07, | |
| "loss": 0.6184368133544922, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7249190938511327, | |
| "grad_norm": 6.155360698699951, | |
| "learning_rate": 4.5658732013803027e-07, | |
| "loss": 0.6536476612091064, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.727076591154261, | |
| "grad_norm": 1.8201708793640137, | |
| "learning_rate": 4.5627072678147904e-07, | |
| "loss": 0.5651783347129822, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7292340884573895, | |
| "grad_norm": 3.5887646675109863, | |
| "learning_rate": 4.559531074177349e-07, | |
| "loss": 0.5361768007278442, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7313915857605178, | |
| "grad_norm": 9.772974967956543, | |
| "learning_rate": 4.5563446384454945e-07, | |
| "loss": 0.5051864981651306, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7335490830636462, | |
| "grad_norm": 1.5057132244110107, | |
| "learning_rate": 4.553147978654715e-07, | |
| "loss": 0.6263077855110168, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7357065803667745, | |
| "grad_norm": 1.7822685241699219, | |
| "learning_rate": 4.5499411128983674e-07, | |
| "loss": 0.6016006469726562, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7378640776699029, | |
| "grad_norm": 10.83908462524414, | |
| "learning_rate": 4.546724059327575e-07, | |
| "loss": 0.5432024002075195, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7400215749730313, | |
| "grad_norm": 2.1380414962768555, | |
| "learning_rate": 4.5434968361511263e-07, | |
| "loss": 0.7576265931129456, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7421790722761596, | |
| "grad_norm": 1.9547456502914429, | |
| "learning_rate": 4.5402594616353676e-07, | |
| "loss": 0.5621410012245178, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7443365695792881, | |
| "grad_norm": 1.8721739053726196, | |
| "learning_rate": 4.537011954104105e-07, | |
| "loss": 0.5246554613113403, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7464940668824164, | |
| "grad_norm": 1.6793891191482544, | |
| "learning_rate": 4.533754331938498e-07, | |
| "loss": 0.6016390323638916, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7486515641855448, | |
| "grad_norm": 1.9931007623672485, | |
| "learning_rate": 4.530486613576954e-07, | |
| "loss": 0.6583068370819092, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7508090614886731, | |
| "grad_norm": 4.051970481872559, | |
| "learning_rate": 4.5272088175150305e-07, | |
| "loss": 0.5990911722183228, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7529665587918015, | |
| "grad_norm": 1.6209897994995117, | |
| "learning_rate": 4.523920962305319e-07, | |
| "loss": 0.6055378913879395, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7551240560949298, | |
| "grad_norm": 1.4128165245056152, | |
| "learning_rate": 4.520623066557351e-07, | |
| "loss": 0.7489557862281799, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7572815533980582, | |
| "grad_norm": 2.651704788208008, | |
| "learning_rate": 4.5173151489374874e-07, | |
| "loss": 0.5671336054801941, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7594390507011867, | |
| "grad_norm": 2.9760892391204834, | |
| "learning_rate": 4.5139972281688125e-07, | |
| "loss": 0.5748198628425598, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.761596548004315, | |
| "grad_norm": 2.070570230484009, | |
| "learning_rate": 4.510669323031032e-07, | |
| "loss": 0.6631715297698975, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7637540453074434, | |
| "grad_norm": 2.8771958351135254, | |
| "learning_rate": 4.50733145236036e-07, | |
| "loss": 0.538175106048584, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7659115426105717, | |
| "grad_norm": 1.689902901649475, | |
| "learning_rate": 4.50398363504942e-07, | |
| "loss": 0.5201085209846497, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7680690399137001, | |
| "grad_norm": 3.293177366256714, | |
| "learning_rate": 4.500625890047133e-07, | |
| "loss": 0.825022280216217, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7702265372168284, | |
| "grad_norm": 0.5633196234703064, | |
| "learning_rate": 4.49725823635861e-07, | |
| "loss": 0.6611315011978149, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7723840345199569, | |
| "grad_norm": 1.3877530097961426, | |
| "learning_rate": 4.4938806930450476e-07, | |
| "loss": 0.5903546810150146, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7745415318230853, | |
| "grad_norm": 2.2322981357574463, | |
| "learning_rate": 4.4904932792236187e-07, | |
| "loss": 0.4113418459892273, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 1.2284919023513794, | |
| "learning_rate": 4.487096014067363e-07, | |
| "loss": 0.5899285078048706, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.778856526429342, | |
| "grad_norm": 1.843123197555542, | |
| "learning_rate": 4.483688916805081e-07, | |
| "loss": 0.5846186280250549, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7810140237324703, | |
| "grad_norm": 3.0655667781829834, | |
| "learning_rate": 4.4802720067212237e-07, | |
| "loss": 0.6830175518989563, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7831715210355987, | |
| "grad_norm": 2.1866564750671387, | |
| "learning_rate": 4.4768453031557797e-07, | |
| "loss": 0.6690636873245239, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.785329018338727, | |
| "grad_norm": 8.055279731750488, | |
| "learning_rate": 4.4734088255041747e-07, | |
| "loss": 0.6548261642456055, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7874865156418555, | |
| "grad_norm": 1.804400086402893, | |
| "learning_rate": 4.4699625932171534e-07, | |
| "loss": 0.8144820928573608, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7896440129449838, | |
| "grad_norm": 1.7731058597564697, | |
| "learning_rate": 4.466506625800674e-07, | |
| "loss": 0.6711707711219788, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7918015102481122, | |
| "grad_norm": 4.458347320556641, | |
| "learning_rate": 4.463040942815796e-07, | |
| "loss": 0.6960354447364807, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7939590075512406, | |
| "grad_norm": 1.831221103668213, | |
| "learning_rate": 4.459565563878568e-07, | |
| "loss": 0.6320536732673645, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7961165048543689, | |
| "grad_norm": 1.1004321575164795, | |
| "learning_rate": 4.456080508659922e-07, | |
| "loss": 0.6839619874954224, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7982740021574973, | |
| "grad_norm": 3.9790191650390625, | |
| "learning_rate": 4.452585796885555e-07, | |
| "loss": 0.787294328212738, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8004314994606256, | |
| "grad_norm": 2.5182931423187256, | |
| "learning_rate": 4.449081448335824e-07, | |
| "loss": 0.5662147402763367, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8025889967637541, | |
| "grad_norm": 1.3335113525390625, | |
| "learning_rate": 4.4455674828456285e-07, | |
| "loss": 0.4203084111213684, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8047464940668824, | |
| "grad_norm": 3.6229031085968018, | |
| "learning_rate": 4.442043920304302e-07, | |
| "loss": 0.3892061412334442, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8069039913700108, | |
| "grad_norm": 2.473930835723877, | |
| "learning_rate": 4.4385107806554964e-07, | |
| "loss": 0.6021113395690918, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8090614886731392, | |
| "grad_norm": 5.694954872131348, | |
| "learning_rate": 4.4349680838970745e-07, | |
| "loss": 0.586351752281189, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8112189859762675, | |
| "grad_norm": 1.9584993124008179, | |
| "learning_rate": 4.431415850080989e-07, | |
| "loss": 0.5266885757446289, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8133764832793959, | |
| "grad_norm": 2.9414353370666504, | |
| "learning_rate": 4.427854099313175e-07, | |
| "loss": 0.7066282033920288, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8155339805825242, | |
| "grad_norm": 2.3507425785064697, | |
| "learning_rate": 4.424282851753435e-07, | |
| "loss": 0.6412563323974609, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8176914778856527, | |
| "grad_norm": 1.6683588027954102, | |
| "learning_rate": 4.420702127615323e-07, | |
| "loss": 0.6926671862602234, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.819848975188781, | |
| "grad_norm": 2.730984687805176, | |
| "learning_rate": 4.4171119471660315e-07, | |
| "loss": 0.506192684173584, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8220064724919094, | |
| "grad_norm": 1.8006408214569092, | |
| "learning_rate": 4.413512330726276e-07, | |
| "loss": 0.5694448351860046, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8241639697950378, | |
| "grad_norm": 1.66466224193573, | |
| "learning_rate": 4.4099032986701817e-07, | |
| "loss": 0.8421421051025391, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8263214670981661, | |
| "grad_norm": 2.7753241062164307, | |
| "learning_rate": 4.406284871425166e-07, | |
| "loss": 0.5470436215400696, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8284789644012945, | |
| "grad_norm": 2.583517551422119, | |
| "learning_rate": 4.402657069471824e-07, | |
| "loss": 0.5224552154541016, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8306364617044228, | |
| "grad_norm": 3.6843671798706055, | |
| "learning_rate": 4.3990199133438133e-07, | |
| "loss": 0.5659505724906921, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8327939590075513, | |
| "grad_norm": 2.2777364253997803, | |
| "learning_rate": 4.395373423627735e-07, | |
| "loss": 0.5481325387954712, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8349514563106796, | |
| "grad_norm": 12.834761619567871, | |
| "learning_rate": 4.3917176209630216e-07, | |
| "loss": 0.7331764101982117, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.837108953613808, | |
| "grad_norm": 4.730658531188965, | |
| "learning_rate": 4.3880525260418143e-07, | |
| "loss": 0.6668429970741272, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8392664509169363, | |
| "grad_norm": 8.263324737548828, | |
| "learning_rate": 4.3843781596088526e-07, | |
| "loss": 0.6778562068939209, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8414239482200647, | |
| "grad_norm": 1.9440997838974, | |
| "learning_rate": 4.380694542461352e-07, | |
| "loss": 0.6994505524635315, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8435814455231931, | |
| "grad_norm": 0.9290769100189209, | |
| "learning_rate": 4.3770016954488887e-07, | |
| "loss": 0.5277756452560425, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8457389428263214, | |
| "grad_norm": 1.2959551811218262, | |
| "learning_rate": 4.373299639473277e-07, | |
| "loss": 0.5180898904800415, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8478964401294499, | |
| "grad_norm": 2.0248026847839355, | |
| "learning_rate": 4.3695883954884616e-07, | |
| "loss": 0.5872831344604492, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8500539374325782, | |
| "grad_norm": 1.9588837623596191, | |
| "learning_rate": 4.365867984500385e-07, | |
| "loss": 0.7239266633987427, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8522114347357066, | |
| "grad_norm": 1.0221301317214966, | |
| "learning_rate": 4.3621384275668796e-07, | |
| "loss": 0.38601911067962646, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8543689320388349, | |
| "grad_norm": 1.3124701976776123, | |
| "learning_rate": 4.3583997457975454e-07, | |
| "loss": 0.5240159630775452, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8565264293419633, | |
| "grad_norm": 7.634000778198242, | |
| "learning_rate": 4.354651960353625e-07, | |
| "loss": 0.3657144010066986, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8586839266450917, | |
| "grad_norm": 1.649867057800293, | |
| "learning_rate": 4.3508950924478943e-07, | |
| "loss": 0.5847235321998596, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.86084142394822, | |
| "grad_norm": 2.088364601135254, | |
| "learning_rate": 4.3471291633445334e-07, | |
| "loss": 0.6124690771102905, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8629989212513485, | |
| "grad_norm": 3.3762848377227783, | |
| "learning_rate": 4.343354194359009e-07, | |
| "loss": 0.5077890753746033, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8651564185544768, | |
| "grad_norm": 1.6368306875228882, | |
| "learning_rate": 4.339570206857957e-07, | |
| "loss": 0.4499396085739136, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8673139158576052, | |
| "grad_norm": 1.6739598512649536, | |
| "learning_rate": 4.335777222259056e-07, | |
| "loss": 0.4595860242843628, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8694714131607335, | |
| "grad_norm": 2.5155189037323, | |
| "learning_rate": 4.331975262030911e-07, | |
| "loss": 0.5732553005218506, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8716289104638619, | |
| "grad_norm": 1.8127964735031128, | |
| "learning_rate": 4.3281643476929286e-07, | |
| "loss": 0.6196991801261902, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8737864077669902, | |
| "grad_norm": 3.939059257507324, | |
| "learning_rate": 4.324344500815197e-07, | |
| "loss": 0.5942954421043396, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8759439050701187, | |
| "grad_norm": 4.195562362670898, | |
| "learning_rate": 4.3205157430183627e-07, | |
| "loss": 0.38596200942993164, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8781014023732471, | |
| "grad_norm": 1.3970879316329956, | |
| "learning_rate": 4.316678095973509e-07, | |
| "loss": 0.613640308380127, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8802588996763754, | |
| "grad_norm": 1.3632667064666748, | |
| "learning_rate": 4.312831581402034e-07, | |
| "loss": 0.653675377368927, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8824163969795038, | |
| "grad_norm": 3.1686818599700928, | |
| "learning_rate": 4.3089762210755246e-07, | |
| "loss": 0.5764310956001282, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8845738942826321, | |
| "grad_norm": 3.4244537353515625, | |
| "learning_rate": 4.305112036815639e-07, | |
| "loss": 0.5124704241752625, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8867313915857605, | |
| "grad_norm": 1.641554832458496, | |
| "learning_rate": 4.3012390504939745e-07, | |
| "loss": 0.6814495921134949, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 1.3232611417770386, | |
| "learning_rate": 4.2973572840319536e-07, | |
| "loss": 0.6368851661682129, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8910463861920173, | |
| "grad_norm": 2.085556983947754, | |
| "learning_rate": 4.2934667594006917e-07, | |
| "loss": 0.6687034368515015, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8932038834951457, | |
| "grad_norm": 2.6139376163482666, | |
| "learning_rate": 4.2895674986208786e-07, | |
| "loss": 0.2821641266345978, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.895361380798274, | |
| "grad_norm": 1.9903957843780518, | |
| "learning_rate": 4.28565952376265e-07, | |
| "loss": 0.5252734422683716, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8975188781014024, | |
| "grad_norm": 2.297610282897949, | |
| "learning_rate": 4.281742856945465e-07, | |
| "loss": 0.745256781578064, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8996763754045307, | |
| "grad_norm": 6.354626655578613, | |
| "learning_rate": 4.277817520337978e-07, | |
| "loss": 0.5809981226921082, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9018338727076591, | |
| "grad_norm": 2.735447883605957, | |
| "learning_rate": 4.273883536157917e-07, | |
| "loss": 0.6928726434707642, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9039913700107874, | |
| "grad_norm": 1.2760671377182007, | |
| "learning_rate": 4.269940926671957e-07, | |
| "loss": 0.6470978856086731, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9061488673139159, | |
| "grad_norm": 9.878061294555664, | |
| "learning_rate": 4.2659897141955876e-07, | |
| "loss": 0.6638087630271912, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9083063646170443, | |
| "grad_norm": 6.520370960235596, | |
| "learning_rate": 4.262029921092999e-07, | |
| "loss": 0.6910456418991089, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9104638619201726, | |
| "grad_norm": 6.460091590881348, | |
| "learning_rate": 4.258061569776944e-07, | |
| "loss": 0.8283501267433167, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.912621359223301, | |
| "grad_norm": 5.205661296844482, | |
| "learning_rate": 4.254084682708617e-07, | |
| "loss": 0.8275612592697144, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9147788565264293, | |
| "grad_norm": 1.463245153427124, | |
| "learning_rate": 4.250099282397526e-07, | |
| "loss": 0.46582770347595215, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9169363538295577, | |
| "grad_norm": 5.333581924438477, | |
| "learning_rate": 4.246105391401362e-07, | |
| "loss": 0.7306879758834839, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.919093851132686, | |
| "grad_norm": 5.498586654663086, | |
| "learning_rate": 4.2421030323258773e-07, | |
| "loss": 0.4175741672515869, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9212513484358145, | |
| "grad_norm": 1.0770583152770996, | |
| "learning_rate": 4.2380922278247524e-07, | |
| "loss": 0.6730119585990906, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9234088457389428, | |
| "grad_norm": 0.42357996106147766, | |
| "learning_rate": 4.234073000599469e-07, | |
| "loss": 0.8127405643463135, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9255663430420712, | |
| "grad_norm": 7.6792120933532715, | |
| "learning_rate": 4.230045373399185e-07, | |
| "loss": 0.576317310333252, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9277238403451996, | |
| "grad_norm": 5.140259265899658, | |
| "learning_rate": 4.2260093690206007e-07, | |
| "loss": 0.523854672908783, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9298813376483279, | |
| "grad_norm": 3.8896889686584473, | |
| "learning_rate": 4.221965010307831e-07, | |
| "loss": 0.8840075731277466, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9320388349514563, | |
| "grad_norm": 1.3811416625976562, | |
| "learning_rate": 4.2179123201522784e-07, | |
| "loss": 0.7325385212898254, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9341963322545846, | |
| "grad_norm": 1.7766026258468628, | |
| "learning_rate": 4.213851321492503e-07, | |
| "loss": 0.30791109800338745, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9363538295577131, | |
| "grad_norm": 1.1082113981246948, | |
| "learning_rate": 4.209782037314089e-07, | |
| "loss": 0.6958837509155273, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9385113268608414, | |
| "grad_norm": 7.657420635223389, | |
| "learning_rate": 4.2057044906495197e-07, | |
| "loss": 0.42088598012924194, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9406688241639698, | |
| "grad_norm": 9.207883834838867, | |
| "learning_rate": 4.2016187045780445e-07, | |
| "loss": 0.6101109385490417, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9428263214670982, | |
| "grad_norm": 1.3625860214233398, | |
| "learning_rate": 4.197524702225547e-07, | |
| "loss": 0.6703606247901917, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9449838187702265, | |
| "grad_norm": 4.89463996887207, | |
| "learning_rate": 4.1934225067644163e-07, | |
| "loss": 0.5207024216651917, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9471413160733549, | |
| "grad_norm": 5.676528453826904, | |
| "learning_rate": 4.1893121414134165e-07, | |
| "loss": 0.67097008228302, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9492988133764833, | |
| "grad_norm": 3.8142192363739014, | |
| "learning_rate": 4.1851936294375525e-07, | |
| "loss": 0.8798677325248718, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9514563106796117, | |
| "grad_norm": 3.3056960105895996, | |
| "learning_rate": 4.181066994147939e-07, | |
| "loss": 0.5886131525039673, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.95361380798274, | |
| "grad_norm": 1.9534400701522827, | |
| "learning_rate": 4.176932258901673e-07, | |
| "loss": 0.553901731967926, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9557713052858684, | |
| "grad_norm": 1.5135817527770996, | |
| "learning_rate": 4.1727894471016933e-07, | |
| "loss": 0.35753270983695984, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9579288025889967, | |
| "grad_norm": 4.861209869384766, | |
| "learning_rate": 4.168638582196654e-07, | |
| "loss": 0.8348190188407898, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9600862998921251, | |
| "grad_norm": 2.2612881660461426, | |
| "learning_rate": 4.164479687680794e-07, | |
| "loss": 0.8166549205780029, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9622437971952535, | |
| "grad_norm": 1.4900096654891968, | |
| "learning_rate": 4.160312787093796e-07, | |
| "loss": 0.47946974635124207, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9644012944983819, | |
| "grad_norm": 2.3414793014526367, | |
| "learning_rate": 4.156137904020659e-07, | |
| "loss": 0.5899794697761536, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9665587918015103, | |
| "grad_norm": 1.6720659732818604, | |
| "learning_rate": 4.1519550620915643e-07, | |
| "loss": 0.6609233617782593, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9687162891046386, | |
| "grad_norm": 1.4975440502166748, | |
| "learning_rate": 4.1477642849817414e-07, | |
| "loss": 0.6876581907272339, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 4.46371603012085, | |
| "learning_rate": 4.143565596411331e-07, | |
| "loss": 0.6830723881721497, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9730312837108953, | |
| "grad_norm": 1.2813857793807983, | |
| "learning_rate": 4.139359020145257e-07, | |
| "loss": 0.5846165418624878, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9751887810140237, | |
| "grad_norm": 3.6278672218322754, | |
| "learning_rate": 4.1351445799930837e-07, | |
| "loss": 0.7689055800437927, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.9773462783171522, | |
| "grad_norm": 6.095682621002197, | |
| "learning_rate": 4.1309222998088923e-07, | |
| "loss": 0.5822760462760925, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9795037756202805, | |
| "grad_norm": 1.989863634109497, | |
| "learning_rate": 4.126692203491132e-07, | |
| "loss": 0.6141869425773621, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9816612729234089, | |
| "grad_norm": 1.743506908416748, | |
| "learning_rate": 4.1224543149824945e-07, | |
| "loss": 0.6478677988052368, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9838187702265372, | |
| "grad_norm": 1.9053099155426025, | |
| "learning_rate": 4.11820865826978e-07, | |
| "loss": 0.48178091645240784, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9859762675296656, | |
| "grad_norm": 2.2662453651428223, | |
| "learning_rate": 4.1139552573837515e-07, | |
| "loss": 0.607950747013092, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9881337648327939, | |
| "grad_norm": 2.4114274978637695, | |
| "learning_rate": 4.109694136399008e-07, | |
| "loss": 0.6108921766281128, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9902912621359223, | |
| "grad_norm": 7.512186050415039, | |
| "learning_rate": 4.105425319433844e-07, | |
| "loss": 0.6795108318328857, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.9924487594390508, | |
| "grad_norm": 1.7533015012741089, | |
| "learning_rate": 4.1011488306501136e-07, | |
| "loss": 0.437101811170578, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9946062567421791, | |
| "grad_norm": 2.403139352798462, | |
| "learning_rate": 4.096864694253095e-07, | |
| "loss": 0.4864053428173065, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9967637540453075, | |
| "grad_norm": 1.6964081525802612, | |
| "learning_rate": 4.0925729344913507e-07, | |
| "loss": 0.643713116645813, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9989212513484358, | |
| "grad_norm": 1.4134613275527954, | |
| "learning_rate": 4.088273575656594e-07, | |
| "loss": 0.6958010196685791, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.0010787486515642, | |
| "grad_norm": 1.5019845962524414, | |
| "learning_rate": 4.083966642083549e-07, | |
| "loss": 0.491763174533844, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.0032362459546926, | |
| "grad_norm": 1.6836707592010498, | |
| "learning_rate": 4.079652158149813e-07, | |
| "loss": 0.6048216819763184, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0053937432578208, | |
| "grad_norm": 1.5291476249694824, | |
| "learning_rate": 4.075330148275719e-07, | |
| "loss": 0.6078330278396606, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.0075512405609492, | |
| "grad_norm": 1.7825332880020142, | |
| "learning_rate": 4.0710006369241984e-07, | |
| "loss": 0.6125837564468384, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.0097087378640777, | |
| "grad_norm": 2.508885622024536, | |
| "learning_rate": 4.06666364860064e-07, | |
| "loss": 0.42874282598495483, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.011866235167206, | |
| "grad_norm": 3.95334529876709, | |
| "learning_rate": 4.062319207852754e-07, | |
| "loss": 0.7187290191650391, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.0140237324703345, | |
| "grad_norm": 1.270442247390747, | |
| "learning_rate": 4.0579673392704315e-07, | |
| "loss": 0.5802730321884155, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0161812297734627, | |
| "grad_norm": 1.6048680543899536, | |
| "learning_rate": 4.0536080674856064e-07, | |
| "loss": 0.5463104248046875, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.0183387270765911, | |
| "grad_norm": 4.189075469970703, | |
| "learning_rate": 4.0492414171721137e-07, | |
| "loss": 0.4786475896835327, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.0204962243797195, | |
| "grad_norm": 7.608390808105469, | |
| "learning_rate": 4.044867413045554e-07, | |
| "loss": 0.6445322632789612, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.022653721682848, | |
| "grad_norm": 3.295640230178833, | |
| "learning_rate": 4.0404860798631497e-07, | |
| "loss": 0.5995020866394043, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.0248112189859762, | |
| "grad_norm": 3.5857110023498535, | |
| "learning_rate": 4.0360974424236045e-07, | |
| "loss": 0.5838800072669983, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0269687162891046, | |
| "grad_norm": 2.5072519779205322, | |
| "learning_rate": 4.031701525566968e-07, | |
| "loss": 0.550299882888794, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.029126213592233, | |
| "grad_norm": 2.477893352508545, | |
| "learning_rate": 4.0272983541744906e-07, | |
| "loss": 0.48044463992118835, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.0312837108953614, | |
| "grad_norm": 2.064870595932007, | |
| "learning_rate": 4.0228879531684825e-07, | |
| "loss": 0.607839822769165, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.0334412081984898, | |
| "grad_norm": 1.39699125289917, | |
| "learning_rate": 4.018470347512177e-07, | |
| "loss": 0.6562771797180176, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.035598705501618, | |
| "grad_norm": 4.258260250091553, | |
| "learning_rate": 4.0140455622095833e-07, | |
| "loss": 0.6375301480293274, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0377562028047465, | |
| "grad_norm": 2.1129860877990723, | |
| "learning_rate": 4.0096136223053503e-07, | |
| "loss": 0.5195407271385193, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.0399137001078749, | |
| "grad_norm": 4.592264652252197, | |
| "learning_rate": 4.005174552884621e-07, | |
| "loss": 0.47522222995758057, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.0420711974110033, | |
| "grad_norm": 1.5298984050750732, | |
| "learning_rate": 4.0007283790728937e-07, | |
| "loss": 0.45716097950935364, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.0442286947141317, | |
| "grad_norm": 2.2780728340148926, | |
| "learning_rate": 3.996275126035877e-07, | |
| "loss": 0.6386222243309021, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.04638619201726, | |
| "grad_norm": 39.363914489746094, | |
| "learning_rate": 3.9918148189793473e-07, | |
| "loss": 0.6134737133979797, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.0485436893203883, | |
| "grad_norm": 1.4374394416809082, | |
| "learning_rate": 3.98734748314901e-07, | |
| "loss": 0.49869057536125183, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.0507011866235167, | |
| "grad_norm": 2.8738322257995605, | |
| "learning_rate": 3.9828731438303513e-07, | |
| "loss": 0.4262846112251282, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.0528586839266452, | |
| "grad_norm": 4.88612174987793, | |
| "learning_rate": 3.978391826348501e-07, | |
| "loss": 0.44011008739471436, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.0550161812297734, | |
| "grad_norm": 1.591728925704956, | |
| "learning_rate": 3.973903556068082e-07, | |
| "loss": 0.579682469367981, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.0571736785329018, | |
| "grad_norm": 2.144808769226074, | |
| "learning_rate": 3.9694083583930734e-07, | |
| "loss": 0.6116613149642944, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0593311758360302, | |
| "grad_norm": 4.2694525718688965, | |
| "learning_rate": 3.964906258766663e-07, | |
| "loss": 0.5095481872558594, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.0614886731391586, | |
| "grad_norm": 1.4594628810882568, | |
| "learning_rate": 3.960397282671104e-07, | |
| "loss": 0.49267393350601196, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.063646170442287, | |
| "grad_norm": 2.070873737335205, | |
| "learning_rate": 3.9558814556275705e-07, | |
| "loss": 0.5884362459182739, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.0658036677454152, | |
| "grad_norm": 1.6621878147125244, | |
| "learning_rate": 3.9513588031960164e-07, | |
| "loss": 0.5078074336051941, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.0679611650485437, | |
| "grad_norm": 1.82585608959198, | |
| "learning_rate": 3.946829350975024e-07, | |
| "loss": 0.5152098536491394, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.070118662351672, | |
| "grad_norm": 1.609985589981079, | |
| "learning_rate": 3.942293124601664e-07, | |
| "loss": 0.5379571318626404, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.0722761596548005, | |
| "grad_norm": 3.880840301513672, | |
| "learning_rate": 3.937750149751353e-07, | |
| "loss": 0.53695148229599, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.074433656957929, | |
| "grad_norm": 1.5093027353286743, | |
| "learning_rate": 3.9332004521376976e-07, | |
| "loss": 0.6305853128433228, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.0765911542610571, | |
| "grad_norm": 10.337991714477539, | |
| "learning_rate": 3.9286440575123625e-07, | |
| "loss": 0.46823710203170776, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.0787486515641855, | |
| "grad_norm": 6.093511581420898, | |
| "learning_rate": 3.9240809916649146e-07, | |
| "loss": 0.7002033591270447, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.080906148867314, | |
| "grad_norm": 1.6978514194488525, | |
| "learning_rate": 3.919511280422681e-07, | |
| "loss": 0.4546293318271637, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.0830636461704424, | |
| "grad_norm": 1.1726937294006348, | |
| "learning_rate": 3.914934949650603e-07, | |
| "loss": 0.6637395620346069, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.0852211434735706, | |
| "grad_norm": 1.5652375221252441, | |
| "learning_rate": 3.910352025251087e-07, | |
| "loss": 0.5811644196510315, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.087378640776699, | |
| "grad_norm": 1.6414304971694946, | |
| "learning_rate": 3.905762533163863e-07, | |
| "loss": 0.595992386341095, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.0895361380798274, | |
| "grad_norm": 2.038053512573242, | |
| "learning_rate": 3.9011664993658315e-07, | |
| "loss": 0.5396429896354675, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.0916936353829558, | |
| "grad_norm": 1.9649733304977417, | |
| "learning_rate": 3.8965639498709213e-07, | |
| "loss": 0.5478025674819946, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.0938511326860842, | |
| "grad_norm": 3.463732957839966, | |
| "learning_rate": 3.891954910729942e-07, | |
| "loss": 0.49627092480659485, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.0960086299892124, | |
| "grad_norm": 3.943431854248047, | |
| "learning_rate": 3.8873394080304304e-07, | |
| "loss": 0.6524069905281067, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.0981661272923409, | |
| "grad_norm": 3.5497028827667236, | |
| "learning_rate": 3.8827174678965144e-07, | |
| "loss": 0.5223618149757385, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.1003236245954693, | |
| "grad_norm": 7.74370002746582, | |
| "learning_rate": 3.878089116488752e-07, | |
| "loss": 0.5389726161956787, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.1024811218985977, | |
| "grad_norm": 2.1320722103118896, | |
| "learning_rate": 3.873454380003992e-07, | |
| "loss": 0.5913807153701782, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.104638619201726, | |
| "grad_norm": 2.8024096488952637, | |
| "learning_rate": 3.8688132846752246e-07, | |
| "loss": 0.5501705408096313, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.1067961165048543, | |
| "grad_norm": 1.3087513446807861, | |
| "learning_rate": 3.864165856771429e-07, | |
| "loss": 0.5886018872261047, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.1089536138079827, | |
| "grad_norm": 1.2707762718200684, | |
| "learning_rate": 3.85951212259743e-07, | |
| "loss": 0.5950880646705627, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 5.31006383895874, | |
| "learning_rate": 3.8548521084937434e-07, | |
| "loss": 0.4451335668563843, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1132686084142396, | |
| "grad_norm": 3.055915117263794, | |
| "learning_rate": 3.8501858408364333e-07, | |
| "loss": 0.47768980264663696, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.1154261057173678, | |
| "grad_norm": 1.2441359758377075, | |
| "learning_rate": 3.845513346036957e-07, | |
| "loss": 0.6385471820831299, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.1175836030204962, | |
| "grad_norm": 6.279306411743164, | |
| "learning_rate": 3.840834650542018e-07, | |
| "loss": 0.45993420481681824, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.1197411003236246, | |
| "grad_norm": 1.52280592918396, | |
| "learning_rate": 3.836149780833418e-07, | |
| "loss": 0.5168780088424683, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.121898597626753, | |
| "grad_norm": 2.0793957710266113, | |
| "learning_rate": 3.8314587634279027e-07, | |
| "loss": 0.5576176643371582, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1240560949298812, | |
| "grad_norm": 1.4773210287094116, | |
| "learning_rate": 3.8267616248770165e-07, | |
| "loss": 0.5373958945274353, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.1262135922330097, | |
| "grad_norm": 2.390502691268921, | |
| "learning_rate": 3.8220583917669486e-07, | |
| "loss": 0.5401830673217773, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.128371089536138, | |
| "grad_norm": 1.2080022096633911, | |
| "learning_rate": 3.8173490907183854e-07, | |
| "loss": 0.5357274413108826, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.1305285868392665, | |
| "grad_norm": 5.848990440368652, | |
| "learning_rate": 3.8126337483863565e-07, | |
| "loss": 0.5787194967269897, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.132686084142395, | |
| "grad_norm": 2.1143975257873535, | |
| "learning_rate": 3.8079123914600874e-07, | |
| "loss": 0.5402184724807739, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.134843581445523, | |
| "grad_norm": 1.5472838878631592, | |
| "learning_rate": 3.8031850466628446e-07, | |
| "loss": 0.5939875245094299, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.1370010787486515, | |
| "grad_norm": 1.5804020166397095, | |
| "learning_rate": 3.798451740751789e-07, | |
| "loss": 0.5679658651351929, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.13915857605178, | |
| "grad_norm": 1.2764010429382324, | |
| "learning_rate": 3.79371250051782e-07, | |
| "loss": 0.5673144459724426, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.1413160733549084, | |
| "grad_norm": 2.0156118869781494, | |
| "learning_rate": 3.788967352785426e-07, | |
| "loss": 0.6328020095825195, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.1434735706580366, | |
| "grad_norm": 9.465262413024902, | |
| "learning_rate": 3.7842163244125336e-07, | |
| "loss": 0.4857198894023895, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.145631067961165, | |
| "grad_norm": 1.7701698541641235, | |
| "learning_rate": 3.7794594422903524e-07, | |
| "loss": 0.2504948377609253, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.1477885652642934, | |
| "grad_norm": 3.641615629196167, | |
| "learning_rate": 3.7746967333432267e-07, | |
| "loss": 0.5472912192344666, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.1499460625674218, | |
| "grad_norm": 2.0676019191741943, | |
| "learning_rate": 3.769928224528479e-07, | |
| "loss": 0.6364511847496033, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.1521035598705502, | |
| "grad_norm": 4.401365756988525, | |
| "learning_rate": 3.7651539428362613e-07, | |
| "loss": 0.5439355969429016, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.1542610571736784, | |
| "grad_norm": 5.850034236907959, | |
| "learning_rate": 3.7603739152894e-07, | |
| "loss": 0.6640523076057434, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.1564185544768069, | |
| "grad_norm": 1.1790587902069092, | |
| "learning_rate": 3.755588168943242e-07, | |
| "loss": 0.5868851542472839, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.1585760517799353, | |
| "grad_norm": 1.469138503074646, | |
| "learning_rate": 3.7507967308855054e-07, | |
| "loss": 0.5512825846672058, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.1607335490830637, | |
| "grad_norm": 1.2130903005599976, | |
| "learning_rate": 3.7459996282361243e-07, | |
| "loss": 0.5239831805229187, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.162891046386192, | |
| "grad_norm": 7.653203964233398, | |
| "learning_rate": 3.741196888147091e-07, | |
| "loss": 0.6439744234085083, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.1650485436893203, | |
| "grad_norm": 1.5135337114334106, | |
| "learning_rate": 3.7363885378023103e-07, | |
| "loss": 0.5788765549659729, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1672060409924487, | |
| "grad_norm": 3.182453155517578, | |
| "learning_rate": 3.731574604417439e-07, | |
| "loss": 0.5617559552192688, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.1693635382955772, | |
| "grad_norm": 2.3747951984405518, | |
| "learning_rate": 3.7267551152397357e-07, | |
| "loss": 0.5370228886604309, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.1715210355987056, | |
| "grad_norm": 1.1393845081329346, | |
| "learning_rate": 3.721930097547905e-07, | |
| "loss": 0.6155055165290833, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.173678532901834, | |
| "grad_norm": 2.894418954849243, | |
| "learning_rate": 3.717099578651941e-07, | |
| "loss": 0.6374943256378174, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.1758360302049622, | |
| "grad_norm": 2.416472911834717, | |
| "learning_rate": 3.71226358589298e-07, | |
| "loss": 0.6947451829910278, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.1779935275080906, | |
| "grad_norm": 1.3670376539230347, | |
| "learning_rate": 3.7074221466431373e-07, | |
| "loss": 0.5406089425086975, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.180151024811219, | |
| "grad_norm": 1.2521384954452515, | |
| "learning_rate": 3.702575288305355e-07, | |
| "loss": 0.5395722389221191, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.1823085221143474, | |
| "grad_norm": 1.9795541763305664, | |
| "learning_rate": 3.697723038313251e-07, | |
| "loss": 0.29258376359939575, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.1844660194174756, | |
| "grad_norm": 1.5469352006912231, | |
| "learning_rate": 3.692865424130957e-07, | |
| "loss": 0.28986942768096924, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.186623516720604, | |
| "grad_norm": 2.116645574569702, | |
| "learning_rate": 3.6880024732529693e-07, | |
| "loss": 0.4253014922142029, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1887810140237325, | |
| "grad_norm": 1.2636767625808716, | |
| "learning_rate": 3.683134213203987e-07, | |
| "loss": 0.3352068066596985, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.190938511326861, | |
| "grad_norm": 4.438840866088867, | |
| "learning_rate": 3.6782606715387635e-07, | |
| "loss": 0.5909640192985535, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.1930960086299893, | |
| "grad_norm": 2.9154276847839355, | |
| "learning_rate": 3.673381875841945e-07, | |
| "loss": 0.6895350217819214, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.1952535059331175, | |
| "grad_norm": 1.7983450889587402, | |
| "learning_rate": 3.668497853727913e-07, | |
| "loss": 0.6385772824287415, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.197411003236246, | |
| "grad_norm": 3.447049140930176, | |
| "learning_rate": 3.6636086328406374e-07, | |
| "loss": 0.6397103071212769, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.1995685005393744, | |
| "grad_norm": 3.8200409412384033, | |
| "learning_rate": 3.6587142408535054e-07, | |
| "loss": 0.5476571321487427, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.2017259978425028, | |
| "grad_norm": 1.2859944105148315, | |
| "learning_rate": 3.6538147054691815e-07, | |
| "loss": 0.5042168498039246, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.203883495145631, | |
| "grad_norm": 1.283574104309082, | |
| "learning_rate": 3.648910054419435e-07, | |
| "loss": 0.5920245051383972, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.2060409924487594, | |
| "grad_norm": 1.5358695983886719, | |
| "learning_rate": 3.6440003154649953e-07, | |
| "loss": 0.4955591559410095, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.2081984897518878, | |
| "grad_norm": 1.0370635986328125, | |
| "learning_rate": 3.639085516395387e-07, | |
| "loss": 0.6023776531219482, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2103559870550162, | |
| "grad_norm": 3.8796122074127197, | |
| "learning_rate": 3.6341656850287774e-07, | |
| "loss": 0.762068510055542, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.2125134843581447, | |
| "grad_norm": 3.400327444076538, | |
| "learning_rate": 3.629240849211814e-07, | |
| "loss": 0.6205697059631348, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.2146709816612729, | |
| "grad_norm": 2.327786684036255, | |
| "learning_rate": 3.6243110368194737e-07, | |
| "loss": 0.5409681797027588, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.2168284789644013, | |
| "grad_norm": 3.4878270626068115, | |
| "learning_rate": 3.619376275754897e-07, | |
| "loss": 0.5776150226593018, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.2189859762675297, | |
| "grad_norm": 3.1736040115356445, | |
| "learning_rate": 3.614436593949239e-07, | |
| "loss": 0.6846837997436523, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2211434735706581, | |
| "grad_norm": 2.631258964538574, | |
| "learning_rate": 3.609492019361503e-07, | |
| "loss": 0.415913850069046, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.2233009708737863, | |
| "grad_norm": 2.7962353229522705, | |
| "learning_rate": 3.604542579978387e-07, | |
| "loss": 0.4860919415950775, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.2254584681769147, | |
| "grad_norm": 2.561530113220215, | |
| "learning_rate": 3.599588303814125e-07, | |
| "loss": 0.2348194271326065, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.2276159654800431, | |
| "grad_norm": 1.6811720132827759, | |
| "learning_rate": 3.594629218910325e-07, | |
| "loss": 0.5006701946258545, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.2297734627831716, | |
| "grad_norm": 4.786057472229004, | |
| "learning_rate": 3.589665353335817e-07, | |
| "loss": 0.7145098447799683, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.2319309600863, | |
| "grad_norm": 1.3728108406066895, | |
| "learning_rate": 3.584696735186486e-07, | |
| "loss": 0.7182168960571289, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.2340884573894282, | |
| "grad_norm": 2.7093677520751953, | |
| "learning_rate": 3.579723392585119e-07, | |
| "loss": 0.5651712417602539, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.2362459546925566, | |
| "grad_norm": 1.986265778541565, | |
| "learning_rate": 3.574745353681243e-07, | |
| "loss": 0.5348817110061646, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.238403451995685, | |
| "grad_norm": 6.097311496734619, | |
| "learning_rate": 3.5697626466509663e-07, | |
| "loss": 0.577741801738739, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.2405609492988134, | |
| "grad_norm": 11.053370475769043, | |
| "learning_rate": 3.564775299696821e-07, | |
| "loss": 0.5550810098648071, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2427184466019416, | |
| "grad_norm": 1.4628392457962036, | |
| "learning_rate": 3.5597833410476006e-07, | |
| "loss": 0.620225727558136, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.24487594390507, | |
| "grad_norm": 7.8950276374816895, | |
| "learning_rate": 3.554786798958199e-07, | |
| "loss": 0.607008159160614, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.2470334412081985, | |
| "grad_norm": 1.5595459938049316, | |
| "learning_rate": 3.549785701709456e-07, | |
| "loss": 0.4987570643424988, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.249190938511327, | |
| "grad_norm": 1.6523284912109375, | |
| "learning_rate": 3.544780077607992e-07, | |
| "loss": 0.43261778354644775, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.2513484358144553, | |
| "grad_norm": 12.226542472839355, | |
| "learning_rate": 3.53976995498605e-07, | |
| "loss": 0.6562701463699341, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.2535059331175837, | |
| "grad_norm": 1.1661919355392456, | |
| "learning_rate": 3.534755362201336e-07, | |
| "loss": 0.501526951789856, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.255663430420712, | |
| "grad_norm": 2.927499532699585, | |
| "learning_rate": 3.529736327636856e-07, | |
| "loss": 0.5619246959686279, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.2578209277238404, | |
| "grad_norm": 2.0249011516571045, | |
| "learning_rate": 3.524712879700758e-07, | |
| "loss": 0.5738718509674072, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.2599784250269688, | |
| "grad_norm": 0.44852355122566223, | |
| "learning_rate": 3.5196850468261694e-07, | |
| "loss": 0.5509487390518188, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.262135922330097, | |
| "grad_norm": 2.7338967323303223, | |
| "learning_rate": 3.514652857471038e-07, | |
| "loss": 0.8337733149528503, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2642934196332254, | |
| "grad_norm": 1.9451134204864502, | |
| "learning_rate": 3.509616340117968e-07, | |
| "loss": 0.650130033493042, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.2664509169363538, | |
| "grad_norm": 1.3618203401565552, | |
| "learning_rate": 3.50457552327406e-07, | |
| "loss": 0.5888417363166809, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.2686084142394822, | |
| "grad_norm": 3.983638048171997, | |
| "learning_rate": 3.499530435470753e-07, | |
| "loss": 0.5166311264038086, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.2707659115426106, | |
| "grad_norm": 0.49615857005119324, | |
| "learning_rate": 3.4944811052636557e-07, | |
| "loss": 0.24507802724838257, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.272923408845739, | |
| "grad_norm": 1.7828303575515747, | |
| "learning_rate": 3.4894275612323937e-07, | |
| "loss": 0.542197585105896, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2750809061488673, | |
| "grad_norm": 3.6452059745788574, | |
| "learning_rate": 3.4843698319804406e-07, | |
| "loss": 0.4652722179889679, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.2772384034519957, | |
| "grad_norm": 2.0969831943511963, | |
| "learning_rate": 3.479307946134958e-07, | |
| "loss": 0.615992546081543, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.279395900755124, | |
| "grad_norm": 5.83058500289917, | |
| "learning_rate": 3.4742419323466364e-07, | |
| "loss": 0.6636797189712524, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.2815533980582523, | |
| "grad_norm": 1.9127455949783325, | |
| "learning_rate": 3.469171819289529e-07, | |
| "loss": 0.5533426403999329, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.2837108953613807, | |
| "grad_norm": 3.0942981243133545, | |
| "learning_rate": 3.4640976356608925e-07, | |
| "loss": 0.4518588185310364, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.2858683926645091, | |
| "grad_norm": 4.2268218994140625, | |
| "learning_rate": 3.4590194101810225e-07, | |
| "loss": 0.6738635897636414, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.2880258899676376, | |
| "grad_norm": 1.095913290977478, | |
| "learning_rate": 3.453937171593092e-07, | |
| "loss": 0.5171671509742737, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.290183387270766, | |
| "grad_norm": 1.8377701044082642, | |
| "learning_rate": 3.448850948662989e-07, | |
| "loss": 0.4299532175064087, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.2923408845738944, | |
| "grad_norm": 5.051590442657471, | |
| "learning_rate": 3.443760770179152e-07, | |
| "loss": 0.5502167344093323, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.2944983818770226, | |
| "grad_norm": 1.6523767709732056, | |
| "learning_rate": 3.438666664952409e-07, | |
| "loss": 0.6330602765083313, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.296655879180151, | |
| "grad_norm": 1.3870859146118164, | |
| "learning_rate": 3.4335686618158146e-07, | |
| "loss": 0.5067728757858276, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.2988133764832794, | |
| "grad_norm": 3.1465868949890137, | |
| "learning_rate": 3.428466789624484e-07, | |
| "loss": 0.4402804374694824, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.3009708737864076, | |
| "grad_norm": 9.775895118713379, | |
| "learning_rate": 3.4233610772554327e-07, | |
| "loss": 0.6617931127548218, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.303128371089536, | |
| "grad_norm": 6.532571792602539, | |
| "learning_rate": 3.418251553607414e-07, | |
| "loss": 0.5412026047706604, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.3052858683926645, | |
| "grad_norm": 23.66463279724121, | |
| "learning_rate": 3.4131382476007483e-07, | |
| "loss": 0.5621963739395142, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.307443365695793, | |
| "grad_norm": 4.139750957489014, | |
| "learning_rate": 3.408021188177168e-07, | |
| "loss": 0.43633171916007996, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.3096008629989213, | |
| "grad_norm": 1.5540739297866821, | |
| "learning_rate": 3.40290040429965e-07, | |
| "loss": 0.585831880569458, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.3117583603020497, | |
| "grad_norm": 0.7869778871536255, | |
| "learning_rate": 3.397775924952252e-07, | |
| "loss": 0.6663514375686646, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.313915857605178, | |
| "grad_norm": 3.8079118728637695, | |
| "learning_rate": 3.3926477791399466e-07, | |
| "loss": 0.6986393928527832, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.3160733549083063, | |
| "grad_norm": 80.64041137695312, | |
| "learning_rate": 3.3875159958884604e-07, | |
| "loss": 0.2534840703010559, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.3182308522114348, | |
| "grad_norm": 1.4813616275787354, | |
| "learning_rate": 3.382380604244108e-07, | |
| "loss": 0.5039587020874023, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.3203883495145632, | |
| "grad_norm": 1.3485782146453857, | |
| "learning_rate": 3.3772416332736267e-07, | |
| "loss": 0.49135297536849976, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.3225458468176914, | |
| "grad_norm": 2.1019444465637207, | |
| "learning_rate": 3.372099112064016e-07, | |
| "loss": 0.5730096101760864, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.3247033441208198, | |
| "grad_norm": 1.2675708532333374, | |
| "learning_rate": 3.3669530697223666e-07, | |
| "loss": 0.3039630055427551, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.3268608414239482, | |
| "grad_norm": 4.25254487991333, | |
| "learning_rate": 3.3618035353757004e-07, | |
| "loss": 0.5067458748817444, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.3290183387270766, | |
| "grad_norm": 1.8785464763641357, | |
| "learning_rate": 3.3566505381708053e-07, | |
| "loss": 0.592536449432373, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.331175836030205, | |
| "grad_norm": 3.387016773223877, | |
| "learning_rate": 3.351494107274067e-07, | |
| "loss": 0.5786437392234802, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 3.627840757369995, | |
| "learning_rate": 3.3463342718713093e-07, | |
| "loss": 0.41272541880607605, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.3354908306364617, | |
| "grad_norm": 2.921630620956421, | |
| "learning_rate": 3.3411710611676245e-07, | |
| "loss": 0.5590270757675171, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.33764832793959, | |
| "grad_norm": 1.4048740863800049, | |
| "learning_rate": 3.3360045043872073e-07, | |
| "loss": 0.5370460748672485, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.3398058252427185, | |
| "grad_norm": 1.9135799407958984, | |
| "learning_rate": 3.3308346307731937e-07, | |
| "loss": 0.6795614361763, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.3419633225458467, | |
| "grad_norm": 5.181615829467773, | |
| "learning_rate": 3.325661469587493e-07, | |
| "loss": 0.43978267908096313, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.3441208198489751, | |
| "grad_norm": 2.0108721256256104, | |
| "learning_rate": 3.320485050110623e-07, | |
| "loss": 0.7196254730224609, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.3462783171521036, | |
| "grad_norm": 5.627135753631592, | |
| "learning_rate": 3.3153054016415404e-07, | |
| "loss": 0.6685677170753479, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.348435814455232, | |
| "grad_norm": 1.5753936767578125, | |
| "learning_rate": 3.3101225534974824e-07, | |
| "loss": 0.47921374440193176, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3505933117583604, | |
| "grad_norm": 1.3769195079803467, | |
| "learning_rate": 3.304936535013796e-07, | |
| "loss": 0.5930690765380859, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.3527508090614886, | |
| "grad_norm": 3.187718391418457, | |
| "learning_rate": 3.2997473755437694e-07, | |
| "loss": 0.443682998418808, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.354908306364617, | |
| "grad_norm": 1.696946144104004, | |
| "learning_rate": 3.294555104458472e-07, | |
| "loss": 0.5157840251922607, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.3570658036677454, | |
| "grad_norm": 1.3455950021743774, | |
| "learning_rate": 3.289359751146585e-07, | |
| "loss": 0.4192175269126892, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.3592233009708738, | |
| "grad_norm": 2.213014602661133, | |
| "learning_rate": 3.2841613450142326e-07, | |
| "loss": 0.5426623821258545, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.361380798274002, | |
| "grad_norm": 1.4176040887832642, | |
| "learning_rate": 3.278959915484822e-07, | |
| "loss": 0.37126630544662476, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.3635382955771305, | |
| "grad_norm": 1.7383960485458374, | |
| "learning_rate": 3.2737554919988713e-07, | |
| "loss": 0.4560404419898987, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.3656957928802589, | |
| "grad_norm": 1.2408937215805054, | |
| "learning_rate": 3.2685481040138437e-07, | |
| "loss": 0.6066496968269348, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.3678532901833873, | |
| "grad_norm": 14.88650131225586, | |
| "learning_rate": 3.2633377810039837e-07, | |
| "loss": 0.5824995636940002, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.3700107874865157, | |
| "grad_norm": 2.481553792953491, | |
| "learning_rate": 3.2581245524601457e-07, | |
| "loss": 0.5986048579216003, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3721682847896441, | |
| "grad_norm": 6.574549198150635, | |
| "learning_rate": 3.252908447889633e-07, | |
| "loss": 0.6264061331748962, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.3743257820927723, | |
| "grad_norm": 11.117745399475098, | |
| "learning_rate": 3.2476894968160245e-07, | |
| "loss": 0.47890105843544006, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.3764832793959008, | |
| "grad_norm": 2.9714407920837402, | |
| "learning_rate": 3.2424677287790105e-07, | |
| "loss": 0.6954044699668884, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.3786407766990292, | |
| "grad_norm": 4.487089157104492, | |
| "learning_rate": 3.237243173334229e-07, | |
| "loss": 0.3699471354484558, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.3807982740021574, | |
| "grad_norm": 3.5095317363739014, | |
| "learning_rate": 3.232015860053093e-07, | |
| "loss": 0.49049827456474304, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.3829557713052858, | |
| "grad_norm": 3.8155364990234375, | |
| "learning_rate": 3.226785818522622e-07, | |
| "loss": 0.5451414585113525, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.3851132686084142, | |
| "grad_norm": 1.868401050567627, | |
| "learning_rate": 3.221553078345282e-07, | |
| "loss": 0.4515441060066223, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.3872707659115426, | |
| "grad_norm": 7.603207111358643, | |
| "learning_rate": 3.216317669138812e-07, | |
| "loss": 0.6191388964653015, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.389428263214671, | |
| "grad_norm": 8.218348503112793, | |
| "learning_rate": 3.211079620536058e-07, | |
| "loss": 0.43059730529785156, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.3915857605177995, | |
| "grad_norm": 4.354824542999268, | |
| "learning_rate": 3.205838962184804e-07, | |
| "loss": 0.47998175024986267, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.3937432578209277, | |
| "grad_norm": 2.3470609188079834, | |
| "learning_rate": 3.2005957237476073e-07, | |
| "loss": 0.6489396095275879, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.395900755124056, | |
| "grad_norm": 5.762950897216797, | |
| "learning_rate": 3.1953499349016284e-07, | |
| "loss": 0.6003392934799194, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.3980582524271845, | |
| "grad_norm": 0.4735300838947296, | |
| "learning_rate": 3.190101625338461e-07, | |
| "loss": 0.5412855744361877, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.4002157497303127, | |
| "grad_norm": 1.6221954822540283, | |
| "learning_rate": 3.18485082476397e-07, | |
| "loss": 0.452088862657547, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.4023732470334411, | |
| "grad_norm": 5.19826078414917, | |
| "learning_rate": 3.179597562898116e-07, | |
| "loss": 0.45721590518951416, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4045307443365695, | |
| "grad_norm": 2.296773910522461, | |
| "learning_rate": 3.1743418694747935e-07, | |
| "loss": 0.6805709600448608, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.406688241639698, | |
| "grad_norm": 1.289931058883667, | |
| "learning_rate": 3.169083774241658e-07, | |
| "loss": 0.48374688625335693, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.4088457389428264, | |
| "grad_norm": 2.8533899784088135, | |
| "learning_rate": 3.1638233069599603e-07, | |
| "loss": 0.5229544639587402, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.4110032362459548, | |
| "grad_norm": 6.371025085449219, | |
| "learning_rate": 3.158560497404377e-07, | |
| "loss": 0.5882778763771057, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.413160733549083, | |
| "grad_norm": 1.4979420900344849, | |
| "learning_rate": 3.153295375362843e-07, | |
| "loss": 0.4229152500629425, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.4153182308522114, | |
| "grad_norm": 1.8354501724243164, | |
| "learning_rate": 3.14802797063638e-07, | |
| "loss": 0.5944955945014954, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.4174757281553398, | |
| "grad_norm": 2.193164825439453, | |
| "learning_rate": 3.1427583130389324e-07, | |
| "loss": 0.5871320366859436, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.419633225458468, | |
| "grad_norm": 3.204235315322876, | |
| "learning_rate": 3.137486432397193e-07, | |
| "loss": 0.2334681898355484, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.4217907227615965, | |
| "grad_norm": 4.964504718780518, | |
| "learning_rate": 3.1322123585504395e-07, | |
| "loss": 0.5250051617622375, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.4239482200647249, | |
| "grad_norm": 2.2570719718933105, | |
| "learning_rate": 3.1269361213503643e-07, | |
| "loss": 0.5495631694793701, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.4261057173678533, | |
| "grad_norm": 1.536373496055603, | |
| "learning_rate": 3.121657750660901e-07, | |
| "loss": 0.48065459728240967, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.4282632146709817, | |
| "grad_norm": 4.908086776733398, | |
| "learning_rate": 3.116377276358063e-07, | |
| "loss": 0.41572993993759155, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.4304207119741101, | |
| "grad_norm": 8.478764533996582, | |
| "learning_rate": 3.111094728329767e-07, | |
| "loss": 0.6322842240333557, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.4325782092772383, | |
| "grad_norm": 1.3569598197937012, | |
| "learning_rate": 3.1058101364756684e-07, | |
| "loss": 0.48605573177337646, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.4347357065803668, | |
| "grad_norm": 2.1617515087127686, | |
| "learning_rate": 3.100523530706991e-07, | |
| "loss": 0.5922558307647705, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.4368932038834952, | |
| "grad_norm": 3.862966775894165, | |
| "learning_rate": 3.095234940946358e-07, | |
| "loss": 0.504891574382782, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.4390507011866236, | |
| "grad_norm": 1.3128629922866821, | |
| "learning_rate": 3.089944397127621e-07, | |
| "loss": 0.5910184383392334, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.4412081984897518, | |
| "grad_norm": 1.3905627727508545, | |
| "learning_rate": 3.0846519291956923e-07, | |
| "loss": 0.4852849543094635, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.4433656957928802, | |
| "grad_norm": 4.070714950561523, | |
| "learning_rate": 3.079357567106375e-07, | |
| "loss": 0.609307050704956, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.4455231930960086, | |
| "grad_norm": 1.5611417293548584, | |
| "learning_rate": 3.074061340826193e-07, | |
| "loss": 0.6423069834709167, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.447680690399137, | |
| "grad_norm": 4.069622993469238, | |
| "learning_rate": 3.0687632803322214e-07, | |
| "loss": 0.4610182046890259, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.4498381877022655, | |
| "grad_norm": 1.5368692874908447, | |
| "learning_rate": 3.0634634156119183e-07, | |
| "loss": 0.596781313419342, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.4519956850053937, | |
| "grad_norm": 4.0829572677612305, | |
| "learning_rate": 3.0581617766629525e-07, | |
| "loss": 0.46168115735054016, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.454153182308522, | |
| "grad_norm": 1.1894795894622803, | |
| "learning_rate": 3.052858393493036e-07, | |
| "loss": 0.3813992738723755, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.4563106796116505, | |
| "grad_norm": 7.187083721160889, | |
| "learning_rate": 3.0475532961197525e-07, | |
| "loss": 0.4869483411312103, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.458468176914779, | |
| "grad_norm": 2.1215786933898926, | |
| "learning_rate": 3.042246514570388e-07, | |
| "loss": 0.44144994020462036, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.4606256742179071, | |
| "grad_norm": 1.7207623720169067, | |
| "learning_rate": 3.036938078881764e-07, | |
| "loss": 0.5316063165664673, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.4627831715210355, | |
| "grad_norm": 1.4604369401931763, | |
| "learning_rate": 3.0316280191000595e-07, | |
| "loss": 0.5334872007369995, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.464940668824164, | |
| "grad_norm": 1.4460911750793457, | |
| "learning_rate": 3.0263163652806497e-07, | |
| "loss": 0.5650609135627747, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.4670981661272924, | |
| "grad_norm": 1.443108320236206, | |
| "learning_rate": 3.0210031474879323e-07, | |
| "loss": 0.3324916660785675, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.4692556634304208, | |
| "grad_norm": 1.309964656829834, | |
| "learning_rate": 3.015688395795154e-07, | |
| "loss": 0.6782206892967224, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.4714131607335492, | |
| "grad_norm": 1.0900267362594604, | |
| "learning_rate": 3.010372140284247e-07, | |
| "loss": 0.42707037925720215, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.4735706580366774, | |
| "grad_norm": 1.5196927785873413, | |
| "learning_rate": 3.0050544110456544e-07, | |
| "loss": 0.561892032623291, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.4757281553398058, | |
| "grad_norm": 1.3471835851669312, | |
| "learning_rate": 2.999735238178159e-07, | |
| "loss": 0.5165982842445374, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.4778856526429343, | |
| "grad_norm": 1.6917212009429932, | |
| "learning_rate": 2.9944146517887166e-07, | |
| "loss": 0.42515087127685547, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.4800431499460625, | |
| "grad_norm": 3.3652212619781494, | |
| "learning_rate": 2.989092681992283e-07, | |
| "loss": 0.5667294859886169, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.4822006472491909, | |
| "grad_norm": 6.991846561431885, | |
| "learning_rate": 2.983769358911643e-07, | |
| "loss": 0.4916223883628845, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.4843581445523193, | |
| "grad_norm": 3.88139271736145, | |
| "learning_rate": 2.9784447126772434e-07, | |
| "loss": 0.5577268600463867, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.4865156418554477, | |
| "grad_norm": 1.634870171546936, | |
| "learning_rate": 2.9731187734270173e-07, | |
| "loss": 0.5713073015213013, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.4886731391585761, | |
| "grad_norm": 2.828623056411743, | |
| "learning_rate": 2.967791571306221e-07, | |
| "loss": 0.7753892540931702, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.4908306364617046, | |
| "grad_norm": 2.1929805278778076, | |
| "learning_rate": 2.962463136467253e-07, | |
| "loss": 0.5757138729095459, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.4929881337648327, | |
| "grad_norm": 1.8372981548309326, | |
| "learning_rate": 2.9571334990694927e-07, | |
| "loss": 0.4425245523452759, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.4951456310679612, | |
| "grad_norm": 1.2296621799468994, | |
| "learning_rate": 2.951802689279126e-07, | |
| "loss": 0.6176034808158875, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.4973031283710896, | |
| "grad_norm": 4.398133277893066, | |
| "learning_rate": 2.9464707372689734e-07, | |
| "loss": 0.6378481984138489, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.4994606256742178, | |
| "grad_norm": 2.845616102218628, | |
| "learning_rate": 2.9411376732183206e-07, | |
| "loss": 0.6340577602386475, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.5016181229773462, | |
| "grad_norm": 1.8457330465316772, | |
| "learning_rate": 2.935803527312748e-07, | |
| "loss": 0.41831356287002563, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.5037756202804746, | |
| "grad_norm": 11.14624309539795, | |
| "learning_rate": 2.930468329743959e-07, | |
| "loss": 0.5453674793243408, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.505933117583603, | |
| "grad_norm": 1.1637507677078247, | |
| "learning_rate": 2.9251321107096105e-07, | |
| "loss": 0.6097413301467896, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.5080906148867315, | |
| "grad_norm": 1.4595943689346313, | |
| "learning_rate": 2.91979490041314e-07, | |
| "loss": 0.5867338180541992, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.5102481121898599, | |
| "grad_norm": 8.647173881530762, | |
| "learning_rate": 2.9144567290635956e-07, | |
| "loss": 0.6274332404136658, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.512405609492988, | |
| "grad_norm": 1.433868169784546, | |
| "learning_rate": 2.909117626875466e-07, | |
| "loss": 0.530730664730072, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.5145631067961165, | |
| "grad_norm": 2.0332090854644775, | |
| "learning_rate": 2.903777624068507e-07, | |
| "loss": 0.38328850269317627, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.516720604099245, | |
| "grad_norm": 2.187605381011963, | |
| "learning_rate": 2.8984367508675735e-07, | |
| "loss": 0.507274866104126, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.5188781014023731, | |
| "grad_norm": 2.2323250770568848, | |
| "learning_rate": 2.8930950375024444e-07, | |
| "loss": 0.47206202149391174, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.5210355987055015, | |
| "grad_norm": 3.1241204738616943, | |
| "learning_rate": 2.8877525142076584e-07, | |
| "loss": 0.45956486463546753, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.52319309600863, | |
| "grad_norm": 1.791256070137024, | |
| "learning_rate": 2.882409211222335e-07, | |
| "loss": 0.29296764731407166, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.5253505933117584, | |
| "grad_norm": 4.025422096252441, | |
| "learning_rate": 2.8770651587900075e-07, | |
| "loss": 0.471237450838089, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.5275080906148868, | |
| "grad_norm": 1.249983787536621, | |
| "learning_rate": 2.8717203871584504e-07, | |
| "loss": 0.5922088027000427, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.5296655879180152, | |
| "grad_norm": 5.017333030700684, | |
| "learning_rate": 2.866374926579512e-07, | |
| "loss": 0.4936787188053131, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.5318230852211436, | |
| "grad_norm": 1.736030101776123, | |
| "learning_rate": 2.8610288073089363e-07, | |
| "loss": 0.5232613682746887, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.5339805825242718, | |
| "grad_norm": 4.345666885375977, | |
| "learning_rate": 2.855682059606196e-07, | |
| "loss": 0.5884013772010803, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.5361380798274002, | |
| "grad_norm": 13.682802200317383, | |
| "learning_rate": 2.850334713734325e-07, | |
| "loss": 0.6141678094863892, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.5382955771305284, | |
| "grad_norm": 1.5001555681228638, | |
| "learning_rate": 2.844986799959738e-07, | |
| "loss": 0.41682037711143494, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.5404530744336569, | |
| "grad_norm": 1.6063748598098755, | |
| "learning_rate": 2.839638348552067e-07, | |
| "loss": 0.5947140455245972, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.5426105717367853, | |
| "grad_norm": 1.9265503883361816, | |
| "learning_rate": 2.8342893897839855e-07, | |
| "loss": 0.504668653011322, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.5447680690399137, | |
| "grad_norm": 2.9574995040893555, | |
| "learning_rate": 2.828939953931038e-07, | |
| "loss": 0.43785667419433594, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.5469255663430421, | |
| "grad_norm": 1.5642776489257812, | |
| "learning_rate": 2.823590071271472e-07, | |
| "loss": 0.42886197566986084, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.5490830636461705, | |
| "grad_norm": 0.6376549005508423, | |
| "learning_rate": 2.818239772086063e-07, | |
| "loss": 0.41987836360931396, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.551240560949299, | |
| "grad_norm": 3.890024423599243, | |
| "learning_rate": 2.8128890866579406e-07, | |
| "loss": 0.5718374252319336, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.5533980582524272, | |
| "grad_norm": 3.199538230895996, | |
| "learning_rate": 2.807538045272427e-07, | |
| "loss": 0.6301546096801758, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 2.968087911605835, | |
| "learning_rate": 2.8021866782168547e-07, | |
| "loss": 0.5787625908851624, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.5577130528586838, | |
| "grad_norm": 4.71113920211792, | |
| "learning_rate": 2.796835015780398e-07, | |
| "loss": 0.5471571087837219, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.5598705501618122, | |
| "grad_norm": 3.893944025039673, | |
| "learning_rate": 2.79148308825391e-07, | |
| "loss": 0.4724005162715912, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.5620280474649406, | |
| "grad_norm": 0.665489673614502, | |
| "learning_rate": 2.7861309259297354e-07, | |
| "loss": 0.6328169107437134, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.564185544768069, | |
| "grad_norm": 1.5428534746170044, | |
| "learning_rate": 2.780778559101556e-07, | |
| "loss": 0.4981670379638672, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5663430420711975, | |
| "grad_norm": 1.2379951477050781, | |
| "learning_rate": 2.7754260180642046e-07, | |
| "loss": 0.6006782054901123, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.5685005393743259, | |
| "grad_norm": 1.8184620141983032, | |
| "learning_rate": 2.770073333113504e-07, | |
| "loss": 0.4560186564922333, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.5706580366774543, | |
| "grad_norm": 7.522675037384033, | |
| "learning_rate": 2.7647205345460906e-07, | |
| "loss": 0.611346423625946, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.5728155339805825, | |
| "grad_norm": 1.3059622049331665, | |
| "learning_rate": 2.7593676526592423e-07, | |
| "loss": 0.2933533191680908, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.574973031283711, | |
| "grad_norm": 3.4842753410339355, | |
| "learning_rate": 2.7540147177507123e-07, | |
| "loss": 0.5341723561286926, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.577130528586839, | |
| "grad_norm": 5.351451396942139, | |
| "learning_rate": 2.74866176011855e-07, | |
| "loss": 0.6091599464416504, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.5792880258899675, | |
| "grad_norm": 2.680004119873047, | |
| "learning_rate": 2.743308810060935e-07, | |
| "loss": 0.7066933512687683, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.581445523193096, | |
| "grad_norm": 2.588871955871582, | |
| "learning_rate": 2.737955897876005e-07, | |
| "loss": 0.6565461754798889, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.5836030204962244, | |
| "grad_norm": 1.496903896331787, | |
| "learning_rate": 2.732603053861681e-07, | |
| "loss": 0.47990620136260986, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.5857605177993528, | |
| "grad_norm": 6.212797164916992, | |
| "learning_rate": 2.7272503083155004e-07, | |
| "loss": 0.4671979546546936, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.5879180151024812, | |
| "grad_norm": 4.061429977416992, | |
| "learning_rate": 2.7218976915344416e-07, | |
| "loss": 0.6535285711288452, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.5900755124056096, | |
| "grad_norm": 1.503240942955017, | |
| "learning_rate": 2.7165452338147555e-07, | |
| "loss": 0.5079244375228882, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.5922330097087378, | |
| "grad_norm": 3.7657103538513184, | |
| "learning_rate": 2.7111929654517925e-07, | |
| "loss": 0.6188565492630005, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.5943905070118662, | |
| "grad_norm": 1.6457622051239014, | |
| "learning_rate": 2.7058409167398305e-07, | |
| "loss": 0.5721461772918701, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.5965480043149944, | |
| "grad_norm": 2.564640760421753, | |
| "learning_rate": 2.7004891179719044e-07, | |
| "loss": 0.594935417175293, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.5987055016181229, | |
| "grad_norm": 1.901548147201538, | |
| "learning_rate": 2.695137599439635e-07, | |
| "loss": 0.5292646884918213, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.6008629989212513, | |
| "grad_norm": 2.8939266204833984, | |
| "learning_rate": 2.689786391433055e-07, | |
| "loss": 0.3793540894985199, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.6030204962243797, | |
| "grad_norm": 1.6566553115844727, | |
| "learning_rate": 2.6844355242404434e-07, | |
| "loss": 0.6384937167167664, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.6051779935275081, | |
| "grad_norm": 1.6839215755462646, | |
| "learning_rate": 2.6790850281481455e-07, | |
| "loss": 0.5815557837486267, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.6073354908306365, | |
| "grad_norm": 2.1547319889068604, | |
| "learning_rate": 2.6737349334404086e-07, | |
| "loss": 0.4502698481082916, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.609492988133765, | |
| "grad_norm": 6.962564945220947, | |
| "learning_rate": 2.66838527039921e-07, | |
| "loss": 0.5677059888839722, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.6116504854368932, | |
| "grad_norm": 2.3401830196380615, | |
| "learning_rate": 2.663036069304079e-07, | |
| "loss": 0.7475476264953613, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.6138079827400216, | |
| "grad_norm": 3.999359130859375, | |
| "learning_rate": 2.657687360431935e-07, | |
| "loss": 0.6050864458084106, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.61596548004315, | |
| "grad_norm": 1.3075038194656372, | |
| "learning_rate": 2.6523391740569074e-07, | |
| "loss": 0.5616152286529541, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.6181229773462782, | |
| "grad_norm": 1.1486589908599854, | |
| "learning_rate": 2.646991540450172e-07, | |
| "loss": 0.5269895792007446, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6202804746494066, | |
| "grad_norm": 7.192336082458496, | |
| "learning_rate": 2.6416444898797716e-07, | |
| "loss": 0.47785210609436035, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.622437971952535, | |
| "grad_norm": 1.773577332496643, | |
| "learning_rate": 2.6362980526104536e-07, | |
| "loss": 0.3188018500804901, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.6245954692556634, | |
| "grad_norm": 4.216702938079834, | |
| "learning_rate": 2.630952258903491e-07, | |
| "loss": 0.5588706135749817, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.6267529665587919, | |
| "grad_norm": 1.6883203983306885, | |
| "learning_rate": 2.6256071390165147e-07, | |
| "loss": 0.3531300723552704, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.6289104638619203, | |
| "grad_norm": 1.359221339225769, | |
| "learning_rate": 2.620262723203342e-07, | |
| "loss": 0.3672279715538025, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.6310679611650487, | |
| "grad_norm": 8.669332504272461, | |
| "learning_rate": 2.6149190417138057e-07, | |
| "loss": 0.6095560193061829, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.633225458468177, | |
| "grad_norm": 3.492126226425171, | |
| "learning_rate": 2.609576124793581e-07, | |
| "loss": 0.41963210701942444, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.6353829557713053, | |
| "grad_norm": 3.135106086730957, | |
| "learning_rate": 2.604234002684016e-07, | |
| "loss": 0.4734860360622406, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.6375404530744335, | |
| "grad_norm": 1.9704272747039795, | |
| "learning_rate": 2.5988927056219613e-07, | |
| "loss": 0.596852719783783, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.639697950377562, | |
| "grad_norm": 2.8910233974456787, | |
| "learning_rate": 2.593552263839596e-07, | |
| "loss": 0.643505871295929, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.6418554476806904, | |
| "grad_norm": 2.2476956844329834, | |
| "learning_rate": 2.588212707564259e-07, | |
| "loss": 0.4490068554878235, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.6440129449838188, | |
| "grad_norm": 6.380528926849365, | |
| "learning_rate": 2.582874067018278e-07, | |
| "loss": 0.48139023780822754, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.6461704422869472, | |
| "grad_norm": 2.361678123474121, | |
| "learning_rate": 2.577536372418795e-07, | |
| "loss": 0.4531154930591583, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.6483279395900756, | |
| "grad_norm": 1.433486819267273, | |
| "learning_rate": 2.572199653977602e-07, | |
| "loss": 0.5615776181221008, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.650485436893204, | |
| "grad_norm": 1.2627114057540894, | |
| "learning_rate": 2.5668639419009606e-07, | |
| "loss": 0.5969760417938232, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.6526429341963322, | |
| "grad_norm": 6.023662090301514, | |
| "learning_rate": 2.5615292663894406e-07, | |
| "loss": 0.7165044546127319, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.6548004314994607, | |
| "grad_norm": 3.729762315750122, | |
| "learning_rate": 2.556195657637744e-07, | |
| "loss": 0.5139379501342773, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.6569579288025889, | |
| "grad_norm": 3.813971996307373, | |
| "learning_rate": 2.5508631458345325e-07, | |
| "loss": 0.40219447016716003, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.6591154261057173, | |
| "grad_norm": 2.2718698978424072, | |
| "learning_rate": 2.545531761162263e-07, | |
| "loss": 0.6281888484954834, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.6612729234088457, | |
| "grad_norm": 1.6029448509216309, | |
| "learning_rate": 2.540201533797007e-07, | |
| "loss": 0.5198391675949097, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.6634304207119741, | |
| "grad_norm": 1.972841501235962, | |
| "learning_rate": 2.5348724939082916e-07, | |
| "loss": 0.5897455811500549, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.6655879180151025, | |
| "grad_norm": 1.6188420057296753, | |
| "learning_rate": 2.5295446716589194e-07, | |
| "loss": 0.36811563372612, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.667745415318231, | |
| "grad_norm": 2.6556005477905273, | |
| "learning_rate": 2.5242180972048e-07, | |
| "loss": 0.48183539509773254, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.6699029126213594, | |
| "grad_norm": 1.075056791305542, | |
| "learning_rate": 2.5188928006947846e-07, | |
| "loss": 0.6169477105140686, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.6720604099244876, | |
| "grad_norm": 4.6703009605407715, | |
| "learning_rate": 2.513568812270487e-07, | |
| "loss": 0.481448233127594, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.674217907227616, | |
| "grad_norm": 2.274782180786133, | |
| "learning_rate": 2.5082461620661196e-07, | |
| "loss": 0.6557754874229431, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.6763754045307442, | |
| "grad_norm": 1.1385339498519897, | |
| "learning_rate": 2.502924880208318e-07, | |
| "loss": 0.4550181031227112, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.6785329018338726, | |
| "grad_norm": 9.544242858886719, | |
| "learning_rate": 2.497604996815976e-07, | |
| "loss": 0.5606685876846313, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.680690399137001, | |
| "grad_norm": 23.09193229675293, | |
| "learning_rate": 2.4922865420000693e-07, | |
| "loss": 0.4275263547897339, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.6828478964401294, | |
| "grad_norm": 1.2717275619506836, | |
| "learning_rate": 2.486969545863489e-07, | |
| "loss": 0.604001522064209, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.6850053937432579, | |
| "grad_norm": 3.0152664184570312, | |
| "learning_rate": 2.4816540385008696e-07, | |
| "loss": 0.5382636189460754, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.6871628910463863, | |
| "grad_norm": 2.152043581008911, | |
| "learning_rate": 2.4763400499984184e-07, | |
| "loss": 0.48857036232948303, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.6893203883495147, | |
| "grad_norm": 1.6351813077926636, | |
| "learning_rate": 2.471027610433748e-07, | |
| "loss": 0.604580819606781, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.691477885652643, | |
| "grad_norm": 1.648799180984497, | |
| "learning_rate": 2.465716749875701e-07, | |
| "loss": 0.5242129564285278, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.6936353829557713, | |
| "grad_norm": 1.386277198791504, | |
| "learning_rate": 2.4604074983841853e-07, | |
| "loss": 0.5764685869216919, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.6957928802588995, | |
| "grad_norm": 1.8634097576141357, | |
| "learning_rate": 2.4550998860099993e-07, | |
| "loss": 0.45359018445014954, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.697950377562028, | |
| "grad_norm": 5.895047187805176, | |
| "learning_rate": 2.4497939427946654e-07, | |
| "loss": 0.6302123069763184, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.7001078748651564, | |
| "grad_norm": 4.849222183227539, | |
| "learning_rate": 2.444489698770256e-07, | |
| "loss": 0.5112524032592773, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.7022653721682848, | |
| "grad_norm": 4.763610363006592, | |
| "learning_rate": 2.43918718395923e-07, | |
| "loss": 0.507486879825592, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.7044228694714132, | |
| "grad_norm": 1.9259899854660034, | |
| "learning_rate": 2.4338864283742554e-07, | |
| "loss": 0.5151503086090088, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.7065803667745416, | |
| "grad_norm": 1.45798659324646, | |
| "learning_rate": 2.428587462018044e-07, | |
| "loss": 0.578480064868927, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.70873786407767, | |
| "grad_norm": 1.3601330518722534, | |
| "learning_rate": 2.4232903148831805e-07, | |
| "loss": 0.5815561413764954, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.7108953613807982, | |
| "grad_norm": 2.1496477127075195, | |
| "learning_rate": 2.4179950169519514e-07, | |
| "loss": 0.48085469007492065, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.7130528586839266, | |
| "grad_norm": 1.8896552324295044, | |
| "learning_rate": 2.4127015981961797e-07, | |
| "loss": 0.44769376516342163, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.715210355987055, | |
| "grad_norm": 3.8881163597106934, | |
| "learning_rate": 2.407410088577047e-07, | |
| "loss": 0.578763484954834, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.7173678532901833, | |
| "grad_norm": 3.5086944103240967, | |
| "learning_rate": 2.402120518044935e-07, | |
| "loss": 0.5796621441841125, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.7195253505933117, | |
| "grad_norm": 1.3541215658187866, | |
| "learning_rate": 2.396832916539247e-07, | |
| "loss": 0.6715743541717529, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.72168284789644, | |
| "grad_norm": 1.8187955617904663, | |
| "learning_rate": 2.391547313988239e-07, | |
| "loss": 0.5842028260231018, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.7238403451995685, | |
| "grad_norm": 2.136215925216675, | |
| "learning_rate": 2.386263740308859e-07, | |
| "loss": 0.5518381595611572, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.725997842502697, | |
| "grad_norm": 5.43556022644043, | |
| "learning_rate": 2.3809822254065637e-07, | |
| "loss": 0.4855960011482239, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.7281553398058254, | |
| "grad_norm": 1.7936867475509644, | |
| "learning_rate": 2.375702799175164e-07, | |
| "loss": 0.5670949816703796, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.7303128371089536, | |
| "grad_norm": 3.9360201358795166, | |
| "learning_rate": 2.3704254914966436e-07, | |
| "loss": 0.4383196234703064, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.732470334412082, | |
| "grad_norm": 1.8262487649917603, | |
| "learning_rate": 2.365150332240999e-07, | |
| "loss": 0.4360693395137787, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.7346278317152104, | |
| "grad_norm": 5.341520309448242, | |
| "learning_rate": 2.3598773512660636e-07, | |
| "loss": 0.5174295902252197, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.7367853290183386, | |
| "grad_norm": 1.4834777116775513, | |
| "learning_rate": 2.3546065784173425e-07, | |
| "loss": 0.5596581697463989, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.738942826321467, | |
| "grad_norm": 2.0027642250061035, | |
| "learning_rate": 2.349338043527843e-07, | |
| "loss": 0.5774880051612854, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.7411003236245954, | |
| "grad_norm": 2.79825758934021, | |
| "learning_rate": 2.3440717764179053e-07, | |
| "loss": 0.34554505348205566, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.7432578209277239, | |
| "grad_norm": 1.6454131603240967, | |
| "learning_rate": 2.338807806895033e-07, | |
| "loss": 0.470796674489975, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.7454153182308523, | |
| "grad_norm": 0.6968181133270264, | |
| "learning_rate": 2.3335461647537252e-07, | |
| "loss": 0.458304226398468, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.7475728155339807, | |
| "grad_norm": 1.9778035879135132, | |
| "learning_rate": 2.3282868797753092e-07, | |
| "loss": 0.3832884132862091, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.7497303128371091, | |
| "grad_norm": 1.3171417713165283, | |
| "learning_rate": 2.3230299817277694e-07, | |
| "loss": 0.33851158618927, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.7518878101402373, | |
| "grad_norm": 1.1305524110794067, | |
| "learning_rate": 2.3177755003655803e-07, | |
| "loss": 0.38404303789138794, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.7540453074433657, | |
| "grad_norm": 3.3389196395874023, | |
| "learning_rate": 2.3125234654295378e-07, | |
| "loss": 0.4796540141105652, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.756202804746494, | |
| "grad_norm": 2.8726422786712646, | |
| "learning_rate": 2.3072739066465906e-07, | |
| "loss": 0.5632970333099365, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.7583603020496223, | |
| "grad_norm": 2.0043997764587402, | |
| "learning_rate": 2.3020268537296728e-07, | |
| "loss": 0.610961377620697, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.7605177993527508, | |
| "grad_norm": 1.977318286895752, | |
| "learning_rate": 2.2967823363775334e-07, | |
| "loss": 0.5584444403648376, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.7626752966558792, | |
| "grad_norm": 1.4552688598632812, | |
| "learning_rate": 2.2915403842745718e-07, | |
| "loss": 0.5306387543678284, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.7648327939590076, | |
| "grad_norm": 1.6729410886764526, | |
| "learning_rate": 2.286301027090668e-07, | |
| "loss": 0.7393071055412292, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.766990291262136, | |
| "grad_norm": 1.8149133920669556, | |
| "learning_rate": 2.2810642944810122e-07, | |
| "loss": 0.6838573217391968, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.7691477885652644, | |
| "grad_norm": 1.5463119745254517, | |
| "learning_rate": 2.2758302160859426e-07, | |
| "loss": 0.6229934692382812, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.7713052858683926, | |
| "grad_norm": 1.5769580602645874, | |
| "learning_rate": 2.2705988215307703e-07, | |
| "loss": 0.5759105682373047, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.773462783171521, | |
| "grad_norm": 1.2159730195999146, | |
| "learning_rate": 2.2653701404256204e-07, | |
| "loss": 0.5320509076118469, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.7756202804746493, | |
| "grad_norm": 20.918190002441406, | |
| "learning_rate": 2.260144202365254e-07, | |
| "loss": 0.7069261074066162, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 1.1241180896759033, | |
| "learning_rate": 2.2549210369289124e-07, | |
| "loss": 0.5508931875228882, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.779935275080906, | |
| "grad_norm": 4.296023368835449, | |
| "learning_rate": 2.24970067368014e-07, | |
| "loss": 0.27031293511390686, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.7820927723840345, | |
| "grad_norm": 1.679374098777771, | |
| "learning_rate": 2.24448314216662e-07, | |
| "loss": 0.4512391686439514, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.784250269687163, | |
| "grad_norm": 2.161573886871338, | |
| "learning_rate": 2.2392684719200116e-07, | |
| "loss": 0.3788191080093384, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.7864077669902914, | |
| "grad_norm": 6.881853103637695, | |
| "learning_rate": 2.2340566924557735e-07, | |
| "loss": 0.5204552412033081, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.7885652642934198, | |
| "grad_norm": 2.566661834716797, | |
| "learning_rate": 2.228847833273007e-07, | |
| "loss": 0.5021325945854187, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.790722761596548, | |
| "grad_norm": 1.374242901802063, | |
| "learning_rate": 2.223641923854282e-07, | |
| "loss": 0.5507632493972778, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.7928802588996764, | |
| "grad_norm": 2.6447982788085938, | |
| "learning_rate": 2.2184389936654736e-07, | |
| "loss": 0.5545051097869873, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.7950377562028046, | |
| "grad_norm": 1.4282158613204956, | |
| "learning_rate": 2.2132390721555933e-07, | |
| "loss": 0.5268256068229675, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.797195253505933, | |
| "grad_norm": 9.742328643798828, | |
| "learning_rate": 2.2080421887566236e-07, | |
| "loss": 0.28805431723594666, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.7993527508090614, | |
| "grad_norm": 2.320671319961548, | |
| "learning_rate": 2.2028483728833524e-07, | |
| "loss": 0.61153244972229, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.8015102481121898, | |
| "grad_norm": 1.3254764080047607, | |
| "learning_rate": 2.197657653933202e-07, | |
| "loss": 0.5806170105934143, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.8036677454153183, | |
| "grad_norm": 1.328969120979309, | |
| "learning_rate": 2.1924700612860692e-07, | |
| "loss": 0.5318849086761475, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.8058252427184467, | |
| "grad_norm": 0.41818344593048096, | |
| "learning_rate": 2.1872856243041532e-07, | |
| "loss": 0.27527254819869995, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.807982740021575, | |
| "grad_norm": 4.447193622589111, | |
| "learning_rate": 2.1821043723317935e-07, | |
| "loss": 0.6419240236282349, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.8101402373247033, | |
| "grad_norm": 1.7327150106430054, | |
| "learning_rate": 2.1769263346953004e-07, | |
| "loss": 0.5605652332305908, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.8122977346278317, | |
| "grad_norm": 1.8278568983078003, | |
| "learning_rate": 2.1717515407027937e-07, | |
| "loss": 0.5259844660758972, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.81445523193096, | |
| "grad_norm": 2.8355183601379395, | |
| "learning_rate": 2.1665800196440314e-07, | |
| "loss": 0.48816215991973877, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.8166127292340883, | |
| "grad_norm": 4.203651428222656, | |
| "learning_rate": 2.161411800790247e-07, | |
| "loss": 0.622113049030304, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.8187702265372168, | |
| "grad_norm": 1.8128265142440796, | |
| "learning_rate": 2.1562469133939836e-07, | |
| "loss": 0.23197607696056366, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.8209277238403452, | |
| "grad_norm": 4.64774227142334, | |
| "learning_rate": 2.1510853866889278e-07, | |
| "loss": 0.5721830129623413, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.8230852211434736, | |
| "grad_norm": 1.5357946157455444, | |
| "learning_rate": 2.1459272498897452e-07, | |
| "loss": 0.47745242714881897, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.825242718446602, | |
| "grad_norm": 6.159779071807861, | |
| "learning_rate": 2.1407725321919107e-07, | |
| "loss": 0.6153979301452637, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.8274002157497304, | |
| "grad_norm": 7.977688312530518, | |
| "learning_rate": 2.1356212627715524e-07, | |
| "loss": 0.5228186845779419, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.8295577130528586, | |
| "grad_norm": 1.6970831155776978, | |
| "learning_rate": 2.1304734707852785e-07, | |
| "loss": 0.49107879400253296, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.831715210355987, | |
| "grad_norm": 1.6975973844528198, | |
| "learning_rate": 2.125329185370011e-07, | |
| "loss": 0.5908475518226624, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.8338727076591155, | |
| "grad_norm": 1.8754093647003174, | |
| "learning_rate": 2.1201884356428313e-07, | |
| "loss": 0.46887385845184326, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.8360302049622437, | |
| "grad_norm": 2.0672781467437744, | |
| "learning_rate": 2.1150512507008016e-07, | |
| "loss": 0.6688355207443237, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.838187702265372, | |
| "grad_norm": 6.069046497344971, | |
| "learning_rate": 2.1099176596208134e-07, | |
| "loss": 0.5004164576530457, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.8403451995685005, | |
| "grad_norm": 2.647517204284668, | |
| "learning_rate": 2.104787691459411e-07, | |
| "loss": 0.6052039265632629, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.842502696871629, | |
| "grad_norm": 24.532392501831055, | |
| "learning_rate": 2.099661375252636e-07, | |
| "loss": 0.5694432854652405, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.8446601941747574, | |
| "grad_norm": 1.9516592025756836, | |
| "learning_rate": 2.0945387400158597e-07, | |
| "loss": 0.38723820447921753, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.8468176914778858, | |
| "grad_norm": 5.9051408767700195, | |
| "learning_rate": 2.0894198147436177e-07, | |
| "loss": 0.5640073418617249, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.8489751887810142, | |
| "grad_norm": 1.8454663753509521, | |
| "learning_rate": 2.0843046284094474e-07, | |
| "loss": 0.5534006357192993, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.8511326860841424, | |
| "grad_norm": 2.155219316482544, | |
| "learning_rate": 2.0791932099657221e-07, | |
| "loss": 0.4856148660182953, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.8532901833872708, | |
| "grad_norm": 6.839637279510498, | |
| "learning_rate": 2.074085588343491e-07, | |
| "loss": 0.41986072063446045, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.855447680690399, | |
| "grad_norm": 1.4975872039794922, | |
| "learning_rate": 2.0689817924523112e-07, | |
| "loss": 0.4183667004108429, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.8576051779935274, | |
| "grad_norm": 1.6008542776107788, | |
| "learning_rate": 2.0638818511800865e-07, | |
| "loss": 0.5753411650657654, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.8597626752966558, | |
| "grad_norm": 1.6697242259979248, | |
| "learning_rate": 2.0587857933929037e-07, | |
| "loss": 0.6102425456047058, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.8619201725997843, | |
| "grad_norm": 1.3475979566574097, | |
| "learning_rate": 2.0536936479348672e-07, | |
| "loss": 0.5690769553184509, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.8640776699029127, | |
| "grad_norm": 2.36917781829834, | |
| "learning_rate": 2.0486054436279394e-07, | |
| "loss": 0.26033759117126465, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.866235167206041, | |
| "grad_norm": 0.5969313383102417, | |
| "learning_rate": 2.0435212092717729e-07, | |
| "loss": 0.2784996032714844, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.8683926645091695, | |
| "grad_norm": 1.8333349227905273, | |
| "learning_rate": 2.0384409736435526e-07, | |
| "loss": 0.5710358619689941, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.8705501618122977, | |
| "grad_norm": 1.0351983308792114, | |
| "learning_rate": 2.033364765497828e-07, | |
| "loss": 0.352516233921051, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.8727076591154261, | |
| "grad_norm": 5.123870849609375, | |
| "learning_rate": 2.0282926135663554e-07, | |
| "loss": 0.5661641359329224, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.8748651564185543, | |
| "grad_norm": 1.9763929843902588, | |
| "learning_rate": 2.0232245465579306e-07, | |
| "loss": 0.6391258239746094, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.8770226537216828, | |
| "grad_norm": 2.7457945346832275, | |
| "learning_rate": 2.0181605931582284e-07, | |
| "loss": 0.4859541952610016, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.8791801510248112, | |
| "grad_norm": 1.343418836593628, | |
| "learning_rate": 2.013100782029641e-07, | |
| "loss": 0.57615065574646, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.8813376483279396, | |
| "grad_norm": 2.796921491622925, | |
| "learning_rate": 2.0080451418111143e-07, | |
| "loss": 0.47713714838027954, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.883495145631068, | |
| "grad_norm": 1.2428369522094727, | |
| "learning_rate": 2.0029937011179882e-07, | |
| "loss": 0.46215853095054626, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.8856526429341964, | |
| "grad_norm": 1.3065931797027588, | |
| "learning_rate": 1.9979464885418295e-07, | |
| "loss": 0.37958696484565735, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.8878101402373249, | |
| "grad_norm": 0.4351181089878082, | |
| "learning_rate": 1.9929035326502773e-07, | |
| "loss": 0.5532968044281006, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.889967637540453, | |
| "grad_norm": 2.7067880630493164, | |
| "learning_rate": 1.9878648619868765e-07, | |
| "loss": 0.5427120923995972, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.8921251348435815, | |
| "grad_norm": 1.2321635484695435, | |
| "learning_rate": 1.9828305050709144e-07, | |
| "loss": 0.4515300989151001, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.8942826321467097, | |
| "grad_norm": 1.237924337387085, | |
| "learning_rate": 1.9778004903972667e-07, | |
| "loss": 0.6264490485191345, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.896440129449838, | |
| "grad_norm": 2.399315118789673, | |
| "learning_rate": 1.9727748464362276e-07, | |
| "loss": 0.5656343698501587, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.8985976267529665, | |
| "grad_norm": 2.262449264526367, | |
| "learning_rate": 1.9677536016333556e-07, | |
| "loss": 0.4433645009994507, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.900755124056095, | |
| "grad_norm": 0.9049375653266907, | |
| "learning_rate": 1.9627367844093078e-07, | |
| "loss": 0.5328507423400879, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.9029126213592233, | |
| "grad_norm": 3.406168222427368, | |
| "learning_rate": 1.9577244231596807e-07, | |
| "loss": 0.5393190979957581, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.9050701186623518, | |
| "grad_norm": 5.9175872802734375, | |
| "learning_rate": 1.9527165462548528e-07, | |
| "loss": 0.5409752130508423, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.9072276159654802, | |
| "grad_norm": 3.4059221744537354, | |
| "learning_rate": 1.9477131820398158e-07, | |
| "loss": 0.4544711410999298, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.9093851132686084, | |
| "grad_norm": 2.6678411960601807, | |
| "learning_rate": 1.942714358834024e-07, | |
| "loss": 0.4812181890010834, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.9115426105717368, | |
| "grad_norm": 2.3084359169006348, | |
| "learning_rate": 1.9377201049312252e-07, | |
| "loss": 0.5532037615776062, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.913700107874865, | |
| "grad_norm": 1.5713317394256592, | |
| "learning_rate": 1.9327304485993084e-07, | |
| "loss": 0.5627604722976685, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.9158576051779934, | |
| "grad_norm": 4.625167369842529, | |
| "learning_rate": 1.9277454180801367e-07, | |
| "loss": 0.5986460447311401, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.9180151024811218, | |
| "grad_norm": 4.1043267250061035, | |
| "learning_rate": 1.9227650415893914e-07, | |
| "loss": 0.5130695700645447, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.9201725997842503, | |
| "grad_norm": 1.6170152425765991, | |
| "learning_rate": 1.9177893473164142e-07, | |
| "loss": 0.3731135129928589, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.9223300970873787, | |
| "grad_norm": 1.3302977085113525, | |
| "learning_rate": 1.9128183634240414e-07, | |
| "loss": 0.4674024283885956, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.924487594390507, | |
| "grad_norm": 1.3277769088745117, | |
| "learning_rate": 1.907852118048451e-07, | |
| "loss": 0.5244305729866028, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.9266450916936355, | |
| "grad_norm": 1.839621663093567, | |
| "learning_rate": 1.902890639298998e-07, | |
| "loss": 0.6807081699371338, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.9288025889967637, | |
| "grad_norm": 22.250324249267578, | |
| "learning_rate": 1.8979339552580615e-07, | |
| "loss": 0.3220374882221222, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.9309600862998921, | |
| "grad_norm": 1.6265594959259033, | |
| "learning_rate": 1.8929820939808783e-07, | |
| "loss": 0.6456558108329773, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.9331175836030206, | |
| "grad_norm": 2.020714282989502, | |
| "learning_rate": 1.8880350834953912e-07, | |
| "loss": 0.5007312297821045, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.9352750809061487, | |
| "grad_norm": 2.1667919158935547, | |
| "learning_rate": 1.8830929518020833e-07, | |
| "loss": 0.46376651525497437, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.9374325782092772, | |
| "grad_norm": 5.430749893188477, | |
| "learning_rate": 1.8781557268738275e-07, | |
| "loss": 0.6586015820503235, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.9395900755124056, | |
| "grad_norm": 0.6731663942337036, | |
| "learning_rate": 1.8732234366557225e-07, | |
| "loss": 0.6122515797615051, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.941747572815534, | |
| "grad_norm": 2.9628219604492188, | |
| "learning_rate": 1.8682961090649342e-07, | |
| "loss": 0.7105916142463684, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.9439050701186624, | |
| "grad_norm": 1.580090045928955, | |
| "learning_rate": 1.8633737719905428e-07, | |
| "loss": 0.4230397939682007, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.9460625674217908, | |
| "grad_norm": 4.340382099151611, | |
| "learning_rate": 1.8584564532933784e-07, | |
| "loss": 0.6302311420440674, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.948220064724919, | |
| "grad_norm": 1.663780689239502, | |
| "learning_rate": 1.853544180805871e-07, | |
| "loss": 0.3612719774246216, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.9503775620280475, | |
| "grad_norm": 1.57147216796875, | |
| "learning_rate": 1.8486369823318833e-07, | |
| "loss": 0.5454095005989075, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.9525350593311759, | |
| "grad_norm": 1.2029516696929932, | |
| "learning_rate": 1.8437348856465623e-07, | |
| "loss": 0.564690351486206, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.954692556634304, | |
| "grad_norm": 1.5036405324935913, | |
| "learning_rate": 1.8388379184961795e-07, | |
| "loss": 0.2203519642353058, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.9568500539374325, | |
| "grad_norm": 1.5001392364501953, | |
| "learning_rate": 1.8339461085979686e-07, | |
| "loss": 0.5622031092643738, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.959007551240561, | |
| "grad_norm": 3.3546156883239746, | |
| "learning_rate": 1.8290594836399765e-07, | |
| "loss": 0.5708537697792053, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.9611650485436893, | |
| "grad_norm": 3.217376232147217, | |
| "learning_rate": 1.8241780712809007e-07, | |
| "loss": 0.5283911228179932, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.9633225458468178, | |
| "grad_norm": 1.1382657289505005, | |
| "learning_rate": 1.8193018991499364e-07, | |
| "loss": 0.6459410786628723, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.9654800431499462, | |
| "grad_norm": 1.5658422708511353, | |
| "learning_rate": 1.8144309948466175e-07, | |
| "loss": 0.5700141191482544, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.9676375404530746, | |
| "grad_norm": 1.9039454460144043, | |
| "learning_rate": 1.8095653859406628e-07, | |
| "loss": 0.4810370206832886, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.9697950377562028, | |
| "grad_norm": 3.176187038421631, | |
| "learning_rate": 1.8047050999718184e-07, | |
| "loss": 0.6500232815742493, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.9719525350593312, | |
| "grad_norm": 1.7816401720046997, | |
| "learning_rate": 1.7998501644497006e-07, | |
| "loss": 0.4381594657897949, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.9741100323624594, | |
| "grad_norm": 2.0123534202575684, | |
| "learning_rate": 1.795000606853646e-07, | |
| "loss": 0.5198497772216797, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.9762675296655878, | |
| "grad_norm": 1.8685188293457031, | |
| "learning_rate": 1.7901564546325436e-07, | |
| "loss": 0.5583903193473816, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.9784250269687162, | |
| "grad_norm": 7.242468357086182, | |
| "learning_rate": 1.7853177352046971e-07, | |
| "loss": 0.6218190789222717, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.9805825242718447, | |
| "grad_norm": 4.002262592315674, | |
| "learning_rate": 1.7804844759576538e-07, | |
| "loss": 0.5924632549285889, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.982740021574973, | |
| "grad_norm": 1.834091067314148, | |
| "learning_rate": 1.775656704248057e-07, | |
| "loss": 0.5840417146682739, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.9848975188781015, | |
| "grad_norm": 1.513541340827942, | |
| "learning_rate": 1.7708344474014924e-07, | |
| "loss": 0.5099426507949829, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.98705501618123, | |
| "grad_norm": 1.4789743423461914, | |
| "learning_rate": 1.7660177327123287e-07, | |
| "loss": 0.5921831130981445, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.9892125134843581, | |
| "grad_norm": 1.290024995803833, | |
| "learning_rate": 1.7612065874435677e-07, | |
| "loss": 0.5426990985870361, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.9913700107874865, | |
| "grad_norm": 1.4434101581573486, | |
| "learning_rate": 1.7564010388266837e-07, | |
| "loss": 0.5949893593788147, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.9935275080906147, | |
| "grad_norm": 7.057824611663818, | |
| "learning_rate": 1.7516011140614795e-07, | |
| "loss": 0.4401338994503021, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.9956850053937432, | |
| "grad_norm": 2.867464303970337, | |
| "learning_rate": 1.7468068403159218e-07, | |
| "loss": 0.4908779263496399, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.9978425026968716, | |
| "grad_norm": 1.0820274353027344, | |
| "learning_rate": 1.7420182447259926e-07, | |
| "loss": 0.41853272914886475, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.5740938186645508, | |
| "learning_rate": 1.7372353543955375e-07, | |
| "loss": 0.48160526156425476, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.0021574973031284, | |
| "grad_norm": 2.7098584175109863, | |
| "learning_rate": 1.7324581963961088e-07, | |
| "loss": 0.5192286372184753, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.004314994606257, | |
| "grad_norm": 1.2259626388549805, | |
| "learning_rate": 1.7276867977668117e-07, | |
| "loss": 0.38666832447052, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.0064724919093853, | |
| "grad_norm": 2.3588082790374756, | |
| "learning_rate": 1.7229211855141535e-07, | |
| "loss": 0.5268582105636597, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.0086299892125137, | |
| "grad_norm": 3.2187092304229736, | |
| "learning_rate": 1.718161386611892e-07, | |
| "loss": 0.39220139384269714, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.0107874865156417, | |
| "grad_norm": 3.2906999588012695, | |
| "learning_rate": 1.71340742800088e-07, | |
| "loss": 0.5459554195404053, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.01294498381877, | |
| "grad_norm": 1.1693605184555054, | |
| "learning_rate": 1.708659336588912e-07, | |
| "loss": 0.3647141456604004, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.0151024811218985, | |
| "grad_norm": 2.798161268234253, | |
| "learning_rate": 1.703917139250576e-07, | |
| "loss": 0.29213812947273254, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.017259978425027, | |
| "grad_norm": 1.1381080150604248, | |
| "learning_rate": 1.6991808628270987e-07, | |
| "loss": 0.5609018802642822, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.0194174757281553, | |
| "grad_norm": 1.3353042602539062, | |
| "learning_rate": 1.694450534126193e-07, | |
| "loss": 0.6125231981277466, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.0215749730312838, | |
| "grad_norm": 0.5496029853820801, | |
| "learning_rate": 1.689726179921906e-07, | |
| "loss": 0.6636590957641602, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.023732470334412, | |
| "grad_norm": 4.846103668212891, | |
| "learning_rate": 1.6850078269544736e-07, | |
| "loss": 0.5935502648353577, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.0258899676375406, | |
| "grad_norm": 1.36087167263031, | |
| "learning_rate": 1.6802955019301574e-07, | |
| "loss": 0.4898212254047394, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.028047464940669, | |
| "grad_norm": 1.213739037513733, | |
| "learning_rate": 1.6755892315211056e-07, | |
| "loss": 0.5537405014038086, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.030204962243797, | |
| "grad_norm": 2.0111231803894043, | |
| "learning_rate": 1.6708890423651965e-07, | |
| "loss": 0.5984706282615662, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.0323624595469254, | |
| "grad_norm": 1.080739974975586, | |
| "learning_rate": 1.6661949610658831e-07, | |
| "loss": 0.6536235809326172, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.034519956850054, | |
| "grad_norm": 1.0429832935333252, | |
| "learning_rate": 1.6615070141920538e-07, | |
| "loss": 0.4953509569168091, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.0366774541531822, | |
| "grad_norm": 0.8104021549224854, | |
| "learning_rate": 1.656825228277871e-07, | |
| "loss": 0.5021868944168091, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.0388349514563107, | |
| "grad_norm": 2.875866413116455, | |
| "learning_rate": 1.6521496298226293e-07, | |
| "loss": 0.4888242483139038, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.040992448759439, | |
| "grad_norm": 1.8168655633926392, | |
| "learning_rate": 1.647480245290596e-07, | |
| "loss": 0.5183455944061279, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.0431499460625675, | |
| "grad_norm": 2.0136873722076416, | |
| "learning_rate": 1.642817101110875e-07, | |
| "loss": 0.4676356911659241, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.045307443365696, | |
| "grad_norm": 1.8156472444534302, | |
| "learning_rate": 1.6381602236772428e-07, | |
| "loss": 0.39894169569015503, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.0474649406688243, | |
| "grad_norm": 1.542965054512024, | |
| "learning_rate": 1.6335096393480077e-07, | |
| "loss": 0.6107752919197083, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.0496224379719523, | |
| "grad_norm": 4.939133644104004, | |
| "learning_rate": 1.6288653744458603e-07, | |
| "loss": 0.4178003668785095, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.0517799352750807, | |
| "grad_norm": 1.781587839126587, | |
| "learning_rate": 1.62422745525772e-07, | |
| "loss": 0.45616793632507324, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.053937432578209, | |
| "grad_norm": 1.2864458560943604, | |
| "learning_rate": 1.619595908034591e-07, | |
| "loss": 0.4433179795742035, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.0560949298813376, | |
| "grad_norm": 1.3701529502868652, | |
| "learning_rate": 1.6149707589914092e-07, | |
| "loss": 0.30546942353248596, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.058252427184466, | |
| "grad_norm": 1.4088618755340576, | |
| "learning_rate": 1.6103520343068992e-07, | |
| "loss": 0.4966147840023041, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.0604099244875944, | |
| "grad_norm": 1.663954734802246, | |
| "learning_rate": 1.6057397601234218e-07, | |
| "loss": 0.5685679912567139, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.062567421790723, | |
| "grad_norm": 1.3583576679229736, | |
| "learning_rate": 1.6011339625468262e-07, | |
| "loss": 0.42057788372039795, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.0647249190938513, | |
| "grad_norm": 1.4086862802505493, | |
| "learning_rate": 1.5965346676463065e-07, | |
| "loss": 0.47804367542266846, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.0668824163969797, | |
| "grad_norm": 1.02560555934906, | |
| "learning_rate": 1.5919419014542485e-07, | |
| "loss": 0.5550174117088318, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.0690399137001076, | |
| "grad_norm": 1.144000768661499, | |
| "learning_rate": 1.5873556899660858e-07, | |
| "loss": 0.538378894329071, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.071197411003236, | |
| "grad_norm": 3.2516837120056152, | |
| "learning_rate": 1.5827760591401513e-07, | |
| "loss": 0.4809839129447937, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.0733549083063645, | |
| "grad_norm": 3.4512033462524414, | |
| "learning_rate": 1.578203034897533e-07, | |
| "loss": 0.5308358669281006, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.075512405609493, | |
| "grad_norm": 7.391180038452148, | |
| "learning_rate": 1.573636643121922e-07, | |
| "loss": 0.5464600324630737, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.0776699029126213, | |
| "grad_norm": 1.2600277662277222, | |
| "learning_rate": 1.5690769096594703e-07, | |
| "loss": 0.4952971339225769, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.0798274002157497, | |
| "grad_norm": 1.352362036705017, | |
| "learning_rate": 1.5645238603186456e-07, | |
| "loss": 0.5618917346000671, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.081984897518878, | |
| "grad_norm": 2.5637662410736084, | |
| "learning_rate": 1.5599775208700793e-07, | |
| "loss": 0.4722367525100708, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.0841423948220066, | |
| "grad_norm": 1.4300997257232666, | |
| "learning_rate": 1.5554379170464265e-07, | |
| "loss": 0.5235872864723206, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.086299892125135, | |
| "grad_norm": 1.3848403692245483, | |
| "learning_rate": 1.5509050745422164e-07, | |
| "loss": 0.5249854922294617, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.0884573894282634, | |
| "grad_norm": 1.8573588132858276, | |
| "learning_rate": 1.546379019013712e-07, | |
| "loss": 0.4274769127368927, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.0906148867313914, | |
| "grad_norm": 2.3221805095672607, | |
| "learning_rate": 1.5418597760787555e-07, | |
| "loss": 0.4279857277870178, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.09277238403452, | |
| "grad_norm": 1.2415211200714111, | |
| "learning_rate": 1.537347371316635e-07, | |
| "loss": 0.43542686104774475, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.0949298813376482, | |
| "grad_norm": 1.8443889617919922, | |
| "learning_rate": 1.532841830267934e-07, | |
| "loss": 0.46662214398384094, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.0970873786407767, | |
| "grad_norm": 1.2522131204605103, | |
| "learning_rate": 1.5283431784343802e-07, | |
| "loss": 0.5438724160194397, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.099244875943905, | |
| "grad_norm": 1.2235276699066162, | |
| "learning_rate": 1.5238514412787158e-07, | |
| "loss": 0.6343849897384644, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.1014023732470335, | |
| "grad_norm": 3.470675468444824, | |
| "learning_rate": 1.5193666442245402e-07, | |
| "loss": 0.43045446276664734, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.103559870550162, | |
| "grad_norm": 1.038780927658081, | |
| "learning_rate": 1.5148888126561726e-07, | |
| "loss": 0.4236026406288147, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.1057173678532903, | |
| "grad_norm": 2.081264019012451, | |
| "learning_rate": 1.5104179719185075e-07, | |
| "loss": 0.5168135166168213, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.1078748651564188, | |
| "grad_norm": 12.081764221191406, | |
| "learning_rate": 1.5059541473168715e-07, | |
| "loss": 0.5867135524749756, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.1100323624595467, | |
| "grad_norm": 2.5666370391845703, | |
| "learning_rate": 1.5014973641168776e-07, | |
| "loss": 0.5193699598312378, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.112189859762675, | |
| "grad_norm": 1.6125311851501465, | |
| "learning_rate": 1.497047647544283e-07, | |
| "loss": 0.529248833656311, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.1143473570658036, | |
| "grad_norm": 2.1865692138671875, | |
| "learning_rate": 1.4926050227848519e-07, | |
| "loss": 0.0995928943157196, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.116504854368932, | |
| "grad_norm": 1.370114803314209, | |
| "learning_rate": 1.4881695149842027e-07, | |
| "loss": 0.4155382513999939, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.1186623516720604, | |
| "grad_norm": 3.971156358718872, | |
| "learning_rate": 1.4837411492476743e-07, | |
| "loss": 0.42390692234039307, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.120819848975189, | |
| "grad_norm": 1.6040301322937012, | |
| "learning_rate": 1.4793199506401797e-07, | |
| "loss": 0.47764524817466736, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.1229773462783172, | |
| "grad_norm": 10.519671440124512, | |
| "learning_rate": 1.474905944186067e-07, | |
| "loss": 0.34308892488479614, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.1251348435814457, | |
| "grad_norm": 1.1796921491622925, | |
| "learning_rate": 1.4704991548689745e-07, | |
| "loss": 0.5871822834014893, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.127292340884574, | |
| "grad_norm": 1.7361336946487427, | |
| "learning_rate": 1.4660996076316912e-07, | |
| "loss": 0.3765156865119934, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.129449838187702, | |
| "grad_norm": 0.9722249507904053, | |
| "learning_rate": 1.461707327376016e-07, | |
| "loss": 0.5491130352020264, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.1316073354908305, | |
| "grad_norm": 0.8201406598091125, | |
| "learning_rate": 1.457322338962616e-07, | |
| "loss": 0.2312294989824295, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.133764832793959, | |
| "grad_norm": 1.1285258531570435, | |
| "learning_rate": 1.4529446672108852e-07, | |
| "loss": 0.408553808927536, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.1359223300970873, | |
| "grad_norm": 2.267793655395508, | |
| "learning_rate": 1.448574336898804e-07, | |
| "loss": 0.4971245229244232, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.1380798274002157, | |
| "grad_norm": 1.5453788042068481, | |
| "learning_rate": 1.4442113727628024e-07, | |
| "loss": 0.5261057615280151, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.140237324703344, | |
| "grad_norm": 1.3704675436019897, | |
| "learning_rate": 1.439855799497615e-07, | |
| "loss": 0.4675547480583191, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.1423948220064726, | |
| "grad_norm": 1.8390225172042847, | |
| "learning_rate": 1.4355076417561429e-07, | |
| "loss": 0.45672228932380676, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.144552319309601, | |
| "grad_norm": 2.047685384750366, | |
| "learning_rate": 1.4311669241493184e-07, | |
| "loss": 0.4718751013278961, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.1467098166127294, | |
| "grad_norm": 2.519134044647217, | |
| "learning_rate": 1.426833671245956e-07, | |
| "loss": 0.20050865411758423, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.148867313915858, | |
| "grad_norm": 1.5338327884674072, | |
| "learning_rate": 1.422507907572626e-07, | |
| "loss": 0.6630242466926575, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.151024811218986, | |
| "grad_norm": 1.7562376260757446, | |
| "learning_rate": 1.418189657613504e-07, | |
| "loss": 0.48454782366752625, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.1531823085221142, | |
| "grad_norm": 1.1941181421279907, | |
| "learning_rate": 1.4138789458102395e-07, | |
| "loss": 0.39869314432144165, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.1553398058252426, | |
| "grad_norm": 1.5241355895996094, | |
| "learning_rate": 1.409575796561815e-07, | |
| "loss": 0.5645185708999634, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.157497303128371, | |
| "grad_norm": 1.2509868144989014, | |
| "learning_rate": 1.4052802342244085e-07, | |
| "loss": 0.4137888550758362, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.1596548004314995, | |
| "grad_norm": 0.9289142489433289, | |
| "learning_rate": 1.4009922831112576e-07, | |
| "loss": 0.39357897639274597, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.161812297734628, | |
| "grad_norm": 1.4196053743362427, | |
| "learning_rate": 1.3967119674925144e-07, | |
| "loss": 0.4930650293827057, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.1639697950377563, | |
| "grad_norm": 1.6377662420272827, | |
| "learning_rate": 1.3924393115951183e-07, | |
| "loss": 0.5852062702178955, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.1661272923408847, | |
| "grad_norm": 3.294273614883423, | |
| "learning_rate": 1.3881743396026519e-07, | |
| "loss": 0.2635650932788849, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.168284789644013, | |
| "grad_norm": 3.2055675983428955, | |
| "learning_rate": 1.383917075655207e-07, | |
| "loss": 0.45562589168548584, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.170442286947141, | |
| "grad_norm": 1.2780847549438477, | |
| "learning_rate": 1.3796675438492466e-07, | |
| "loss": 0.2905101478099823, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.1725997842502696, | |
| "grad_norm": 1.1974769830703735, | |
| "learning_rate": 1.37542576823747e-07, | |
| "loss": 0.5826109647750854, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.174757281553398, | |
| "grad_norm": 12.196817398071289, | |
| "learning_rate": 1.3711917728286758e-07, | |
| "loss": 0.6894016861915588, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.1769147788565264, | |
| "grad_norm": 1.2338263988494873, | |
| "learning_rate": 1.3669655815876238e-07, | |
| "loss": 0.5621905326843262, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.179072276159655, | |
| "grad_norm": 1.2616736888885498, | |
| "learning_rate": 1.3627472184349054e-07, | |
| "loss": 0.5268079042434692, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.1812297734627832, | |
| "grad_norm": 2.433539867401123, | |
| "learning_rate": 1.3585367072468014e-07, | |
| "loss": 0.5845661163330078, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.1833872707659117, | |
| "grad_norm": 5.173591136932373, | |
| "learning_rate": 1.3543340718551505e-07, | |
| "loss": 0.5688271522521973, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.18554476806904, | |
| "grad_norm": 3.6794235706329346, | |
| "learning_rate": 1.3501393360472135e-07, | |
| "loss": 0.5398727655410767, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.1877022653721685, | |
| "grad_norm": 3.5358774662017822, | |
| "learning_rate": 1.345952523565541e-07, | |
| "loss": 0.4828336238861084, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.1898597626752965, | |
| "grad_norm": 2.045574188232422, | |
| "learning_rate": 1.3417736581078343e-07, | |
| "loss": 0.4576345682144165, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.192017259978425, | |
| "grad_norm": 2.9018726348876953, | |
| "learning_rate": 1.3376027633268145e-07, | |
| "loss": 0.5629587769508362, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.1941747572815533, | |
| "grad_norm": 0.7962714433670044, | |
| "learning_rate": 1.33343986283009e-07, | |
| "loss": 0.3971530795097351, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.1963322545846817, | |
| "grad_norm": 8.073356628417969, | |
| "learning_rate": 1.3292849801800172e-07, | |
| "loss": 0.34259432554244995, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.19848975188781, | |
| "grad_norm": 2.6538028717041016, | |
| "learning_rate": 1.325138138893574e-07, | |
| "loss": 0.3950064778327942, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.2006472491909386, | |
| "grad_norm": 2.4987733364105225, | |
| "learning_rate": 1.3209993624422226e-07, | |
| "loss": 0.5430999398231506, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.202804746494067, | |
| "grad_norm": 2.3891706466674805, | |
| "learning_rate": 1.3168686742517777e-07, | |
| "loss": 0.4212225377559662, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.2049622437971954, | |
| "grad_norm": 3.8016743659973145, | |
| "learning_rate": 1.312746097702273e-07, | |
| "loss": 0.47842153906822205, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.207119741100324, | |
| "grad_norm": 1.4113874435424805, | |
| "learning_rate": 1.3086316561278298e-07, | |
| "loss": 0.6112795472145081, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.209277238403452, | |
| "grad_norm": 1.0874158143997192, | |
| "learning_rate": 1.304525372816527e-07, | |
| "loss": 0.37503018975257874, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.2114347357065802, | |
| "grad_norm": 1.5210480690002441, | |
| "learning_rate": 1.3004272710102627e-07, | |
| "loss": 0.5227410793304443, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.2135922330097086, | |
| "grad_norm": 2.596205234527588, | |
| "learning_rate": 1.2963373739046308e-07, | |
| "loss": 0.5574774742126465, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.215749730312837, | |
| "grad_norm": 1.1875081062316895, | |
| "learning_rate": 1.2922557046487847e-07, | |
| "loss": 0.5108221769332886, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.2179072276159655, | |
| "grad_norm": 1.1905848979949951, | |
| "learning_rate": 1.2881822863453066e-07, | |
| "loss": 0.6219309568405151, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.220064724919094, | |
| "grad_norm": 1.272186279296875, | |
| "learning_rate": 1.2841171420500799e-07, | |
| "loss": 0.5619434118270874, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.7432719469070435, | |
| "learning_rate": 1.2800602947721539e-07, | |
| "loss": 0.47846826910972595, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.2243797195253507, | |
| "grad_norm": 3.082793951034546, | |
| "learning_rate": 1.2760117674736174e-07, | |
| "loss": 0.40510886907577515, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.226537216828479, | |
| "grad_norm": 2.457563638687134, | |
| "learning_rate": 1.2719715830694665e-07, | |
| "loss": 0.5271166563034058, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.228694714131607, | |
| "grad_norm": 2.0129661560058594, | |
| "learning_rate": 1.2679397644274786e-07, | |
| "loss": 0.5444518327713013, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.2308522114347356, | |
| "grad_norm": 1.3749797344207764, | |
| "learning_rate": 1.2639163343680764e-07, | |
| "loss": 0.4370856285095215, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.233009708737864, | |
| "grad_norm": 2.305034637451172, | |
| "learning_rate": 1.259901315664204e-07, | |
| "loss": 0.3118676543235779, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.2351672060409924, | |
| "grad_norm": 2.7576963901519775, | |
| "learning_rate": 1.2558947310411988e-07, | |
| "loss": 0.5651037096977234, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.237324703344121, | |
| "grad_norm": 1.6983442306518555, | |
| "learning_rate": 1.251896603176657e-07, | |
| "loss": 0.4641881585121155, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.2394822006472492, | |
| "grad_norm": 1.8414127826690674, | |
| "learning_rate": 1.2479069547003113e-07, | |
| "loss": 0.43197086453437805, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.2416396979503777, | |
| "grad_norm": 2.0715110301971436, | |
| "learning_rate": 1.2439258081938982e-07, | |
| "loss": 0.4157189428806305, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.243797195253506, | |
| "grad_norm": 1.076478123664856, | |
| "learning_rate": 1.2399531861910356e-07, | |
| "loss": 0.47206589579582214, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.2459546925566345, | |
| "grad_norm": 1.650862693786621, | |
| "learning_rate": 1.2359891111770893e-07, | |
| "loss": 0.34252646565437317, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.2481121898597625, | |
| "grad_norm": 10.067177772521973, | |
| "learning_rate": 1.2320336055890485e-07, | |
| "loss": 0.39974507689476013, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.250269687162891, | |
| "grad_norm": 2.362675666809082, | |
| "learning_rate": 1.228086691815401e-07, | |
| "loss": 0.3123304843902588, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.2524271844660193, | |
| "grad_norm": 1.422263741493225, | |
| "learning_rate": 1.224148392196002e-07, | |
| "loss": 0.512368381023407, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.2545846817691477, | |
| "grad_norm": 3.862522602081299, | |
| "learning_rate": 1.2202187290219506e-07, | |
| "loss": 0.6224650144577026, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.256742179072276, | |
| "grad_norm": 1.9737151861190796, | |
| "learning_rate": 1.2162977245354618e-07, | |
| "loss": 0.5244027376174927, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.2588996763754046, | |
| "grad_norm": 1.3677443265914917, | |
| "learning_rate": 1.212385400929746e-07, | |
| "loss": 0.45020678639411926, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.261057173678533, | |
| "grad_norm": 2.039045572280884, | |
| "learning_rate": 1.208481780348872e-07, | |
| "loss": 0.5531943440437317, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.2632146709816614, | |
| "grad_norm": 1.5377328395843506, | |
| "learning_rate": 1.2045868848876553e-07, | |
| "loss": 0.37629038095474243, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.26537216828479, | |
| "grad_norm": 2.3397927284240723, | |
| "learning_rate": 1.2007007365915235e-07, | |
| "loss": 0.3320096433162689, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.267529665587918, | |
| "grad_norm": 3.1143479347229004, | |
| "learning_rate": 1.1968233574563937e-07, | |
| "loss": 0.40027597546577454, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.269687162891046, | |
| "grad_norm": 4.891796112060547, | |
| "learning_rate": 1.1929547694285518e-07, | |
| "loss": 0.4987124502658844, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.2718446601941746, | |
| "grad_norm": 1.840555191040039, | |
| "learning_rate": 1.1890949944045232e-07, | |
| "loss": 0.49355462193489075, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.274002157497303, | |
| "grad_norm": 3.1459503173828125, | |
| "learning_rate": 1.1852440542309507e-07, | |
| "loss": 0.36765629053115845, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.2761596548004315, | |
| "grad_norm": 1.4921073913574219, | |
| "learning_rate": 1.1814019707044715e-07, | |
| "loss": 0.4247042238712311, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.27831715210356, | |
| "grad_norm": 1.55894935131073, | |
| "learning_rate": 1.1775687655715948e-07, | |
| "loss": 0.4907058775424957, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.2804746494066883, | |
| "grad_norm": 1.8256114721298218, | |
| "learning_rate": 1.1737444605285757e-07, | |
| "loss": 0.46969401836395264, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.2826321467098167, | |
| "grad_norm": 2.421661376953125, | |
| "learning_rate": 1.1699290772212944e-07, | |
| "loss": 0.5222725868225098, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.284789644012945, | |
| "grad_norm": 1.7159234285354614, | |
| "learning_rate": 1.1661226372451344e-07, | |
| "loss": 0.38712745904922485, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.286947141316073, | |
| "grad_norm": 1.709659218788147, | |
| "learning_rate": 1.1623251621448581e-07, | |
| "loss": 0.2972549796104431, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.2891046386192015, | |
| "grad_norm": 5.333364486694336, | |
| "learning_rate": 1.1585366734144861e-07, | |
| "loss": 0.5384417772293091, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.29126213592233, | |
| "grad_norm": 1.5736010074615479, | |
| "learning_rate": 1.154757192497175e-07, | |
| "loss": 0.48839452862739563, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.2934196332254584, | |
| "grad_norm": 2.719190835952759, | |
| "learning_rate": 1.1509867407850982e-07, | |
| "loss": 0.4550670385360718, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.295577130528587, | |
| "grad_norm": 1.8119968175888062, | |
| "learning_rate": 1.1472253396193217e-07, | |
| "loss": 0.6911446452140808, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.2977346278317152, | |
| "grad_norm": 4.632421970367432, | |
| "learning_rate": 1.1434730102896833e-07, | |
| "loss": 0.4868852198123932, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.2998921251348436, | |
| "grad_norm": 1.4012049436569214, | |
| "learning_rate": 1.1397297740346771e-07, | |
| "loss": 0.4392762780189514, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.302049622437972, | |
| "grad_norm": 1.4517533779144287, | |
| "learning_rate": 1.1359956520413267e-07, | |
| "loss": 0.6284406185150146, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.3042071197411005, | |
| "grad_norm": 23.886926651000977, | |
| "learning_rate": 1.1322706654450692e-07, | |
| "loss": 0.5837987661361694, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.3063646170442285, | |
| "grad_norm": 2.8387584686279297, | |
| "learning_rate": 1.1285548353296335e-07, | |
| "loss": 0.49964287877082825, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.308522114347357, | |
| "grad_norm": 1.3726048469543457, | |
| "learning_rate": 1.1248481827269252e-07, | |
| "loss": 0.4638864994049072, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.3106796116504853, | |
| "grad_norm": 2.2775590419769287, | |
| "learning_rate": 1.1211507286168997e-07, | |
| "loss": 0.3237634003162384, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.3128371089536137, | |
| "grad_norm": 4.829261779785156, | |
| "learning_rate": 1.1174624939274521e-07, | |
| "loss": 0.37512320280075073, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.314994606256742, | |
| "grad_norm": 1.2612223625183105, | |
| "learning_rate": 1.1137834995342951e-07, | |
| "loss": 0.44408831000328064, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.3171521035598706, | |
| "grad_norm": 1.091060757637024, | |
| "learning_rate": 1.1101137662608356e-07, | |
| "loss": 0.45439931750297546, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.319309600862999, | |
| "grad_norm": 1.9243289232254028, | |
| "learning_rate": 1.1064533148780674e-07, | |
| "loss": 0.5608944296836853, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.3214670981661274, | |
| "grad_norm": 1.0947431325912476, | |
| "learning_rate": 1.1028021661044448e-07, | |
| "loss": 0.43637141585350037, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.323624595469256, | |
| "grad_norm": 1.2952344417572021, | |
| "learning_rate": 1.0991603406057712e-07, | |
| "loss": 0.4882223904132843, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.325782092772384, | |
| "grad_norm": 2.949446201324463, | |
| "learning_rate": 1.0955278589950754e-07, | |
| "loss": 0.4377874732017517, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.3279395900755127, | |
| "grad_norm": 1.3994691371917725, | |
| "learning_rate": 1.0919047418325027e-07, | |
| "loss": 0.4923911392688751, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.3300970873786406, | |
| "grad_norm": 4.1141438484191895, | |
| "learning_rate": 1.088291009625195e-07, | |
| "loss": 0.450039267539978, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.332254584681769, | |
| "grad_norm": 1.905044436454773, | |
| "learning_rate": 1.0846866828271706e-07, | |
| "loss": 0.3132597506046295, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.3344120819848975, | |
| "grad_norm": 2.2338316440582275, | |
| "learning_rate": 1.081091781839217e-07, | |
| "loss": 0.47647953033447266, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.336569579288026, | |
| "grad_norm": 1.30208158493042, | |
| "learning_rate": 1.0775063270087683e-07, | |
| "loss": 0.49173516035079956, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.3387270765911543, | |
| "grad_norm": 1.6397186517715454, | |
| "learning_rate": 1.073930338629793e-07, | |
| "loss": 0.6098090410232544, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.3408845738942827, | |
| "grad_norm": 1.269619107246399, | |
| "learning_rate": 1.0703638369426782e-07, | |
| "loss": 0.36568519473075867, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.343042071197411, | |
| "grad_norm": 1.4696781635284424, | |
| "learning_rate": 1.0668068421341176e-07, | |
| "loss": 0.5783711671829224, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.3451995685005396, | |
| "grad_norm": 7.201903343200684, | |
| "learning_rate": 1.0632593743369927e-07, | |
| "loss": 0.5010417699813843, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.347357065803668, | |
| "grad_norm": 1.4425990581512451, | |
| "learning_rate": 1.0597214536302627e-07, | |
| "loss": 0.4950565695762634, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.349514563106796, | |
| "grad_norm": 1.3873226642608643, | |
| "learning_rate": 1.0561931000388497e-07, | |
| "loss": 0.5758650898933411, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.3516720604099244, | |
| "grad_norm": 2.6027872562408447, | |
| "learning_rate": 1.0526743335335244e-07, | |
| "loss": 0.43806731700897217, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.353829557713053, | |
| "grad_norm": 1.688138723373413, | |
| "learning_rate": 1.0491651740307942e-07, | |
| "loss": 0.45398759841918945, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.355987055016181, | |
| "grad_norm": 1.561120867729187, | |
| "learning_rate": 1.0456656413927885e-07, | |
| "loss": 0.5060495734214783, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.3581445523193096, | |
| "grad_norm": 0.7075545787811279, | |
| "learning_rate": 1.0421757554271513e-07, | |
| "loss": 0.6581755876541138, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.360302049622438, | |
| "grad_norm": 1.6746031045913696, | |
| "learning_rate": 1.0386955358869228e-07, | |
| "loss": 0.4242514669895172, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.3624595469255665, | |
| "grad_norm": 1.3905069828033447, | |
| "learning_rate": 1.0352250024704305e-07, | |
| "loss": 0.5258157849311829, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.364617044228695, | |
| "grad_norm": 1.4214626550674438, | |
| "learning_rate": 1.0317641748211797e-07, | |
| "loss": 0.4111720025539398, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.3667745415318233, | |
| "grad_norm": 2.1955699920654297, | |
| "learning_rate": 1.0283130725277387e-07, | |
| "loss": 0.4222407937049866, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.3689320388349513, | |
| "grad_norm": 3.329183578491211, | |
| "learning_rate": 1.0248717151236292e-07, | |
| "loss": 0.48131048679351807, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.3710895361380797, | |
| "grad_norm": 1.4323745965957642, | |
| "learning_rate": 1.0214401220872165e-07, | |
| "loss": 0.5055102109909058, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.373247033441208, | |
| "grad_norm": 3.3138020038604736, | |
| "learning_rate": 1.0180183128415996e-07, | |
| "loss": 0.4181690514087677, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.3754045307443366, | |
| "grad_norm": 1.2691850662231445, | |
| "learning_rate": 1.0146063067544994e-07, | |
| "loss": 0.5093836188316345, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.377562028047465, | |
| "grad_norm": 1.5404523611068726, | |
| "learning_rate": 1.0112041231381497e-07, | |
| "loss": 0.5514087677001953, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.3797195253505934, | |
| "grad_norm": 0.5054550170898438, | |
| "learning_rate": 1.007811781249191e-07, | |
| "loss": 0.5555412173271179, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.381877022653722, | |
| "grad_norm": 1.4161839485168457, | |
| "learning_rate": 1.0044293002885543e-07, | |
| "loss": 0.40494877099990845, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.3840345199568502, | |
| "grad_norm": 2.184622049331665, | |
| "learning_rate": 1.0010566994013612e-07, | |
| "loss": 0.4903999865055084, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.3861920172599786, | |
| "grad_norm": 1.4374651908874512, | |
| "learning_rate": 9.976939976768092e-08, | |
| "loss": 0.3994739353656769, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.3883495145631066, | |
| "grad_norm": 1.230521559715271, | |
| "learning_rate": 9.943412141480658e-08, | |
| "loss": 0.3945184051990509, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.390507011866235, | |
| "grad_norm": 1.331252932548523, | |
| "learning_rate": 9.909983677921607e-08, | |
| "loss": 0.3576943278312683, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.3926645091693635, | |
| "grad_norm": 2.683382987976074, | |
| "learning_rate": 9.876654775298799e-08, | |
| "loss": 0.5146565437316895, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.394822006472492, | |
| "grad_norm": 1.1658935546875, | |
| "learning_rate": 9.843425622256546e-08, | |
| "loss": 0.4697680175304413, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.3969795037756203, | |
| "grad_norm": 1.2742339372634888, | |
| "learning_rate": 9.810296406874583e-08, | |
| "loss": 0.5487444400787354, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.3991370010787487, | |
| "grad_norm": 2.137641668319702, | |
| "learning_rate": 9.777267316667e-08, | |
| "loss": 0.4315913915634155, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.401294498381877, | |
| "grad_norm": 2.66500186920166, | |
| "learning_rate": 9.744338538581147e-08, | |
| "loss": 0.5236972570419312, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.4034519956850056, | |
| "grad_norm": 1.3068699836730957, | |
| "learning_rate": 9.711510258996617e-08, | |
| "loss": 0.46810081601142883, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.405609492988134, | |
| "grad_norm": 2.274906635284424, | |
| "learning_rate": 9.678782663724156e-08, | |
| "loss": 0.3743009567260742, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.407766990291262, | |
| "grad_norm": 7.867446422576904, | |
| "learning_rate": 9.646155938004655e-08, | |
| "loss": 0.5277361273765564, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.4099244875943904, | |
| "grad_norm": 1.2440294027328491, | |
| "learning_rate": 9.613630266508053e-08, | |
| "loss": 0.5060732960700989, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.412081984897519, | |
| "grad_norm": 2.0601508617401123, | |
| "learning_rate": 9.581205833332316e-08, | |
| "loss": 0.422480046749115, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.414239482200647, | |
| "grad_norm": 3.935227632522583, | |
| "learning_rate": 9.548882822002405e-08, | |
| "loss": 0.5240671634674072, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.4163969795037756, | |
| "grad_norm": 1.6686252355575562, | |
| "learning_rate": 9.516661415469216e-08, | |
| "loss": 0.5927475690841675, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.418554476806904, | |
| "grad_norm": 1.506426215171814, | |
| "learning_rate": 9.484541796108551e-08, | |
| "loss": 0.4393031597137451, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.4207119741100325, | |
| "grad_norm": 2.3679986000061035, | |
| "learning_rate": 9.45252414572009e-08, | |
| "loss": 0.41118109226226807, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.422869471413161, | |
| "grad_norm": 3.8800225257873535, | |
| "learning_rate": 9.420608645526373e-08, | |
| "loss": 0.5971561670303345, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.4250269687162893, | |
| "grad_norm": 3.2100000381469727, | |
| "learning_rate": 9.388795476171742e-08, | |
| "loss": 0.4730355739593506, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.4271844660194173, | |
| "grad_norm": 2.2373881340026855, | |
| "learning_rate": 9.357084817721342e-08, | |
| "loss": 0.5331867337226868, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.4293419633225457, | |
| "grad_norm": 1.286616563796997, | |
| "learning_rate": 9.325476849660124e-08, | |
| "loss": 0.3063022196292877, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.431499460625674, | |
| "grad_norm": 16.60342788696289, | |
| "learning_rate": 9.293971750891755e-08, | |
| "loss": 0.44442903995513916, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.4336569579288025, | |
| "grad_norm": 1.4422513246536255, | |
| "learning_rate": 9.262569699737699e-08, | |
| "loss": 0.45078244805336, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.435814455231931, | |
| "grad_norm": 3.8183093070983887, | |
| "learning_rate": 9.231270873936134e-08, | |
| "loss": 0.6303662657737732, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.4379719525350594, | |
| "grad_norm": 2.514275550842285, | |
| "learning_rate": 9.200075450640982e-08, | |
| "loss": 0.41539642214775085, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.440129449838188, | |
| "grad_norm": 15.767145156860352, | |
| "learning_rate": 9.16898360642091e-08, | |
| "loss": 0.5099453330039978, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.4422869471413162, | |
| "grad_norm": 1.8690749406814575, | |
| "learning_rate": 9.137995517258301e-08, | |
| "loss": 0.4256049394607544, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 1.6698436737060547, | |
| "learning_rate": 9.107111358548284e-08, | |
| "loss": 0.24495507776737213, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.4466019417475726, | |
| "grad_norm": 1.4875117540359497, | |
| "learning_rate": 9.076331305097726e-08, | |
| "loss": 0.5850554704666138, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.448759439050701, | |
| "grad_norm": 1.4028273820877075, | |
| "learning_rate": 9.045655531124265e-08, | |
| "loss": 0.6093006134033203, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.4509169363538295, | |
| "grad_norm": 1.2962590456008911, | |
| "learning_rate": 9.015084210255303e-08, | |
| "loss": 0.47294872999191284, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.453074433656958, | |
| "grad_norm": 1.212159514427185, | |
| "learning_rate": 8.984617515527011e-08, | |
| "loss": 0.49960917234420776, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.4552319309600863, | |
| "grad_norm": 1.358234167098999, | |
| "learning_rate": 8.954255619383396e-08, | |
| "loss": 0.5016006231307983, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.4573894282632147, | |
| "grad_norm": 1.423470139503479, | |
| "learning_rate": 8.92399869367528e-08, | |
| "loss": 0.5450627207756042, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.459546925566343, | |
| "grad_norm": 10.734492301940918, | |
| "learning_rate": 8.893846909659339e-08, | |
| "loss": 0.5570814609527588, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.4617044228694716, | |
| "grad_norm": 1.709943413734436, | |
| "learning_rate": 8.863800437997145e-08, | |
| "loss": 0.487891286611557, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.4638619201726, | |
| "grad_norm": 4.394408226013184, | |
| "learning_rate": 8.833859448754206e-08, | |
| "loss": 0.4305161237716675, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.466019417475728, | |
| "grad_norm": 4.226398468017578, | |
| "learning_rate": 8.804024111398971e-08, | |
| "loss": 0.529059648513794, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.4681769147788564, | |
| "grad_norm": 1.9100017547607422, | |
| "learning_rate": 8.77429459480189e-08, | |
| "loss": 0.5420784950256348, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.470334412081985, | |
| "grad_norm": 2.035076141357422, | |
| "learning_rate": 8.744671067234483e-08, | |
| "loss": 0.3656485080718994, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.472491909385113, | |
| "grad_norm": 0.4504566490650177, | |
| "learning_rate": 8.715153696368342e-08, | |
| "loss": 0.10097479820251465, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.4746494066882416, | |
| "grad_norm": 1.2601561546325684, | |
| "learning_rate": 8.685742649274209e-08, | |
| "loss": 0.5880253314971924, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.47680690399137, | |
| "grad_norm": 1.1769603490829468, | |
| "learning_rate": 8.656438092421015e-08, | |
| "loss": 0.2316816747188568, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.4789644012944985, | |
| "grad_norm": 4.42578125, | |
| "learning_rate": 8.627240191674979e-08, | |
| "loss": 0.5261933207511902, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.481121898597627, | |
| "grad_norm": 2.054516553878784, | |
| "learning_rate": 8.598149112298586e-08, | |
| "loss": 0.3512814939022064, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.4832793959007553, | |
| "grad_norm": 1.8083107471466064, | |
| "learning_rate": 8.569165018949755e-08, | |
| "loss": 0.5700247287750244, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.4854368932038833, | |
| "grad_norm": 0.7789508700370789, | |
| "learning_rate": 8.540288075680832e-08, | |
| "loss": 0.5419098138809204, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.4875943905070117, | |
| "grad_norm": 1.425304889678955, | |
| "learning_rate": 8.511518445937682e-08, | |
| "loss": 0.5239717960357666, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.48975188781014, | |
| "grad_norm": 2.2157175540924072, | |
| "learning_rate": 8.482856292558771e-08, | |
| "loss": 0.5087226629257202, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.4919093851132685, | |
| "grad_norm": 1.6569185256958008, | |
| "learning_rate": 8.454301777774237e-08, | |
| "loss": 0.5517893433570862, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.494066882416397, | |
| "grad_norm": 1.5045207738876343, | |
| "learning_rate": 8.425855063204987e-08, | |
| "loss": 0.44327977299690247, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.4962243797195254, | |
| "grad_norm": 3.6006226539611816, | |
| "learning_rate": 8.397516309861743e-08, | |
| "loss": 0.5266181230545044, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.498381877022654, | |
| "grad_norm": 1.071789264678955, | |
| "learning_rate": 8.369285678144197e-08, | |
| "loss": 0.3867831826210022, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.500539374325782, | |
| "grad_norm": 1.2131776809692383, | |
| "learning_rate": 8.341163327840026e-08, | |
| "loss": 0.4348089396953583, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.5026968716289106, | |
| "grad_norm": 1.3953055143356323, | |
| "learning_rate": 8.313149418124043e-08, | |
| "loss": 0.3074108362197876, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.5048543689320386, | |
| "grad_norm": 1.4362233877182007, | |
| "learning_rate": 8.285244107557284e-08, | |
| "loss": 0.559751033782959, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.5070118662351675, | |
| "grad_norm": 2.6127679347991943, | |
| "learning_rate": 8.257447554086095e-08, | |
| "loss": 0.49618762731552124, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.5091693635382954, | |
| "grad_norm": 1.4416208267211914, | |
| "learning_rate": 8.229759915041243e-08, | |
| "loss": 0.37788355350494385, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.511326860841424, | |
| "grad_norm": 1.3234007358551025, | |
| "learning_rate": 8.202181347137041e-08, | |
| "loss": 0.529360830783844, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.5134843581445523, | |
| "grad_norm": 1.065699577331543, | |
| "learning_rate": 8.174712006470453e-08, | |
| "loss": 0.3420860767364502, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.5156418554476807, | |
| "grad_norm": 0.9615293741226196, | |
| "learning_rate": 8.147352048520198e-08, | |
| "loss": 0.3195402920246124, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.517799352750809, | |
| "grad_norm": 1.6290311813354492, | |
| "learning_rate": 8.12010162814588e-08, | |
| "loss": 0.41644540429115295, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.5199568500539375, | |
| "grad_norm": 3.109558343887329, | |
| "learning_rate": 8.092960899587121e-08, | |
| "loss": 0.4512273371219635, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.522114347357066, | |
| "grad_norm": 2.5993058681488037, | |
| "learning_rate": 8.065930016462671e-08, | |
| "loss": 0.5339113473892212, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.524271844660194, | |
| "grad_norm": 5.516918659210205, | |
| "learning_rate": 8.039009131769548e-08, | |
| "loss": 0.6151620745658875, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.526429341963323, | |
| "grad_norm": 18.284648895263672, | |
| "learning_rate": 8.012198397882164e-08, | |
| "loss": 0.5498458743095398, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.528586839266451, | |
| "grad_norm": 5.122011661529541, | |
| "learning_rate": 7.98549796655148e-08, | |
| "loss": 0.5199579000473022, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.530744336569579, | |
| "grad_norm": 1.5160562992095947, | |
| "learning_rate": 7.958907988904126e-08, | |
| "loss": 0.6041461825370789, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.5329018338727076, | |
| "grad_norm": 1.5068459510803223, | |
| "learning_rate": 7.932428615441553e-08, | |
| "loss": 0.593430757522583, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.535059331175836, | |
| "grad_norm": 1.434343934059143, | |
| "learning_rate": 7.9060599960392e-08, | |
| "loss": 0.31969255208969116, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.5372168284789645, | |
| "grad_norm": 2.9943349361419678, | |
| "learning_rate": 7.879802279945609e-08, | |
| "loss": 0.5539549589157104, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.539374325782093, | |
| "grad_norm": 1.722773790359497, | |
| "learning_rate": 7.85365561578161e-08, | |
| "loss": 0.5361152291297913, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.5415318230852213, | |
| "grad_norm": 1.305713176727295, | |
| "learning_rate": 7.827620151539466e-08, | |
| "loss": 0.4791654050350189, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.5436893203883493, | |
| "grad_norm": 3.0847299098968506, | |
| "learning_rate": 7.801696034582053e-08, | |
| "loss": 0.5922753810882568, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.545846817691478, | |
| "grad_norm": 2.508219003677368, | |
| "learning_rate": 7.77588341164198e-08, | |
| "loss": 0.5202733874320984, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.548004314994606, | |
| "grad_norm": 1.4630476236343384, | |
| "learning_rate": 7.750182428820827e-08, | |
| "loss": 0.5938437581062317, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.5501618122977345, | |
| "grad_norm": 4.091277122497559, | |
| "learning_rate": 7.724593231588272e-08, | |
| "loss": 0.5015133619308472, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.552319309600863, | |
| "grad_norm": 1.16217839717865, | |
| "learning_rate": 7.699115964781254e-08, | |
| "loss": 0.27357974648475647, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.5544768069039914, | |
| "grad_norm": 1.87477707862854, | |
| "learning_rate": 7.673750772603207e-08, | |
| "loss": 0.612422525882721, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.55663430420712, | |
| "grad_norm": 1.7888860702514648, | |
| "learning_rate": 7.6484977986232e-08, | |
| "loss": 0.7179882526397705, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.558791801510248, | |
| "grad_norm": 0.9722982048988342, | |
| "learning_rate": 7.623357185775133e-08, | |
| "loss": 0.4070899486541748, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.5609492988133766, | |
| "grad_norm": 1.3785275220870972, | |
| "learning_rate": 7.598329076356936e-08, | |
| "loss": 0.5034173727035522, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.5631067961165046, | |
| "grad_norm": 1.4615991115570068, | |
| "learning_rate": 7.573413612029774e-08, | |
| "loss": 0.42231953144073486, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.5652642934196335, | |
| "grad_norm": 1.8301200866699219, | |
| "learning_rate": 7.548610933817214e-08, | |
| "loss": 0.4184509217739105, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.5674217907227614, | |
| "grad_norm": 2.232179880142212, | |
| "learning_rate": 7.523921182104446e-08, | |
| "loss": 0.4791230261325836, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.56957928802589, | |
| "grad_norm": 1.1662960052490234, | |
| "learning_rate": 7.499344496637498e-08, | |
| "loss": 0.45605581998825073, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.5717367853290183, | |
| "grad_norm": 1.7335278987884521, | |
| "learning_rate": 7.474881016522429e-08, | |
| "loss": 0.4822132885456085, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.5738942826321467, | |
| "grad_norm": 1.393849492073059, | |
| "learning_rate": 7.45053088022454e-08, | |
| "loss": 0.61832594871521, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.576051779935275, | |
| "grad_norm": 2.2385711669921875, | |
| "learning_rate": 7.426294225567596e-08, | |
| "loss": 0.41741040349006653, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.5782092772384035, | |
| "grad_norm": 1.4406133890151978, | |
| "learning_rate": 7.40217118973306e-08, | |
| "loss": 0.4191496670246124, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.580366774541532, | |
| "grad_norm": 1.7564983367919922, | |
| "learning_rate": 7.378161909259297e-08, | |
| "loss": 0.5521677732467651, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.58252427184466, | |
| "grad_norm": 1.317151665687561, | |
| "learning_rate": 7.354266520040793e-08, | |
| "loss": 0.5924421548843384, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.584681769147789, | |
| "grad_norm": 7.4354753494262695, | |
| "learning_rate": 7.330485157327426e-08, | |
| "loss": 0.46872678399086, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.5868392664509168, | |
| "grad_norm": 1.6784964799880981, | |
| "learning_rate": 7.306817955723654e-08, | |
| "loss": 0.4340111017227173, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.588996763754045, | |
| "grad_norm": 2.780867099761963, | |
| "learning_rate": 7.283265049187784e-08, | |
| "loss": 0.35171282291412354, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.5911542610571736, | |
| "grad_norm": 1.664075493812561, | |
| "learning_rate": 7.259826571031191e-08, | |
| "loss": 0.39083340764045715, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.593311758360302, | |
| "grad_norm": 3.530792713165283, | |
| "learning_rate": 7.236502653917599e-08, | |
| "loss": 0.4641299247741699, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.5954692556634305, | |
| "grad_norm": 1.017684817314148, | |
| "learning_rate": 7.213293429862288e-08, | |
| "loss": 0.3411005437374115, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.597626752966559, | |
| "grad_norm": 3.9479050636291504, | |
| "learning_rate": 7.190199030231364e-08, | |
| "loss": 0.5616810321807861, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.5997842502696873, | |
| "grad_norm": 5.205540180206299, | |
| "learning_rate": 7.167219585741041e-08, | |
| "loss": 0.5188603401184082, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.6019417475728153, | |
| "grad_norm": 1.752669334411621, | |
| "learning_rate": 7.144355226456839e-08, | |
| "loss": 0.622796893119812, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.604099244875944, | |
| "grad_norm": 1.7586170434951782, | |
| "learning_rate": 7.121606081792928e-08, | |
| "loss": 0.4979010820388794, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.606256742179072, | |
| "grad_norm": 1.7928980588912964, | |
| "learning_rate": 7.098972280511323e-08, | |
| "loss": 0.40664538741111755, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.6084142394822005, | |
| "grad_norm": 1.9738396406173706, | |
| "learning_rate": 7.076453950721202e-08, | |
| "loss": 0.5753185153007507, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.610571736785329, | |
| "grad_norm": 1.1539170742034912, | |
| "learning_rate": 7.054051219878153e-08, | |
| "loss": 0.47662532329559326, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.6127292340884574, | |
| "grad_norm": 2.355470895767212, | |
| "learning_rate": 7.031764214783478e-08, | |
| "loss": 0.4526709318161011, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.614886731391586, | |
| "grad_norm": 1.3842222690582275, | |
| "learning_rate": 7.009593061583462e-08, | |
| "loss": 0.4917500615119934, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.617044228694714, | |
| "grad_norm": 7.645388603210449, | |
| "learning_rate": 6.987537885768635e-08, | |
| "loss": 0.504601240158081, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.6192017259978426, | |
| "grad_norm": 1.4394519329071045, | |
| "learning_rate": 6.965598812173118e-08, | |
| "loss": 0.6155430674552917, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.6213592233009706, | |
| "grad_norm": 1.2602229118347168, | |
| "learning_rate": 6.943775964973861e-08, | |
| "loss": 0.5159276723861694, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.6235167206040995, | |
| "grad_norm": 1.7222973108291626, | |
| "learning_rate": 6.922069467689969e-08, | |
| "loss": 0.46511101722717285, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.6256742179072274, | |
| "grad_norm": 3.4029550552368164, | |
| "learning_rate": 6.900479443182e-08, | |
| "loss": 0.5705016851425171, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.627831715210356, | |
| "grad_norm": 3.3381807804107666, | |
| "learning_rate": 6.879006013651269e-08, | |
| "loss": 0.588231086730957, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.6299892125134843, | |
| "grad_norm": 1.722324013710022, | |
| "learning_rate": 6.857649300639145e-08, | |
| "loss": 0.4552815556526184, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.6321467098166127, | |
| "grad_norm": 2.675380229949951, | |
| "learning_rate": 6.836409425026375e-08, | |
| "loss": 0.3620685040950775, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.634304207119741, | |
| "grad_norm": 4.263212203979492, | |
| "learning_rate": 6.815286507032405e-08, | |
| "loss": 0.33681440353393555, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.6364617044228695, | |
| "grad_norm": 3.89007830619812, | |
| "learning_rate": 6.794280666214682e-08, | |
| "loss": 0.4459841251373291, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.638619201725998, | |
| "grad_norm": 3.6217668056488037, | |
| "learning_rate": 6.773392021467987e-08, | |
| "loss": 0.5162920951843262, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.6407766990291264, | |
| "grad_norm": 3.093386173248291, | |
| "learning_rate": 6.752620691023762e-08, | |
| "loss": 0.25055232644081116, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.642934196332255, | |
| "grad_norm": 3.092965602874756, | |
| "learning_rate": 6.731966792449451e-08, | |
| "loss": 0.6372309923171997, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.6450916936353828, | |
| "grad_norm": 1.5319708585739136, | |
| "learning_rate": 6.711430442647809e-08, | |
| "loss": 0.4929147958755493, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.647249190938511, | |
| "grad_norm": 13.12871265411377, | |
| "learning_rate": 6.691011757856258e-08, | |
| "loss": 0.434012770652771, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.6494066882416396, | |
| "grad_norm": 1.5877397060394287, | |
| "learning_rate": 6.670710853646239e-08, | |
| "loss": 0.43648290634155273, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.651564185544768, | |
| "grad_norm": 1.0320699214935303, | |
| "learning_rate": 6.650527844922533e-08, | |
| "loss": 0.4268641471862793, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.6537216828478964, | |
| "grad_norm": 3.019049644470215, | |
| "learning_rate": 6.630462845922622e-08, | |
| "loss": 0.6072458624839783, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.655879180151025, | |
| "grad_norm": 1.9484155178070068, | |
| "learning_rate": 6.610515970216046e-08, | |
| "loss": 0.42939677834510803, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.6580366774541533, | |
| "grad_norm": 1.258055567741394, | |
| "learning_rate": 6.59068733070377e-08, | |
| "loss": 0.45139870047569275, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.6601941747572817, | |
| "grad_norm": 0.20732815563678741, | |
| "learning_rate": 6.570977039617512e-08, | |
| "loss": 0.19261834025382996, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.66235167206041, | |
| "grad_norm": 1.5855305194854736, | |
| "learning_rate": 6.551385208519136e-08, | |
| "loss": 0.609540581703186, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.664509169363538, | |
| "grad_norm": 1.3604331016540527, | |
| "learning_rate": 6.531911948300026e-08, | |
| "loss": 0.4661960303783417, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 1.1402088403701782, | |
| "learning_rate": 6.512557369180416e-08, | |
| "loss": 0.3893601894378662, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.668824163969795, | |
| "grad_norm": 1.1722886562347412, | |
| "learning_rate": 6.493321580708825e-08, | |
| "loss": 0.50113445520401, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.6709816612729234, | |
| "grad_norm": 1.2451552152633667, | |
| "learning_rate": 6.474204691761392e-08, | |
| "loss": 0.5579499006271362, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.6731391585760518, | |
| "grad_norm": 1.9142377376556396, | |
| "learning_rate": 6.455206810541275e-08, | |
| "loss": 0.5365015864372253, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.67529665587918, | |
| "grad_norm": 4.749199867248535, | |
| "learning_rate": 6.436328044578045e-08, | |
| "loss": 0.5498421788215637, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.6774541531823086, | |
| "grad_norm": 4.736466884613037, | |
| "learning_rate": 6.417568500727065e-08, | |
| "loss": 0.474033921957016, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.679611650485437, | |
| "grad_norm": 1.416872262954712, | |
| "learning_rate": 6.398928285168894e-08, | |
| "loss": 0.5008449554443359, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.6817691477885655, | |
| "grad_norm": 1.2803456783294678, | |
| "learning_rate": 6.380407503408675e-08, | |
| "loss": 0.4675408601760864, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.6839266450916934, | |
| "grad_norm": 2.346578598022461, | |
| "learning_rate": 6.362006260275566e-08, | |
| "loss": 0.48824068903923035, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.686084142394822, | |
| "grad_norm": 1.5030266046524048, | |
| "learning_rate": 6.343724659922105e-08, | |
| "loss": 0.4942224323749542, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.6882416396979503, | |
| "grad_norm": 4.458725929260254, | |
| "learning_rate": 6.325562805823647e-08, | |
| "loss": 0.5143862962722778, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.6903991370010787, | |
| "grad_norm": 0.25812989473342896, | |
| "learning_rate": 6.307520800777791e-08, | |
| "loss": 0.06615746021270752, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.692556634304207, | |
| "grad_norm": 1.5842936038970947, | |
| "learning_rate": 6.289598746903753e-08, | |
| "loss": 0.488372266292572, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.6947141316073355, | |
| "grad_norm": 2.356571674346924, | |
| "learning_rate": 6.271796745641836e-08, | |
| "loss": 0.33276641368865967, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.696871628910464, | |
| "grad_norm": 1.8246636390686035, | |
| "learning_rate": 6.254114897752822e-08, | |
| "loss": 0.534456193447113, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.6990291262135924, | |
| "grad_norm": 1.4518077373504639, | |
| "learning_rate": 6.23655330331743e-08, | |
| "loss": 0.48372286558151245, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.701186623516721, | |
| "grad_norm": 1.9352519512176514, | |
| "learning_rate": 6.21911206173572e-08, | |
| "loss": 0.44714611768722534, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.7033441208198488, | |
| "grad_norm": 63.32822036743164, | |
| "learning_rate": 6.20179127172655e-08, | |
| "loss": 0.49458324909210205, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.705501618122977, | |
| "grad_norm": 1.128757357597351, | |
| "learning_rate": 6.184591031327023e-08, | |
| "loss": 0.53676438331604, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.7076591154261056, | |
| "grad_norm": 1.42232346534729, | |
| "learning_rate": 6.1675114378919e-08, | |
| "loss": 0.6496074199676514, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.709816612729234, | |
| "grad_norm": 1.3712254762649536, | |
| "learning_rate": 6.150552588093088e-08, | |
| "loss": 0.30613094568252563, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.7119741100323624, | |
| "grad_norm": 1.141538381576538, | |
| "learning_rate": 6.133714577919062e-08, | |
| "loss": 0.6155597567558289, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.714131607335491, | |
| "grad_norm": 1.5372565984725952, | |
| "learning_rate": 6.116997502674356e-08, | |
| "loss": 0.5866535305976868, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.7162891046386193, | |
| "grad_norm": 1.712314248085022, | |
| "learning_rate": 6.100401456978973e-08, | |
| "loss": 0.6070207357406616, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.7184466019417477, | |
| "grad_norm": 1.3143550157546997, | |
| "learning_rate": 6.0839265347679e-08, | |
| "loss": 0.46849966049194336, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.720604099244876, | |
| "grad_norm": 1.805611491203308, | |
| "learning_rate": 6.06757282929055e-08, | |
| "loss": 0.44359534978866577, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.722761596548004, | |
| "grad_norm": 3.4996039867401123, | |
| "learning_rate": 6.051340433110235e-08, | |
| "loss": 0.4810839295387268, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.724919093851133, | |
| "grad_norm": 0.6923230886459351, | |
| "learning_rate": 6.035229438103654e-08, | |
| "loss": 0.47448840737342834, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.727076591154261, | |
| "grad_norm": 1.4109259843826294, | |
| "learning_rate": 6.019239935460361e-08, | |
| "loss": 0.4482736885547638, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.7292340884573894, | |
| "grad_norm": 2.148198127746582, | |
| "learning_rate": 6.003372015682248e-08, | |
| "loss": 0.47598910331726074, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.7313915857605178, | |
| "grad_norm": 2.2834556102752686, | |
| "learning_rate": 5.987625768583047e-08, | |
| "loss": 0.5227712392807007, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.733549083063646, | |
| "grad_norm": 3.4445295333862305, | |
| "learning_rate": 5.972001283287814e-08, | |
| "loss": 0.4431000053882599, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.7357065803667746, | |
| "grad_norm": 2.449721097946167, | |
| "learning_rate": 5.956498648232411e-08, | |
| "loss": 0.4020468294620514, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.737864077669903, | |
| "grad_norm": 6.9917073249816895, | |
| "learning_rate": 5.9411179511630237e-08, | |
| "loss": 0.3725473880767822, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.7400215749730314, | |
| "grad_norm": 2.097342014312744, | |
| "learning_rate": 5.9258592791356675e-08, | |
| "loss": 0.47959214448928833, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.7421790722761594, | |
| "grad_norm": 1.6001266241073608, | |
| "learning_rate": 5.910722718515675e-08, | |
| "loss": 0.49233609437942505, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.7443365695792883, | |
| "grad_norm": 2.2737677097320557, | |
| "learning_rate": 5.8957083549772227e-08, | |
| "loss": 0.5828397870063782, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.7464940668824163, | |
| "grad_norm": 4.293592929840088, | |
| "learning_rate": 5.880816273502835e-08, | |
| "loss": 0.40149906277656555, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.7486515641855447, | |
| "grad_norm": 2.3264200687408447, | |
| "learning_rate": 5.866046558382924e-08, | |
| "loss": 0.5630208849906921, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.750809061488673, | |
| "grad_norm": 0.47289416193962097, | |
| "learning_rate": 5.851399293215284e-08, | |
| "loss": 0.3701988160610199, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.7529665587918015, | |
| "grad_norm": 4.011696815490723, | |
| "learning_rate": 5.8368745609046394e-08, | |
| "loss": 0.4746440351009369, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.75512405609493, | |
| "grad_norm": 4.506484508514404, | |
| "learning_rate": 5.8224724436621686e-08, | |
| "loss": 0.41828322410583496, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.7572815533980584, | |
| "grad_norm": 1.0973607301712036, | |
| "learning_rate": 5.808193023005037e-08, | |
| "loss": 0.35553479194641113, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.759439050701187, | |
| "grad_norm": 3.0977184772491455, | |
| "learning_rate": 5.7940363797559355e-08, | |
| "loss": 0.6049969792366028, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.7615965480043148, | |
| "grad_norm": 3.566246271133423, | |
| "learning_rate": 5.780002594042628e-08, | |
| "loss": 0.4752573072910309, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.7637540453074436, | |
| "grad_norm": 3.1380441188812256, | |
| "learning_rate": 5.766091745297499e-08, | |
| "loss": 0.42298072576522827, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.7659115426105716, | |
| "grad_norm": 1.4781795740127563, | |
| "learning_rate": 5.752303912257083e-08, | |
| "loss": 0.27772021293640137, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.7680690399137, | |
| "grad_norm": 0.4035155773162842, | |
| "learning_rate": 5.738639172961655e-08, | |
| "loss": 0.2534405291080475, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.7702265372168284, | |
| "grad_norm": 2.6446008682250977, | |
| "learning_rate": 5.725097604754762e-08, | |
| "loss": 0.445311039686203, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.772384034519957, | |
| "grad_norm": 0.6513259410858154, | |
| "learning_rate": 5.7116792842827847e-08, | |
| "loss": 0.3810059428215027, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.7745415318230853, | |
| "grad_norm": 1.0772795677185059, | |
| "learning_rate": 5.698384287494524e-08, | |
| "loss": 0.4859530031681061, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.7766990291262137, | |
| "grad_norm": 1.3191403150558472, | |
| "learning_rate": 5.68521268964075e-08, | |
| "loss": 0.3801627457141876, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.778856526429342, | |
| "grad_norm": 1.1567744016647339, | |
| "learning_rate": 5.672164565273794e-08, | |
| "loss": 0.43105101585388184, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.78101402373247, | |
| "grad_norm": 1.525492548942566, | |
| "learning_rate": 5.6592399882471005e-08, | |
| "loss": 0.46906399726867676, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.783171521035599, | |
| "grad_norm": 1.5278234481811523, | |
| "learning_rate": 5.646439031714843e-08, | |
| "loss": 0.44850075244903564, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.785329018338727, | |
| "grad_norm": 1.656800627708435, | |
| "learning_rate": 5.633761768131492e-08, | |
| "loss": 0.4555439352989197, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.7874865156418553, | |
| "grad_norm": 5.497950077056885, | |
| "learning_rate": 5.6212082692513836e-08, | |
| "loss": 0.5264440774917603, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.7896440129449838, | |
| "grad_norm": 3.069579601287842, | |
| "learning_rate": 5.608778606128367e-08, | |
| "loss": 0.4567970633506775, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.791801510248112, | |
| "grad_norm": 0.7134677767753601, | |
| "learning_rate": 5.59647284911535e-08, | |
| "loss": 0.5471997261047363, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.7939590075512406, | |
| "grad_norm": 1.767069935798645, | |
| "learning_rate": 5.5842910678639274e-08, | |
| "loss": 0.5194593667984009, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.796116504854369, | |
| "grad_norm": 1.2429695129394531, | |
| "learning_rate": 5.5722333313239796e-08, | |
| "loss": 0.472802996635437, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.7982740021574974, | |
| "grad_norm": 3.2857539653778076, | |
| "learning_rate": 5.5602997077432874e-08, | |
| "loss": 0.6141800880432129, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.8004314994606254, | |
| "grad_norm": 1.292257308959961, | |
| "learning_rate": 5.548490264667141e-08, | |
| "loss": 0.4846678376197815, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.8025889967637543, | |
| "grad_norm": 22.469823837280273, | |
| "learning_rate": 5.536805068937954e-08, | |
| "loss": 0.5878589749336243, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.8047464940668823, | |
| "grad_norm": 3.061379909515381, | |
| "learning_rate": 5.525244186694894e-08, | |
| "loss": 0.4901062548160553, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.8069039913700107, | |
| "grad_norm": 2.615751266479492, | |
| "learning_rate": 5.5138076833735084e-08, | |
| "loss": 0.40489572286605835, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.809061488673139, | |
| "grad_norm": 1.320258378982544, | |
| "learning_rate": 5.5024956237053384e-08, | |
| "loss": 0.5788986682891846, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.8112189859762675, | |
| "grad_norm": 1.3249293565750122, | |
| "learning_rate": 5.491308071717573e-08, | |
| "loss": 0.42938145995140076, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.813376483279396, | |
| "grad_norm": 1.0026346445083618, | |
| "learning_rate": 5.480245090732673e-08, | |
| "loss": 0.495646595954895, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.8155339805825244, | |
| "grad_norm": 1.6520612239837646, | |
| "learning_rate": 5.469306743368023e-08, | |
| "loss": 0.4816511273384094, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.8176914778856528, | |
| "grad_norm": 1.3639336824417114, | |
| "learning_rate": 5.458493091535563e-08, | |
| "loss": 0.3476675748825073, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.8198489751887807, | |
| "grad_norm": 1.3383265733718872, | |
| "learning_rate": 5.447804196441453e-08, | |
| "loss": 0.5728883147239685, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.8220064724919096, | |
| "grad_norm": 3.793182134628296, | |
| "learning_rate": 5.4372401185857145e-08, | |
| "loss": 0.6043237447738647, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.8241639697950376, | |
| "grad_norm": 2.1428301334381104, | |
| "learning_rate": 5.426800917761897e-08, | |
| "loss": 0.529897928237915, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 2.826321467098166, | |
| "grad_norm": 2.7142257690429688, | |
| "learning_rate": 5.41648665305673e-08, | |
| "loss": 0.5976702570915222, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.8284789644012944, | |
| "grad_norm": 1.4826109409332275, | |
| "learning_rate": 5.406297382849803e-08, | |
| "loss": 0.4717695116996765, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 2.830636461704423, | |
| "grad_norm": 1.6335054636001587, | |
| "learning_rate": 5.396233164813221e-08, | |
| "loss": 0.48008373379707336, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 2.8327939590075513, | |
| "grad_norm": 1.709679126739502, | |
| "learning_rate": 5.3862940559112795e-08, | |
| "loss": 0.5768192410469055, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 2.8349514563106797, | |
| "grad_norm": 3.3123061656951904, | |
| "learning_rate": 5.376480112400159e-08, | |
| "loss": 0.5282171368598938, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 2.837108953613808, | |
| "grad_norm": 6.133842945098877, | |
| "learning_rate": 5.366791389827578e-08, | |
| "loss": 0.47790658473968506, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.839266450916936, | |
| "grad_norm": 2.2591352462768555, | |
| "learning_rate": 5.3572279430325055e-08, | |
| "loss": 0.5901204347610474, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 2.841423948220065, | |
| "grad_norm": 1.7429542541503906, | |
| "learning_rate": 5.3477898261448344e-08, | |
| "loss": 0.40829578042030334, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 2.843581445523193, | |
| "grad_norm": 1.7901886701583862, | |
| "learning_rate": 5.3384770925850796e-08, | |
| "loss": 0.6178877353668213, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 2.8457389428263213, | |
| "grad_norm": 2.5778701305389404, | |
| "learning_rate": 5.3292897950640776e-08, | |
| "loss": 0.5174447298049927, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 2.8478964401294498, | |
| "grad_norm": 1.1254876852035522, | |
| "learning_rate": 5.3202279855826885e-08, | |
| "loss": 0.48666954040527344, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.850053937432578, | |
| "grad_norm": 1.12764310836792, | |
| "learning_rate": 5.311291715431497e-08, | |
| "loss": 0.5326154828071594, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.8522114347357066, | |
| "grad_norm": 4.193994522094727, | |
| "learning_rate": 5.3024810351905257e-08, | |
| "loss": 0.552856981754303, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.854368932038835, | |
| "grad_norm": 2.1312756538391113, | |
| "learning_rate": 5.2937959947289485e-08, | |
| "loss": 0.31122079491615295, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.8565264293419634, | |
| "grad_norm": 2.0096354484558105, | |
| "learning_rate": 5.2852366432048054e-08, | |
| "loss": 0.4695837199687958, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.858683926645092, | |
| "grad_norm": 2.734739065170288, | |
| "learning_rate": 5.2768030290647315e-08, | |
| "loss": 0.4716711640357971, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.8608414239482203, | |
| "grad_norm": 2.2249631881713867, | |
| "learning_rate": 5.26849520004367e-08, | |
| "loss": 0.44928935170173645, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.8629989212513482, | |
| "grad_norm": 4.771669387817383, | |
| "learning_rate": 5.260313203164621e-08, | |
| "loss": 0.49516862630844116, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.8651564185544767, | |
| "grad_norm": 1.4668776988983154, | |
| "learning_rate": 5.252257084738355e-08, | |
| "loss": 0.4240492582321167, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.867313915857605, | |
| "grad_norm": 1.4637136459350586, | |
| "learning_rate": 5.244326890363166e-08, | |
| "loss": 0.4604833126068115, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.8694714131607335, | |
| "grad_norm": 2.386207342147827, | |
| "learning_rate": 5.2365226649246e-08, | |
| "loss": 0.5221148133277893, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.871628910463862, | |
| "grad_norm": 1.389047622680664, | |
| "learning_rate": 5.2288444525952225e-08, | |
| "loss": 0.5388311147689819, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.8737864077669903, | |
| "grad_norm": 1.2806384563446045, | |
| "learning_rate": 5.221292296834336e-08, | |
| "loss": 0.4201410114765167, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.8759439050701188, | |
| "grad_norm": 2.330186605453491, | |
| "learning_rate": 5.213866240387767e-08, | |
| "loss": 0.48175758123397827, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.878101402373247, | |
| "grad_norm": 1.2908588647842407, | |
| "learning_rate": 5.206566325287606e-08, | |
| "loss": 0.5429530739784241, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.8802588996763756, | |
| "grad_norm": 1.2484322786331177, | |
| "learning_rate": 5.199392592851967e-08, | |
| "loss": 0.3116611838340759, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.8824163969795036, | |
| "grad_norm": 1.2387123107910156, | |
| "learning_rate": 5.192345083684766e-08, | |
| "loss": 0.5519980192184448, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.884573894282632, | |
| "grad_norm": 1.2122451066970825, | |
| "learning_rate": 5.1854238376754894e-08, | |
| "loss": 0.4367588758468628, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.8867313915857604, | |
| "grad_norm": 1.0126200914382935, | |
| "learning_rate": 5.178628893998947e-08, | |
| "loss": 0.5291083455085754, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 17.17489242553711, | |
| "learning_rate": 5.171960291115085e-08, | |
| "loss": 0.440005362033844, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.8910463861920173, | |
| "grad_norm": 2.179588794708252, | |
| "learning_rate": 5.165418066768743e-08, | |
| "loss": 0.33802393078804016, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.8932038834951457, | |
| "grad_norm": 1.7356486320495605, | |
| "learning_rate": 5.1590022579894453e-08, | |
| "loss": 0.6102227568626404, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.895361380798274, | |
| "grad_norm": 1.4570553302764893, | |
| "learning_rate": 5.152712901091197e-08, | |
| "loss": 0.519218921661377, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.8975188781014025, | |
| "grad_norm": 1.47493577003479, | |
| "learning_rate": 5.146550031672273e-08, | |
| "loss": 0.5683881640434265, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.899676375404531, | |
| "grad_norm": 2.640059471130371, | |
| "learning_rate": 5.1405136846150246e-08, | |
| "loss": 0.3549501597881317, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.901833872707659, | |
| "grad_norm": 3.362346887588501, | |
| "learning_rate": 5.1346038940856663e-08, | |
| "loss": 0.4709499180316925, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.9039913700107873, | |
| "grad_norm": 2.4766998291015625, | |
| "learning_rate": 5.1288206935341004e-08, | |
| "loss": 0.43772682547569275, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.9061488673139158, | |
| "grad_norm": 12.43819808959961, | |
| "learning_rate": 5.123164115693719e-08, | |
| "loss": 0.5715151429176331, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.908306364617044, | |
| "grad_norm": 1.2701572179794312, | |
| "learning_rate": 5.11763419258121e-08, | |
| "loss": 0.4489721655845642, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.9104638619201726, | |
| "grad_norm": 2.798393964767456, | |
| "learning_rate": 5.112230955496399e-08, | |
| "loss": 0.483008474111557, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.912621359223301, | |
| "grad_norm": 2.360163688659668, | |
| "learning_rate": 5.106954435022051e-08, | |
| "loss": 0.23574459552764893, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.9147788565264294, | |
| "grad_norm": 1.3117761611938477, | |
| "learning_rate": 5.1018046610236994e-08, | |
| "loss": 0.4940152168273926, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.916936353829558, | |
| "grad_norm": 4.25691556930542, | |
| "learning_rate": 5.0967816626494914e-08, | |
| "loss": 0.6037485003471375, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.9190938511326863, | |
| "grad_norm": 1.7997087240219116, | |
| "learning_rate": 5.09188546833001e-08, | |
| "loss": 0.5916652083396912, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.9212513484358142, | |
| "grad_norm": 1.2903374433517456, | |
| "learning_rate": 5.0871161057781174e-08, | |
| "loss": 0.5085786581039429, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.9234088457389427, | |
| "grad_norm": 2.052659511566162, | |
| "learning_rate": 5.0824736019887965e-08, | |
| "loss": 0.4842919409275055, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.925566343042071, | |
| "grad_norm": 1.6598625183105469, | |
| "learning_rate": 5.077957983239001e-08, | |
| "loss": 0.485705703496933, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.9277238403451995, | |
| "grad_norm": 1.2494041919708252, | |
| "learning_rate": 5.0735692750875014e-08, | |
| "loss": 0.5640828609466553, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.929881337648328, | |
| "grad_norm": 2.9620018005371094, | |
| "learning_rate": 5.0693075023747485e-08, | |
| "loss": 0.4791678786277771, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.9320388349514563, | |
| "grad_norm": 3.0161349773406982, | |
| "learning_rate": 5.0651726892227225e-08, | |
| "loss": 0.5130857229232788, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.9341963322545848, | |
| "grad_norm": 3.2139105796813965, | |
| "learning_rate": 5.061164859034808e-08, | |
| "loss": 0.5509821176528931, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.936353829557713, | |
| "grad_norm": 2.8673787117004395, | |
| "learning_rate": 5.057284034495652e-08, | |
| "loss": 0.4344579875469208, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.9385113268608416, | |
| "grad_norm": 2.557595729827881, | |
| "learning_rate": 5.05353023757104e-08, | |
| "loss": 0.4874427616596222, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.9406688241639696, | |
| "grad_norm": 1.2036933898925781, | |
| "learning_rate": 5.04990348950777e-08, | |
| "loss": 0.3965007960796356, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.9428263214670984, | |
| "grad_norm": 3.1260766983032227, | |
| "learning_rate": 5.0464038108335355e-08, | |
| "loss": 0.5202281475067139, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.9449838187702264, | |
| "grad_norm": 10.786460876464844, | |
| "learning_rate": 5.043031221356804e-08, | |
| "loss": 0.37282276153564453, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.947141316073355, | |
| "grad_norm": 1.7192671298980713, | |
| "learning_rate": 5.039785740166707e-08, | |
| "loss": 0.40285778045654297, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 2.9492988133764833, | |
| "grad_norm": 1.8213419914245605, | |
| "learning_rate": 5.036667385632939e-08, | |
| "loss": 0.49603918194770813, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 2.9514563106796117, | |
| "grad_norm": 1.3751518726348877, | |
| "learning_rate": 5.0336761754056387e-08, | |
| "loss": 0.4726869463920593, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 2.95361380798274, | |
| "grad_norm": 2.531486749649048, | |
| "learning_rate": 5.030812126415301e-08, | |
| "loss": 0.4985443949699402, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 2.9557713052858685, | |
| "grad_norm": 1.1253113746643066, | |
| "learning_rate": 5.028075254872682e-08, | |
| "loss": 0.43464401364326477, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.957928802588997, | |
| "grad_norm": 1.2070587873458862, | |
| "learning_rate": 5.025465576268697e-08, | |
| "loss": 0.4627860486507416, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 2.960086299892125, | |
| "grad_norm": 6.266397953033447, | |
| "learning_rate": 5.0229831053743396e-08, | |
| "loss": 0.4669394791126251, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 2.9622437971952538, | |
| "grad_norm": 1.160976767539978, | |
| "learning_rate": 5.020627856240602e-08, | |
| "loss": 0.5476797819137573, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 2.9644012944983817, | |
| "grad_norm": 1.2844939231872559, | |
| "learning_rate": 5.018399842198384e-08, | |
| "loss": 0.4904819130897522, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 2.96655879180151, | |
| "grad_norm": 2.1023972034454346, | |
| "learning_rate": 5.016299075858434e-08, | |
| "loss": 0.5619233250617981, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.9687162891046386, | |
| "grad_norm": 2.740511417388916, | |
| "learning_rate": 5.0143255691112545e-08, | |
| "loss": 0.5617838501930237, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 2.970873786407767, | |
| "grad_norm": 1.1180198192596436, | |
| "learning_rate": 5.012479333127061e-08, | |
| "loss": 0.6266708374023438, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 2.9730312837108954, | |
| "grad_norm": 2.0200552940368652, | |
| "learning_rate": 5.0107603783556983e-08, | |
| "loss": 0.2789224684238434, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 2.975188781014024, | |
| "grad_norm": 1.1091794967651367, | |
| "learning_rate": 5.009168714526591e-08, | |
| "loss": 0.5262956023216248, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 2.9773462783171523, | |
| "grad_norm": 1.1029998064041138, | |
| "learning_rate": 5.0077043506486894e-08, | |
| "loss": 0.1211983859539032, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.9795037756202802, | |
| "grad_norm": 1.243391990661621, | |
| "learning_rate": 5.006367295010413e-08, | |
| "loss": 0.36533698439598083, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 2.981661272923409, | |
| "grad_norm": 1.189220666885376, | |
| "learning_rate": 5.005157555179603e-08, | |
| "loss": 0.6093316078186035, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 2.983818770226537, | |
| "grad_norm": 1.3239524364471436, | |
| "learning_rate": 5.0040751380034905e-08, | |
| "loss": 0.29459065198898315, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 2.9859762675296655, | |
| "grad_norm": 3.5429818630218506, | |
| "learning_rate": 5.0031200496086436e-08, | |
| "loss": 0.5054223537445068, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 2.988133764832794, | |
| "grad_norm": 3.4252212047576904, | |
| "learning_rate": 5.0022922954009416e-08, | |
| "loss": 0.34435006976127625, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.9902912621359223, | |
| "grad_norm": 2.453650951385498, | |
| "learning_rate": 5.001591880065541e-08, | |
| "loss": 0.5675299167633057, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 2.9924487594390508, | |
| "grad_norm": 1.6163811683654785, | |
| "learning_rate": 5.001018807566848e-08, | |
| "loss": 0.5318145751953125, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 2.994606256742179, | |
| "grad_norm": 4.034750461578369, | |
| "learning_rate": 5.000573081148502e-08, | |
| "loss": 0.5025465488433838, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 2.9967637540453076, | |
| "grad_norm": 1.6249115467071533, | |
| "learning_rate": 5.0002547033333525e-08, | |
| "loss": 0.235714852809906, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 2.9989212513484356, | |
| "grad_norm": 2.0912704467773438, | |
| "learning_rate": 5.000063675923442e-08, | |
| "loss": 0.2616070806980133, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2781, | |
| "total_flos": 3.284111394515778e+18, | |
| "train_loss": 0.5546778752215686, | |
| "train_runtime": 35021.8231, | |
| "train_samples_per_second": 1.271, | |
| "train_steps_per_second": 0.079 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2781, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.284111394515778e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |