Image-Text-to-Text
Transformers
Safetensors
qwen3_5
llama-factory
full
Generated from Trainer
conversational
Instructions to use furproxy/9b-125-2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use furproxy/9b-125-2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="furproxy/9b-125-2") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("furproxy/9b-125-2") model = AutoModelForImageTextToText.from_pretrained("furproxy/9b-125-2") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use furproxy/9b-125-2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "furproxy/9b-125-2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/furproxy/9b-125-2
- SGLang
How to use furproxy/9b-125-2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "furproxy/9b-125-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "furproxy/9b-125-2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "furproxy/9b-125-2", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use furproxy/9b-125-2 with Docker Model Runner:
docker model run hf.co/furproxy/9b-125-2
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2781, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002157497303128371, | |
| "grad_norm": 2.5762534141540527, | |
| "learning_rate": 1.4285714285714284e-08, | |
| "loss": 0.9756889939308167, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004314994606256742, | |
| "grad_norm": 4.507562637329102, | |
| "learning_rate": 4.285714285714286e-08, | |
| "loss": 0.661911129951477, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006472491909385114, | |
| "grad_norm": 5.074595928192139, | |
| "learning_rate": 7.142857142857142e-08, | |
| "loss": 0.8359099626541138, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008629989212513484, | |
| "grad_norm": 1.9605220556259155, | |
| "learning_rate": 1e-07, | |
| "loss": 0.8231168985366821, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010787486515641856, | |
| "grad_norm": 1.6714932918548584, | |
| "learning_rate": 1.2857142857142855e-07, | |
| "loss": 0.7096143960952759, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012944983818770227, | |
| "grad_norm": 4.321066379547119, | |
| "learning_rate": 1.5714285714285714e-07, | |
| "loss": 0.7912081480026245, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.015102481121898598, | |
| "grad_norm": 8.806991577148438, | |
| "learning_rate": 1.8571428571428572e-07, | |
| "loss": 0.856565535068512, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.017259978425026967, | |
| "grad_norm": 4.99681282043457, | |
| "learning_rate": 2.1428571428571426e-07, | |
| "loss": 1.1687901020050049, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.019417475728155338, | |
| "grad_norm": 2.5645041465759277, | |
| "learning_rate": 2.4285714285714287e-07, | |
| "loss": 1.0152899026870728, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021574973031283712, | |
| "grad_norm": 1.8295631408691406, | |
| "learning_rate": 2.714285714285714e-07, | |
| "loss": 0.6205134987831116, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023732470334412083, | |
| "grad_norm": 4.249131202697754, | |
| "learning_rate": 3e-07, | |
| "loss": 0.9060122966766357, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.025889967637540454, | |
| "grad_norm": 3.811307191848755, | |
| "learning_rate": 3.2857142857142857e-07, | |
| "loss": 0.8020558953285217, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.028047464940668825, | |
| "grad_norm": 3.780266284942627, | |
| "learning_rate": 3.5714285714285716e-07, | |
| "loss": 0.7707346677780151, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.030204962243797196, | |
| "grad_norm": 2.9128313064575195, | |
| "learning_rate": 3.857142857142857e-07, | |
| "loss": 0.6942681670188904, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.032362459546925564, | |
| "grad_norm": 3.999141216278076, | |
| "learning_rate": 4.142857142857143e-07, | |
| "loss": 1.0023633241653442, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.034519956850053934, | |
| "grad_norm": 1.0977139472961426, | |
| "learning_rate": 4.428571428571428e-07, | |
| "loss": 0.6870981454849243, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.036677454153182305, | |
| "grad_norm": 7.128520965576172, | |
| "learning_rate": 4.714285714285714e-07, | |
| "loss": 0.9697052836418152, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.038834951456310676, | |
| "grad_norm": 2.706787109375, | |
| "learning_rate": 5e-07, | |
| "loss": 0.8704000115394592, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.040992448759439054, | |
| "grad_norm": 28.9028377532959, | |
| "learning_rate": 5.285714285714286e-07, | |
| "loss": 1.3768728971481323, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.043149946062567425, | |
| "grad_norm": 5.019951343536377, | |
| "learning_rate": 5.571428571428571e-07, | |
| "loss": 0.5050771832466125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.045307443365695796, | |
| "grad_norm": 1.896100401878357, | |
| "learning_rate": 5.857142857142857e-07, | |
| "loss": 0.7155470848083496, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04746494066882417, | |
| "grad_norm": 1.3924132585525513, | |
| "learning_rate": 6.142857142857143e-07, | |
| "loss": 0.8246269822120667, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04962243797195254, | |
| "grad_norm": 2.244379758834839, | |
| "learning_rate": 6.428571428571429e-07, | |
| "loss": 0.6300725340843201, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05177993527508091, | |
| "grad_norm": 4.4260573387146, | |
| "learning_rate": 6.714285714285714e-07, | |
| "loss": 0.7466659545898438, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05393743257820928, | |
| "grad_norm": 3.2849934101104736, | |
| "learning_rate": 7e-07, | |
| "loss": 0.5939810276031494, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05609492988133765, | |
| "grad_norm": 1.4281387329101562, | |
| "learning_rate": 7.285714285714286e-07, | |
| "loss": 0.36054807901382446, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05825242718446602, | |
| "grad_norm": 6.341257095336914, | |
| "learning_rate": 7.57142857142857e-07, | |
| "loss": 0.8747532367706299, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06040992448759439, | |
| "grad_norm": 14.657182693481445, | |
| "learning_rate": 7.857142857142856e-07, | |
| "loss": 0.991641640663147, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06256742179072276, | |
| "grad_norm": 1.8714779615402222, | |
| "learning_rate": 8.142857142857142e-07, | |
| "loss": 0.6384595632553101, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06472491909385113, | |
| "grad_norm": 6.577902793884277, | |
| "learning_rate": 8.428571428571428e-07, | |
| "loss": 0.7958289384841919, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0668824163969795, | |
| "grad_norm": 6.575786590576172, | |
| "learning_rate": 8.714285714285714e-07, | |
| "loss": 0.9528671503067017, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06903991370010787, | |
| "grad_norm": 2.090458631515503, | |
| "learning_rate": 9e-07, | |
| "loss": 0.7380706071853638, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07119741100323625, | |
| "grad_norm": 2.861210584640503, | |
| "learning_rate": 9.285714285714285e-07, | |
| "loss": 0.8518832921981812, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07335490830636461, | |
| "grad_norm": 2.01114559173584, | |
| "learning_rate": 9.571428571428572e-07, | |
| "loss": 0.7344475388526917, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07551240560949299, | |
| "grad_norm": 9.70598316192627, | |
| "learning_rate": 9.857142857142857e-07, | |
| "loss": 0.6952767968177795, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07766990291262135, | |
| "grad_norm": 3.434774160385132, | |
| "learning_rate": 1.0142857142857142e-06, | |
| "loss": 0.6583250761032104, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07982740021574973, | |
| "grad_norm": 2.2592010498046875, | |
| "learning_rate": 1.0428571428571429e-06, | |
| "loss": 0.6166390180587769, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08198489751887811, | |
| "grad_norm": 2.191253185272217, | |
| "learning_rate": 1.0714285714285714e-06, | |
| "loss": 0.6770592927932739, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08414239482200647, | |
| "grad_norm": 1.9236204624176025, | |
| "learning_rate": 1.1e-06, | |
| "loss": 0.6031355261802673, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08629989212513485, | |
| "grad_norm": 4.434933662414551, | |
| "learning_rate": 1.1285714285714285e-06, | |
| "loss": 0.949522078037262, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08845738942826321, | |
| "grad_norm": 3.6469240188598633, | |
| "learning_rate": 1.1571428571428572e-06, | |
| "loss": 0.5610405206680298, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09061488673139159, | |
| "grad_norm": 2.5983078479766846, | |
| "learning_rate": 1.1857142857142857e-06, | |
| "loss": 0.40821573138237, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09277238403451996, | |
| "grad_norm": 1.9814131259918213, | |
| "learning_rate": 1.2142857142857142e-06, | |
| "loss": 0.6474723815917969, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09492988133764833, | |
| "grad_norm": 1.5465178489685059, | |
| "learning_rate": 1.2428571428571429e-06, | |
| "loss": 0.6871901154518127, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0970873786407767, | |
| "grad_norm": 2.41676664352417, | |
| "learning_rate": 1.2714285714285714e-06, | |
| "loss": 0.6116282343864441, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09924487594390508, | |
| "grad_norm": 1.3548113107681274, | |
| "learning_rate": 1.3e-06, | |
| "loss": 0.6866545677185059, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10140237324703344, | |
| "grad_norm": 3.994168519973755, | |
| "learning_rate": 1.3285714285714285e-06, | |
| "loss": 0.7566230297088623, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10355987055016182, | |
| "grad_norm": 0.6952499151229858, | |
| "learning_rate": 1.3571428571428572e-06, | |
| "loss": 0.5508694648742676, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10571736785329018, | |
| "grad_norm": 2.8167052268981934, | |
| "learning_rate": 1.3857142857142857e-06, | |
| "loss": 0.6589823961257935, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10787486515641856, | |
| "grad_norm": 20.95288848876953, | |
| "learning_rate": 1.4142857142857144e-06, | |
| "loss": 0.558512806892395, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11003236245954692, | |
| "grad_norm": 2.7887301445007324, | |
| "learning_rate": 1.4428571428571429e-06, | |
| "loss": 0.7576460838317871, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1121898597626753, | |
| "grad_norm": 3.9458041191101074, | |
| "learning_rate": 1.4714285714285716e-06, | |
| "loss": 0.7658395171165466, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11434735706580366, | |
| "grad_norm": 3.11449933052063, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.40781867504119873, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11650485436893204, | |
| "grad_norm": 55.18882369995117, | |
| "learning_rate": 1.5285714285714283e-06, | |
| "loss": 0.7694418430328369, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1186623516720604, | |
| "grad_norm": 2.1952178478240967, | |
| "learning_rate": 1.557142857142857e-06, | |
| "loss": 0.6382092833518982, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12081984897518878, | |
| "grad_norm": 4.497931957244873, | |
| "learning_rate": 1.5857142857142855e-06, | |
| "loss": 0.5454550981521606, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12297734627831715, | |
| "grad_norm": 3.0473287105560303, | |
| "learning_rate": 1.6142857142857142e-06, | |
| "loss": 0.5170645713806152, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12513484358144553, | |
| "grad_norm": 3.3767971992492676, | |
| "learning_rate": 1.6428571428571426e-06, | |
| "loss": 0.5989764332771301, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1272923408845739, | |
| "grad_norm": 2.4410431385040283, | |
| "learning_rate": 1.6714285714285713e-06, | |
| "loss": 0.538360595703125, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12944983818770225, | |
| "grad_norm": 2.4122188091278076, | |
| "learning_rate": 1.6999999999999998e-06, | |
| "loss": 0.5263152122497559, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13160733549083065, | |
| "grad_norm": 1.3902043104171753, | |
| "learning_rate": 1.7285714285714285e-06, | |
| "loss": 0.6690125465393066, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.133764832793959, | |
| "grad_norm": 1.8167104721069336, | |
| "learning_rate": 1.757142857142857e-06, | |
| "loss": 0.6804316639900208, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13592233009708737, | |
| "grad_norm": 0.8370219469070435, | |
| "learning_rate": 1.7857142857142857e-06, | |
| "loss": 0.8587678670883179, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.13807982740021574, | |
| "grad_norm": 1.6989076137542725, | |
| "learning_rate": 1.8142857142857142e-06, | |
| "loss": 0.5992355942726135, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14023732470334413, | |
| "grad_norm": 2.9481143951416016, | |
| "learning_rate": 1.8428571428571426e-06, | |
| "loss": 0.35928595066070557, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1423948220064725, | |
| "grad_norm": 4.938568592071533, | |
| "learning_rate": 1.8714285714285713e-06, | |
| "loss": 0.6318232417106628, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.14455231930960086, | |
| "grad_norm": 6.683016777038574, | |
| "learning_rate": 1.8999999999999998e-06, | |
| "loss": 0.7641289234161377, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14670981661272922, | |
| "grad_norm": 3.555145740509033, | |
| "learning_rate": 1.9285714285714285e-06, | |
| "loss": 0.7520711421966553, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1488673139158576, | |
| "grad_norm": 2.5793240070343018, | |
| "learning_rate": 1.957142857142857e-06, | |
| "loss": 0.7046728730201721, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.15102481121898598, | |
| "grad_norm": 4.05105447769165, | |
| "learning_rate": 1.985714285714286e-06, | |
| "loss": 0.4872206449508667, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15318230852211434, | |
| "grad_norm": 3.6239945888519287, | |
| "learning_rate": 1.9999993632405402e-06, | |
| "loss": 0.6896734237670898, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1553398058252427, | |
| "grad_norm": 1.404502272605896, | |
| "learning_rate": 1.999994269170269e-06, | |
| "loss": 0.5899009108543396, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1574973031283711, | |
| "grad_norm": 4.971773147583008, | |
| "learning_rate": 1.9999840810585597e-06, | |
| "loss": 0.7049793601036072, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.15965480043149946, | |
| "grad_norm": 1.3405131101608276, | |
| "learning_rate": 1.9999687989630773e-06, | |
| "loss": 0.7568652033805847, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16181229773462782, | |
| "grad_norm": 1.5060237646102905, | |
| "learning_rate": 1.9999484229703205e-06, | |
| "loss": 0.5981850624084473, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16396979503775622, | |
| "grad_norm": 5.329726696014404, | |
| "learning_rate": 1.9999229531956187e-06, | |
| "loss": 0.4102513790130615, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.16612729234088458, | |
| "grad_norm": 6.227677822113037, | |
| "learning_rate": 1.9998923897831327e-06, | |
| "loss": 0.6681348085403442, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.16828478964401294, | |
| "grad_norm": 12.442171096801758, | |
| "learning_rate": 1.9998567329058537e-06, | |
| "loss": 0.7901923060417175, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.1704422869471413, | |
| "grad_norm": 1.463600754737854, | |
| "learning_rate": 1.9998159827656035e-06, | |
| "loss": 0.6713565587997437, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1725997842502697, | |
| "grad_norm": 4.213168621063232, | |
| "learning_rate": 1.9997701395930303e-06, | |
| "loss": 0.8061548471450806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17475728155339806, | |
| "grad_norm": 1.8637281656265259, | |
| "learning_rate": 1.9997192036476113e-06, | |
| "loss": 0.6461450457572937, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.17691477885652643, | |
| "grad_norm": 1.6268417835235596, | |
| "learning_rate": 1.999663175217647e-06, | |
| "loss": 0.5500176548957825, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1790722761596548, | |
| "grad_norm": 1.7090154886245728, | |
| "learning_rate": 1.999602054620263e-06, | |
| "loss": 0.6174845695495605, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18122977346278318, | |
| "grad_norm": 1.6175590753555298, | |
| "learning_rate": 1.9995358422014078e-06, | |
| "loss": 0.5844609141349792, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.18338727076591155, | |
| "grad_norm": 1.7991397380828857, | |
| "learning_rate": 1.9994645383358485e-06, | |
| "loss": 0.6766707897186279, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1855447680690399, | |
| "grad_norm": 2.1886162757873535, | |
| "learning_rate": 1.9993881434271707e-06, | |
| "loss": 0.7125424146652222, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.18770226537216828, | |
| "grad_norm": 2.0554721355438232, | |
| "learning_rate": 1.9993066579077766e-06, | |
| "loss": 0.6898304224014282, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.18985976267529667, | |
| "grad_norm": 1.493452548980713, | |
| "learning_rate": 1.9992200822388794e-06, | |
| "loss": 0.6477411985397339, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19201725997842503, | |
| "grad_norm": 5.064616680145264, | |
| "learning_rate": 1.999128416910507e-06, | |
| "loss": 0.5987610816955566, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.1941747572815534, | |
| "grad_norm": 1.9240336418151855, | |
| "learning_rate": 1.9990316624414902e-06, | |
| "loss": 0.5100513100624084, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19633225458468176, | |
| "grad_norm": 2.9364237785339355, | |
| "learning_rate": 1.998929819379468e-06, | |
| "loss": 0.8424034714698792, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.19848975188781015, | |
| "grad_norm": 1.8846420049667358, | |
| "learning_rate": 1.998822888300881e-06, | |
| "loss": 0.6540043354034424, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20064724919093851, | |
| "grad_norm": 25.783281326293945, | |
| "learning_rate": 1.9987108698109675e-06, | |
| "loss": 0.7192497253417969, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.20280474649406688, | |
| "grad_norm": 3.445650100708008, | |
| "learning_rate": 1.9985937645437617e-06, | |
| "loss": 0.4845433831214905, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.20496224379719524, | |
| "grad_norm": 2.80410099029541, | |
| "learning_rate": 1.9984715731620883e-06, | |
| "loss": 0.42129552364349365, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20711974110032363, | |
| "grad_norm": 4.098501682281494, | |
| "learning_rate": 1.9983442963575616e-06, | |
| "loss": 0.5982234477996826, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.209277238403452, | |
| "grad_norm": 3.001051664352417, | |
| "learning_rate": 1.998211934850578e-06, | |
| "loss": 0.65160071849823, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21143473570658036, | |
| "grad_norm": 3.8019604682922363, | |
| "learning_rate": 1.998074489390314e-06, | |
| "loss": 0.5729217529296875, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21359223300970873, | |
| "grad_norm": 7.936295986175537, | |
| "learning_rate": 1.997931960754724e-06, | |
| "loss": 0.6380269527435303, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.21574973031283712, | |
| "grad_norm": 1.6919151544570923, | |
| "learning_rate": 1.99778434975053e-06, | |
| "loss": 0.692238450050354, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21790722761596548, | |
| "grad_norm": 2.152122974395752, | |
| "learning_rate": 1.997631657213223e-06, | |
| "loss": 0.5761340856552124, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22006472491909385, | |
| "grad_norm": 1.63760244846344, | |
| "learning_rate": 1.9974738840070554e-06, | |
| "loss": 0.6452651619911194, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 2.098172187805176, | |
| "learning_rate": 1.9973110310250364e-06, | |
| "loss": 0.6504206657409668, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2243797195253506, | |
| "grad_norm": 5.233450889587402, | |
| "learning_rate": 1.9971430991889274e-06, | |
| "loss": 0.603036642074585, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22653721682847897, | |
| "grad_norm": 5.004756927490234, | |
| "learning_rate": 1.996970089449236e-06, | |
| "loss": 0.6306214332580566, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22869471413160733, | |
| "grad_norm": 2.422592878341675, | |
| "learning_rate": 1.9967920027852115e-06, | |
| "loss": 0.5596987009048462, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2308522114347357, | |
| "grad_norm": 1.5725358724594116, | |
| "learning_rate": 1.99660884020484e-06, | |
| "loss": 0.5717631578445435, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.23300970873786409, | |
| "grad_norm": 1.93349289894104, | |
| "learning_rate": 1.9964206027448355e-06, | |
| "loss": 0.5819022059440613, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.23516720604099245, | |
| "grad_norm": 4.832953453063965, | |
| "learning_rate": 1.9962272914706387e-06, | |
| "loss": 0.6450964212417603, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2373247033441208, | |
| "grad_norm": 3.763730525970459, | |
| "learning_rate": 1.996028907476406e-06, | |
| "loss": 0.7145527601242065, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23948220064724918, | |
| "grad_norm": 3.421351909637451, | |
| "learning_rate": 1.995825451885008e-06, | |
| "loss": 0.7275266051292419, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.24163969795037757, | |
| "grad_norm": 1.2539039850234985, | |
| "learning_rate": 1.995616925848019e-06, | |
| "loss": 0.6702066659927368, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.24379719525350593, | |
| "grad_norm": 3.2891104221343994, | |
| "learning_rate": 1.9954033305457154e-06, | |
| "loss": 0.6769608855247498, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2459546925566343, | |
| "grad_norm": 3.350397825241089, | |
| "learning_rate": 1.995184667187062e-06, | |
| "loss": 0.5695778727531433, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2481121898597627, | |
| "grad_norm": 1.5770859718322754, | |
| "learning_rate": 1.994960937009713e-06, | |
| "loss": 0.690039873123169, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25026968716289105, | |
| "grad_norm": 12.937152862548828, | |
| "learning_rate": 1.9947321412799988e-06, | |
| "loss": 0.883323073387146, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2524271844660194, | |
| "grad_norm": 3.2988932132720947, | |
| "learning_rate": 1.994498281292922e-06, | |
| "loss": 0.798008918762207, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2545846817691478, | |
| "grad_norm": 8.973938941955566, | |
| "learning_rate": 1.9942593583721493e-06, | |
| "loss": 0.6434545516967773, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.25674217907227614, | |
| "grad_norm": 1.573055624961853, | |
| "learning_rate": 1.9940153738700045e-06, | |
| "loss": 0.6816240549087524, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2588996763754045, | |
| "grad_norm": 1.305035948753357, | |
| "learning_rate": 1.9937663291674593e-06, | |
| "loss": 0.7506214380264282, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26105717367853293, | |
| "grad_norm": 1.362859845161438, | |
| "learning_rate": 1.993512225674127e-06, | |
| "loss": 0.6811486482620239, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2632146709816613, | |
| "grad_norm": 1.6364890336990356, | |
| "learning_rate": 1.9932530648282555e-06, | |
| "loss": 0.648339569568634, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.26537216828478966, | |
| "grad_norm": 1.4568816423416138, | |
| "learning_rate": 1.992988848096715e-06, | |
| "loss": 0.6864685416221619, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.267529665587918, | |
| "grad_norm": 37.19289779663086, | |
| "learning_rate": 1.9927195769749953e-06, | |
| "loss": 0.8183077573776245, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2696871628910464, | |
| "grad_norm": 2.8543128967285156, | |
| "learning_rate": 1.9924452529871915e-06, | |
| "loss": 0.6495329141616821, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27184466019417475, | |
| "grad_norm": 2.8163821697235107, | |
| "learning_rate": 1.992165877686001e-06, | |
| "loss": 0.7900782227516174, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2740021574973031, | |
| "grad_norm": 1.5852282047271729, | |
| "learning_rate": 1.9918814526527105e-06, | |
| "loss": 0.47972819209098816, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2761596548004315, | |
| "grad_norm": 1.4288207292556763, | |
| "learning_rate": 1.9915919794971892e-06, | |
| "loss": 0.5876221656799316, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2783171521035599, | |
| "grad_norm": 3.196465015411377, | |
| "learning_rate": 1.9912974598578793e-06, | |
| "loss": 0.46865469217300415, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.28047464940668826, | |
| "grad_norm": 1.5016988515853882, | |
| "learning_rate": 1.9909978954017847e-06, | |
| "loss": 0.7941604852676392, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2826321467098166, | |
| "grad_norm": 2.889617919921875, | |
| "learning_rate": 1.9906932878244665e-06, | |
| "loss": 0.5378029942512512, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.284789644012945, | |
| "grad_norm": 1.2625280618667603, | |
| "learning_rate": 1.990383638850028e-06, | |
| "loss": 0.7057135701179504, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.28694714131607335, | |
| "grad_norm": 2.495908260345459, | |
| "learning_rate": 1.990068950231107e-06, | |
| "loss": 0.6187635660171509, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2891046386192017, | |
| "grad_norm": 3.3619916439056396, | |
| "learning_rate": 1.9897492237488683e-06, | |
| "loss": 0.7007441520690918, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2912621359223301, | |
| "grad_norm": 2.7590885162353516, | |
| "learning_rate": 1.9894244612129886e-06, | |
| "loss": 0.5531818270683289, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29341963322545844, | |
| "grad_norm": 13.184637069702148, | |
| "learning_rate": 1.9890946644616523e-06, | |
| "loss": 0.7034265398979187, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.29557713052858686, | |
| "grad_norm": 3.412360191345215, | |
| "learning_rate": 1.9887598353615344e-06, | |
| "loss": 0.7009316086769104, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2977346278317152, | |
| "grad_norm": 4.455333232879639, | |
| "learning_rate": 1.988419975807796e-06, | |
| "loss": 0.662095844745636, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2998921251348436, | |
| "grad_norm": 4.525757789611816, | |
| "learning_rate": 1.988075087724069e-06, | |
| "loss": 0.6825252771377563, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.30204962243797195, | |
| "grad_norm": 2.143056869506836, | |
| "learning_rate": 1.98772517306245e-06, | |
| "loss": 0.674209713935852, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3042071197411003, | |
| "grad_norm": 8.63754940032959, | |
| "learning_rate": 1.9873702338034837e-06, | |
| "loss": 0.6299592852592468, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3063646170442287, | |
| "grad_norm": 3.6352531909942627, | |
| "learning_rate": 1.9870102719561552e-06, | |
| "loss": 0.6460418701171875, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.30852211434735705, | |
| "grad_norm": 6.43894624710083, | |
| "learning_rate": 1.9866452895578784e-06, | |
| "loss": 0.4357595145702362, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3106796116504854, | |
| "grad_norm": 4.515871047973633, | |
| "learning_rate": 1.986275288674484e-06, | |
| "loss": 0.6767151355743408, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.31283710895361383, | |
| "grad_norm": 1.191287636756897, | |
| "learning_rate": 1.9859002714002067e-06, | |
| "loss": 0.6962684392929077, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3149946062567422, | |
| "grad_norm": 6.864166259765625, | |
| "learning_rate": 1.9855202398576756e-06, | |
| "loss": 0.6553777456283569, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.31715210355987056, | |
| "grad_norm": 2.185790777206421, | |
| "learning_rate": 1.9851351961979e-06, | |
| "loss": 0.7482725977897644, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3193096008629989, | |
| "grad_norm": 3.45621395111084, | |
| "learning_rate": 1.9847451426002587e-06, | |
| "loss": 0.4616151452064514, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3214670981661273, | |
| "grad_norm": 6.849677085876465, | |
| "learning_rate": 1.9843500812724876e-06, | |
| "loss": 0.6612831354141235, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.32362459546925565, | |
| "grad_norm": 5.291024208068848, | |
| "learning_rate": 1.9839500144506657e-06, | |
| "loss": 0.6871935129165649, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.325782092772384, | |
| "grad_norm": 4.101494789123535, | |
| "learning_rate": 1.9835449443992042e-06, | |
| "loss": 0.4521007537841797, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.32793959007551243, | |
| "grad_norm": 2.2320597171783447, | |
| "learning_rate": 1.9831348734108325e-06, | |
| "loss": 0.6099227666854858, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3300970873786408, | |
| "grad_norm": 2.0806515216827393, | |
| "learning_rate": 1.9827198038065867e-06, | |
| "loss": 0.6959011554718018, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.33225458468176916, | |
| "grad_norm": 2.5396556854248047, | |
| "learning_rate": 1.9822997379357946e-06, | |
| "loss": 0.6063118577003479, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3344120819848975, | |
| "grad_norm": 1.9925243854522705, | |
| "learning_rate": 1.9818746781760637e-06, | |
| "loss": 0.7447793483734131, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3365695792880259, | |
| "grad_norm": 1.5124423503875732, | |
| "learning_rate": 1.9814446269332665e-06, | |
| "loss": 0.6756496429443359, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.33872707659115425, | |
| "grad_norm": 1.5381604433059692, | |
| "learning_rate": 1.9810095866415288e-06, | |
| "loss": 0.7244548797607422, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3408845738942826, | |
| "grad_norm": 2.3492956161499023, | |
| "learning_rate": 1.980569559763214e-06, | |
| "loss": 0.7345068454742432, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.343042071197411, | |
| "grad_norm": 3.104962110519409, | |
| "learning_rate": 1.980124548788911e-06, | |
| "loss": 0.7250902056694031, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3451995685005394, | |
| "grad_norm": 5.452524662017822, | |
| "learning_rate": 1.9796745562374177e-06, | |
| "loss": 0.5213475823402405, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34735706580366776, | |
| "grad_norm": 1.2911655902862549, | |
| "learning_rate": 1.9792195846557292e-06, | |
| "loss": 0.41105973720550537, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.34951456310679613, | |
| "grad_norm": 1.4482433795928955, | |
| "learning_rate": 1.9787596366190224e-06, | |
| "loss": 0.6460384130477905, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3516720604099245, | |
| "grad_norm": 7.015091419219971, | |
| "learning_rate": 1.9782947147306403e-06, | |
| "loss": 0.7474948763847351, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.35382955771305286, | |
| "grad_norm": 3.078240156173706, | |
| "learning_rate": 1.9778248216220793e-06, | |
| "loss": 0.6818826198577881, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3559870550161812, | |
| "grad_norm": 1.273003339767456, | |
| "learning_rate": 1.977349959952973e-06, | |
| "loss": 0.6558285355567932, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3581445523193096, | |
| "grad_norm": 5.311271667480469, | |
| "learning_rate": 1.976870132411077e-06, | |
| "loss": 0.48177772760391235, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.36030204962243795, | |
| "grad_norm": 4.013199329376221, | |
| "learning_rate": 1.976385341712255e-06, | |
| "loss": 0.6249281167984009, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.36245954692556637, | |
| "grad_norm": 10.565508842468262, | |
| "learning_rate": 1.9758955906004624e-06, | |
| "loss": 0.5407902002334595, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.36461704422869473, | |
| "grad_norm": 3.089301824569702, | |
| "learning_rate": 1.97540088184773e-06, | |
| "loss": 0.5577709674835205, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3667745415318231, | |
| "grad_norm": 9.143464088439941, | |
| "learning_rate": 1.97490121825415e-06, | |
| "loss": 0.7211488485336304, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36893203883495146, | |
| "grad_norm": 1.91471529006958, | |
| "learning_rate": 1.97439660264786e-06, | |
| "loss": 0.6752923727035522, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3710895361380798, | |
| "grad_norm": 1.919268250465393, | |
| "learning_rate": 1.9738870378850255e-06, | |
| "loss": 0.6122534871101379, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3732470334412082, | |
| "grad_norm": 1.600658893585205, | |
| "learning_rate": 1.973372526849825e-06, | |
| "loss": 0.6465229988098145, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.37540453074433655, | |
| "grad_norm": 2.6975924968719482, | |
| "learning_rate": 1.9728530724544317e-06, | |
| "loss": 0.7250155806541443, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3775620280474649, | |
| "grad_norm": 1.543426752090454, | |
| "learning_rate": 1.972328677639003e-06, | |
| "loss": 0.6498576998710632, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.37971952535059333, | |
| "grad_norm": 2.0587544441223145, | |
| "learning_rate": 1.971799345371654e-06, | |
| "loss": 0.6255270838737488, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.3818770226537217, | |
| "grad_norm": 4.156800746917725, | |
| "learning_rate": 1.97126507864845e-06, | |
| "loss": 0.7264662981033325, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.38403451995685006, | |
| "grad_norm": 1.7672313451766968, | |
| "learning_rate": 1.9707258804933843e-06, | |
| "loss": 0.6123859882354736, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3861920172599784, | |
| "grad_norm": 2.0073418617248535, | |
| "learning_rate": 1.9701817539583623e-06, | |
| "loss": 0.584026038646698, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3883495145631068, | |
| "grad_norm": 1.1840739250183105, | |
| "learning_rate": 1.9696327021231857e-06, | |
| "loss": 0.7097981572151184, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39050701186623515, | |
| "grad_norm": 1.7339706420898438, | |
| "learning_rate": 1.9690787280955324e-06, | |
| "loss": 0.6338366866111755, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3926645091693635, | |
| "grad_norm": 2.4935598373413086, | |
| "learning_rate": 1.9685198350109406e-06, | |
| "loss": 0.5935678482055664, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3948220064724919, | |
| "grad_norm": 6.941248893737793, | |
| "learning_rate": 1.9679560260327916e-06, | |
| "loss": 0.7488420009613037, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3969795037756203, | |
| "grad_norm": 5.579442024230957, | |
| "learning_rate": 1.9673873043522904e-06, | |
| "loss": 0.6892845630645752, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.39913700107874867, | |
| "grad_norm": 1.7267175912857056, | |
| "learning_rate": 1.9668136731884486e-06, | |
| "loss": 0.7125424742698669, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.40129449838187703, | |
| "grad_norm": 1.7778825759887695, | |
| "learning_rate": 1.966235135788065e-06, | |
| "loss": 0.6329432129859924, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4034519956850054, | |
| "grad_norm": 4.815840244293213, | |
| "learning_rate": 1.965651695425709e-06, | |
| "loss": 0.6711968183517456, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.40560949298813376, | |
| "grad_norm": 1.8194682598114014, | |
| "learning_rate": 1.965063355403701e-06, | |
| "loss": 0.5624091625213623, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4077669902912621, | |
| "grad_norm": 3.847508192062378, | |
| "learning_rate": 1.9644701190520943e-06, | |
| "loss": 0.43470942974090576, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4099244875943905, | |
| "grad_norm": 1.7189173698425293, | |
| "learning_rate": 1.9638719897286545e-06, | |
| "loss": 0.5556265115737915, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4120819848975189, | |
| "grad_norm": 2.6449780464172363, | |
| "learning_rate": 1.9632689708188435e-06, | |
| "loss": 0.5694633722305298, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.41423948220064727, | |
| "grad_norm": 6.1240410804748535, | |
| "learning_rate": 1.962661065735797e-06, | |
| "loss": 0.6872696280479431, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.41639697950377563, | |
| "grad_norm": 5.022050380706787, | |
| "learning_rate": 1.9620482779203086e-06, | |
| "loss": 0.6486364603042603, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.418554476806904, | |
| "grad_norm": 1.9404337406158447, | |
| "learning_rate": 1.961430610840807e-06, | |
| "loss": 0.6287031173706055, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.42071197411003236, | |
| "grad_norm": 4.457851886749268, | |
| "learning_rate": 1.9608080679933385e-06, | |
| "loss": 0.40318727493286133, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4228694714131607, | |
| "grad_norm": 8.74242115020752, | |
| "learning_rate": 1.960180652901547e-06, | |
| "loss": 0.6349734663963318, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4250269687162891, | |
| "grad_norm": 4.8024725914001465, | |
| "learning_rate": 1.9595483691166534e-06, | |
| "loss": 0.6840596199035645, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.42718446601941745, | |
| "grad_norm": 7.498271465301514, | |
| "learning_rate": 1.958911220217436e-06, | |
| "loss": 0.817265510559082, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.42934196332254587, | |
| "grad_norm": 2.826599359512329, | |
| "learning_rate": 1.958269209810209e-06, | |
| "loss": 0.5891008377075195, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.43149946062567424, | |
| "grad_norm": 4.398319244384766, | |
| "learning_rate": 1.957622341528805e-06, | |
| "loss": 0.5453633069992065, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4336569579288026, | |
| "grad_norm": 2.75591778755188, | |
| "learning_rate": 1.9569706190345512e-06, | |
| "loss": 0.6217541098594666, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.43581445523193096, | |
| "grad_norm": 2.1976888179779053, | |
| "learning_rate": 1.9563140460162505e-06, | |
| "loss": 0.658210813999176, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.43797195253505933, | |
| "grad_norm": 4.405237197875977, | |
| "learning_rate": 1.9556526261901602e-06, | |
| "loss": 0.735411524772644, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4401294498381877, | |
| "grad_norm": 3.2632224559783936, | |
| "learning_rate": 1.95498636329997e-06, | |
| "loss": 0.5909388065338135, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.44228694714131606, | |
| "grad_norm": 2.1249871253967285, | |
| "learning_rate": 1.9543152611167837e-06, | |
| "loss": 0.7845476865768433, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 4.967487335205078, | |
| "learning_rate": 1.9536393234390937e-06, | |
| "loss": 0.6481199860572815, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.44660194174757284, | |
| "grad_norm": 1.769586443901062, | |
| "learning_rate": 1.9529585540927636e-06, | |
| "loss": 0.5764113068580627, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4487594390507012, | |
| "grad_norm": 4.130702972412109, | |
| "learning_rate": 1.9522729569310036e-06, | |
| "loss": 0.7091037034988403, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.45091693635382957, | |
| "grad_norm": 2.021075487136841, | |
| "learning_rate": 1.9515825358343494e-06, | |
| "loss": 0.6121603846549988, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.45307443365695793, | |
| "grad_norm": 2.6177845001220703, | |
| "learning_rate": 1.9508872947106413e-06, | |
| "loss": 0.6298436522483826, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4552319309600863, | |
| "grad_norm": 2.469846487045288, | |
| "learning_rate": 1.9501872374950016e-06, | |
| "loss": 0.6969653367996216, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.45738942826321466, | |
| "grad_norm": 1.5605947971343994, | |
| "learning_rate": 1.949482368149811e-06, | |
| "loss": 0.6391591429710388, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.459546925566343, | |
| "grad_norm": 7.972052097320557, | |
| "learning_rate": 1.948772690664688e-06, | |
| "loss": 0.6320364475250244, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4617044228694714, | |
| "grad_norm": 3.05439829826355, | |
| "learning_rate": 1.9480582090564657e-06, | |
| "loss": 0.7582883238792419, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4638619201725998, | |
| "grad_norm": 1.7435839176177979, | |
| "learning_rate": 1.9473389273691686e-06, | |
| "loss": 0.653886616230011, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.46601941747572817, | |
| "grad_norm": 1.643883466720581, | |
| "learning_rate": 1.9466148496739893e-06, | |
| "loss": 0.6401156187057495, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.46817691477885653, | |
| "grad_norm": 1.892043113708496, | |
| "learning_rate": 1.9458859800692685e-06, | |
| "loss": 0.42768222093582153, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4703344120819849, | |
| "grad_norm": 7.755466938018799, | |
| "learning_rate": 1.9451523226804665e-06, | |
| "loss": 0.7829925417900085, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.47249190938511326, | |
| "grad_norm": 25.02216339111328, | |
| "learning_rate": 1.944413881660145e-06, | |
| "loss": 0.8609887361526489, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4746494066882416, | |
| "grad_norm": 1.8407223224639893, | |
| "learning_rate": 1.9436706611879413e-06, | |
| "loss": 0.5991024374961853, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.47680690399137, | |
| "grad_norm": 1.7243049144744873, | |
| "learning_rate": 1.9429226654705433e-06, | |
| "loss": 0.5861119627952576, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.47896440129449835, | |
| "grad_norm": 3.1721413135528564, | |
| "learning_rate": 1.9421698987416685e-06, | |
| "loss": 0.6749376058578491, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.4811218985976268, | |
| "grad_norm": 3.368656873703003, | |
| "learning_rate": 1.941412365262039e-06, | |
| "loss": 0.6256532073020935, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.48327939590075514, | |
| "grad_norm": 2.6724510192871094, | |
| "learning_rate": 1.9406500693193555e-06, | |
| "loss": 0.6529517769813538, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4854368932038835, | |
| "grad_norm": 2.0183353424072266, | |
| "learning_rate": 1.939883015228276e-06, | |
| "loss": 0.7027242183685303, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.48759439050701187, | |
| "grad_norm": 2.5906269550323486, | |
| "learning_rate": 1.9391112073303897e-06, | |
| "loss": 0.6666867733001709, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.48975188781014023, | |
| "grad_norm": 3.279174327850342, | |
| "learning_rate": 1.9383346499941934e-06, | |
| "loss": 0.6068412065505981, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4919093851132686, | |
| "grad_norm": 2.020169496536255, | |
| "learning_rate": 1.937553347615064e-06, | |
| "loss": 0.49952036142349243, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.49406688241639696, | |
| "grad_norm": 1.5360465049743652, | |
| "learning_rate": 1.936767304615237e-06, | |
| "loss": 0.6741431951522827, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.4962243797195254, | |
| "grad_norm": 2.7520928382873535, | |
| "learning_rate": 1.935976525443782e-06, | |
| "loss": 0.6988986730575562, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.49838187702265374, | |
| "grad_norm": 3.1902847290039062, | |
| "learning_rate": 1.935181014576573e-06, | |
| "loss": 0.6338163614273071, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5005393743257821, | |
| "grad_norm": 2.226433515548706, | |
| "learning_rate": 1.934380776516266e-06, | |
| "loss": 0.6862495541572571, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5026968716289104, | |
| "grad_norm": 1.4880640506744385, | |
| "learning_rate": 1.9335758157922757e-06, | |
| "loss": 0.7557521462440491, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5048543689320388, | |
| "grad_norm": 1.4232990741729736, | |
| "learning_rate": 1.932766136960745e-06, | |
| "loss": 0.675652027130127, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5070118662351673, | |
| "grad_norm": 2.1612069606781006, | |
| "learning_rate": 1.931951744604522e-06, | |
| "loss": 0.621537446975708, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5091693635382956, | |
| "grad_norm": 1.3291016817092896, | |
| "learning_rate": 1.9311326433331355e-06, | |
| "loss": 0.6317250728607178, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.511326860841424, | |
| "grad_norm": 1.6099094152450562, | |
| "learning_rate": 1.9303088377827653e-06, | |
| "loss": 0.7552534937858582, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5134843581445523, | |
| "grad_norm": 1.411257028579712, | |
| "learning_rate": 1.9294803326162187e-06, | |
| "loss": 0.7963615655899048, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5156418554476807, | |
| "grad_norm": 2.952651262283325, | |
| "learning_rate": 1.9286471325229026e-06, | |
| "loss": 0.7329859733581543, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.517799352750809, | |
| "grad_norm": 0.9981465935707092, | |
| "learning_rate": 1.9278092422187978e-06, | |
| "loss": 0.4232223331928253, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5199568500539374, | |
| "grad_norm": 1.3622761964797974, | |
| "learning_rate": 1.926966666446433e-06, | |
| "loss": 0.6612151265144348, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5221143473570659, | |
| "grad_norm": 2.9351611137390137, | |
| "learning_rate": 1.9261194099748554e-06, | |
| "loss": 0.6452651023864746, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5242718446601942, | |
| "grad_norm": 2.3402223587036133, | |
| "learning_rate": 1.9252674775996062e-06, | |
| "loss": 0.7631157040596008, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5264293419633226, | |
| "grad_norm": 2.175255537033081, | |
| "learning_rate": 1.9244108741426933e-06, | |
| "loss": 0.6183757781982422, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5285868392664509, | |
| "grad_norm": 1.3048573732376099, | |
| "learning_rate": 1.923549604452562e-06, | |
| "loss": 0.5466787219047546, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5307443365695793, | |
| "grad_norm": 6.882724761962891, | |
| "learning_rate": 1.9226836734040696e-06, | |
| "loss": 0.6256377696990967, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5329018338727076, | |
| "grad_norm": 5.080470085144043, | |
| "learning_rate": 1.9218130858984566e-06, | |
| "loss": 0.7089909315109253, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.535059331175836, | |
| "grad_norm": 5.151968955993652, | |
| "learning_rate": 1.92093784686332e-06, | |
| "loss": 0.5963342785835266, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5372168284789643, | |
| "grad_norm": 2.0868022441864014, | |
| "learning_rate": 1.9200579612525847e-06, | |
| "loss": 0.7230027318000793, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5393743257820928, | |
| "grad_norm": 1.5343974828720093, | |
| "learning_rate": 1.919173434046476e-06, | |
| "loss": 0.5582040548324585, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5415318230852212, | |
| "grad_norm": 2.0801985263824463, | |
| "learning_rate": 1.9182842702514894e-06, | |
| "loss": 0.7240785956382751, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5436893203883495, | |
| "grad_norm": 4.034970760345459, | |
| "learning_rate": 1.917390474900365e-06, | |
| "loss": 0.6458247900009155, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5458468176914779, | |
| "grad_norm": 1.5025601387023926, | |
| "learning_rate": 1.916492053052059e-06, | |
| "loss": 0.7182348370552063, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5480043149946062, | |
| "grad_norm": 1.2147194147109985, | |
| "learning_rate": 1.915589009791712e-06, | |
| "loss": 0.7499125599861145, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5501618122977346, | |
| "grad_norm": 1.183869481086731, | |
| "learning_rate": 1.914681350230623e-06, | |
| "loss": 0.6138162612915039, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.552319309600863, | |
| "grad_norm": 1.6860522031784058, | |
| "learning_rate": 1.9137690795062195e-06, | |
| "loss": 0.665122389793396, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5544768069039914, | |
| "grad_norm": 2.0282976627349854, | |
| "learning_rate": 1.9128522027820286e-06, | |
| "loss": 0.6816024780273438, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5566343042071198, | |
| "grad_norm": 4.281038284301758, | |
| "learning_rate": 1.911930725247649e-06, | |
| "loss": 0.5960591435432434, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5587918015102481, | |
| "grad_norm": 1.3044649362564087, | |
| "learning_rate": 1.911004652118718e-06, | |
| "loss": 0.7166500687599182, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5609492988133765, | |
| "grad_norm": 4.467653751373291, | |
| "learning_rate": 1.9100739886368856e-06, | |
| "loss": 0.6787055134773254, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5631067961165048, | |
| "grad_norm": 6.644638538360596, | |
| "learning_rate": 1.9091387400697836e-06, | |
| "loss": 0.6345533728599548, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5652642934196332, | |
| "grad_norm": 2.0383713245391846, | |
| "learning_rate": 1.908198911710996e-06, | |
| "loss": 0.432686984539032, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5674217907227616, | |
| "grad_norm": 1.5780389308929443, | |
| "learning_rate": 1.9072545088800281e-06, | |
| "loss": 0.7076600790023804, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.56957928802589, | |
| "grad_norm": 1.6204893589019775, | |
| "learning_rate": 1.9063055369222779e-06, | |
| "loss": 0.6012558341026306, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5717367853290184, | |
| "grad_norm": 6.985592842102051, | |
| "learning_rate": 1.905352001209004e-06, | |
| "loss": 0.6433860063552856, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5738942826321467, | |
| "grad_norm": 1.4386237859725952, | |
| "learning_rate": 1.9043939071372968e-06, | |
| "loss": 0.6871167421340942, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5760517799352751, | |
| "grad_norm": 1.2262943983078003, | |
| "learning_rate": 1.9034312601300479e-06, | |
| "loss": 0.7119494080543518, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5782092772384034, | |
| "grad_norm": 2.725543975830078, | |
| "learning_rate": 1.9024640656359182e-06, | |
| "loss": 0.5970579385757446, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5803667745415318, | |
| "grad_norm": 1.5997346639633179, | |
| "learning_rate": 1.901492329129308e-06, | |
| "loss": 0.6494900584220886, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5825242718446602, | |
| "grad_norm": 3.0856845378875732, | |
| "learning_rate": 1.9005160561103253e-06, | |
| "loss": 0.7896479368209839, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5846817691477886, | |
| "grad_norm": 2.178337574005127, | |
| "learning_rate": 1.8995352521047555e-06, | |
| "loss": 0.7269325256347656, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5868392664509169, | |
| "grad_norm": 2.0112404823303223, | |
| "learning_rate": 1.8985499226640302e-06, | |
| "loss": 0.5430014133453369, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5889967637540453, | |
| "grad_norm": 2.0597128868103027, | |
| "learning_rate": 1.897560073365195e-06, | |
| "loss": 0.7385756969451904, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5911542610571737, | |
| "grad_norm": 0.3906221091747284, | |
| "learning_rate": 1.8965657098108778e-06, | |
| "loss": 0.739960253238678, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.593311758360302, | |
| "grad_norm": 6.757683753967285, | |
| "learning_rate": 1.8955668376292584e-06, | |
| "loss": 0.5648355484008789, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5954692556634305, | |
| "grad_norm": 1.4673168659210205, | |
| "learning_rate": 1.8945634624740346e-06, | |
| "loss": 0.6756861209869385, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5976267529665588, | |
| "grad_norm": 2.3454344272613525, | |
| "learning_rate": 1.8935555900243924e-06, | |
| "loss": 0.693338930606842, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5997842502696872, | |
| "grad_norm": 3.2340376377105713, | |
| "learning_rate": 1.8925432259849734e-06, | |
| "loss": 0.6485008001327515, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6019417475728155, | |
| "grad_norm": 3.2170920372009277, | |
| "learning_rate": 1.89152637608584e-06, | |
| "loss": 0.6817625164985657, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6040992448759439, | |
| "grad_norm": 1.9984098672866821, | |
| "learning_rate": 1.8905050460824468e-06, | |
| "loss": 0.7717204093933105, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6062567421790723, | |
| "grad_norm": 2.4421756267547607, | |
| "learning_rate": 1.8894792417556051e-06, | |
| "loss": 0.6852340698242188, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6084142394822006, | |
| "grad_norm": 2.060135841369629, | |
| "learning_rate": 1.888448968911452e-06, | |
| "loss": 0.7176313996315002, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6105717367853291, | |
| "grad_norm": 3.1218700408935547, | |
| "learning_rate": 1.887414233381416e-06, | |
| "loss": 0.6021454334259033, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6127292340884574, | |
| "grad_norm": 1.8716174364089966, | |
| "learning_rate": 1.8863750410221855e-06, | |
| "loss": 0.6650149822235107, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6148867313915858, | |
| "grad_norm": 1.871856689453125, | |
| "learning_rate": 1.8853313977156739e-06, | |
| "loss": 0.6372621655464172, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6170442286947141, | |
| "grad_norm": 2.7764410972595215, | |
| "learning_rate": 1.8842833093689885e-06, | |
| "loss": 0.6875618100166321, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6192017259978425, | |
| "grad_norm": 1.4079262018203735, | |
| "learning_rate": 1.8832307819143953e-06, | |
| "loss": 0.685975968837738, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6213592233009708, | |
| "grad_norm": 5.893849849700928, | |
| "learning_rate": 1.8821738213092862e-06, | |
| "loss": 0.631260871887207, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6235167206040992, | |
| "grad_norm": 2.4246366024017334, | |
| "learning_rate": 1.8811124335361445e-06, | |
| "loss": 0.6432245373725891, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6256742179072277, | |
| "grad_norm": 56.797996520996094, | |
| "learning_rate": 1.8800466246025129e-06, | |
| "loss": 0.6804959177970886, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.627831715210356, | |
| "grad_norm": 1.7273633480072021, | |
| "learning_rate": 1.8789764005409568e-06, | |
| "loss": 0.5822848677635193, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6299892125134844, | |
| "grad_norm": 1.6046229600906372, | |
| "learning_rate": 1.8779017674090322e-06, | |
| "loss": 0.7005263566970825, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6321467098166127, | |
| "grad_norm": 1.5924113988876343, | |
| "learning_rate": 1.8768227312892515e-06, | |
| "loss": 0.7687848210334778, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6343042071197411, | |
| "grad_norm": 2.035219430923462, | |
| "learning_rate": 1.875739298289047e-06, | |
| "loss": 0.5710114240646362, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6364617044228694, | |
| "grad_norm": 2.202737808227539, | |
| "learning_rate": 1.8746514745407386e-06, | |
| "loss": 0.7539809346199036, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6386192017259978, | |
| "grad_norm": 5.263622760772705, | |
| "learning_rate": 1.8735592662014985e-06, | |
| "loss": 0.7617581486701965, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6407766990291263, | |
| "grad_norm": 17.145244598388672, | |
| "learning_rate": 1.872462679453315e-06, | |
| "loss": 0.8727496266365051, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6429341963322546, | |
| "grad_norm": 1.9058817625045776, | |
| "learning_rate": 1.871361720502959e-06, | |
| "loss": 0.6560637950897217, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.645091693635383, | |
| "grad_norm": 2.8487465381622314, | |
| "learning_rate": 1.8702563955819493e-06, | |
| "loss": 0.5254390835762024, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6472491909385113, | |
| "grad_norm": 2.9062187671661377, | |
| "learning_rate": 1.869146710946515e-06, | |
| "loss": 0.7910107970237732, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6494066882416397, | |
| "grad_norm": 4.04607629776001, | |
| "learning_rate": 1.8680326728775622e-06, | |
| "loss": 0.6240645051002502, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.651564185544768, | |
| "grad_norm": 1.921399474143982, | |
| "learning_rate": 1.866914287680638e-06, | |
| "loss": 0.8376886248588562, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6537216828478964, | |
| "grad_norm": 7.570333957672119, | |
| "learning_rate": 1.8657915616858946e-06, | |
| "loss": 0.7127501368522644, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6558791801510249, | |
| "grad_norm": 1.5097516775131226, | |
| "learning_rate": 1.864664501248053e-06, | |
| "loss": 0.5545579195022583, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6580366774541532, | |
| "grad_norm": 3.1739096641540527, | |
| "learning_rate": 1.8635331127463678e-06, | |
| "loss": 0.6854344010353088, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6601941747572816, | |
| "grad_norm": 2.4847121238708496, | |
| "learning_rate": 1.8623974025845913e-06, | |
| "loss": 0.6225752234458923, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6623516720604099, | |
| "grad_norm": 2.919856071472168, | |
| "learning_rate": 1.8612573771909354e-06, | |
| "loss": 0.7242894172668457, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6645091693635383, | |
| "grad_norm": 1.5826700925827026, | |
| "learning_rate": 1.8601130430180384e-06, | |
| "loss": 0.7404430508613586, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.9459686279296875, | |
| "learning_rate": 1.8589644065429246e-06, | |
| "loss": 0.7019950747489929, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.668824163969795, | |
| "grad_norm": 2.677245855331421, | |
| "learning_rate": 1.8578114742669712e-06, | |
| "loss": 0.6545602083206177, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6709816612729234, | |
| "grad_norm": 6.284696578979492, | |
| "learning_rate": 1.85665425271587e-06, | |
| "loss": 0.4951339364051819, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6731391585760518, | |
| "grad_norm": 1.498757243156433, | |
| "learning_rate": 1.8554927484395892e-06, | |
| "loss": 0.7832834720611572, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6752966558791802, | |
| "grad_norm": 2.4137635231018066, | |
| "learning_rate": 1.8543269680123387e-06, | |
| "loss": 0.6441301107406616, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6774541531823085, | |
| "grad_norm": 4.308967590332031, | |
| "learning_rate": 1.853156918032531e-06, | |
| "loss": 0.7098633050918579, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6796116504854369, | |
| "grad_norm": 2.8264269828796387, | |
| "learning_rate": 1.851982605122746e-06, | |
| "loss": 0.610696017742157, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6817691477885652, | |
| "grad_norm": 1.2851277589797974, | |
| "learning_rate": 1.8508040359296903e-06, | |
| "loss": 0.7390373945236206, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6839266450916937, | |
| "grad_norm": 1.6539459228515625, | |
| "learning_rate": 1.8496212171241626e-06, | |
| "loss": 0.5240519046783447, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.686084142394822, | |
| "grad_norm": 1.8807573318481445, | |
| "learning_rate": 1.8484341554010143e-06, | |
| "loss": 0.4707701504230499, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6882416396979504, | |
| "grad_norm": 2.358454704284668, | |
| "learning_rate": 1.8472428574791121e-06, | |
| "loss": 0.7253568172454834, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6903991370010788, | |
| "grad_norm": 2.440108299255371, | |
| "learning_rate": 1.8460473301013004e-06, | |
| "loss": 0.7356727123260498, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6925566343042071, | |
| "grad_norm": 2.899152994155884, | |
| "learning_rate": 1.844847580034362e-06, | |
| "loss": 0.6664748191833496, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6947141316073355, | |
| "grad_norm": 5.704761028289795, | |
| "learning_rate": 1.843643614068981e-06, | |
| "loss": 0.7694708108901978, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6968716289104638, | |
| "grad_norm": 1.51004159450531, | |
| "learning_rate": 1.842435439019703e-06, | |
| "loss": 0.6821762323379517, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6990291262135923, | |
| "grad_norm": 5.242131233215332, | |
| "learning_rate": 1.8412230617248988e-06, | |
| "loss": 0.6199461221694946, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7011866235167206, | |
| "grad_norm": 2.5778682231903076, | |
| "learning_rate": 1.8400064890467229e-06, | |
| "loss": 0.6760554313659668, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.703344120819849, | |
| "grad_norm": 1.4639006853103638, | |
| "learning_rate": 1.8387857278710763e-06, | |
| "loss": 0.662639856338501, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7055016181229773, | |
| "grad_norm": 2.5555951595306396, | |
| "learning_rate": 1.8375607851075678e-06, | |
| "loss": 0.5903278589248657, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7076591154261057, | |
| "grad_norm": 1.839576244354248, | |
| "learning_rate": 1.8363316676894743e-06, | |
| "loss": 0.659648597240448, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7098166127292341, | |
| "grad_norm": 4.13273811340332, | |
| "learning_rate": 1.8350983825737008e-06, | |
| "loss": 0.5222451090812683, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7119741100323624, | |
| "grad_norm": 1.8703253269195557, | |
| "learning_rate": 1.833860936740742e-06, | |
| "loss": 0.7516009211540222, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7141316073354909, | |
| "grad_norm": 1.5587713718414307, | |
| "learning_rate": 1.8326193371946435e-06, | |
| "loss": 0.6802030801773071, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7162891046386192, | |
| "grad_norm": 1.9971494674682617, | |
| "learning_rate": 1.8313735909629605e-06, | |
| "loss": 0.5823180675506592, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7184466019417476, | |
| "grad_norm": 3.314469575881958, | |
| "learning_rate": 1.8301237050967186e-06, | |
| "loss": 0.6089075207710266, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7206040992448759, | |
| "grad_norm": 2.5151665210723877, | |
| "learning_rate": 1.8288696866703752e-06, | |
| "loss": 0.5487096309661865, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7227615965480043, | |
| "grad_norm": 1.755199909210205, | |
| "learning_rate": 1.827611542781777e-06, | |
| "loss": 0.6520088911056519, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7249190938511327, | |
| "grad_norm": 2.233076333999634, | |
| "learning_rate": 1.826349280552121e-06, | |
| "loss": 0.6878398656845093, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.727076591154261, | |
| "grad_norm": 2.0914413928985596, | |
| "learning_rate": 1.8250829071259162e-06, | |
| "loss": 0.6050041317939758, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7292340884573895, | |
| "grad_norm": 3.670649528503418, | |
| "learning_rate": 1.8238124296709396e-06, | |
| "loss": 0.5783309936523438, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7313915857605178, | |
| "grad_norm": 4.5103559494018555, | |
| "learning_rate": 1.8225378553781978e-06, | |
| "loss": 0.5625826120376587, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7335490830636462, | |
| "grad_norm": 2.3067467212677, | |
| "learning_rate": 1.821259191461886e-06, | |
| "loss": 0.6222144365310669, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7357065803667745, | |
| "grad_norm": 4.616910934448242, | |
| "learning_rate": 1.819976445159347e-06, | |
| "loss": 0.6577675938606262, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7378640776699029, | |
| "grad_norm": 2.574132204055786, | |
| "learning_rate": 1.81868962373103e-06, | |
| "loss": 0.5882217884063721, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7400215749730313, | |
| "grad_norm": 1.4304159879684448, | |
| "learning_rate": 1.8173987344604505e-06, | |
| "loss": 0.7386992573738098, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7421790722761596, | |
| "grad_norm": 2.1306235790252686, | |
| "learning_rate": 1.816103784654147e-06, | |
| "loss": 0.586725115776062, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7443365695792881, | |
| "grad_norm": 1.9864001274108887, | |
| "learning_rate": 1.814804781641642e-06, | |
| "loss": 0.5822692513465881, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7464940668824164, | |
| "grad_norm": 1.6799951791763306, | |
| "learning_rate": 1.8135017327753992e-06, | |
| "loss": 0.630893886089325, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7486515641855448, | |
| "grad_norm": 1.0661367177963257, | |
| "learning_rate": 1.8121946454307816e-06, | |
| "loss": 0.682563066482544, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7508090614886731, | |
| "grad_norm": 4.472043514251709, | |
| "learning_rate": 1.8108835270060122e-06, | |
| "loss": 0.6360002756118774, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7529665587918015, | |
| "grad_norm": 1.2949084043502808, | |
| "learning_rate": 1.8095683849221276e-06, | |
| "loss": 0.6381992101669312, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7551240560949298, | |
| "grad_norm": 1.5483993291854858, | |
| "learning_rate": 1.8082492266229404e-06, | |
| "loss": 0.7825127243995667, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7572815533980582, | |
| "grad_norm": 2.104930877685547, | |
| "learning_rate": 1.806926059574995e-06, | |
| "loss": 0.5905802845954895, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7594390507011867, | |
| "grad_norm": 2.691180467605591, | |
| "learning_rate": 1.805598891267525e-06, | |
| "loss": 0.6105803847312927, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.761596548004315, | |
| "grad_norm": 2.662587881088257, | |
| "learning_rate": 1.8042677292124127e-06, | |
| "loss": 0.7156485319137573, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7637540453074434, | |
| "grad_norm": 4.052894115447998, | |
| "learning_rate": 1.802932580944144e-06, | |
| "loss": 0.6582145690917969, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7659115426105717, | |
| "grad_norm": 1.772103190422058, | |
| "learning_rate": 1.801593454019768e-06, | |
| "loss": 0.5497456789016724, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7680690399137001, | |
| "grad_norm": 4.38840913772583, | |
| "learning_rate": 1.8002503560188531e-06, | |
| "loss": 0.8528274893760681, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7702265372168284, | |
| "grad_norm": 0.47714903950691223, | |
| "learning_rate": 1.798903294543444e-06, | |
| "loss": 0.6722896695137024, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7723840345199569, | |
| "grad_norm": 1.1433959007263184, | |
| "learning_rate": 1.797552277218019e-06, | |
| "loss": 0.640397310256958, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7745415318230853, | |
| "grad_norm": 2.8816723823547363, | |
| "learning_rate": 1.7961973116894475e-06, | |
| "loss": 0.43922677636146545, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7766990291262136, | |
| "grad_norm": 1.3017030954360962, | |
| "learning_rate": 1.7948384056269452e-06, | |
| "loss": 0.6236469745635986, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.778856526429342, | |
| "grad_norm": 5.502106189727783, | |
| "learning_rate": 1.7934755667220324e-06, | |
| "loss": 0.6106448769569397, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7810140237324703, | |
| "grad_norm": 3.15694522857666, | |
| "learning_rate": 1.7921088026884895e-06, | |
| "loss": 0.7106237411499023, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7831715210355987, | |
| "grad_norm": 1.253527283668518, | |
| "learning_rate": 1.7907381212623119e-06, | |
| "loss": 0.6325215101242065, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.785329018338727, | |
| "grad_norm": 5.992726802825928, | |
| "learning_rate": 1.7893635302016699e-06, | |
| "loss": 0.698371946811676, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7874865156418555, | |
| "grad_norm": 2.6129043102264404, | |
| "learning_rate": 1.7879850372868614e-06, | |
| "loss": 0.8592634797096252, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7896440129449838, | |
| "grad_norm": 1.9722578525543213, | |
| "learning_rate": 1.7866026503202696e-06, | |
| "loss": 0.7127001881599426, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7918015102481122, | |
| "grad_norm": 2.3035688400268555, | |
| "learning_rate": 1.7852163771263183e-06, | |
| "loss": 0.7264171242713928, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7939590075512406, | |
| "grad_norm": 1.6729274988174438, | |
| "learning_rate": 1.7838262255514273e-06, | |
| "loss": 0.6522683501243591, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7961165048543689, | |
| "grad_norm": 1.2953232526779175, | |
| "learning_rate": 1.7824322034639688e-06, | |
| "loss": 0.7508292198181152, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7982740021574973, | |
| "grad_norm": 1.8854900598526, | |
| "learning_rate": 1.781034318754222e-06, | |
| "loss": 0.8205673098564148, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8004314994606256, | |
| "grad_norm": 2.379824161529541, | |
| "learning_rate": 1.7796325793343296e-06, | |
| "loss": 0.627574622631073, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8025889967637541, | |
| "grad_norm": 1.9717144966125488, | |
| "learning_rate": 1.7782269931382514e-06, | |
| "loss": 0.41914719343185425, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8047464940668824, | |
| "grad_norm": 3.594667911529541, | |
| "learning_rate": 1.7768175681217208e-06, | |
| "loss": 0.40262705087661743, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8069039913700108, | |
| "grad_norm": 1.6693843603134155, | |
| "learning_rate": 1.7754043122621986e-06, | |
| "loss": 0.6387592554092407, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8090614886731392, | |
| "grad_norm": 5.557443141937256, | |
| "learning_rate": 1.7739872335588298e-06, | |
| "loss": 0.6391375064849854, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8112189859762675, | |
| "grad_norm": 1.475829839706421, | |
| "learning_rate": 1.7725663400323957e-06, | |
| "loss": 0.5560780167579651, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8133764832793959, | |
| "grad_norm": 3.5974369049072266, | |
| "learning_rate": 1.77114163972527e-06, | |
| "loss": 0.7343906164169312, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8155339805825242, | |
| "grad_norm": 1.595281720161438, | |
| "learning_rate": 1.769713140701374e-06, | |
| "loss": 0.6695587038993835, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8176914778856527, | |
| "grad_norm": 1.641003131866455, | |
| "learning_rate": 1.7682808510461292e-06, | |
| "loss": 0.7364107370376587, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.819848975188781, | |
| "grad_norm": 1.8976866006851196, | |
| "learning_rate": 1.7668447788664126e-06, | |
| "loss": 0.5367798209190369, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8220064724919094, | |
| "grad_norm": 2.350424289703369, | |
| "learning_rate": 1.7654049322905105e-06, | |
| "loss": 0.6110427379608154, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8241639697950378, | |
| "grad_norm": 1.7859790325164795, | |
| "learning_rate": 1.7639613194680727e-06, | |
| "loss": 0.8835413455963135, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8263214670981661, | |
| "grad_norm": 1.9460476636886597, | |
| "learning_rate": 1.7625139485700664e-06, | |
| "loss": 0.5881315469741821, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8284789644012945, | |
| "grad_norm": 17.784387588500977, | |
| "learning_rate": 1.7610628277887297e-06, | |
| "loss": 0.5561118721961975, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8306364617044228, | |
| "grad_norm": 2.5915396213531494, | |
| "learning_rate": 1.7596079653375253e-06, | |
| "loss": 0.6103290319442749, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8327939590075513, | |
| "grad_norm": 7.325887680053711, | |
| "learning_rate": 1.758149369451094e-06, | |
| "loss": 0.52987140417099, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8349514563106796, | |
| "grad_norm": 3.882723093032837, | |
| "learning_rate": 1.7566870483852086e-06, | |
| "loss": 0.7465340495109558, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.837108953613808, | |
| "grad_norm": 5.062621593475342, | |
| "learning_rate": 1.7552210104167257e-06, | |
| "loss": 0.6753080487251282, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8392664509169363, | |
| "grad_norm": 6.415841102600098, | |
| "learning_rate": 1.753751263843541e-06, | |
| "loss": 0.693338930606842, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8414239482200647, | |
| "grad_norm": 2.172607898712158, | |
| "learning_rate": 1.7522778169845408e-06, | |
| "loss": 0.7129068374633789, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8435814455231931, | |
| "grad_norm": 2.3066418170928955, | |
| "learning_rate": 1.7508006781795555e-06, | |
| "loss": 0.5250005722045898, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8457389428263214, | |
| "grad_norm": 1.514641523361206, | |
| "learning_rate": 1.7493198557893109e-06, | |
| "loss": 0.5880756378173828, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8478964401294499, | |
| "grad_norm": 2.358647584915161, | |
| "learning_rate": 1.7478353581953846e-06, | |
| "loss": 0.6020887494087219, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8500539374325782, | |
| "grad_norm": 2.5027408599853516, | |
| "learning_rate": 1.746347193800154e-06, | |
| "loss": 0.7379757165908813, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8522114347357066, | |
| "grad_norm": 1.7015382051467896, | |
| "learning_rate": 1.7448553710267519e-06, | |
| "loss": 0.3867076337337494, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8543689320388349, | |
| "grad_norm": 24.41814613342285, | |
| "learning_rate": 1.7433598983190181e-06, | |
| "loss": 0.5596577525138855, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8565264293419633, | |
| "grad_norm": 0.5729015469551086, | |
| "learning_rate": 1.74186078414145e-06, | |
| "loss": 0.37773168087005615, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8586839266450917, | |
| "grad_norm": 1.438088297843933, | |
| "learning_rate": 1.7403580369791577e-06, | |
| "loss": 0.6138755679130554, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.86084142394822, | |
| "grad_norm": 1.3894504308700562, | |
| "learning_rate": 1.7388516653378134e-06, | |
| "loss": 0.6411980986595154, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8629989212513485, | |
| "grad_norm": 7.195361137390137, | |
| "learning_rate": 1.7373416777436036e-06, | |
| "loss": 0.5361164808273315, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8651564185544768, | |
| "grad_norm": 3.299745798110962, | |
| "learning_rate": 1.7358280827431829e-06, | |
| "loss": 0.45560529828071594, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8673139158576052, | |
| "grad_norm": 2.5834922790527344, | |
| "learning_rate": 1.7343108889036223e-06, | |
| "loss": 0.5199063420295715, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8694714131607335, | |
| "grad_norm": 2.0384316444396973, | |
| "learning_rate": 1.7327901048123644e-06, | |
| "loss": 0.6027982234954834, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8716289104638619, | |
| "grad_norm": 3.682217597961426, | |
| "learning_rate": 1.7312657390771714e-06, | |
| "loss": 0.6176765561103821, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8737864077669902, | |
| "grad_norm": 3.4343974590301514, | |
| "learning_rate": 1.7297378003260787e-06, | |
| "loss": 0.6307402849197388, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8759439050701187, | |
| "grad_norm": 18.784271240234375, | |
| "learning_rate": 1.728206297207345e-06, | |
| "loss": 0.4677152633666992, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8781014023732471, | |
| "grad_norm": 1.2662842273712158, | |
| "learning_rate": 1.7266712383894037e-06, | |
| "loss": 0.6467829346656799, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8802588996763754, | |
| "grad_norm": 1.4935745000839233, | |
| "learning_rate": 1.7251326325608135e-06, | |
| "loss": 0.6746770143508911, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8824163969795038, | |
| "grad_norm": 3.506131410598755, | |
| "learning_rate": 1.7235904884302098e-06, | |
| "loss": 0.6060282588005066, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8845738942826321, | |
| "grad_norm": 3.4990806579589844, | |
| "learning_rate": 1.7220448147262555e-06, | |
| "loss": 0.5744661688804626, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8867313915857605, | |
| "grad_norm": 1.9610271453857422, | |
| "learning_rate": 1.7204956201975898e-06, | |
| "loss": 0.6914322376251221, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 2.512073040008545, | |
| "learning_rate": 1.7189429136127814e-06, | |
| "loss": 0.6700202226638794, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8910463861920173, | |
| "grad_norm": 2.086268663406372, | |
| "learning_rate": 1.7173867037602767e-06, | |
| "loss": 0.7067221403121948, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8932038834951457, | |
| "grad_norm": 3.7312817573547363, | |
| "learning_rate": 1.7158269994483514e-06, | |
| "loss": 0.31625503301620483, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.895361380798274, | |
| "grad_norm": 6.126044750213623, | |
| "learning_rate": 1.71426380950506e-06, | |
| "loss": 0.5323830842971802, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8975188781014024, | |
| "grad_norm": 1.7195242643356323, | |
| "learning_rate": 1.712697142778186e-06, | |
| "loss": 0.782951831817627, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8996763754045307, | |
| "grad_norm": 8.366249084472656, | |
| "learning_rate": 1.7111270081351913e-06, | |
| "loss": 0.5681637525558472, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9018338727076591, | |
| "grad_norm": 2.791904926300049, | |
| "learning_rate": 1.7095534144631668e-06, | |
| "loss": 0.7307286858558655, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9039913700107874, | |
| "grad_norm": 1.9204684495925903, | |
| "learning_rate": 1.7079763706687827e-06, | |
| "loss": 0.6743446588516235, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9061488673139159, | |
| "grad_norm": 5.066476821899414, | |
| "learning_rate": 1.706395885678235e-06, | |
| "loss": 0.6655571460723877, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9083063646170443, | |
| "grad_norm": 4.142644882202148, | |
| "learning_rate": 1.7048119684371996e-06, | |
| "loss": 0.6895488500595093, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9104638619201726, | |
| "grad_norm": 7.594639778137207, | |
| "learning_rate": 1.7032246279107776e-06, | |
| "loss": 0.8503600358963013, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.912621359223301, | |
| "grad_norm": 8.625396728515625, | |
| "learning_rate": 1.7016338730834468e-06, | |
| "loss": 0.8498875498771667, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9147788565264293, | |
| "grad_norm": 2.5336923599243164, | |
| "learning_rate": 1.7000397129590104e-06, | |
| "loss": 0.49179524183273315, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9169363538295577, | |
| "grad_norm": 12.546621322631836, | |
| "learning_rate": 1.6984421565605447e-06, | |
| "loss": 0.7858133912086487, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.919093851132686, | |
| "grad_norm": 1.803154706954956, | |
| "learning_rate": 1.696841212930351e-06, | |
| "loss": 0.42831236124038696, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9212513484358145, | |
| "grad_norm": 1.1497598886489868, | |
| "learning_rate": 1.695236891129901e-06, | |
| "loss": 0.6902183294296265, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9234088457389428, | |
| "grad_norm": 0.7733110785484314, | |
| "learning_rate": 1.6936292002397876e-06, | |
| "loss": 0.7910528182983398, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9255663430420712, | |
| "grad_norm": 4.334436893463135, | |
| "learning_rate": 1.692018149359674e-06, | |
| "loss": 0.6410449743270874, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9277238403451996, | |
| "grad_norm": 3.1473817825317383, | |
| "learning_rate": 1.6904037476082403e-06, | |
| "loss": 0.5418177247047424, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9298813376483279, | |
| "grad_norm": 3.289321184158325, | |
| "learning_rate": 1.6887860041231324e-06, | |
| "loss": 0.8675633072853088, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9320388349514563, | |
| "grad_norm": 0.5947059392929077, | |
| "learning_rate": 1.6871649280609114e-06, | |
| "loss": 0.7250087857246399, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9341963322545846, | |
| "grad_norm": 1.812920331954956, | |
| "learning_rate": 1.6855405285970012e-06, | |
| "loss": 0.3274366855621338, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9363538295577131, | |
| "grad_norm": 1.37776780128479, | |
| "learning_rate": 1.6839128149256357e-06, | |
| "loss": 0.7339057326316833, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9385113268608414, | |
| "grad_norm": 2.6405365467071533, | |
| "learning_rate": 1.6822817962598079e-06, | |
| "loss": 0.4312754273414612, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9406688241639698, | |
| "grad_norm": 32.455711364746094, | |
| "learning_rate": 1.6806474818312178e-06, | |
| "loss": 0.6649459600448608, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9428263214670982, | |
| "grad_norm": 1.1529123783111572, | |
| "learning_rate": 1.6790098808902187e-06, | |
| "loss": 0.694479763507843, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9449838187702265, | |
| "grad_norm": 6.395750045776367, | |
| "learning_rate": 1.6773690027057665e-06, | |
| "loss": 0.5320945978164673, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9471413160733549, | |
| "grad_norm": 12.70807933807373, | |
| "learning_rate": 1.6757248565653666e-06, | |
| "loss": 0.7014382481575012, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9492988133764833, | |
| "grad_norm": 2.392099142074585, | |
| "learning_rate": 1.674077451775021e-06, | |
| "loss": 0.9157409071922302, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9514563106796117, | |
| "grad_norm": 1.741999864578247, | |
| "learning_rate": 1.6724267976591756e-06, | |
| "loss": 0.616689145565033, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.95361380798274, | |
| "grad_norm": 1.99687922000885, | |
| "learning_rate": 1.6707729035606691e-06, | |
| "loss": 0.5802426338195801, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9557713052858684, | |
| "grad_norm": 1.6920971870422363, | |
| "learning_rate": 1.6691157788406773e-06, | |
| "loss": 0.42533692717552185, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9579288025889967, | |
| "grad_norm": 3.21420955657959, | |
| "learning_rate": 1.6674554328786616e-06, | |
| "loss": 0.8310537338256836, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9600862998921251, | |
| "grad_norm": 2.7011826038360596, | |
| "learning_rate": 1.6657918750723176e-06, | |
| "loss": 0.8436251282691956, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9622437971952535, | |
| "grad_norm": 1.146492838859558, | |
| "learning_rate": 1.6641251148375184e-06, | |
| "loss": 0.4956342577934265, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9644012944983819, | |
| "grad_norm": 11.642980575561523, | |
| "learning_rate": 1.6624551616082635e-06, | |
| "loss": 0.643322765827179, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9665587918015103, | |
| "grad_norm": 1.810482144355774, | |
| "learning_rate": 1.6607820248366257e-06, | |
| "loss": 0.6843705177307129, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9687162891046386, | |
| "grad_norm": 1.3741674423217773, | |
| "learning_rate": 1.6591057139926966e-06, | |
| "loss": 0.7010579109191895, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.970873786407767, | |
| "grad_norm": 3.2511136531829834, | |
| "learning_rate": 1.6574262385645323e-06, | |
| "loss": 0.6527800559997559, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9730312837108953, | |
| "grad_norm": 3.3792011737823486, | |
| "learning_rate": 1.6557436080581027e-06, | |
| "loss": 0.6726928949356079, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9751887810140237, | |
| "grad_norm": 1.9965680837631226, | |
| "learning_rate": 1.6540578319972335e-06, | |
| "loss": 0.7932605147361755, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.9773462783171522, | |
| "grad_norm": 14.346431732177734, | |
| "learning_rate": 1.652368919923557e-06, | |
| "loss": 0.6329518556594849, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9795037756202805, | |
| "grad_norm": 1.1356829404830933, | |
| "learning_rate": 1.6506768813964527e-06, | |
| "loss": 0.6013335585594177, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9816612729234089, | |
| "grad_norm": 1.9456955194473267, | |
| "learning_rate": 1.6489817259929978e-06, | |
| "loss": 0.6943175792694092, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9838187702265372, | |
| "grad_norm": 75.11457824707031, | |
| "learning_rate": 1.647283463307912e-06, | |
| "loss": 0.499568372964859, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9859762675296656, | |
| "grad_norm": 2.1002883911132812, | |
| "learning_rate": 1.6455821029535006e-06, | |
| "loss": 0.6039252281188965, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9881337648327939, | |
| "grad_norm": 2.215057849884033, | |
| "learning_rate": 1.6438776545596032e-06, | |
| "loss": 0.6023073196411133, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9902912621359223, | |
| "grad_norm": 5.074563980102539, | |
| "learning_rate": 1.6421701277735377e-06, | |
| "loss": 0.6670839190483093, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.9924487594390508, | |
| "grad_norm": 1.9246139526367188, | |
| "learning_rate": 1.6404595322600454e-06, | |
| "loss": 0.45060187578201294, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9946062567421791, | |
| "grad_norm": 2.673752784729004, | |
| "learning_rate": 1.638745877701238e-06, | |
| "loss": 0.5095839500427246, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9967637540453075, | |
| "grad_norm": 1.1209965944290161, | |
| "learning_rate": 1.6370291737965403e-06, | |
| "loss": 0.6856327652931213, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9989212513484358, | |
| "grad_norm": 1.4867414236068726, | |
| "learning_rate": 1.6353094302626375e-06, | |
| "loss": 0.7345451712608337, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.0010787486515642, | |
| "grad_norm": 1.4351245164871216, | |
| "learning_rate": 1.6335866568334196e-06, | |
| "loss": 0.4384617805480957, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.0032362459546926, | |
| "grad_norm": 1.608482837677002, | |
| "learning_rate": 1.6318608632599252e-06, | |
| "loss": 0.5233771800994873, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0053937432578208, | |
| "grad_norm": 1.6175332069396973, | |
| "learning_rate": 1.6301320593102877e-06, | |
| "loss": 0.5526682734489441, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.0075512405609492, | |
| "grad_norm": 1.7772831916809082, | |
| "learning_rate": 1.6284002547696794e-06, | |
| "loss": 0.5304218530654907, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.0097087378640777, | |
| "grad_norm": 1.2558060884475708, | |
| "learning_rate": 1.626665459440256e-06, | |
| "loss": 0.3196244239807129, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.011866235167206, | |
| "grad_norm": 4.1625471115112305, | |
| "learning_rate": 1.6249276831411015e-06, | |
| "loss": 0.49367865920066833, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.0140237324703345, | |
| "grad_norm": 2.80029296875, | |
| "learning_rate": 1.6231869357081726e-06, | |
| "loss": 0.5806005597114563, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0161812297734627, | |
| "grad_norm": 2.1595256328582764, | |
| "learning_rate": 1.6214432269942426e-06, | |
| "loss": 0.558415412902832, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.0183387270765911, | |
| "grad_norm": 4.51309061050415, | |
| "learning_rate": 1.6196965668688455e-06, | |
| "loss": 0.4171544909477234, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.0204962243797195, | |
| "grad_norm": 9.326614379882812, | |
| "learning_rate": 1.6179469652182215e-06, | |
| "loss": 0.49132904410362244, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.022653721682848, | |
| "grad_norm": 2.721449613571167, | |
| "learning_rate": 1.6161944319452599e-06, | |
| "loss": 0.526667058467865, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.0248112189859762, | |
| "grad_norm": 8.595200538635254, | |
| "learning_rate": 1.6144389769694418e-06, | |
| "loss": 0.519080400466919, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0269687162891046, | |
| "grad_norm": 1.9319312572479248, | |
| "learning_rate": 1.6126806102267871e-06, | |
| "loss": 0.4982292950153351, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.029126213592233, | |
| "grad_norm": 2.2782833576202393, | |
| "learning_rate": 1.6109193416697962e-06, | |
| "loss": 0.47339990735054016, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.0312837108953614, | |
| "grad_norm": 1.7561050653457642, | |
| "learning_rate": 1.609155181267393e-06, | |
| "loss": 0.4229566156864166, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.0334412081984898, | |
| "grad_norm": 2.3419620990753174, | |
| "learning_rate": 1.6073881390048708e-06, | |
| "loss": 0.5675852298736572, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.035598705501618, | |
| "grad_norm": 4.762004852294922, | |
| "learning_rate": 1.6056182248838333e-06, | |
| "loss": 0.47640660405158997, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0377562028047465, | |
| "grad_norm": 1.654563307762146, | |
| "learning_rate": 1.6038454489221401e-06, | |
| "loss": 0.39150771498680115, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.0399137001078749, | |
| "grad_norm": 6.535782337188721, | |
| "learning_rate": 1.6020698211538485e-06, | |
| "loss": 0.43942204117774963, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.0420711974110033, | |
| "grad_norm": 1.442032814025879, | |
| "learning_rate": 1.6002913516291575e-06, | |
| "loss": 0.3959490954875946, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.0442286947141317, | |
| "grad_norm": 2.8204493522644043, | |
| "learning_rate": 1.5985100504143508e-06, | |
| "loss": 0.46986186504364014, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.04638619201726, | |
| "grad_norm": 5.788197994232178, | |
| "learning_rate": 1.596725927591739e-06, | |
| "loss": 0.5587306022644043, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.0485436893203883, | |
| "grad_norm": 3.22556209564209, | |
| "learning_rate": 1.594938993259604e-06, | |
| "loss": 0.42848098278045654, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.0507011866235167, | |
| "grad_norm": 5.465310573577881, | |
| "learning_rate": 1.5931492575321405e-06, | |
| "loss": 0.3230629861354828, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.0528586839266452, | |
| "grad_norm": 2.285598039627075, | |
| "learning_rate": 1.5913567305394004e-06, | |
| "loss": 0.4129447937011719, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.0550161812297734, | |
| "grad_norm": 2.510387659072876, | |
| "learning_rate": 1.5895614224272329e-06, | |
| "loss": 0.5222740173339844, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.0571736785329018, | |
| "grad_norm": 3.7488322257995605, | |
| "learning_rate": 1.5877633433572293e-06, | |
| "loss": 0.47047188878059387, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0593311758360302, | |
| "grad_norm": 1.3235845565795898, | |
| "learning_rate": 1.5859625035066652e-06, | |
| "loss": 0.4286286234855652, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.0614886731391586, | |
| "grad_norm": 1.2796275615692139, | |
| "learning_rate": 1.5841589130684417e-06, | |
| "loss": 0.411946564912796, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.063646170442287, | |
| "grad_norm": 2.5920588970184326, | |
| "learning_rate": 1.5823525822510282e-06, | |
| "loss": 0.4910277724266052, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.0658036677454152, | |
| "grad_norm": 13.590333938598633, | |
| "learning_rate": 1.5805435212784066e-06, | |
| "loss": 0.381788045167923, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.0679611650485437, | |
| "grad_norm": 1.8935883045196533, | |
| "learning_rate": 1.5787317403900095e-06, | |
| "loss": 0.4319833517074585, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.070118662351672, | |
| "grad_norm": 1.8740428686141968, | |
| "learning_rate": 1.5769172498406657e-06, | |
| "loss": 0.5537865161895752, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.0722761596548005, | |
| "grad_norm": 2.8530309200286865, | |
| "learning_rate": 1.5751000599005411e-06, | |
| "loss": 0.45889872312545776, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.074433656957929, | |
| "grad_norm": 3.0372843742370605, | |
| "learning_rate": 1.573280180855079e-06, | |
| "loss": 0.4843668043613434, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.0765911542610571, | |
| "grad_norm": 1.9461435079574585, | |
| "learning_rate": 1.571457623004945e-06, | |
| "loss": 0.3833789527416229, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.0787486515641855, | |
| "grad_norm": 4.167815208435059, | |
| "learning_rate": 1.5696323966659659e-06, | |
| "loss": 0.7622794508934021, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.080906148867314, | |
| "grad_norm": 2.6408567428588867, | |
| "learning_rate": 1.5678045121690723e-06, | |
| "loss": 0.38144806027412415, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.0830636461704424, | |
| "grad_norm": 1.6359201669692993, | |
| "learning_rate": 1.5659739798602412e-06, | |
| "loss": 0.5962096452713013, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.0852211434735706, | |
| "grad_norm": 1.9073861837387085, | |
| "learning_rate": 1.5641408101004348e-06, | |
| "loss": 0.5042172074317932, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.087378640776699, | |
| "grad_norm": 1.1828426122665405, | |
| "learning_rate": 1.5623050132655452e-06, | |
| "loss": 0.4065170884132385, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.0895361380798274, | |
| "grad_norm": 1.109727144241333, | |
| "learning_rate": 1.5604665997463326e-06, | |
| "loss": 0.3995954990386963, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.0916936353829558, | |
| "grad_norm": 2.5301997661590576, | |
| "learning_rate": 1.5586255799483685e-06, | |
| "loss": 0.4737590253353119, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.0938511326860842, | |
| "grad_norm": 3.5179555416107178, | |
| "learning_rate": 1.5567819642919768e-06, | |
| "loss": 0.3755728006362915, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.0960086299892124, | |
| "grad_norm": 2.1552042961120605, | |
| "learning_rate": 1.5549357632121722e-06, | |
| "loss": 0.5351279973983765, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.0981661272923409, | |
| "grad_norm": 1.9615085124969482, | |
| "learning_rate": 1.5530869871586058e-06, | |
| "loss": 0.480570912361145, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.1003236245954693, | |
| "grad_norm": 5.5772552490234375, | |
| "learning_rate": 1.5512356465955008e-06, | |
| "loss": 0.4701279103755951, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.1024811218985977, | |
| "grad_norm": 2.0782828330993652, | |
| "learning_rate": 1.5493817520015969e-06, | |
| "loss": 0.6023370027542114, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.104638619201726, | |
| "grad_norm": 4.706164360046387, | |
| "learning_rate": 1.5475253138700899e-06, | |
| "loss": 0.4403872489929199, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.1067961165048543, | |
| "grad_norm": 2.136815309524536, | |
| "learning_rate": 1.5456663427085716e-06, | |
| "loss": 0.49264582991600037, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.1089536138079827, | |
| "grad_norm": 2.051373243331909, | |
| "learning_rate": 1.543804849038972e-06, | |
| "loss": 0.4840565621852875, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 2.1924808025360107, | |
| "learning_rate": 1.5419408433974974e-06, | |
| "loss": 0.49226483702659607, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.1132686084142396, | |
| "grad_norm": 3.719738245010376, | |
| "learning_rate": 1.5400743363345733e-06, | |
| "loss": 0.429510235786438, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.1154261057173678, | |
| "grad_norm": 1.6024198532104492, | |
| "learning_rate": 1.5382053384147828e-06, | |
| "loss": 0.5755860805511475, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.1175836030204962, | |
| "grad_norm": 5.685046672821045, | |
| "learning_rate": 1.5363338602168072e-06, | |
| "loss": 0.40157079696655273, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.1197411003236246, | |
| "grad_norm": 1.610744833946228, | |
| "learning_rate": 1.5344599123333671e-06, | |
| "loss": 0.4434182643890381, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.121898597626753, | |
| "grad_norm": 23.230365753173828, | |
| "learning_rate": 1.532583505371161e-06, | |
| "loss": 0.4990198314189911, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1240560949298812, | |
| "grad_norm": 1.455960988998413, | |
| "learning_rate": 1.5307046499508066e-06, | |
| "loss": 0.4062468409538269, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.1262135922330097, | |
| "grad_norm": 1.460098385810852, | |
| "learning_rate": 1.5288233567067794e-06, | |
| "loss": 0.45499229431152344, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.128371089536138, | |
| "grad_norm": 1.3446215391159058, | |
| "learning_rate": 1.5269396362873542e-06, | |
| "loss": 0.4300175905227661, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.1305285868392665, | |
| "grad_norm": 1.6477187871932983, | |
| "learning_rate": 1.5250534993545426e-06, | |
| "loss": 0.4830603301525116, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.132686084142395, | |
| "grad_norm": 2.070373296737671, | |
| "learning_rate": 1.523164956584035e-06, | |
| "loss": 0.47534123063087463, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.134843581445523, | |
| "grad_norm": 2.0876166820526123, | |
| "learning_rate": 1.5212740186651378e-06, | |
| "loss": 0.4968222975730896, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.1370010787486515, | |
| "grad_norm": 1.7046785354614258, | |
| "learning_rate": 1.5193806963007156e-06, | |
| "loss": 0.4516274034976959, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.13915857605178, | |
| "grad_norm": 6.408827781677246, | |
| "learning_rate": 1.517485000207128e-06, | |
| "loss": 0.45875146985054016, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.1413160733549084, | |
| "grad_norm": 1.6034789085388184, | |
| "learning_rate": 1.5155869411141704e-06, | |
| "loss": 0.5700262188911438, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.1434735706580366, | |
| "grad_norm": 9.753545761108398, | |
| "learning_rate": 1.5136865297650134e-06, | |
| "loss": 0.3870803117752075, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.145631067961165, | |
| "grad_norm": 2.6454174518585205, | |
| "learning_rate": 1.511783776916141e-06, | |
| "loss": 0.1962374895811081, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.1477885652642934, | |
| "grad_norm": 9.807194709777832, | |
| "learning_rate": 1.5098786933372907e-06, | |
| "loss": 0.3792603611946106, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.1499460625674218, | |
| "grad_norm": 1.371470332145691, | |
| "learning_rate": 1.5079712898113916e-06, | |
| "loss": 0.4742359519004822, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.1521035598705502, | |
| "grad_norm": 9.515076637268066, | |
| "learning_rate": 1.5060615771345045e-06, | |
| "loss": 0.49537792801856995, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.1542610571736784, | |
| "grad_norm": 3.214311361312866, | |
| "learning_rate": 1.50414956611576e-06, | |
| "loss": 0.5695366859436035, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.1564185544768069, | |
| "grad_norm": 3.578993797302246, | |
| "learning_rate": 1.5022352675772967e-06, | |
| "loss": 0.4019346535205841, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.1585760517799353, | |
| "grad_norm": 1.514540195465088, | |
| "learning_rate": 1.5003186923542022e-06, | |
| "loss": 0.4417833089828491, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.1607335490830637, | |
| "grad_norm": 1.5279725790023804, | |
| "learning_rate": 1.4983998512944497e-06, | |
| "loss": 0.40684929490089417, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.162891046386192, | |
| "grad_norm": 2.6913864612579346, | |
| "learning_rate": 1.4964787552588364e-06, | |
| "loss": 0.6169437766075134, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.1650485436893203, | |
| "grad_norm": 6.149393558502197, | |
| "learning_rate": 1.4945554151209241e-06, | |
| "loss": 0.4913300573825836, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1672060409924487, | |
| "grad_norm": 3.6629035472869873, | |
| "learning_rate": 1.4926298417669757e-06, | |
| "loss": 0.4479219615459442, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.1693635382955772, | |
| "grad_norm": 2.302075147628784, | |
| "learning_rate": 1.4907020460958943e-06, | |
| "loss": 0.4335775077342987, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.1715210355987056, | |
| "grad_norm": 1.0914833545684814, | |
| "learning_rate": 1.488772039019162e-06, | |
| "loss": 0.466959148645401, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.173678532901834, | |
| "grad_norm": 5.46653938293457, | |
| "learning_rate": 1.4868398314607765e-06, | |
| "loss": 0.6127966046333313, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.1758360302049622, | |
| "grad_norm": 1.7374179363250732, | |
| "learning_rate": 1.484905434357192e-06, | |
| "loss": 0.5522704124450684, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.1779935275080906, | |
| "grad_norm": 1.311828374862671, | |
| "learning_rate": 1.482968858657255e-06, | |
| "loss": 0.4033716320991516, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.180151024811219, | |
| "grad_norm": 1.440038800239563, | |
| "learning_rate": 1.481030115322142e-06, | |
| "loss": 0.4107467234134674, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.1823085221143474, | |
| "grad_norm": 17.832111358642578, | |
| "learning_rate": 1.4790892153253004e-06, | |
| "loss": 0.26430749893188477, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.1844660194174756, | |
| "grad_norm": 21.0089054107666, | |
| "learning_rate": 1.4771461696523828e-06, | |
| "loss": 0.2329411655664444, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.186623516720604, | |
| "grad_norm": 3.482215166091919, | |
| "learning_rate": 1.4752009893011877e-06, | |
| "loss": 0.33426716923713684, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.1887810140237325, | |
| "grad_norm": 1.4247711896896362, | |
| "learning_rate": 1.4732536852815948e-06, | |
| "loss": 0.3406693637371063, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.190938511326861, | |
| "grad_norm": 2.5058937072753906, | |
| "learning_rate": 1.4713042686155054e-06, | |
| "loss": 0.4682016670703888, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.1930960086299893, | |
| "grad_norm": 3.2917213439941406, | |
| "learning_rate": 1.469352750336778e-06, | |
| "loss": 0.5560429096221924, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.1952535059331175, | |
| "grad_norm": 2.8005712032318115, | |
| "learning_rate": 1.4673991414911653e-06, | |
| "loss": 0.49286743998527527, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.197411003236246, | |
| "grad_norm": 1.9056379795074463, | |
| "learning_rate": 1.465443453136255e-06, | |
| "loss": 0.5415875911712646, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.1995685005393744, | |
| "grad_norm": 3.3546078205108643, | |
| "learning_rate": 1.4634856963414022e-06, | |
| "loss": 0.5321105122566223, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.2017259978425028, | |
| "grad_norm": 1.4719895124435425, | |
| "learning_rate": 1.4615258821876726e-06, | |
| "loss": 0.4267783761024475, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.203883495145631, | |
| "grad_norm": 1.423250436782837, | |
| "learning_rate": 1.459564021767774e-06, | |
| "loss": 0.498091459274292, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.2060409924487594, | |
| "grad_norm": 2.6084094047546387, | |
| "learning_rate": 1.4576001261859981e-06, | |
| "loss": 0.4652736186981201, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.2081984897518878, | |
| "grad_norm": 1.2524727582931519, | |
| "learning_rate": 1.4556342065581548e-06, | |
| "loss": 0.5334936380386353, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2103559870550162, | |
| "grad_norm": 4.288187026977539, | |
| "learning_rate": 1.453666274011511e-06, | |
| "loss": 0.6997748017311096, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.2125134843581447, | |
| "grad_norm": 2.6082146167755127, | |
| "learning_rate": 1.4516963396847255e-06, | |
| "loss": 0.6567426323890686, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.2146709816612729, | |
| "grad_norm": 1.885820746421814, | |
| "learning_rate": 1.4497244147277895e-06, | |
| "loss": 0.41897153854370117, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.2168284789644013, | |
| "grad_norm": 3.6321957111358643, | |
| "learning_rate": 1.4477505103019587e-06, | |
| "loss": 0.4789751172065735, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.2189859762675297, | |
| "grad_norm": 3.317688226699829, | |
| "learning_rate": 1.4457746375796956e-06, | |
| "loss": 0.551139235496521, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2211434735706581, | |
| "grad_norm": 1.2082242965698242, | |
| "learning_rate": 1.4437968077446013e-06, | |
| "loss": 0.3944661617279053, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.2233009708737863, | |
| "grad_norm": 1.8302658796310425, | |
| "learning_rate": 1.4418170319913548e-06, | |
| "loss": 0.23596011102199554, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.2254584681769147, | |
| "grad_norm": 3.345332622528076, | |
| "learning_rate": 1.43983532152565e-06, | |
| "loss": 0.20758569240570068, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.2276159654800431, | |
| "grad_norm": 2.142779588699341, | |
| "learning_rate": 1.43785168756413e-06, | |
| "loss": 0.4067525267601013, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.2297734627831716, | |
| "grad_norm": 14.230850219726562, | |
| "learning_rate": 1.4358661413343269e-06, | |
| "loss": 0.5197821855545044, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.2319309600863, | |
| "grad_norm": 1.4702306985855103, | |
| "learning_rate": 1.4338786940745943e-06, | |
| "loss": 0.6153298020362854, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.2340884573894282, | |
| "grad_norm": 3.1709959506988525, | |
| "learning_rate": 1.4318893570340476e-06, | |
| "loss": 0.47198399901390076, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.2362459546925566, | |
| "grad_norm": 2.0311388969421387, | |
| "learning_rate": 1.4298981414724972e-06, | |
| "loss": 0.4431988596916199, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.238403451995685, | |
| "grad_norm": 2.6444904804229736, | |
| "learning_rate": 1.4279050586603865e-06, | |
| "loss": 0.49952733516693115, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.2405609492988134, | |
| "grad_norm": 2.9312846660614014, | |
| "learning_rate": 1.4259101198787284e-06, | |
| "loss": 0.40768349170684814, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2427184466019416, | |
| "grad_norm": 1.304535150527954, | |
| "learning_rate": 1.4239133364190402e-06, | |
| "loss": 0.32800549268722534, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.24487594390507, | |
| "grad_norm": 6.382114887237549, | |
| "learning_rate": 1.4219147195832796e-06, | |
| "loss": 0.5660591125488281, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.2470334412081985, | |
| "grad_norm": 1.935137152671814, | |
| "learning_rate": 1.4199142806837825e-06, | |
| "loss": 0.46538597345352173, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.249190938511327, | |
| "grad_norm": 1.4178097248077393, | |
| "learning_rate": 1.4179120310431967e-06, | |
| "loss": 0.3020792007446289, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.2513484358144553, | |
| "grad_norm": 6.318742752075195, | |
| "learning_rate": 1.41590798199442e-06, | |
| "loss": 0.5570347309112549, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.2535059331175837, | |
| "grad_norm": 1.248417615890503, | |
| "learning_rate": 1.4139021448805344e-06, | |
| "loss": 0.3992771506309509, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.255663430420712, | |
| "grad_norm": 5.451845645904541, | |
| "learning_rate": 1.4118945310547424e-06, | |
| "loss": 0.5283824801445007, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.2578209277238404, | |
| "grad_norm": 2.265537738800049, | |
| "learning_rate": 1.4098851518803032e-06, | |
| "loss": 0.41607847809791565, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.2599784250269688, | |
| "grad_norm": 0.5451850295066833, | |
| "learning_rate": 1.4078740187304678e-06, | |
| "loss": 0.44866782426834106, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.262135922330097, | |
| "grad_norm": 6.6960835456848145, | |
| "learning_rate": 1.4058611429884153e-06, | |
| "loss": 0.6595394015312195, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2642934196332254, | |
| "grad_norm": 6.947851657867432, | |
| "learning_rate": 1.4038465360471872e-06, | |
| "loss": 0.6133137345314026, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.2664509169363538, | |
| "grad_norm": 2.5718576908111572, | |
| "learning_rate": 1.401830209309624e-06, | |
| "loss": 0.4383125901222229, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.2686084142394822, | |
| "grad_norm": 2.443553924560547, | |
| "learning_rate": 1.3998121741883012e-06, | |
| "loss": 0.38315558433532715, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.2707659115426106, | |
| "grad_norm": 0.8398682475090027, | |
| "learning_rate": 1.3977924421054623e-06, | |
| "loss": 0.22079361975193024, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.272923408845739, | |
| "grad_norm": 3.0209848880767822, | |
| "learning_rate": 1.3957710244929575e-06, | |
| "loss": 0.4939245581626892, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2750809061488673, | |
| "grad_norm": 4.289799213409424, | |
| "learning_rate": 1.3937479327921762e-06, | |
| "loss": 0.42832162976264954, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.2772384034519957, | |
| "grad_norm": 2.087005376815796, | |
| "learning_rate": 1.3917231784539831e-06, | |
| "loss": 0.5092071294784546, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.279395900755124, | |
| "grad_norm": 1.6985106468200684, | |
| "learning_rate": 1.3896967729386545e-06, | |
| "loss": 0.6054165363311768, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.2815533980582523, | |
| "grad_norm": 3.5646963119506836, | |
| "learning_rate": 1.3876687277158117e-06, | |
| "loss": 0.47859057784080505, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.2837108953613807, | |
| "grad_norm": 3.154890537261963, | |
| "learning_rate": 1.385639054264357e-06, | |
| "loss": 0.43968018889427185, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.2858683926645091, | |
| "grad_norm": 6.229619026184082, | |
| "learning_rate": 1.383607764072409e-06, | |
| "loss": 0.5543320775032043, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.2880258899676376, | |
| "grad_norm": 12.460729598999023, | |
| "learning_rate": 1.3815748686372368e-06, | |
| "loss": 0.4493723511695862, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.290183387270766, | |
| "grad_norm": 1.6863099336624146, | |
| "learning_rate": 1.3795403794651955e-06, | |
| "loss": 0.3126695156097412, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.2923408845738944, | |
| "grad_norm": 3.3788959980010986, | |
| "learning_rate": 1.3775043080716608e-06, | |
| "loss": 0.46441030502319336, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.2944983818770226, | |
| "grad_norm": 1.3057730197906494, | |
| "learning_rate": 1.3754666659809636e-06, | |
| "loss": 0.4863712191581726, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.296655879180151, | |
| "grad_norm": 1.384608507156372, | |
| "learning_rate": 1.3734274647263258e-06, | |
| "loss": 0.41433578729629517, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.2988133764832794, | |
| "grad_norm": 0.7437410950660706, | |
| "learning_rate": 1.3713867158497935e-06, | |
| "loss": 0.3361971378326416, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.3009708737864076, | |
| "grad_norm": 5.0748090744018555, | |
| "learning_rate": 1.369344430902173e-06, | |
| "loss": 0.582435667514801, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.303128371089536, | |
| "grad_norm": 1.445181131362915, | |
| "learning_rate": 1.3673006214429657e-06, | |
| "loss": 0.49374300241470337, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.3052858683926645, | |
| "grad_norm": 2.7276389598846436, | |
| "learning_rate": 1.3652552990402993e-06, | |
| "loss": 0.49756351113319397, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.307443365695793, | |
| "grad_norm": 3.362050771713257, | |
| "learning_rate": 1.3632084752708672e-06, | |
| "loss": 0.4800053536891937, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.3096008629989213, | |
| "grad_norm": 1.3913723230361938, | |
| "learning_rate": 1.36116016171986e-06, | |
| "loss": 0.569862961769104, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.3117583603020497, | |
| "grad_norm": 0.8088376522064209, | |
| "learning_rate": 1.3591103699809009e-06, | |
| "loss": 0.43602418899536133, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.313915857605178, | |
| "grad_norm": 2.7153704166412354, | |
| "learning_rate": 1.3570591116559786e-06, | |
| "loss": 0.627713680267334, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.3160733549083063, | |
| "grad_norm": 2.235117197036743, | |
| "learning_rate": 1.3550063983553842e-06, | |
| "loss": 0.20072109997272491, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.3182308522114348, | |
| "grad_norm": 2.215144157409668, | |
| "learning_rate": 1.352952241697643e-06, | |
| "loss": 0.45614534616470337, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.3203883495145632, | |
| "grad_norm": 1.2694110870361328, | |
| "learning_rate": 1.3508966533094507e-06, | |
| "loss": 0.4190627932548523, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.3225458468176914, | |
| "grad_norm": 1.3221111297607422, | |
| "learning_rate": 1.3488396448256063e-06, | |
| "loss": 0.41167372465133667, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.3247033441208198, | |
| "grad_norm": 0.3495451509952545, | |
| "learning_rate": 1.3467812278889466e-06, | |
| "loss": 0.2586868107318878, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.3268608414239482, | |
| "grad_norm": 3.7483558654785156, | |
| "learning_rate": 1.3447214141502801e-06, | |
| "loss": 0.42079082131385803, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.3290183387270766, | |
| "grad_norm": 1.2944005727767944, | |
| "learning_rate": 1.3426602152683221e-06, | |
| "loss": 0.4828168451786041, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.331175836030205, | |
| "grad_norm": 2.611660957336426, | |
| "learning_rate": 1.3405976429096268e-06, | |
| "loss": 0.5353527665138245, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 3.6428263187408447, | |
| "learning_rate": 1.3385337087485237e-06, | |
| "loss": 0.28263047337532043, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.3354908306364617, | |
| "grad_norm": 1.8777357339859009, | |
| "learning_rate": 1.3364684244670498e-06, | |
| "loss": 0.47503718733787537, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.33764832793959, | |
| "grad_norm": 1.553531527519226, | |
| "learning_rate": 1.334401801754883e-06, | |
| "loss": 0.4773551821708679, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.3398058252427185, | |
| "grad_norm": 26.932111740112305, | |
| "learning_rate": 1.3323338523092775e-06, | |
| "loss": 0.5582832098007202, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.3419633225458467, | |
| "grad_norm": 5.682314395904541, | |
| "learning_rate": 1.3302645878349972e-06, | |
| "loss": 0.3482803702354431, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.3441208198489751, | |
| "grad_norm": 1.8704055547714233, | |
| "learning_rate": 1.3281940200442492e-06, | |
| "loss": 0.5859532952308655, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.3462783171521036, | |
| "grad_norm": 2.0849342346191406, | |
| "learning_rate": 1.3261221606566161e-06, | |
| "loss": 0.571201503276825, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.348435814455232, | |
| "grad_norm": 1.3928718566894531, | |
| "learning_rate": 1.324049021398993e-06, | |
| "loss": 0.3548327088356018, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3505933117583604, | |
| "grad_norm": 1.392311930656433, | |
| "learning_rate": 1.3219746140055185e-06, | |
| "loss": 0.5696713328361511, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.3527508090614886, | |
| "grad_norm": 1.4951963424682617, | |
| "learning_rate": 1.3198989502175077e-06, | |
| "loss": 0.34389352798461914, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.354908306364617, | |
| "grad_norm": 2.442704916000366, | |
| "learning_rate": 1.3178220417833887e-06, | |
| "loss": 0.4191893935203552, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.3570658036677454, | |
| "grad_norm": 2.9892749786376953, | |
| "learning_rate": 1.315743900458634e-06, | |
| "loss": 0.35198745131492615, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.3592233009708738, | |
| "grad_norm": 2.776257038116455, | |
| "learning_rate": 1.313664538005693e-06, | |
| "loss": 0.3809160888195038, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.361380798274002, | |
| "grad_norm": 2.1207423210144043, | |
| "learning_rate": 1.3115839661939288e-06, | |
| "loss": 0.3112916350364685, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.3635382955771305, | |
| "grad_norm": 2.34796404838562, | |
| "learning_rate": 1.3095021967995485e-06, | |
| "loss": 0.3474862575531006, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.3656957928802589, | |
| "grad_norm": 1.681514024734497, | |
| "learning_rate": 1.3074192416055375e-06, | |
| "loss": 0.6013367176055908, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.3678532901833873, | |
| "grad_norm": 1.5219907760620117, | |
| "learning_rate": 1.3053351124015935e-06, | |
| "loss": 0.44022852182388306, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.3700107874865157, | |
| "grad_norm": 10.068926811218262, | |
| "learning_rate": 1.3032498209840583e-06, | |
| "loss": 0.4306741952896118, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3721682847896441, | |
| "grad_norm": 3.296771287918091, | |
| "learning_rate": 1.3011633791558532e-06, | |
| "loss": 0.5527811050415039, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.3743257820927723, | |
| "grad_norm": 136.3231201171875, | |
| "learning_rate": 1.2990757987264098e-06, | |
| "loss": 0.41877317428588867, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.3764832793959008, | |
| "grad_norm": 2.0969786643981934, | |
| "learning_rate": 1.2969870915116042e-06, | |
| "loss": 0.578849732875824, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.3786407766990292, | |
| "grad_norm": 4.652449131011963, | |
| "learning_rate": 1.2948972693336916e-06, | |
| "loss": 0.33083122968673706, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.3807982740021574, | |
| "grad_norm": 1.5804355144500732, | |
| "learning_rate": 1.292806344021237e-06, | |
| "loss": 0.3789401948451996, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.3829557713052858, | |
| "grad_norm": 1.5308772325515747, | |
| "learning_rate": 1.2907143274090487e-06, | |
| "loss": 0.5875998735427856, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.3851132686084142, | |
| "grad_norm": 1.4146822690963745, | |
| "learning_rate": 1.2886212313381128e-06, | |
| "loss": 0.38486555218696594, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.3872707659115426, | |
| "grad_norm": 4.086416244506836, | |
| "learning_rate": 1.2865270676555249e-06, | |
| "loss": 0.596904456615448, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.389428263214671, | |
| "grad_norm": 3.7820913791656494, | |
| "learning_rate": 1.2844318482144233e-06, | |
| "loss": 0.43893247842788696, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.3915857605177995, | |
| "grad_norm": 2.7569808959960938, | |
| "learning_rate": 1.2823355848739217e-06, | |
| "loss": 0.3261288106441498, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.3937432578209277, | |
| "grad_norm": 10.709510803222656, | |
| "learning_rate": 1.280238289499043e-06, | |
| "loss": 0.5592629909515381, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.395900755124056, | |
| "grad_norm": 3.0105295181274414, | |
| "learning_rate": 1.2781399739606513e-06, | |
| "loss": 0.5706429481506348, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.3980582524271845, | |
| "grad_norm": 0.3642590045928955, | |
| "learning_rate": 1.2760406501353845e-06, | |
| "loss": 0.4913448393344879, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.4002157497303127, | |
| "grad_norm": 1.7649108171463013, | |
| "learning_rate": 1.273940329905588e-06, | |
| "loss": 0.4015069007873535, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.4023732470334411, | |
| "grad_norm": 5.478614807128906, | |
| "learning_rate": 1.2718390251592465e-06, | |
| "loss": 0.3647070527076721, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4045307443365695, | |
| "grad_norm": 2.2883858680725098, | |
| "learning_rate": 1.2697367477899174e-06, | |
| "loss": 0.5743715763092041, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.406688241639698, | |
| "grad_norm": 1.2849724292755127, | |
| "learning_rate": 1.2676335096966633e-06, | |
| "loss": 0.3841140866279602, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.4088457389428264, | |
| "grad_norm": 1.79099702835083, | |
| "learning_rate": 1.2655293227839841e-06, | |
| "loss": 0.4001426100730896, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.4110032362459548, | |
| "grad_norm": 2.938184976577759, | |
| "learning_rate": 1.2634241989617508e-06, | |
| "loss": 0.5245987176895142, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.413160733549083, | |
| "grad_norm": 1.6925368309020996, | |
| "learning_rate": 1.2613181501451373e-06, | |
| "loss": 0.41294950246810913, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.4153182308522114, | |
| "grad_norm": 1.1948857307434082, | |
| "learning_rate": 1.259211188254552e-06, | |
| "loss": 0.4697638154029846, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.4174757281553398, | |
| "grad_norm": 3.2041354179382324, | |
| "learning_rate": 1.257103325215573e-06, | |
| "loss": 0.47677257657051086, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.419633225458468, | |
| "grad_norm": 4.060916423797607, | |
| "learning_rate": 1.2549945729588771e-06, | |
| "loss": 0.22076305747032166, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.4217907227615965, | |
| "grad_norm": 4.826178073883057, | |
| "learning_rate": 1.2528849434201758e-06, | |
| "loss": 0.4530554711818695, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.4239482200647249, | |
| "grad_norm": 1.5924415588378906, | |
| "learning_rate": 1.2507744485401457e-06, | |
| "loss": 0.4310169517993927, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.4261057173678533, | |
| "grad_norm": 1.6999584436416626, | |
| "learning_rate": 1.2486631002643604e-06, | |
| "loss": 0.32071733474731445, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.4282632146709817, | |
| "grad_norm": 1.5567405223846436, | |
| "learning_rate": 1.2465509105432252e-06, | |
| "loss": 0.2832459509372711, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.4304207119741101, | |
| "grad_norm": 5.614641189575195, | |
| "learning_rate": 1.2444378913319067e-06, | |
| "loss": 0.47128552198410034, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.4325782092772383, | |
| "grad_norm": 1.8778231143951416, | |
| "learning_rate": 1.2423240545902674e-06, | |
| "loss": 0.38101163506507874, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.4347357065803668, | |
| "grad_norm": 3.9056172370910645, | |
| "learning_rate": 1.2402094122827964e-06, | |
| "loss": 0.537193775177002, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.4368932038834952, | |
| "grad_norm": 3.836848735809326, | |
| "learning_rate": 1.2380939763785433e-06, | |
| "loss": 0.4837642014026642, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.4390507011866236, | |
| "grad_norm": 1.4066507816314697, | |
| "learning_rate": 1.2359777588510484e-06, | |
| "loss": 0.5043050646781921, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.4412081984897518, | |
| "grad_norm": 1.7807657718658447, | |
| "learning_rate": 1.233860771678277e-06, | |
| "loss": 0.42978519201278687, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.4433656957928802, | |
| "grad_norm": 2.4499216079711914, | |
| "learning_rate": 1.23174302684255e-06, | |
| "loss": 0.5630735754966736, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.4455231930960086, | |
| "grad_norm": 2.219531297683716, | |
| "learning_rate": 1.2296245363304772e-06, | |
| "loss": 0.6489322185516357, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.447680690399137, | |
| "grad_norm": 3.5208077430725098, | |
| "learning_rate": 1.2275053121328886e-06, | |
| "loss": 0.424197793006897, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.4498381877022655, | |
| "grad_norm": 2.0139458179473877, | |
| "learning_rate": 1.2253853662447673e-06, | |
| "loss": 0.51392662525177, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.4519956850053937, | |
| "grad_norm": 3.8764588832855225, | |
| "learning_rate": 1.223264710665181e-06, | |
| "loss": 0.4180300533771515, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.454153182308522, | |
| "grad_norm": 1.3136292695999146, | |
| "learning_rate": 1.2211433573972145e-06, | |
| "loss": 0.3597021698951721, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.4563106796116505, | |
| "grad_norm": 2.6723670959472656, | |
| "learning_rate": 1.219021318447901e-06, | |
| "loss": 0.4391145408153534, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.458468176914779, | |
| "grad_norm": 2.19071102142334, | |
| "learning_rate": 1.2168986058281552e-06, | |
| "loss": 0.31397783756256104, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.4606256742179071, | |
| "grad_norm": 2.55515718460083, | |
| "learning_rate": 1.2147752315527056e-06, | |
| "loss": 0.49626126885414124, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.4627831715210355, | |
| "grad_norm": 1.1953641176223755, | |
| "learning_rate": 1.2126512076400238e-06, | |
| "loss": 0.36800915002822876, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.464940668824164, | |
| "grad_norm": 1.1821345090866089, | |
| "learning_rate": 1.2105265461122599e-06, | |
| "loss": 0.36970698833465576, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.4670981661272924, | |
| "grad_norm": 1.9997817277908325, | |
| "learning_rate": 1.208401258995173e-06, | |
| "loss": 0.24953503906726837, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.4692556634304208, | |
| "grad_norm": 1.4127711057662964, | |
| "learning_rate": 1.2062753583180617e-06, | |
| "loss": 0.6299887895584106, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.4714131607335492, | |
| "grad_norm": 1.239811897277832, | |
| "learning_rate": 1.2041488561136987e-06, | |
| "loss": 0.2647631764411926, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.4735706580366774, | |
| "grad_norm": 1.8730353116989136, | |
| "learning_rate": 1.2020217644182618e-06, | |
| "loss": 0.47313305735588074, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.4757281553398058, | |
| "grad_norm": 1.2671191692352295, | |
| "learning_rate": 1.1998940952712636e-06, | |
| "loss": 0.4221327602863312, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.4778856526429343, | |
| "grad_norm": 3.7683935165405273, | |
| "learning_rate": 1.1977658607154866e-06, | |
| "loss": 0.3292485773563385, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.4800431499460625, | |
| "grad_norm": 2.1078288555145264, | |
| "learning_rate": 1.1956370727969132e-06, | |
| "loss": 0.4748386740684509, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.4822006472491909, | |
| "grad_norm": 3.0718023777008057, | |
| "learning_rate": 1.1935077435646573e-06, | |
| "loss": 0.41127315163612366, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.4843581445523193, | |
| "grad_norm": 9.03038501739502, | |
| "learning_rate": 1.1913778850708974e-06, | |
| "loss": 0.38048920035362244, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.4865156418554477, | |
| "grad_norm": 2.5646114349365234, | |
| "learning_rate": 1.189247509370807e-06, | |
| "loss": 0.5044585466384888, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.4886731391585761, | |
| "grad_norm": 3.9271023273468018, | |
| "learning_rate": 1.1871166285224885e-06, | |
| "loss": 0.5840790271759033, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.4908306364617046, | |
| "grad_norm": 1.9364007711410522, | |
| "learning_rate": 1.1849852545869013e-06, | |
| "loss": 0.4913451671600342, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.4929881337648327, | |
| "grad_norm": 7.079308986663818, | |
| "learning_rate": 1.182853399627797e-06, | |
| "loss": 0.40108633041381836, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.4951456310679612, | |
| "grad_norm": 1.5303609371185303, | |
| "learning_rate": 1.1807210757116505e-06, | |
| "loss": 0.5875151753425598, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.4973031283710896, | |
| "grad_norm": 1.7939358949661255, | |
| "learning_rate": 1.1785882949075894e-06, | |
| "loss": 0.43406108021736145, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.4994606256742178, | |
| "grad_norm": 1.709847092628479, | |
| "learning_rate": 1.1764550692873282e-06, | |
| "loss": 0.4609090983867645, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.5016181229773462, | |
| "grad_norm": 1.4324554204940796, | |
| "learning_rate": 1.1743214109250992e-06, | |
| "loss": 0.2564505934715271, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.5037756202804746, | |
| "grad_norm": 2.9874749183654785, | |
| "learning_rate": 1.1721873318975835e-06, | |
| "loss": 0.46675199270248413, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.505933117583603, | |
| "grad_norm": 3.750638008117676, | |
| "learning_rate": 1.1700528442838442e-06, | |
| "loss": 0.5055999755859375, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.5080906148867315, | |
| "grad_norm": 2.8105647563934326, | |
| "learning_rate": 1.167917960165256e-06, | |
| "loss": 0.5268608331680298, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.5102481121898599, | |
| "grad_norm": 3.50753116607666, | |
| "learning_rate": 1.1657826916254382e-06, | |
| "loss": 0.5102010369300842, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.512405609492988, | |
| "grad_norm": 10.553208351135254, | |
| "learning_rate": 1.1636470507501863e-06, | |
| "loss": 0.4071239233016968, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.5145631067961165, | |
| "grad_norm": 3.4755797386169434, | |
| "learning_rate": 1.1615110496274028e-06, | |
| "loss": 0.3140917420387268, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.516720604099245, | |
| "grad_norm": 2.3255038261413574, | |
| "learning_rate": 1.1593747003470294e-06, | |
| "loss": 0.49230116605758667, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.5188781014023731, | |
| "grad_norm": 1.2084012031555176, | |
| "learning_rate": 1.1572380150009777e-06, | |
| "loss": 0.39797013998031616, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.5210355987055015, | |
| "grad_norm": 5.355250358581543, | |
| "learning_rate": 1.1551010056830634e-06, | |
| "loss": 0.36559203267097473, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.52319309600863, | |
| "grad_norm": 1.0859466791152954, | |
| "learning_rate": 1.152963684488934e-06, | |
| "loss": 0.20361725986003876, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.5253505933117584, | |
| "grad_norm": 3.301490306854248, | |
| "learning_rate": 1.150826063516003e-06, | |
| "loss": 0.36109161376953125, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.5275080906148868, | |
| "grad_norm": 2.034646511077881, | |
| "learning_rate": 1.1486881548633802e-06, | |
| "loss": 0.4435052275657654, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.5296655879180152, | |
| "grad_norm": 1.7059470415115356, | |
| "learning_rate": 1.1465499706318048e-06, | |
| "loss": 0.4154685437679291, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.5318230852211436, | |
| "grad_norm": 1.5160272121429443, | |
| "learning_rate": 1.1444115229235745e-06, | |
| "loss": 0.37496164441108704, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.5339805825242718, | |
| "grad_norm": 2.6280198097229004, | |
| "learning_rate": 1.1422728238424785e-06, | |
| "loss": 0.48741182684898376, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.5361380798274002, | |
| "grad_norm": 14.306265830993652, | |
| "learning_rate": 1.14013388549373e-06, | |
| "loss": 0.5213165879249573, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.5382955771305284, | |
| "grad_norm": 1.211489200592041, | |
| "learning_rate": 1.1379947199838952e-06, | |
| "loss": 0.345187783241272, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.5404530744336569, | |
| "grad_norm": 3.2337164878845215, | |
| "learning_rate": 1.1358553394208268e-06, | |
| "loss": 0.5196102857589722, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.5426105717367853, | |
| "grad_norm": 1.5404866933822632, | |
| "learning_rate": 1.1337157559135942e-06, | |
| "loss": 0.4148750603199005, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.5447680690399137, | |
| "grad_norm": 1.683718204498291, | |
| "learning_rate": 1.1315759815724152e-06, | |
| "loss": 0.32485026121139526, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.5469255663430421, | |
| "grad_norm": 2.2065541744232178, | |
| "learning_rate": 1.1294360285085888e-06, | |
| "loss": 0.2961767017841339, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.5490830636461705, | |
| "grad_norm": 1.0630570650100708, | |
| "learning_rate": 1.1272959088344253e-06, | |
| "loss": 0.37115591764450073, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.551240560949299, | |
| "grad_norm": 2.599900245666504, | |
| "learning_rate": 1.1251556346631762e-06, | |
| "loss": 0.5358873605728149, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.5533980582524272, | |
| "grad_norm": 1.2480677366256714, | |
| "learning_rate": 1.1230152181089708e-06, | |
| "loss": 0.46197211742401123, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 2.2794196605682373, | |
| "learning_rate": 1.1208746712867419e-06, | |
| "loss": 0.44740840792655945, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.5577130528586838, | |
| "grad_norm": 1.7489802837371826, | |
| "learning_rate": 1.1187340063121593e-06, | |
| "loss": 0.4339655339717865, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.5598705501618122, | |
| "grad_norm": 3.410910129547119, | |
| "learning_rate": 1.116593235301564e-06, | |
| "loss": 0.3300541639328003, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.5620280474649406, | |
| "grad_norm": 0.5800649523735046, | |
| "learning_rate": 1.1144523703718942e-06, | |
| "loss": 0.5032283663749695, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.564185544768069, | |
| "grad_norm": 1.7073270082473755, | |
| "learning_rate": 1.1123114236406224e-06, | |
| "loss": 0.4437793791294098, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.5663430420711975, | |
| "grad_norm": 1.9129263162612915, | |
| "learning_rate": 1.1101704072256819e-06, | |
| "loss": 0.49655881524086, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.5685005393743259, | |
| "grad_norm": 3.6259055137634277, | |
| "learning_rate": 1.1080293332454016e-06, | |
| "loss": 0.331562340259552, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.5706580366774543, | |
| "grad_norm": 1.8879085779190063, | |
| "learning_rate": 1.1058882138184363e-06, | |
| "loss": 0.5420922040939331, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.5728155339805825, | |
| "grad_norm": 1.6056373119354248, | |
| "learning_rate": 1.103747061063697e-06, | |
| "loss": 0.2305726557970047, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.574973031283711, | |
| "grad_norm": 2.3105075359344482, | |
| "learning_rate": 1.101605887100285e-06, | |
| "loss": 0.4295492470264435, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.577130528586839, | |
| "grad_norm": 3.3066842555999756, | |
| "learning_rate": 1.09946470404742e-06, | |
| "loss": 0.5346636772155762, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.5792880258899675, | |
| "grad_norm": 5.481215476989746, | |
| "learning_rate": 1.097323524024374e-06, | |
| "loss": 0.669352114200592, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.581445523193096, | |
| "grad_norm": 5.5241851806640625, | |
| "learning_rate": 1.095182359150402e-06, | |
| "loss": 0.5989066958427429, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.5836030204962244, | |
| "grad_norm": 1.298604130744934, | |
| "learning_rate": 1.0930412215446723e-06, | |
| "loss": 0.3661651015281677, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.5857605177993528, | |
| "grad_norm": 4.695067405700684, | |
| "learning_rate": 1.0909001233262001e-06, | |
| "loss": 0.449363648891449, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.5879180151024812, | |
| "grad_norm": 2.782097578048706, | |
| "learning_rate": 1.0887590766137766e-06, | |
| "loss": 0.5595487356185913, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.5900755124056096, | |
| "grad_norm": 1.2103036642074585, | |
| "learning_rate": 1.0866180935259022e-06, | |
| "loss": 0.38902321457862854, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.5922330097087378, | |
| "grad_norm": 1.6246592998504639, | |
| "learning_rate": 1.084477186180717e-06, | |
| "loss": 0.5024740099906921, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.5943905070118662, | |
| "grad_norm": 1.3438127040863037, | |
| "learning_rate": 1.0823363666959322e-06, | |
| "loss": 0.47724461555480957, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.5965480043149944, | |
| "grad_norm": 1.5329099893569946, | |
| "learning_rate": 1.0801956471887618e-06, | |
| "loss": 0.43613773584365845, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.5987055016181229, | |
| "grad_norm": 2.6041982173919678, | |
| "learning_rate": 1.078055039775854e-06, | |
| "loss": 0.5445818305015564, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.6008629989212513, | |
| "grad_norm": 3.287353277206421, | |
| "learning_rate": 1.075914556573222e-06, | |
| "loss": 0.35657113790512085, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.6030204962243797, | |
| "grad_norm": 6.16733455657959, | |
| "learning_rate": 1.0737742096961774e-06, | |
| "loss": 0.5397022366523743, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.6051779935275081, | |
| "grad_norm": 1.3404687643051147, | |
| "learning_rate": 1.0716340112592582e-06, | |
| "loss": 0.40695685148239136, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.6073354908306365, | |
| "grad_norm": 4.531323432922363, | |
| "learning_rate": 1.0694939733761635e-06, | |
| "loss": 0.43187639117240906, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.609492988133765, | |
| "grad_norm": 4.229406833648682, | |
| "learning_rate": 1.067354108159684e-06, | |
| "loss": 0.3659261465072632, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.6116504854368932, | |
| "grad_norm": 1.4188188314437866, | |
| "learning_rate": 1.0652144277216315e-06, | |
| "loss": 0.5332222580909729, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.6138079827400216, | |
| "grad_norm": 2.903252363204956, | |
| "learning_rate": 1.063074944172774e-06, | |
| "loss": 0.4275670647621155, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.61596548004315, | |
| "grad_norm": 1.9704622030258179, | |
| "learning_rate": 1.060935669622763e-06, | |
| "loss": 0.5114681720733643, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.6181229773462782, | |
| "grad_norm": 1.4989230632781982, | |
| "learning_rate": 1.0587966161800688e-06, | |
| "loss": 0.4305647909641266, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6202804746494066, | |
| "grad_norm": 4.043560981750488, | |
| "learning_rate": 1.0566577959519086e-06, | |
| "loss": 0.34898895025253296, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.622437971952535, | |
| "grad_norm": 3.2984836101531982, | |
| "learning_rate": 1.0545192210441814e-06, | |
| "loss": 0.3457680642604828, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.6245954692556634, | |
| "grad_norm": 2.0970866680145264, | |
| "learning_rate": 1.0523809035613964e-06, | |
| "loss": 0.45543625950813293, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.6267529665587919, | |
| "grad_norm": 4.432509422302246, | |
| "learning_rate": 1.0502428556066059e-06, | |
| "loss": 0.33377963304519653, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.6289104638619203, | |
| "grad_norm": 1.2249876260757446, | |
| "learning_rate": 1.0481050892813368e-06, | |
| "loss": 0.3518203794956207, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.6310679611650487, | |
| "grad_norm": 3.6273698806762695, | |
| "learning_rate": 1.0459676166855223e-06, | |
| "loss": 0.47581151127815247, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.633225458468177, | |
| "grad_norm": 1.7668628692626953, | |
| "learning_rate": 1.0438304499174325e-06, | |
| "loss": 0.31876808404922485, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.6353829557713053, | |
| "grad_norm": 4.061316013336182, | |
| "learning_rate": 1.0416936010736064e-06, | |
| "loss": 0.47807684540748596, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.6375404530744335, | |
| "grad_norm": 1.9564175605773926, | |
| "learning_rate": 1.0395570822487845e-06, | |
| "loss": 0.47794413566589355, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.639697950377562, | |
| "grad_norm": 1.4420032501220703, | |
| "learning_rate": 1.0374209055358385e-06, | |
| "loss": 0.6091484427452087, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.6418554476806904, | |
| "grad_norm": 2.4212918281555176, | |
| "learning_rate": 1.0352850830257037e-06, | |
| "loss": 0.3609981834888458, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.6440129449838188, | |
| "grad_norm": 17.685544967651367, | |
| "learning_rate": 1.0331496268073113e-06, | |
| "loss": 0.3519137501716614, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.6461704422869472, | |
| "grad_norm": 2.5702126026153564, | |
| "learning_rate": 1.031014548967518e-06, | |
| "loss": 0.4019058346748352, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.6483279395900756, | |
| "grad_norm": 1.5977301597595215, | |
| "learning_rate": 1.0288798615910409e-06, | |
| "loss": 0.4482097923755646, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.650485436893204, | |
| "grad_norm": 3.8261749744415283, | |
| "learning_rate": 1.0267455767603842e-06, | |
| "loss": 0.5603641867637634, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.6526429341963322, | |
| "grad_norm": 2.4676754474639893, | |
| "learning_rate": 1.0246117065557762e-06, | |
| "loss": 0.6466296315193176, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.6548004314994607, | |
| "grad_norm": 9.348182678222656, | |
| "learning_rate": 1.0224782630550976e-06, | |
| "loss": 0.4512023627758026, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.6569579288025889, | |
| "grad_norm": 1.4680399894714355, | |
| "learning_rate": 1.020345258333813e-06, | |
| "loss": 0.3725220561027527, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.6591154261057173, | |
| "grad_norm": 1.6723597049713135, | |
| "learning_rate": 1.0182127044649052e-06, | |
| "loss": 0.5063510537147522, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.6612729234088457, | |
| "grad_norm": 1.5339092016220093, | |
| "learning_rate": 1.0160806135188028e-06, | |
| "loss": 0.46868813037872314, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.6634304207119741, | |
| "grad_norm": 1.4481370449066162, | |
| "learning_rate": 1.0139489975633166e-06, | |
| "loss": 0.44415712356567383, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.6655879180151025, | |
| "grad_norm": 4.979800701141357, | |
| "learning_rate": 1.0118178686635677e-06, | |
| "loss": 0.3348858952522278, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.667745415318231, | |
| "grad_norm": 2.358186721801758, | |
| "learning_rate": 1.00968723888192e-06, | |
| "loss": 0.42780208587646484, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.6699029126213594, | |
| "grad_norm": 1.201817512512207, | |
| "learning_rate": 1.0075571202779138e-06, | |
| "loss": 0.46995261311531067, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.6720604099244876, | |
| "grad_norm": 3.709390878677368, | |
| "learning_rate": 1.0054275249081947e-06, | |
| "loss": 0.35104840993881226, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.674217907227616, | |
| "grad_norm": 1.4292689561843872, | |
| "learning_rate": 1.0032984648264479e-06, | |
| "loss": 0.4314435124397278, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.6763754045307442, | |
| "grad_norm": 1.3379240036010742, | |
| "learning_rate": 1.0011699520833272e-06, | |
| "loss": 0.4032558798789978, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.6785329018338726, | |
| "grad_norm": 7.688792705535889, | |
| "learning_rate": 9.990419987263904e-07, | |
| "loss": 0.4385361671447754, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.680690399137001, | |
| "grad_norm": 3.1750102043151855, | |
| "learning_rate": 9.969146168000277e-07, | |
| "loss": 0.31719791889190674, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.6828478964401294, | |
| "grad_norm": 1.8544740676879883, | |
| "learning_rate": 9.947878183453955e-07, | |
| "loss": 0.5202147364616394, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.6850053937432579, | |
| "grad_norm": 1.8201504945755005, | |
| "learning_rate": 9.926616154003478e-07, | |
| "loss": 0.34038931131362915, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.6871628910463863, | |
| "grad_norm": 1.8023303747177124, | |
| "learning_rate": 9.905360199993674e-07, | |
| "loss": 0.3473019599914551, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.6893203883495147, | |
| "grad_norm": 2.248263120651245, | |
| "learning_rate": 9.884110441734992e-07, | |
| "loss": 0.49435266852378845, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.691477885652643, | |
| "grad_norm": 2.6698451042175293, | |
| "learning_rate": 9.862866999502805e-07, | |
| "loss": 0.4461665451526642, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.6936353829557713, | |
| "grad_norm": 1.4548275470733643, | |
| "learning_rate": 9.841629993536741e-07, | |
| "loss": 0.5574808120727539, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.6957928802588995, | |
| "grad_norm": 1.4881387948989868, | |
| "learning_rate": 9.820399544039997e-07, | |
| "loss": 0.3747144043445587, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.697950377562028, | |
| "grad_norm": 43.702919006347656, | |
| "learning_rate": 9.799175771178662e-07, | |
| "loss": 0.543049693107605, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.7001078748651564, | |
| "grad_norm": 3.537771463394165, | |
| "learning_rate": 9.777958795081024e-07, | |
| "loss": 0.38331982493400574, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.7022653721682848, | |
| "grad_norm": 1.9265162944793701, | |
| "learning_rate": 9.75674873583692e-07, | |
| "loss": 0.3932670056819916, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.7044228694714132, | |
| "grad_norm": 1.3593825101852417, | |
| "learning_rate": 9.735545713497021e-07, | |
| "loss": 0.4138597249984741, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.7065803667745416, | |
| "grad_norm": 2.415477991104126, | |
| "learning_rate": 9.714349848072175e-07, | |
| "loss": 0.4992269277572632, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.70873786407767, | |
| "grad_norm": 1.0291266441345215, | |
| "learning_rate": 9.693161259532722e-07, | |
| "loss": 0.4245167076587677, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.7108953613807982, | |
| "grad_norm": 1.4725301265716553, | |
| "learning_rate": 9.671980067807806e-07, | |
| "loss": 0.35596776008605957, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.7130528586839266, | |
| "grad_norm": 1.3389267921447754, | |
| "learning_rate": 9.650806392784719e-07, | |
| "loss": 0.3590199947357178, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.715210355987055, | |
| "grad_norm": 1.9211981296539307, | |
| "learning_rate": 9.629640354308188e-07, | |
| "loss": 0.5305579900741577, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.7173678532901833, | |
| "grad_norm": 1.3896666765213013, | |
| "learning_rate": 9.60848207217974e-07, | |
| "loss": 0.3872862458229065, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.7195253505933117, | |
| "grad_norm": 1.2243990898132324, | |
| "learning_rate": 9.587331666156988e-07, | |
| "loss": 0.5288591384887695, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.72168284789644, | |
| "grad_norm": 1.8954887390136719, | |
| "learning_rate": 9.566189255952956e-07, | |
| "loss": 0.43896806240081787, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.7238403451995685, | |
| "grad_norm": 1.1927108764648438, | |
| "learning_rate": 9.545054961235435e-07, | |
| "loss": 0.4235879182815552, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.725997842502697, | |
| "grad_norm": 7.993542194366455, | |
| "learning_rate": 9.523928901626255e-07, | |
| "loss": 0.35887616872787476, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.7281553398058254, | |
| "grad_norm": 2.1313676834106445, | |
| "learning_rate": 9.502811196700656e-07, | |
| "loss": 0.46110397577285767, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.7303128371089536, | |
| "grad_norm": 1.278878092765808, | |
| "learning_rate": 9.481701965986574e-07, | |
| "loss": 0.3147183656692505, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.732470334412082, | |
| "grad_norm": 3.170421838760376, | |
| "learning_rate": 9.460601328963996e-07, | |
| "loss": 0.24724824726581573, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.7346278317152104, | |
| "grad_norm": 1.7401503324508667, | |
| "learning_rate": 9.439509405064254e-07, | |
| "loss": 0.41423508524894714, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.7367853290183386, | |
| "grad_norm": 1.8899052143096924, | |
| "learning_rate": 9.41842631366937e-07, | |
| "loss": 0.5291723608970642, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.738942826321467, | |
| "grad_norm": 2.190075635910034, | |
| "learning_rate": 9.397352174111372e-07, | |
| "loss": 0.49489831924438477, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.7411003236245954, | |
| "grad_norm": 4.175290584564209, | |
| "learning_rate": 9.376287105671621e-07, | |
| "loss": 0.2998746633529663, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.7432578209277239, | |
| "grad_norm": 1.679629921913147, | |
| "learning_rate": 9.355231227580132e-07, | |
| "loss": 0.4566305875778198, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.7454153182308523, | |
| "grad_norm": 1.7757675647735596, | |
| "learning_rate": 9.334184659014901e-07, | |
| "loss": 0.36898234486579895, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.7475728155339807, | |
| "grad_norm": 24.852197647094727, | |
| "learning_rate": 9.313147519101237e-07, | |
| "loss": 0.2811485826969147, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.7497303128371091, | |
| "grad_norm": 1.6118603944778442, | |
| "learning_rate": 9.292119926911078e-07, | |
| "loss": 0.2936355173587799, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.7518878101402373, | |
| "grad_norm": 1.2674829959869385, | |
| "learning_rate": 9.271102001462321e-07, | |
| "loss": 0.3665968179702759, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.7540453074433657, | |
| "grad_norm": 2.609710216522217, | |
| "learning_rate": 9.250093861718151e-07, | |
| "loss": 0.38114845752716064, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.756202804746494, | |
| "grad_norm": 2.0557167530059814, | |
| "learning_rate": 9.229095626586362e-07, | |
| "loss": 0.4779360294342041, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.7583603020496223, | |
| "grad_norm": 2.9698874950408936, | |
| "learning_rate": 9.208107414918691e-07, | |
| "loss": 0.5487996935844421, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.7605177993527508, | |
| "grad_norm": 1.6979955434799194, | |
| "learning_rate": 9.187129345510134e-07, | |
| "loss": 0.5224738121032715, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.7626752966558792, | |
| "grad_norm": 2.131030321121216, | |
| "learning_rate": 9.166161537098287e-07, | |
| "loss": 0.33794957399368286, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.7648327939590076, | |
| "grad_norm": 1.3157271146774292, | |
| "learning_rate": 9.145204108362672e-07, | |
| "loss": 0.49309784173965454, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.766990291262136, | |
| "grad_norm": 1.6136844158172607, | |
| "learning_rate": 9.124257177924049e-07, | |
| "loss": 0.5821846723556519, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.7691477885652644, | |
| "grad_norm": 1.258776068687439, | |
| "learning_rate": 9.10332086434377e-07, | |
| "loss": 0.46728694438934326, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.7713052858683926, | |
| "grad_norm": 1.5475536584854126, | |
| "learning_rate": 9.082395286123081e-07, | |
| "loss": 0.4196864068508148, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.773462783171521, | |
| "grad_norm": 3.12204909324646, | |
| "learning_rate": 9.061480561702482e-07, | |
| "loss": 0.42648231983184814, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.7756202804746493, | |
| "grad_norm": 4.430125713348389, | |
| "learning_rate": 9.040576809461016e-07, | |
| "loss": 0.5809032917022705, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 1.5230090618133545, | |
| "learning_rate": 9.019684147715649e-07, | |
| "loss": 0.4213182330131531, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.779935275080906, | |
| "grad_norm": 2.2308318614959717, | |
| "learning_rate": 8.99880269472056e-07, | |
| "loss": 0.2347421497106552, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.7820927723840345, | |
| "grad_norm": 1.6177752017974854, | |
| "learning_rate": 8.97793256866648e-07, | |
| "loss": 0.4257172644138336, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.784250269687163, | |
| "grad_norm": 2.0257010459899902, | |
| "learning_rate": 8.957073887680046e-07, | |
| "loss": 0.3010298013687134, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.7864077669902914, | |
| "grad_norm": 2.075418472290039, | |
| "learning_rate": 8.936226769823094e-07, | |
| "loss": 0.5388916730880737, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.7885652642934198, | |
| "grad_norm": 1.9110989570617676, | |
| "learning_rate": 8.915391333092028e-07, | |
| "loss": 0.40239423513412476, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.790722761596548, | |
| "grad_norm": 1.4136828184127808, | |
| "learning_rate": 8.894567695417128e-07, | |
| "loss": 0.44491565227508545, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.7928802588996764, | |
| "grad_norm": 1.7407686710357666, | |
| "learning_rate": 8.873755974661894e-07, | |
| "loss": 0.4648374021053314, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.7950377562028046, | |
| "grad_norm": 1.3232940435409546, | |
| "learning_rate": 8.852956288622373e-07, | |
| "loss": 0.4256327450275421, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.797195253505933, | |
| "grad_norm": 2.126704692840576, | |
| "learning_rate": 8.832168755026495e-07, | |
| "loss": 0.1840769350528717, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.7993527508090614, | |
| "grad_norm": 1.6251252889633179, | |
| "learning_rate": 8.81139349153341e-07, | |
| "loss": 0.4822881519794464, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.8015102481121898, | |
| "grad_norm": 1.9323124885559082, | |
| "learning_rate": 8.790630615732808e-07, | |
| "loss": 0.4157404899597168, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.8036677454153183, | |
| "grad_norm": 4.677561283111572, | |
| "learning_rate": 8.769880245144277e-07, | |
| "loss": 0.3802054226398468, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.8058252427184467, | |
| "grad_norm": 0.5432685613632202, | |
| "learning_rate": 8.749142497216613e-07, | |
| "loss": 0.22273704409599304, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.807982740021575, | |
| "grad_norm": 3.5203936100006104, | |
| "learning_rate": 8.728417489327174e-07, | |
| "loss": 0.546721339225769, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.8101402373247033, | |
| "grad_norm": 1.5164800882339478, | |
| "learning_rate": 8.707705338781202e-07, | |
| "loss": 0.5539653897285461, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.8122977346278317, | |
| "grad_norm": 4.041696071624756, | |
| "learning_rate": 8.687006162811175e-07, | |
| "loss": 0.48323866724967957, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.81445523193096, | |
| "grad_norm": 2.8498449325561523, | |
| "learning_rate": 8.666320078576125e-07, | |
| "loss": 0.37030312418937683, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.8166127292340883, | |
| "grad_norm": 2.4165847301483154, | |
| "learning_rate": 8.645647203160988e-07, | |
| "loss": 0.535261869430542, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.8187702265372168, | |
| "grad_norm": 1.3950622081756592, | |
| "learning_rate": 8.624987653575935e-07, | |
| "loss": 0.09442806243896484, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.8209277238403452, | |
| "grad_norm": 3.6123199462890625, | |
| "learning_rate": 8.604341546755711e-07, | |
| "loss": 0.4735386073589325, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.8230852211434736, | |
| "grad_norm": 1.8474417924880981, | |
| "learning_rate": 8.583708999558981e-07, | |
| "loss": 0.42983824014663696, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.825242718446602, | |
| "grad_norm": 4.4611406326293945, | |
| "learning_rate": 8.563090128767643e-07, | |
| "loss": 0.4846471846103668, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.8274002157497304, | |
| "grad_norm": 4.02655553817749, | |
| "learning_rate": 8.54248505108621e-07, | |
| "loss": 0.4285997152328491, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.8295577130528586, | |
| "grad_norm": 1.1664454936981201, | |
| "learning_rate": 8.521893883141114e-07, | |
| "loss": 0.3732617199420929, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.831715210355987, | |
| "grad_norm": 2.430764675140381, | |
| "learning_rate": 8.501316741480044e-07, | |
| "loss": 0.5520771741867065, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.8338727076591155, | |
| "grad_norm": 1.7392953634262085, | |
| "learning_rate": 8.480753742571325e-07, | |
| "loss": 0.4468059241771698, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.8360302049622437, | |
| "grad_norm": 1.8151521682739258, | |
| "learning_rate": 8.460205002803206e-07, | |
| "loss": 0.623181939125061, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.838187702265372, | |
| "grad_norm": 1.6103137731552124, | |
| "learning_rate": 8.439670638483254e-07, | |
| "loss": 0.47068604826927185, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.8403451995685005, | |
| "grad_norm": 1.699935793876648, | |
| "learning_rate": 8.419150765837644e-07, | |
| "loss": 0.461783230304718, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.842502696871629, | |
| "grad_norm": 1.5268728733062744, | |
| "learning_rate": 8.398645501010544e-07, | |
| "loss": 0.4249412715435028, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.8446601941747574, | |
| "grad_norm": 1.973617434501648, | |
| "learning_rate": 8.378154960063439e-07, | |
| "loss": 0.3225463628768921, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.8468176914778858, | |
| "grad_norm": 2.0861403942108154, | |
| "learning_rate": 8.357679258974471e-07, | |
| "loss": 0.41262945532798767, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.8489751887810142, | |
| "grad_norm": 1.1829684972763062, | |
| "learning_rate": 8.33721851363779e-07, | |
| "loss": 0.3057762086391449, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.8511326860841424, | |
| "grad_norm": 2.940964937210083, | |
| "learning_rate": 8.316772839862889e-07, | |
| "loss": 0.49465298652648926, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.8532901833872708, | |
| "grad_norm": 2.7260243892669678, | |
| "learning_rate": 8.296342353373964e-07, | |
| "loss": 0.3695753216743469, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.855447680690399, | |
| "grad_norm": 1.9509867429733276, | |
| "learning_rate": 8.275927169809245e-07, | |
| "loss": 0.33289045095443726, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.8576051779935274, | |
| "grad_norm": 1.32254159450531, | |
| "learning_rate": 8.255527404720346e-07, | |
| "loss": 0.48791223764419556, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.8597626752966558, | |
| "grad_norm": 1.0751956701278687, | |
| "learning_rate": 8.235143173571615e-07, | |
| "loss": 0.4154895544052124, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.8619201725997843, | |
| "grad_norm": 2.691671371459961, | |
| "learning_rate": 8.214774591739469e-07, | |
| "loss": 0.4291550815105438, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.8640776699029127, | |
| "grad_norm": 2.053277015686035, | |
| "learning_rate": 8.194421774511757e-07, | |
| "loss": 0.19994314014911652, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.866235167206041, | |
| "grad_norm": 0.4679964482784271, | |
| "learning_rate": 8.174084837087091e-07, | |
| "loss": 0.25225332379341125, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.8683926645091695, | |
| "grad_norm": 8.203169822692871, | |
| "learning_rate": 8.15376389457421e-07, | |
| "loss": 0.4160417914390564, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.8705501618122977, | |
| "grad_norm": 2.1094777584075928, | |
| "learning_rate": 8.133459061991312e-07, | |
| "loss": 0.3063911199569702, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.8727076591154261, | |
| "grad_norm": 5.972533702850342, | |
| "learning_rate": 8.113170454265421e-07, | |
| "loss": 0.48280882835388184, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.8748651564185543, | |
| "grad_norm": 1.7371788024902344, | |
| "learning_rate": 8.092898186231722e-07, | |
| "loss": 0.5959540605545044, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.8770226537216828, | |
| "grad_norm": 3.3627707958221436, | |
| "learning_rate": 8.072642372632914e-07, | |
| "loss": 0.4884318709373474, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.8791801510248112, | |
| "grad_norm": 2.1984074115753174, | |
| "learning_rate": 8.052403128118564e-07, | |
| "loss": 0.6091974377632141, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.8813376483279396, | |
| "grad_norm": 2.3978047370910645, | |
| "learning_rate": 8.032180567244457e-07, | |
| "loss": 0.44491517543792725, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.883495145631068, | |
| "grad_norm": 1.8564910888671875, | |
| "learning_rate": 8.011974804471953e-07, | |
| "loss": 0.356891930103302, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.8856526429341964, | |
| "grad_norm": 1.3446942567825317, | |
| "learning_rate": 7.991785954167318e-07, | |
| "loss": 0.23551291227340698, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.8878101402373249, | |
| "grad_norm": 0.45997709035873413, | |
| "learning_rate": 7.971614130601109e-07, | |
| "loss": 0.4230949878692627, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.889967637540453, | |
| "grad_norm": 38.918373107910156, | |
| "learning_rate": 7.951459447947506e-07, | |
| "loss": 0.47550415992736816, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.8921251348435815, | |
| "grad_norm": 2.1779584884643555, | |
| "learning_rate": 7.931322020283658e-07, | |
| "loss": 0.34226706624031067, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.8942826321467097, | |
| "grad_norm": 1.3864846229553223, | |
| "learning_rate": 7.911201961589067e-07, | |
| "loss": 0.4829237163066864, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.896440129449838, | |
| "grad_norm": 13.083473205566406, | |
| "learning_rate": 7.89109938574491e-07, | |
| "loss": 0.4935177266597748, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.8985976267529665, | |
| "grad_norm": 1.8217555284500122, | |
| "learning_rate": 7.871014406533422e-07, | |
| "loss": 0.33267736434936523, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.900755124056095, | |
| "grad_norm": 1.1673423051834106, | |
| "learning_rate": 7.850947137637231e-07, | |
| "loss": 0.5361051559448242, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.9029126213592233, | |
| "grad_norm": 2.398650646209717, | |
| "learning_rate": 7.830897692638723e-07, | |
| "loss": 0.45928269624710083, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.9050701186623518, | |
| "grad_norm": 1.6035159826278687, | |
| "learning_rate": 7.810866185019411e-07, | |
| "loss": 0.40345799922943115, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.9072276159654802, | |
| "grad_norm": 3.5140364170074463, | |
| "learning_rate": 7.790852728159263e-07, | |
| "loss": 0.4371829032897949, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.9093851132686084, | |
| "grad_norm": 2.60213041305542, | |
| "learning_rate": 7.770857435336096e-07, | |
| "loss": 0.4061744213104248, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.9115426105717368, | |
| "grad_norm": 1.3775845766067505, | |
| "learning_rate": 7.750880419724901e-07, | |
| "loss": 0.4554259181022644, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.913700107874865, | |
| "grad_norm": 1.0794130563735962, | |
| "learning_rate": 7.730921794397233e-07, | |
| "loss": 0.5084207057952881, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.9158576051779934, | |
| "grad_norm": 2.0664308071136475, | |
| "learning_rate": 7.710981672320547e-07, | |
| "loss": 0.41404515504837036, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.9180151024811218, | |
| "grad_norm": 2.6501731872558594, | |
| "learning_rate": 7.691060166357565e-07, | |
| "loss": 0.43099674582481384, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.9201725997842503, | |
| "grad_norm": 1.3328322172164917, | |
| "learning_rate": 7.671157389265657e-07, | |
| "loss": 0.28375762701034546, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.9223300970873787, | |
| "grad_norm": 1.3328646421432495, | |
| "learning_rate": 7.651273453696166e-07, | |
| "loss": 0.3038649260997772, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.924487594390507, | |
| "grad_norm": 1.6350358724594116, | |
| "learning_rate": 7.631408472193804e-07, | |
| "loss": 0.37957847118377686, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.9266450916936355, | |
| "grad_norm": 2.626065731048584, | |
| "learning_rate": 7.611562557195992e-07, | |
| "loss": 0.5506111979484558, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.9288025889967637, | |
| "grad_norm": 1.2828840017318726, | |
| "learning_rate": 7.591735821032246e-07, | |
| "loss": 0.27725642919540405, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.9309600862998921, | |
| "grad_norm": 2.3256094455718994, | |
| "learning_rate": 7.571928375923513e-07, | |
| "loss": 0.5789600014686584, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.9331175836030206, | |
| "grad_norm": 1.5279923677444458, | |
| "learning_rate": 7.552140333981565e-07, | |
| "loss": 0.3936736583709717, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.9352750809061487, | |
| "grad_norm": 1.2446404695510864, | |
| "learning_rate": 7.532371807208333e-07, | |
| "loss": 0.3211576044559479, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.9374325782092772, | |
| "grad_norm": 4.188495635986328, | |
| "learning_rate": 7.51262290749531e-07, | |
| "loss": 0.6068055629730225, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.9395900755124056, | |
| "grad_norm": 0.5956944227218628, | |
| "learning_rate": 7.49289374662289e-07, | |
| "loss": 0.49566900730133057, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.941747572815534, | |
| "grad_norm": 1.9421483278274536, | |
| "learning_rate": 7.473184436259737e-07, | |
| "loss": 0.6433679461479187, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.9439050701186624, | |
| "grad_norm": 1.5543241500854492, | |
| "learning_rate": 7.453495087962171e-07, | |
| "loss": 0.24959444999694824, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.9460625674217908, | |
| "grad_norm": 1.5686687231063843, | |
| "learning_rate": 7.433825813173513e-07, | |
| "loss": 0.5422605872154236, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.948220064724919, | |
| "grad_norm": 1.1901352405548096, | |
| "learning_rate": 7.414176723223484e-07, | |
| "loss": 0.28422844409942627, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.9503775620280475, | |
| "grad_norm": 1.0920721292495728, | |
| "learning_rate": 7.394547929327533e-07, | |
| "loss": 0.3562416732311249, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.9525350593311759, | |
| "grad_norm": 1.3346633911132812, | |
| "learning_rate": 7.374939542586249e-07, | |
| "loss": 0.43261829018592834, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.954692556634304, | |
| "grad_norm": 0.9234395623207092, | |
| "learning_rate": 7.355351673984718e-07, | |
| "loss": 0.1937822848558426, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.9568500539374325, | |
| "grad_norm": 1.3584299087524414, | |
| "learning_rate": 7.335784434391874e-07, | |
| "loss": 0.48144611716270447, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.959007551240561, | |
| "grad_norm": 3.7962646484375, | |
| "learning_rate": 7.316237934559906e-07, | |
| "loss": 0.5200175642967224, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.9611650485436893, | |
| "grad_norm": 2.161349058151245, | |
| "learning_rate": 7.296712285123603e-07, | |
| "loss": 0.535617470741272, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.9633225458468178, | |
| "grad_norm": 1.4636130332946777, | |
| "learning_rate": 7.277207596599746e-07, | |
| "loss": 0.5756503343582153, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.9654800431499462, | |
| "grad_norm": 1.4813960790634155, | |
| "learning_rate": 7.25772397938647e-07, | |
| "loss": 0.47899457812309265, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.9676375404530746, | |
| "grad_norm": 1.616752028465271, | |
| "learning_rate": 7.238261543762651e-07, | |
| "loss": 0.446144700050354, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.9697950377562028, | |
| "grad_norm": 3.5466485023498535, | |
| "learning_rate": 7.218820399887274e-07, | |
| "loss": 0.6149036884307861, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.9719525350593312, | |
| "grad_norm": 1.1728744506835938, | |
| "learning_rate": 7.199400657798802e-07, | |
| "loss": 0.3574240207672119, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.9741100323624594, | |
| "grad_norm": 1.4667657613754272, | |
| "learning_rate": 7.180002427414584e-07, | |
| "loss": 0.4160582721233368, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.9762675296655878, | |
| "grad_norm": 1.9609451293945312, | |
| "learning_rate": 7.160625818530175e-07, | |
| "loss": 0.4743785560131073, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.9784250269687162, | |
| "grad_norm": 1.3401987552642822, | |
| "learning_rate": 7.141270940818789e-07, | |
| "loss": 0.4877952039241791, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.9805825242718447, | |
| "grad_norm": 2.090475559234619, | |
| "learning_rate": 7.121937903830615e-07, | |
| "loss": 0.4774564206600189, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.982740021574973, | |
| "grad_norm": 1.7114410400390625, | |
| "learning_rate": 7.102626816992228e-07, | |
| "loss": 0.5767732262611389, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.9848975188781015, | |
| "grad_norm": 1.462022304534912, | |
| "learning_rate": 7.08333778960597e-07, | |
| "loss": 0.4107472598552704, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.98705501618123, | |
| "grad_norm": 20.44681739807129, | |
| "learning_rate": 7.064070930849315e-07, | |
| "loss": 0.44799551367759705, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.9892125134843581, | |
| "grad_norm": 1.4650821685791016, | |
| "learning_rate": 7.044826349774271e-07, | |
| "loss": 0.45217186212539673, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.9913700107874865, | |
| "grad_norm": 1.2852694988250732, | |
| "learning_rate": 7.025604155306735e-07, | |
| "loss": 0.5372745394706726, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.9935275080906147, | |
| "grad_norm": 3.7176291942596436, | |
| "learning_rate": 7.006404456245918e-07, | |
| "loss": 0.3280995488166809, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.9956850053937432, | |
| "grad_norm": 2.549321174621582, | |
| "learning_rate": 6.987227361263687e-07, | |
| "loss": 0.419173002243042, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.9978425026968716, | |
| "grad_norm": 0.8345087170600891, | |
| "learning_rate": 6.968072978903971e-07, | |
| "loss": 0.2861520051956177, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6546863317489624, | |
| "learning_rate": 6.94894141758215e-07, | |
| "loss": 0.38616907596588135, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.0021574973031284, | |
| "grad_norm": 2.098503351211548, | |
| "learning_rate": 6.929832785584435e-07, | |
| "loss": 0.41417112946510315, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.004314994606257, | |
| "grad_norm": 1.7302627563476562, | |
| "learning_rate": 6.910747191067247e-07, | |
| "loss": 0.2879858613014221, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.0064724919093853, | |
| "grad_norm": 1.0965220928192139, | |
| "learning_rate": 6.891684742056614e-07, | |
| "loss": 0.3841347396373749, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.0086299892125137, | |
| "grad_norm": 1.9664329290390015, | |
| "learning_rate": 6.872645546447569e-07, | |
| "loss": 0.13829857110977173, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.0107874865156417, | |
| "grad_norm": 1.8741497993469238, | |
| "learning_rate": 6.85362971200352e-07, | |
| "loss": 0.3561688959598541, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.01294498381877, | |
| "grad_norm": 1.2023261785507202, | |
| "learning_rate": 6.834637346355648e-07, | |
| "loss": 0.24942456185817719, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.0151024811218985, | |
| "grad_norm": 3.097130298614502, | |
| "learning_rate": 6.815668557002304e-07, | |
| "loss": 0.16739408671855927, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.017259978425027, | |
| "grad_norm": 2.240835189819336, | |
| "learning_rate": 6.796723451308395e-07, | |
| "loss": 0.287383109331131, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.0194174757281553, | |
| "grad_norm": 1.164444923400879, | |
| "learning_rate": 6.777802136504772e-07, | |
| "loss": 0.27731871604919434, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.0215749730312838, | |
| "grad_norm": 3.692326307296753, | |
| "learning_rate": 6.758904719687624e-07, | |
| "loss": 0.42448198795318604, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.023732470334412, | |
| "grad_norm": 12.38591480255127, | |
| "learning_rate": 6.740031307817894e-07, | |
| "loss": 0.46731823682785034, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.0258899676375406, | |
| "grad_norm": 1.388956904411316, | |
| "learning_rate": 6.72118200772063e-07, | |
| "loss": 0.45638781785964966, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.028047464940669, | |
| "grad_norm": 2.0737364292144775, | |
| "learning_rate": 6.702356926084422e-07, | |
| "loss": 0.26328131556510925, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.030204962243797, | |
| "grad_norm": 4.843571662902832, | |
| "learning_rate": 6.683556169460786e-07, | |
| "loss": 0.3340507447719574, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.0323624595469254, | |
| "grad_norm": 0.6562245488166809, | |
| "learning_rate": 6.664779844263533e-07, | |
| "loss": 0.36223921179771423, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.034519956850054, | |
| "grad_norm": 1.3073861598968506, | |
| "learning_rate": 6.646028056768215e-07, | |
| "loss": 0.3697828948497772, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.0366774541531822, | |
| "grad_norm": 0.8905764818191528, | |
| "learning_rate": 6.627300913111484e-07, | |
| "loss": 0.23265878856182098, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.0388349514563107, | |
| "grad_norm": 1.8539386987686157, | |
| "learning_rate": 6.608598519290517e-07, | |
| "loss": 0.2889014780521393, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.040992448759439, | |
| "grad_norm": 1.5576704740524292, | |
| "learning_rate": 6.589920981162384e-07, | |
| "loss": 0.2241078019142151, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.0431499460625675, | |
| "grad_norm": 1.355921983718872, | |
| "learning_rate": 6.5712684044435e-07, | |
| "loss": 0.3171182870864868, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.045307443365696, | |
| "grad_norm": 1.5620806217193604, | |
| "learning_rate": 6.552640894708971e-07, | |
| "loss": 0.2683061361312866, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.0474649406688243, | |
| "grad_norm": 1.372431755065918, | |
| "learning_rate": 6.534038557392031e-07, | |
| "loss": 0.3898204267024994, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.0496224379719523, | |
| "grad_norm": 3.4002630710601807, | |
| "learning_rate": 6.515461497783441e-07, | |
| "loss": 0.18718461692333221, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.0517799352750807, | |
| "grad_norm": 0.8313205242156982, | |
| "learning_rate": 6.49690982103088e-07, | |
| "loss": 0.26798462867736816, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.053937432578209, | |
| "grad_norm": 0.6434590816497803, | |
| "learning_rate": 6.478383632138364e-07, | |
| "loss": 0.20526859164237976, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.0560949298813376, | |
| "grad_norm": 1.930409550666809, | |
| "learning_rate": 6.459883035965637e-07, | |
| "loss": 0.13682284951210022, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.058252427184466, | |
| "grad_norm": 1.254040241241455, | |
| "learning_rate": 6.441408137227597e-07, | |
| "loss": 0.21237482130527496, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.0604099244875944, | |
| "grad_norm": 2.053589105606079, | |
| "learning_rate": 6.422959040493687e-07, | |
| "loss": 0.30055493116378784, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.062567421790723, | |
| "grad_norm": 2.110161304473877, | |
| "learning_rate": 6.404535850187305e-07, | |
| "loss": 0.30984535813331604, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.0647249190938513, | |
| "grad_norm": 1.3705302476882935, | |
| "learning_rate": 6.386138670585226e-07, | |
| "loss": 0.3130619525909424, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.0668824163969797, | |
| "grad_norm": 2.5152392387390137, | |
| "learning_rate": 6.367767605816994e-07, | |
| "loss": 0.30358609557151794, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.0690399137001076, | |
| "grad_norm": 2.5105814933776855, | |
| "learning_rate": 6.349422759864343e-07, | |
| "loss": 0.3234387934207916, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.071197411003236, | |
| "grad_norm": 10.742331504821777, | |
| "learning_rate": 6.331104236560605e-07, | |
| "loss": 0.2621289789676666, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.0733549083063645, | |
| "grad_norm": 5.28401517868042, | |
| "learning_rate": 6.312812139590132e-07, | |
| "loss": 0.3532802164554596, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.075512405609493, | |
| "grad_norm": 1.3391534090042114, | |
| "learning_rate": 6.294546572487688e-07, | |
| "loss": 0.32384493947029114, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.0776699029126213, | |
| "grad_norm": 2.9051060676574707, | |
| "learning_rate": 6.276307638637881e-07, | |
| "loss": 0.38077038526535034, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.0798274002157497, | |
| "grad_norm": 1.0339443683624268, | |
| "learning_rate": 6.258095441274582e-07, | |
| "loss": 0.348030686378479, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.081984897518878, | |
| "grad_norm": 1.0094170570373535, | |
| "learning_rate": 6.239910083480317e-07, | |
| "loss": 0.22280654311180115, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.0841423948220066, | |
| "grad_norm": 6.025458812713623, | |
| "learning_rate": 6.221751668185706e-07, | |
| "loss": 0.2871300280094147, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.086299892125135, | |
| "grad_norm": 1.6982316970825195, | |
| "learning_rate": 6.203620298168865e-07, | |
| "loss": 0.36530792713165283, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.0884573894282634, | |
| "grad_norm": 1.1299806833267212, | |
| "learning_rate": 6.185516076054848e-07, | |
| "loss": 0.291080117225647, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.0906148867313914, | |
| "grad_norm": 2.090158700942993, | |
| "learning_rate": 6.167439104315022e-07, | |
| "loss": 0.28274258971214294, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.09277238403452, | |
| "grad_norm": 1.412570595741272, | |
| "learning_rate": 6.14938948526654e-07, | |
| "loss": 0.14839334785938263, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.0949298813376482, | |
| "grad_norm": 1.48866868019104, | |
| "learning_rate": 6.131367321071736e-07, | |
| "loss": 0.20612022280693054, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.0970873786407767, | |
| "grad_norm": 1.3251118659973145, | |
| "learning_rate": 6.113372713737521e-07, | |
| "loss": 0.2410675585269928, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.099244875943905, | |
| "grad_norm": 1.649924635887146, | |
| "learning_rate": 6.095405765114863e-07, | |
| "loss": 0.4107120931148529, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.1014023732470335, | |
| "grad_norm": 2.8146562576293945, | |
| "learning_rate": 6.077466576898161e-07, | |
| "loss": 0.22407367825508118, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.103559870550162, | |
| "grad_norm": 1.3203238248825073, | |
| "learning_rate": 6.05955525062469e-07, | |
| "loss": 0.3075147867202759, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.1057173678532903, | |
| "grad_norm": 2.156553268432617, | |
| "learning_rate": 6.04167188767403e-07, | |
| "loss": 0.32935836911201477, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.1078748651564188, | |
| "grad_norm": 3.6382105350494385, | |
| "learning_rate": 6.023816589267486e-07, | |
| "loss": 0.3246581554412842, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.1100323624595467, | |
| "grad_norm": 4.373478412628174, | |
| "learning_rate": 6.005989456467511e-07, | |
| "loss": 0.2509233355522156, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.112189859762675, | |
| "grad_norm": 1.778868556022644, | |
| "learning_rate": 5.988190590177132e-07, | |
| "loss": 0.3160122036933899, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.1143473570658036, | |
| "grad_norm": 0.4364719092845917, | |
| "learning_rate": 5.970420091139407e-07, | |
| "loss": 0.04425504431128502, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.116504854368932, | |
| "grad_norm": 1.821292519569397, | |
| "learning_rate": 5.952678059936811e-07, | |
| "loss": 0.18517985939979553, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.1186623516720604, | |
| "grad_norm": 3.6087453365325928, | |
| "learning_rate": 5.934964596990697e-07, | |
| "loss": 0.2705124616622925, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.120819848975189, | |
| "grad_norm": 1.464837670326233, | |
| "learning_rate": 5.917279802560719e-07, | |
| "loss": 0.21107757091522217, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.1229773462783172, | |
| "grad_norm": 4.029551029205322, | |
| "learning_rate": 5.899623776744268e-07, | |
| "loss": 0.23950833082199097, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.1251348435814457, | |
| "grad_norm": 1.4416351318359375, | |
| "learning_rate": 5.881996619475898e-07, | |
| "loss": 0.3448520600795746, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.127292340884574, | |
| "grad_norm": 2.3087425231933594, | |
| "learning_rate": 5.864398430526765e-07, | |
| "loss": 0.18799349665641785, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.129449838187702, | |
| "grad_norm": 0.4913981556892395, | |
| "learning_rate": 5.846829309504064e-07, | |
| "loss": 0.23318088054656982, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.1316073354908305, | |
| "grad_norm": 0.16859030723571777, | |
| "learning_rate": 5.829289355850464e-07, | |
| "loss": 0.22492466866970062, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.133764832793959, | |
| "grad_norm": 1.511398196220398, | |
| "learning_rate": 5.811778668843541e-07, | |
| "loss": 0.20076408982276917, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.1359223300970873, | |
| "grad_norm": 3.2070164680480957, | |
| "learning_rate": 5.794297347595216e-07, | |
| "loss": 0.2566869258880615, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.1380798274002157, | |
| "grad_norm": 1.4940425157546997, | |
| "learning_rate": 5.77684549105121e-07, | |
| "loss": 0.34161150455474854, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.140237324703344, | |
| "grad_norm": 1.6653574705123901, | |
| "learning_rate": 5.75942319799046e-07, | |
| "loss": 0.2889230251312256, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.1423948220064726, | |
| "grad_norm": 1.7484220266342163, | |
| "learning_rate": 5.742030567024571e-07, | |
| "loss": 0.2357415109872818, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.144552319309601, | |
| "grad_norm": 2.1993203163146973, | |
| "learning_rate": 5.724667696597274e-07, | |
| "loss": 0.43604907393455505, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.1467098166127294, | |
| "grad_norm": 3.952744960784912, | |
| "learning_rate": 5.707334684983824e-07, | |
| "loss": 0.1417762041091919, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.148867313915858, | |
| "grad_norm": 1.5836926698684692, | |
| "learning_rate": 5.690031630290504e-07, | |
| "loss": 0.3969094753265381, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.151024811218986, | |
| "grad_norm": 2.5084621906280518, | |
| "learning_rate": 5.672758630454016e-07, | |
| "loss": 0.3280077576637268, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.1531823085221142, | |
| "grad_norm": 2.075791835784912, | |
| "learning_rate": 5.655515783240958e-07, | |
| "loss": 0.23175282776355743, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.1553398058252426, | |
| "grad_norm": 1.316325306892395, | |
| "learning_rate": 5.63830318624726e-07, | |
| "loss": 0.3914681077003479, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.157497303128371, | |
| "grad_norm": 1.3925992250442505, | |
| "learning_rate": 5.621120936897634e-07, | |
| "loss": 0.2599402368068695, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.1596548004314995, | |
| "grad_norm": 0.6926285624504089, | |
| "learning_rate": 5.60396913244503e-07, | |
| "loss": 0.20738252997398376, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.161812297734628, | |
| "grad_norm": 2.7593421936035156, | |
| "learning_rate": 5.586847869970058e-07, | |
| "loss": 0.3029998242855072, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.1639697950377563, | |
| "grad_norm": 1.116198182106018, | |
| "learning_rate": 5.569757246380473e-07, | |
| "loss": 0.3626508414745331, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.1661272923408847, | |
| "grad_norm": 1.4919781684875488, | |
| "learning_rate": 5.552697358410607e-07, | |
| "loss": 0.1747465282678604, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.168284789644013, | |
| "grad_norm": 2.0446255207061768, | |
| "learning_rate": 5.535668302620828e-07, | |
| "loss": 0.3265528082847595, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.170442286947141, | |
| "grad_norm": 1.902366280555725, | |
| "learning_rate": 5.518670175396986e-07, | |
| "loss": 0.20548182725906372, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.1725997842502696, | |
| "grad_norm": 1.5543972253799438, | |
| "learning_rate": 5.50170307294988e-07, | |
| "loss": 0.30606332421302795, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.174757281553398, | |
| "grad_norm": 4.172656059265137, | |
| "learning_rate": 5.484767091314703e-07, | |
| "loss": 0.43032437562942505, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.1769147788565264, | |
| "grad_norm": 1.8856433629989624, | |
| "learning_rate": 5.467862326350495e-07, | |
| "loss": 0.3198752701282501, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.179072276159655, | |
| "grad_norm": 2.642690420150757, | |
| "learning_rate": 5.450988873739622e-07, | |
| "loss": 0.39496558904647827, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.1812297734627832, | |
| "grad_norm": 1.6509486436843872, | |
| "learning_rate": 5.434146828987205e-07, | |
| "loss": 0.3583213686943054, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.1833872707659117, | |
| "grad_norm": 5.141426086425781, | |
| "learning_rate": 5.417336287420602e-07, | |
| "loss": 0.3227855861186981, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.18554476806904, | |
| "grad_norm": 2.045142412185669, | |
| "learning_rate": 5.400557344188854e-07, | |
| "loss": 0.3605496883392334, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.1877022653721685, | |
| "grad_norm": 1.7817591428756714, | |
| "learning_rate": 5.383810094262164e-07, | |
| "loss": 0.2572648525238037, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.1898597626752965, | |
| "grad_norm": 1.3152915239334106, | |
| "learning_rate": 5.367094632431337e-07, | |
| "loss": 0.23091773688793182, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.192017259978425, | |
| "grad_norm": 2.0624170303344727, | |
| "learning_rate": 5.350411053307258e-07, | |
| "loss": 0.32704049348831177, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.1941747572815533, | |
| "grad_norm": 0.41580072045326233, | |
| "learning_rate": 5.33375945132036e-07, | |
| "loss": 0.1712155044078827, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.1963322545846817, | |
| "grad_norm": 6.342857360839844, | |
| "learning_rate": 5.317139920720069e-07, | |
| "loss": 0.11315549165010452, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.19848975188781, | |
| "grad_norm": 2.2447385787963867, | |
| "learning_rate": 5.300552555574296e-07, | |
| "loss": 0.2954585552215576, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.2006472491909386, | |
| "grad_norm": 1.2444943189620972, | |
| "learning_rate": 5.28399744976889e-07, | |
| "loss": 0.3177575170993805, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.202804746494067, | |
| "grad_norm": 1.3165860176086426, | |
| "learning_rate": 5.267474697007111e-07, | |
| "loss": 0.18876095116138458, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.2049622437971954, | |
| "grad_norm": 1.2250696420669556, | |
| "learning_rate": 5.250984390809092e-07, | |
| "loss": 0.36252525448799133, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.207119741100324, | |
| "grad_norm": 2.139589309692383, | |
| "learning_rate": 5.234526624511319e-07, | |
| "loss": 0.30584216117858887, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.209277238403452, | |
| "grad_norm": 1.1470484733581543, | |
| "learning_rate": 5.218101491266108e-07, | |
| "loss": 0.27216002345085144, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.2114347357065802, | |
| "grad_norm": 1.030771017074585, | |
| "learning_rate": 5.201709084041051e-07, | |
| "loss": 0.26957935094833374, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.2135922330097086, | |
| "grad_norm": 3.0721845626831055, | |
| "learning_rate": 5.185349495618523e-07, | |
| "loss": 0.35413840413093567, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.215749730312837, | |
| "grad_norm": 1.2705237865447998, | |
| "learning_rate": 5.169022818595139e-07, | |
| "loss": 0.2741287648677826, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.2179072276159655, | |
| "grad_norm": 1.2934070825576782, | |
| "learning_rate": 5.152729145381226e-07, | |
| "loss": 0.4308694899082184, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.220064724919094, | |
| "grad_norm": 1.0611752271652222, | |
| "learning_rate": 5.136468568200319e-07, | |
| "loss": 0.3000924587249756, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.2346817255020142, | |
| "learning_rate": 5.120241179088615e-07, | |
| "loss": 0.24799837172031403, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.2243797195253507, | |
| "grad_norm": 2.2104601860046387, | |
| "learning_rate": 5.10404706989447e-07, | |
| "loss": 0.26346859335899353, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.226537216828479, | |
| "grad_norm": 1.240453839302063, | |
| "learning_rate": 5.087886332277866e-07, | |
| "loss": 0.22416910529136658, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.228694714131607, | |
| "grad_norm": 1.543516755104065, | |
| "learning_rate": 5.071759057709915e-07, | |
| "loss": 0.2962421178817749, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.2308522114347356, | |
| "grad_norm": 1.8429937362670898, | |
| "learning_rate": 5.055665337472306e-07, | |
| "loss": 0.21297654509544373, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.233009708737864, | |
| "grad_norm": 1.4537338018417358, | |
| "learning_rate": 5.039605262656816e-07, | |
| "loss": 0.15425504744052887, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.2351672060409924, | |
| "grad_norm": 2.0566556453704834, | |
| "learning_rate": 5.023578924164795e-07, | |
| "loss": 0.4284798502922058, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.237324703344121, | |
| "grad_norm": 1.6528476476669312, | |
| "learning_rate": 5.007586412706629e-07, | |
| "loss": 0.3135349750518799, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.2394822006472492, | |
| "grad_norm": 2.057396173477173, | |
| "learning_rate": 4.991627818801245e-07, | |
| "loss": 0.3012422025203705, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.2416396979503777, | |
| "grad_norm": 2.47934889793396, | |
| "learning_rate": 4.975703232775593e-07, | |
| "loss": 0.24459701776504517, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.243797195253506, | |
| "grad_norm": 1.943104863166809, | |
| "learning_rate": 4.959812744764143e-07, | |
| "loss": 0.3131766617298126, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.2459546925566345, | |
| "grad_norm": 1.1001262664794922, | |
| "learning_rate": 4.943956444708357e-07, | |
| "loss": 0.21205957233905792, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.2481121898597625, | |
| "grad_norm": 4.736835956573486, | |
| "learning_rate": 4.928134422356194e-07, | |
| "loss": 0.2981138825416565, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.250269687162891, | |
| "grad_norm": 2.2798216342926025, | |
| "learning_rate": 4.912346767261605e-07, | |
| "loss": 0.1790456920862198, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.2524271844660193, | |
| "grad_norm": 1.6988016366958618, | |
| "learning_rate": 4.896593568784008e-07, | |
| "loss": 0.35665163397789, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.2545846817691477, | |
| "grad_norm": 8.441971778869629, | |
| "learning_rate": 4.880874916087802e-07, | |
| "loss": 0.42117640376091003, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.256742179072276, | |
| "grad_norm": 1.6971404552459717, | |
| "learning_rate": 4.865190898141847e-07, | |
| "loss": 0.3492169678211212, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.2588996763754046, | |
| "grad_norm": 0.9748818874359131, | |
| "learning_rate": 4.849541603718984e-07, | |
| "loss": 0.24743738770484924, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.261057173678533, | |
| "grad_norm": 1.4215339422225952, | |
| "learning_rate": 4.833927121395488e-07, | |
| "loss": 0.3036370277404785, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.2632146709816614, | |
| "grad_norm": 1.2809211015701294, | |
| "learning_rate": 4.818347539550621e-07, | |
| "loss": 0.2065061330795288, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.26537216828479, | |
| "grad_norm": 1.3214771747589111, | |
| "learning_rate": 4.802802946366094e-07, | |
| "loss": 0.15301240980625153, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.267529665587918, | |
| "grad_norm": 1.464438557624817, | |
| "learning_rate": 4.787293429825575e-07, | |
| "loss": 0.24719694256782532, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.269687162891046, | |
| "grad_norm": 1.064639925956726, | |
| "learning_rate": 4.771819077714207e-07, | |
| "loss": 0.25772497057914734, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.2718446601941746, | |
| "grad_norm": 1.1112815141677856, | |
| "learning_rate": 4.756379977618093e-07, | |
| "loss": 0.3478604257106781, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.274002157497303, | |
| "grad_norm": 2.554323196411133, | |
| "learning_rate": 4.740976216923803e-07, | |
| "loss": 0.2710329592227936, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.2761596548004315, | |
| "grad_norm": 1.0012887716293335, | |
| "learning_rate": 4.725607882817886e-07, | |
| "loss": 0.17554689943790436, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.27831715210356, | |
| "grad_norm": 7.408605098724365, | |
| "learning_rate": 4.710275062286379e-07, | |
| "loss": 0.32163527607917786, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.2804746494066883, | |
| "grad_norm": 1.3676038980484009, | |
| "learning_rate": 4.694977842114303e-07, | |
| "loss": 0.2676321864128113, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.2826321467098167, | |
| "grad_norm": 3.824159860610962, | |
| "learning_rate": 4.6797163088851777e-07, | |
| "loss": 0.3604358434677124, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.284789644012945, | |
| "grad_norm": 2.4927074909210205, | |
| "learning_rate": 4.6644905489805377e-07, | |
| "loss": 0.19784438610076904, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.286947141316073, | |
| "grad_norm": 2.12221097946167, | |
| "learning_rate": 4.6493006485794325e-07, | |
| "loss": 0.14516694843769073, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.2891046386192015, | |
| "grad_norm": 2.6523265838623047, | |
| "learning_rate": 4.6341466936579445e-07, | |
| "loss": 0.4613579511642456, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.29126213592233, | |
| "grad_norm": 1.4518303871154785, | |
| "learning_rate": 4.6190287699887e-07, | |
| "loss": 0.29724588990211487, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.2934196332254584, | |
| "grad_norm": 1.7566689252853394, | |
| "learning_rate": 4.6039469631403926e-07, | |
| "loss": 0.29199060797691345, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.295577130528587, | |
| "grad_norm": 1.4850506782531738, | |
| "learning_rate": 4.588901358477287e-07, | |
| "loss": 0.34357935190200806, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.2977346278317152, | |
| "grad_norm": 2.186891794204712, | |
| "learning_rate": 4.5738920411587333e-07, | |
| "loss": 0.35363560914993286, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.2998921251348436, | |
| "grad_norm": 1.4798485040664673, | |
| "learning_rate": 4.5589190961387085e-07, | |
| "loss": 0.345289021730423, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.302049622437972, | |
| "grad_norm": 1.1641236543655396, | |
| "learning_rate": 4.543982608165307e-07, | |
| "loss": 0.3444761633872986, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.3042071197411005, | |
| "grad_norm": 5.22445821762085, | |
| "learning_rate": 4.529082661780277e-07, | |
| "loss": 0.4176110625267029, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.3063646170442285, | |
| "grad_norm": 2.9714982509613037, | |
| "learning_rate": 4.514219341318534e-07, | |
| "loss": 0.3582867383956909, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.308522114347357, | |
| "grad_norm": 1.2270290851593018, | |
| "learning_rate": 4.499392730907701e-07, | |
| "loss": 0.19157586991786957, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.3106796116504853, | |
| "grad_norm": 2.424367904663086, | |
| "learning_rate": 4.484602914467599e-07, | |
| "loss": 0.17550167441368103, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.3128371089536137, | |
| "grad_norm": 6.250462532043457, | |
| "learning_rate": 4.4698499757098085e-07, | |
| "loss": 0.2245817482471466, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.314994606256742, | |
| "grad_norm": 1.6150588989257812, | |
| "learning_rate": 4.4551339981371805e-07, | |
| "loss": 0.22309915721416473, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.3171521035598706, | |
| "grad_norm": 2.557511568069458, | |
| "learning_rate": 4.4404550650433423e-07, | |
| "loss": 0.31364479660987854, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.319309600862999, | |
| "grad_norm": 1.9506558179855347, | |
| "learning_rate": 4.4258132595122697e-07, | |
| "loss": 0.3908032774925232, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.3214670981661274, | |
| "grad_norm": 1.9863252639770508, | |
| "learning_rate": 4.411208664417779e-07, | |
| "loss": 0.33292022347450256, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.323624595469256, | |
| "grad_norm": 2.143460512161255, | |
| "learning_rate": 4.3966413624230847e-07, | |
| "loss": 0.1916477084159851, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.325782092772384, | |
| "grad_norm": 3.893472194671631, | |
| "learning_rate": 4.3821114359803016e-07, | |
| "loss": 0.33617085218429565, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.3279395900755127, | |
| "grad_norm": 2.143810272216797, | |
| "learning_rate": 4.367618967330011e-07, | |
| "loss": 0.3440120220184326, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.3300970873786406, | |
| "grad_norm": 1.6093735694885254, | |
| "learning_rate": 4.35316403850078e-07, | |
| "loss": 0.18562518060207367, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.332254584681769, | |
| "grad_norm": 1.975888729095459, | |
| "learning_rate": 4.3387467313086825e-07, | |
| "loss": 0.15831519663333893, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.3344120819848975, | |
| "grad_norm": 1.708641529083252, | |
| "learning_rate": 4.324367127356868e-07, | |
| "loss": 0.2527565360069275, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.336569579288026, | |
| "grad_norm": 1.4729366302490234, | |
| "learning_rate": 4.310025308035073e-07, | |
| "loss": 0.2772301435470581, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.3387270765911543, | |
| "grad_norm": 1.6301394701004028, | |
| "learning_rate": 4.295721354519172e-07, | |
| "loss": 0.3133164644241333, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.3408845738942827, | |
| "grad_norm": 3.0249712467193604, | |
| "learning_rate": 4.281455347770713e-07, | |
| "loss": 0.24287529289722443, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.343042071197411, | |
| "grad_norm": 1.6311941146850586, | |
| "learning_rate": 4.2672273685364703e-07, | |
| "loss": 0.4204927086830139, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.3451995685005396, | |
| "grad_norm": 5.362087249755859, | |
| "learning_rate": 4.253037497347971e-07, | |
| "loss": 0.37392908334732056, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.347357065803668, | |
| "grad_norm": 1.4209610223770142, | |
| "learning_rate": 4.2388858145210506e-07, | |
| "loss": 0.21947862207889557, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.349514563106796, | |
| "grad_norm": 1.5687469244003296, | |
| "learning_rate": 4.224772400155399e-07, | |
| "loss": 0.36956965923309326, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.3516720604099244, | |
| "grad_norm": 1.6838412284851074, | |
| "learning_rate": 4.2106973341340976e-07, | |
| "loss": 0.23953932523727417, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.353829557713053, | |
| "grad_norm": 4.918888568878174, | |
| "learning_rate": 4.1966606961231766e-07, | |
| "loss": 0.3331076502799988, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.355987055016181, | |
| "grad_norm": 1.4841556549072266, | |
| "learning_rate": 4.182662565571154e-07, | |
| "loss": 0.2150951325893402, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.3581445523193096, | |
| "grad_norm": 2.378197193145752, | |
| "learning_rate": 4.168703021708605e-07, | |
| "loss": 0.442268043756485, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.360302049622438, | |
| "grad_norm": 1.1213322877883911, | |
| "learning_rate": 4.154782143547691e-07, | |
| "loss": 0.21381919085979462, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.3624595469255665, | |
| "grad_norm": 1.2793383598327637, | |
| "learning_rate": 4.140900009881722e-07, | |
| "loss": 0.297492116689682, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.364617044228695, | |
| "grad_norm": 1.0152313709259033, | |
| "learning_rate": 4.127056699284719e-07, | |
| "loss": 0.2662775218486786, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.3667745415318233, | |
| "grad_norm": 3.4524388313293457, | |
| "learning_rate": 4.1132522901109547e-07, | |
| "loss": 0.1951354295015335, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.3689320388349513, | |
| "grad_norm": 5.245743751525879, | |
| "learning_rate": 4.099486860494517e-07, | |
| "loss": 0.3872916102409363, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.3710895361380797, | |
| "grad_norm": 1.206447720527649, | |
| "learning_rate": 4.085760488348866e-07, | |
| "loss": 0.24260494112968445, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.373247033441208, | |
| "grad_norm": 1.1352065801620483, | |
| "learning_rate": 4.0720732513663985e-07, | |
| "loss": 0.24157175421714783, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.3754045307443366, | |
| "grad_norm": 1.7284468412399292, | |
| "learning_rate": 4.0584252270179975e-07, | |
| "loss": 0.24914561212062836, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.377562028047465, | |
| "grad_norm": 3.4234535694122314, | |
| "learning_rate": 4.0448164925525987e-07, | |
| "loss": 0.4321536421775818, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.3797195253505934, | |
| "grad_norm": 0.3995194137096405, | |
| "learning_rate": 4.031247124996764e-07, | |
| "loss": 0.3386417329311371, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.381877022653722, | |
| "grad_norm": 1.4748889207839966, | |
| "learning_rate": 4.017717201154217e-07, | |
| "loss": 0.2493990957736969, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.3840345199568502, | |
| "grad_norm": 2.027784824371338, | |
| "learning_rate": 4.004226797605445e-07, | |
| "loss": 0.36100074648857117, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.3861920172599786, | |
| "grad_norm": 1.2897789478302002, | |
| "learning_rate": 3.990775990707237e-07, | |
| "loss": 0.23212602734565735, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.3883495145631066, | |
| "grad_norm": 1.4718191623687744, | |
| "learning_rate": 3.9773648565922634e-07, | |
| "loss": 0.17623895406723022, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.390507011866235, | |
| "grad_norm": 1.6537420749664307, | |
| "learning_rate": 3.963993471168643e-07, | |
| "loss": 0.3103001117706299, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.3926645091693635, | |
| "grad_norm": 3.3019044399261475, | |
| "learning_rate": 3.9506619101195196e-07, | |
| "loss": 0.3791100084781647, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.394822006472492, | |
| "grad_norm": 0.7170013785362244, | |
| "learning_rate": 3.9373702489026184e-07, | |
| "loss": 0.23801524937152863, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.3969795037756203, | |
| "grad_norm": 1.0052597522735596, | |
| "learning_rate": 3.9241185627498333e-07, | |
| "loss": 0.2777608036994934, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.3991370010787487, | |
| "grad_norm": 5.178793907165527, | |
| "learning_rate": 3.9109069266668e-07, | |
| "loss": 0.35337719321250916, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.401294498381877, | |
| "grad_norm": 1.5237584114074707, | |
| "learning_rate": 3.8977354154324586e-07, | |
| "loss": 0.3222746253013611, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.4034519956850056, | |
| "grad_norm": 2.202849864959717, | |
| "learning_rate": 3.884604103598647e-07, | |
| "loss": 0.2847940921783447, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.405609492988134, | |
| "grad_norm": 2.125887870788574, | |
| "learning_rate": 3.8715130654896623e-07, | |
| "loss": 0.3124774992465973, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.407766990291262, | |
| "grad_norm": 2.585139513015747, | |
| "learning_rate": 3.858462375201862e-07, | |
| "loss": 0.3422589898109436, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.4099244875943904, | |
| "grad_norm": 2.663825035095215, | |
| "learning_rate": 3.8454521066032214e-07, | |
| "loss": 0.2664566934108734, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.412081984897519, | |
| "grad_norm": 8.156516075134277, | |
| "learning_rate": 3.8324823333329263e-07, | |
| "loss": 0.2662767171859741, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.414239482200647, | |
| "grad_norm": 1.3628453016281128, | |
| "learning_rate": 3.819553128800962e-07, | |
| "loss": 0.3383438289165497, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.4163969795037756, | |
| "grad_norm": 1.212145447731018, | |
| "learning_rate": 3.806664566187686e-07, | |
| "loss": 0.3051017224788666, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.418554476806904, | |
| "grad_norm": 4.809718608856201, | |
| "learning_rate": 3.7938167184434206e-07, | |
| "loss": 0.2975846230983734, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.4207119741100325, | |
| "grad_norm": 1.992264986038208, | |
| "learning_rate": 3.781009658288036e-07, | |
| "loss": 0.2492162585258484, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.422869471413161, | |
| "grad_norm": 2.0577735900878906, | |
| "learning_rate": 3.768243458210549e-07, | |
| "loss": 0.35954225063323975, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.4250269687162893, | |
| "grad_norm": 1.2022833824157715, | |
| "learning_rate": 3.755518190468697e-07, | |
| "loss": 0.1975034475326538, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.4271844660194173, | |
| "grad_norm": 2.0728843212127686, | |
| "learning_rate": 3.7428339270885367e-07, | |
| "loss": 0.3646237254142761, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.4293419633225457, | |
| "grad_norm": 1.2893973588943481, | |
| "learning_rate": 3.73019073986405e-07, | |
| "loss": 0.22355937957763672, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.431499460625674, | |
| "grad_norm": 2.275219440460205, | |
| "learning_rate": 3.717588700356702e-07, | |
| "loss": 0.1818881779909134, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.4336569579288025, | |
| "grad_norm": 1.8378785848617554, | |
| "learning_rate": 3.7050278798950795e-07, | |
| "loss": 0.22650231420993805, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.435814455231931, | |
| "grad_norm": 2.7440717220306396, | |
| "learning_rate": 3.6925083495744534e-07, | |
| "loss": 0.44353553652763367, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.4379719525350594, | |
| "grad_norm": 1.588118076324463, | |
| "learning_rate": 3.6800301802563927e-07, | |
| "loss": 0.2522018551826477, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.440129449838188, | |
| "grad_norm": 1.7098896503448486, | |
| "learning_rate": 3.667593442568364e-07, | |
| "loss": 0.32094866037368774, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.4422869471413162, | |
| "grad_norm": 1.7896616458892822, | |
| "learning_rate": 3.6551982069033205e-07, | |
| "loss": 0.2640255093574524, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 0.299568235874176, | |
| "learning_rate": 3.6428445434193136e-07, | |
| "loss": 0.14886681735515594, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.4466019417475726, | |
| "grad_norm": 1.7262334823608398, | |
| "learning_rate": 3.6305325220390905e-07, | |
| "loss": 0.26833122968673706, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.448759439050701, | |
| "grad_norm": 1.531208872795105, | |
| "learning_rate": 3.618262212449706e-07, | |
| "loss": 0.4644531011581421, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.4509169363538295, | |
| "grad_norm": 2.4541876316070557, | |
| "learning_rate": 3.606033684102121e-07, | |
| "loss": 0.21291311085224152, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.453074433656958, | |
| "grad_norm": 1.4597750902175903, | |
| "learning_rate": 3.5938470062108043e-07, | |
| "loss": 0.22411037981510162, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.4552319309600863, | |
| "grad_norm": 3.633920431137085, | |
| "learning_rate": 3.5817022477533585e-07, | |
| "loss": 0.23133979737758636, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.4573894282632147, | |
| "grad_norm": 1.5003464221954346, | |
| "learning_rate": 3.569599477470112e-07, | |
| "loss": 0.3434515595436096, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.459546925566343, | |
| "grad_norm": 2.270390033721924, | |
| "learning_rate": 3.5575387638637357e-07, | |
| "loss": 0.3822650611400604, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.4617044228694716, | |
| "grad_norm": 1.8612627983093262, | |
| "learning_rate": 3.545520175198858e-07, | |
| "loss": 0.2585938274860382, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.4638619201726, | |
| "grad_norm": 2.235671281814575, | |
| "learning_rate": 3.5335437795016823e-07, | |
| "loss": 0.28054654598236084, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.466019417475728, | |
| "grad_norm": 4.319032669067383, | |
| "learning_rate": 3.5216096445595884e-07, | |
| "loss": 0.4531608819961548, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.4681769147788564, | |
| "grad_norm": 1.4703388214111328, | |
| "learning_rate": 3.509717837920756e-07, | |
| "loss": 0.3540151119232178, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.470334412081985, | |
| "grad_norm": 2.039675712585449, | |
| "learning_rate": 3.497868426893793e-07, | |
| "loss": 0.13517698645591736, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.472491909385113, | |
| "grad_norm": 0.5410081148147583, | |
| "learning_rate": 3.486061478547337e-07, | |
| "loss": 0.02865418791770935, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.4746494066882416, | |
| "grad_norm": 1.2113001346588135, | |
| "learning_rate": 3.4742970597096834e-07, | |
| "loss": 0.33490967750549316, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.47680690399137, | |
| "grad_norm": 1.3965346813201904, | |
| "learning_rate": 3.462575236968406e-07, | |
| "loss": 0.18190385401248932, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.4789644012944985, | |
| "grad_norm": 3.908724069595337, | |
| "learning_rate": 3.4508960766699914e-07, | |
| "loss": 0.31986406445503235, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.481121898597627, | |
| "grad_norm": 1.823320746421814, | |
| "learning_rate": 3.4392596449194346e-07, | |
| "loss": 0.24571648240089417, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.4832793959007553, | |
| "grad_norm": 9.324934959411621, | |
| "learning_rate": 3.427666007579902e-07, | |
| "loss": 0.2971467077732086, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.4854368932038833, | |
| "grad_norm": 1.0990798473358154, | |
| "learning_rate": 3.416115230272333e-07, | |
| "loss": 0.3316362500190735, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.4875943905070117, | |
| "grad_norm": 0.9614824056625366, | |
| "learning_rate": 3.4046073783750726e-07, | |
| "loss": 0.21634887158870697, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.48975188781014, | |
| "grad_norm": 3.744295120239258, | |
| "learning_rate": 3.3931425170235083e-07, | |
| "loss": 0.3917967677116394, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.4919093851132685, | |
| "grad_norm": 2.0177054405212402, | |
| "learning_rate": 3.381720711109695e-07, | |
| "loss": 0.3639075458049774, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.494066882416397, | |
| "grad_norm": 3.873263359069824, | |
| "learning_rate": 3.3703420252819947e-07, | |
| "loss": 0.28831595182418823, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.4962243797195254, | |
| "grad_norm": 9.90953540802002, | |
| "learning_rate": 3.359006523944697e-07, | |
| "loss": 0.2942795753479004, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.498381877022654, | |
| "grad_norm": 1.6383076906204224, | |
| "learning_rate": 3.347714271257679e-07, | |
| "loss": 0.19500726461410522, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.500539374325782, | |
| "grad_norm": 1.960985779762268, | |
| "learning_rate": 3.3364653311360104e-07, | |
| "loss": 0.30468082427978516, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.5026968716289106, | |
| "grad_norm": 1.2923824787139893, | |
| "learning_rate": 3.325259767249617e-07, | |
| "loss": 0.20314782857894897, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.5048543689320386, | |
| "grad_norm": 1.4000043869018555, | |
| "learning_rate": 3.3140976430229136e-07, | |
| "loss": 0.34522709250450134, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.5070118662351675, | |
| "grad_norm": 1.8520127534866333, | |
| "learning_rate": 3.302979021634438e-07, | |
| "loss": 0.3963944911956787, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.5091693635382954, | |
| "grad_norm": 1.3927682638168335, | |
| "learning_rate": 3.2919039660164973e-07, | |
| "loss": 0.126472607254982, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.511326860841424, | |
| "grad_norm": 1.8297348022460938, | |
| "learning_rate": 3.2808725388548164e-07, | |
| "loss": 0.3507118821144104, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.5134843581445523, | |
| "grad_norm": 0.6373293399810791, | |
| "learning_rate": 3.269884802588181e-07, | |
| "loss": 0.15893447399139404, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.5156418554476807, | |
| "grad_norm": 0.42088356614112854, | |
| "learning_rate": 3.258940819408079e-07, | |
| "loss": 0.14911764860153198, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.517799352750809, | |
| "grad_norm": 2.131605863571167, | |
| "learning_rate": 3.248040651258352e-07, | |
| "loss": 0.2661122679710388, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.5199568500539375, | |
| "grad_norm": 7.058028697967529, | |
| "learning_rate": 3.2371843598348485e-07, | |
| "loss": 0.35104191303253174, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.522114347357066, | |
| "grad_norm": 12.030430793762207, | |
| "learning_rate": 3.2263720065850686e-07, | |
| "loss": 0.28974059224128723, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.524271844660194, | |
| "grad_norm": 2.7837140560150146, | |
| "learning_rate": 3.215603652707819e-07, | |
| "loss": 0.4368301033973694, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.526429341963323, | |
| "grad_norm": 7.616663455963135, | |
| "learning_rate": 3.2048793591528655e-07, | |
| "loss": 0.47825562953948975, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.528586839266451, | |
| "grad_norm": 3.9860897064208984, | |
| "learning_rate": 3.194199186620592e-07, | |
| "loss": 0.31387850642204285, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.530744336569579, | |
| "grad_norm": 1.596435546875, | |
| "learning_rate": 3.1835631955616505e-07, | |
| "loss": 0.3126782178878784, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.5329018338727076, | |
| "grad_norm": 1.315140724182129, | |
| "learning_rate": 3.172971446176621e-07, | |
| "loss": 0.26786503195762634, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.535059331175836, | |
| "grad_norm": 0.4910350441932678, | |
| "learning_rate": 3.16242399841568e-07, | |
| "loss": 0.1304475963115692, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.5372168284789645, | |
| "grad_norm": 1.8181979656219482, | |
| "learning_rate": 3.1519209119782435e-07, | |
| "loss": 0.26876750588417053, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.539374325782093, | |
| "grad_norm": 2.385711193084717, | |
| "learning_rate": 3.141462246312644e-07, | |
| "loss": 0.2904283106327057, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.5415318230852213, | |
| "grad_norm": 1.548781156539917, | |
| "learning_rate": 3.1310480606157864e-07, | |
| "loss": 0.2804209589958191, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.5436893203883493, | |
| "grad_norm": 2.7937028408050537, | |
| "learning_rate": 3.120678413832821e-07, | |
| "loss": 0.36265939474105835, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.545846817691478, | |
| "grad_norm": 1.6474692821502686, | |
| "learning_rate": 3.110353364656792e-07, | |
| "loss": 0.2979966700077057, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.548004314994606, | |
| "grad_norm": 1.5488511323928833, | |
| "learning_rate": 3.1000729715283306e-07, | |
| "loss": 0.37080666422843933, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.5501618122977345, | |
| "grad_norm": 2.0763769149780273, | |
| "learning_rate": 3.089837292635309e-07, | |
| "loss": 0.293621301651001, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.552319309600863, | |
| "grad_norm": 1.1936835050582886, | |
| "learning_rate": 3.079646385912502e-07, | |
| "loss": 0.1373947560787201, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.5544768069039914, | |
| "grad_norm": 1.4387550354003906, | |
| "learning_rate": 3.069500309041283e-07, | |
| "loss": 0.34591472148895264, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.55663430420712, | |
| "grad_norm": 36.577571868896484, | |
| "learning_rate": 3.05939911944928e-07, | |
| "loss": 0.6267740726470947, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.558791801510248, | |
| "grad_norm": 1.3116552829742432, | |
| "learning_rate": 3.049342874310053e-07, | |
| "loss": 0.25853827595710754, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.5609492988133766, | |
| "grad_norm": 1.2759634256362915, | |
| "learning_rate": 3.0393316305427743e-07, | |
| "loss": 0.29107633233070374, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.5631067961165046, | |
| "grad_norm": 2.6377315521240234, | |
| "learning_rate": 3.0293654448119094e-07, | |
| "loss": 0.3561844527721405, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.5652642934196335, | |
| "grad_norm": 1.450838565826416, | |
| "learning_rate": 3.0194443735268855e-07, | |
| "loss": 0.24433653056621552, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.5674217907227614, | |
| "grad_norm": 5.896989822387695, | |
| "learning_rate": 3.009568472841778e-07, | |
| "loss": 0.3096998929977417, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.56957928802589, | |
| "grad_norm": 1.285710096359253, | |
| "learning_rate": 2.999737798654999e-07, | |
| "loss": 0.23295409977436066, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.5717367853290183, | |
| "grad_norm": 1.9540512561798096, | |
| "learning_rate": 2.9899524066089715e-07, | |
| "loss": 0.3955782949924469, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.5738942826321467, | |
| "grad_norm": 1.3187015056610107, | |
| "learning_rate": 2.980212352089816e-07, | |
| "loss": 0.34291237592697144, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.576051779935275, | |
| "grad_norm": 35.879234313964844, | |
| "learning_rate": 2.9705176902270386e-07, | |
| "loss": 0.2686152160167694, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.5782092772384035, | |
| "grad_norm": 1.8663164377212524, | |
| "learning_rate": 2.960868475893224e-07, | |
| "loss": 0.20186705887317657, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.580366774541532, | |
| "grad_norm": 1.9428675174713135, | |
| "learning_rate": 2.951264763703719e-07, | |
| "loss": 0.2464224100112915, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.58252427184466, | |
| "grad_norm": 1.2760838270187378, | |
| "learning_rate": 2.941706608016317e-07, | |
| "loss": 0.2949107885360718, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.584681769147789, | |
| "grad_norm": 1.605370044708252, | |
| "learning_rate": 2.9321940629309705e-07, | |
| "loss": 0.2963062822818756, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.5868392664509168, | |
| "grad_norm": 1.2159507274627686, | |
| "learning_rate": 2.9227271822894615e-07, | |
| "loss": 0.25476306676864624, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.588996763754045, | |
| "grad_norm": 1.5449872016906738, | |
| "learning_rate": 2.913306019675114e-07, | |
| "loss": 0.1481795310974121, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.5911542610571736, | |
| "grad_norm": 1.4837470054626465, | |
| "learning_rate": 2.9039306284124764e-07, | |
| "loss": 0.1671726405620575, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.593311758360302, | |
| "grad_norm": 2.013652801513672, | |
| "learning_rate": 2.8946010615670397e-07, | |
| "loss": 0.3186720609664917, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.5954692556634305, | |
| "grad_norm": 1.3679430484771729, | |
| "learning_rate": 2.8853173719449153e-07, | |
| "loss": 0.19645805656909943, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.597626752966559, | |
| "grad_norm": 4.073277473449707, | |
| "learning_rate": 2.8760796120925455e-07, | |
| "loss": 0.27875351905822754, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.5997842502696873, | |
| "grad_norm": 2.3851447105407715, | |
| "learning_rate": 2.8668878342964165e-07, | |
| "loss": 0.3042440414428711, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.6019417475728153, | |
| "grad_norm": 3.0329368114471436, | |
| "learning_rate": 2.8577420905827356e-07, | |
| "loss": 0.40673866868019104, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.604099244875944, | |
| "grad_norm": 1.5875539779663086, | |
| "learning_rate": 2.848642432717171e-07, | |
| "loss": 0.3267652988433838, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.606256742179072, | |
| "grad_norm": 1.2932881116867065, | |
| "learning_rate": 2.8395889122045293e-07, | |
| "loss": 0.3058151602745056, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.6084142394822005, | |
| "grad_norm": 1.7162359952926636, | |
| "learning_rate": 2.8305815802884807e-07, | |
| "loss": 0.3365314304828644, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.610571736785329, | |
| "grad_norm": 2.6204607486724854, | |
| "learning_rate": 2.8216204879512613e-07, | |
| "loss": 0.26252228021621704, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.6127292340884574, | |
| "grad_norm": 1.4677083492279053, | |
| "learning_rate": 2.8127056859133914e-07, | |
| "loss": 0.22944192588329315, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.614886731391586, | |
| "grad_norm": 1.4243353605270386, | |
| "learning_rate": 2.803837224633385e-07, | |
| "loss": 0.30490928888320923, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.617044228694714, | |
| "grad_norm": 1.6455210447311401, | |
| "learning_rate": 2.795015154307454e-07, | |
| "loss": 0.3725619614124298, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.6192017259978426, | |
| "grad_norm": 1.263656497001648, | |
| "learning_rate": 2.786239524869247e-07, | |
| "loss": 0.38048383593559265, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.6213592233009706, | |
| "grad_norm": 4.27044677734375, | |
| "learning_rate": 2.7775103859895443e-07, | |
| "loss": 0.306596577167511, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.6235167206040995, | |
| "grad_norm": 1.5108482837677002, | |
| "learning_rate": 2.7688277870759877e-07, | |
| "loss": 0.25143009424209595, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.6256742179072274, | |
| "grad_norm": 2.3310492038726807, | |
| "learning_rate": 2.7601917772728e-07, | |
| "loss": 0.38734516501426697, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.627831715210356, | |
| "grad_norm": 3.2000060081481934, | |
| "learning_rate": 2.7516024054605076e-07, | |
| "loss": 0.3311081528663635, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.6299892125134843, | |
| "grad_norm": 1.907240390777588, | |
| "learning_rate": 2.743059720255658e-07, | |
| "loss": 0.1861996203660965, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.6321467098166127, | |
| "grad_norm": 1.7378534078598022, | |
| "learning_rate": 2.73456377001055e-07, | |
| "loss": 0.21612344682216644, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.634304207119741, | |
| "grad_norm": 1.9995208978652954, | |
| "learning_rate": 2.726114602812962e-07, | |
| "loss": 0.20262135565280914, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.6364617044228695, | |
| "grad_norm": 2.0192410945892334, | |
| "learning_rate": 2.7177122664858727e-07, | |
| "loss": 0.3542102575302124, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.638619201725998, | |
| "grad_norm": 1.935210943222046, | |
| "learning_rate": 2.709356808587195e-07, | |
| "loss": 0.39216798543930054, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.6407766990291264, | |
| "grad_norm": 1.6480642557144165, | |
| "learning_rate": 2.7010482764095047e-07, | |
| "loss": 0.12209905683994293, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.642934196332255, | |
| "grad_norm": 7.414170265197754, | |
| "learning_rate": 2.6927867169797805e-07, | |
| "loss": 0.5208877325057983, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.6450916936353828, | |
| "grad_norm": 1.8177531957626343, | |
| "learning_rate": 2.6845721770591236e-07, | |
| "loss": 0.4026768207550049, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.647249190938511, | |
| "grad_norm": 7.202023506164551, | |
| "learning_rate": 2.676404703142503e-07, | |
| "loss": 0.25339025259017944, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.6494066882416396, | |
| "grad_norm": 1.1629081964492798, | |
| "learning_rate": 2.6682843414584954e-07, | |
| "loss": 0.27695736289024353, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.651564185544768, | |
| "grad_norm": 2.3341569900512695, | |
| "learning_rate": 2.660211137969013e-07, | |
| "loss": 0.1916518658399582, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.6537216828478964, | |
| "grad_norm": 3.8498494625091553, | |
| "learning_rate": 2.6521851383690486e-07, | |
| "loss": 0.28546687960624695, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.655879180151025, | |
| "grad_norm": 1.4487419128417969, | |
| "learning_rate": 2.6442063880864183e-07, | |
| "loss": 0.2577356994152069, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.6580366774541533, | |
| "grad_norm": 1.4185736179351807, | |
| "learning_rate": 2.636274932281508e-07, | |
| "loss": 0.335868775844574, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.6601941747572817, | |
| "grad_norm": 0.31347620487213135, | |
| "learning_rate": 2.628390815847005e-07, | |
| "loss": 0.08725874125957489, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.66235167206041, | |
| "grad_norm": 1.61336088180542, | |
| "learning_rate": 2.6205540834076545e-07, | |
| "loss": 0.38296324014663696, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.664509169363538, | |
| "grad_norm": 1.5005100965499878, | |
| "learning_rate": 2.6127647793200105e-07, | |
| "loss": 0.29919686913490295, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 1.2096229791641235, | |
| "learning_rate": 2.6050229476721666e-07, | |
| "loss": 0.20811551809310913, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.668824163969795, | |
| "grad_norm": 1.0160880088806152, | |
| "learning_rate": 2.59732863228353e-07, | |
| "loss": 0.2570361793041229, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.6709816612729234, | |
| "grad_norm": 1.900524377822876, | |
| "learning_rate": 2.589681876704557e-07, | |
| "loss": 0.2715557813644409, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.6731391585760518, | |
| "grad_norm": 4.474449157714844, | |
| "learning_rate": 2.58208272421651e-07, | |
| "loss": 0.3477630913257599, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.67529665587918, | |
| "grad_norm": 1.686800241470337, | |
| "learning_rate": 2.574531217831218e-07, | |
| "loss": 0.3386651277542114, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.6774541531823086, | |
| "grad_norm": 1.3463099002838135, | |
| "learning_rate": 2.567027400290826e-07, | |
| "loss": 0.24490870535373688, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.679611650485437, | |
| "grad_norm": 1.4641351699829102, | |
| "learning_rate": 2.5595713140675575e-07, | |
| "loss": 0.2602543234825134, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.6817691477885655, | |
| "grad_norm": 1.584945797920227, | |
| "learning_rate": 2.55216300136347e-07, | |
| "loss": 0.2942560911178589, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.6839266450916934, | |
| "grad_norm": 1.3279131650924683, | |
| "learning_rate": 2.544802504110226e-07, | |
| "loss": 0.2843012809753418, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.686084142394822, | |
| "grad_norm": 1.5675629377365112, | |
| "learning_rate": 2.537489863968842e-07, | |
| "loss": 0.3618108034133911, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.6882416396979503, | |
| "grad_norm": 1.386384129524231, | |
| "learning_rate": 2.530225122329459e-07, | |
| "loss": 0.18842831254005432, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.6903991370010787, | |
| "grad_norm": 0.44938626885414124, | |
| "learning_rate": 2.5230083203111163e-07, | |
| "loss": 0.010065621696412563, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.692556634304207, | |
| "grad_norm": 1.9967701435089111, | |
| "learning_rate": 2.5158394987615014e-07, | |
| "loss": 0.3003666400909424, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.6947141316073355, | |
| "grad_norm": 0.5517582893371582, | |
| "learning_rate": 2.5087186982567345e-07, | |
| "loss": 0.1282682716846466, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.696871628910464, | |
| "grad_norm": 1.3890687227249146, | |
| "learning_rate": 2.5016459591011287e-07, | |
| "loss": 0.3012073338031769, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.6990291262135924, | |
| "grad_norm": 2.243579387664795, | |
| "learning_rate": 2.494621321326972e-07, | |
| "loss": 0.3187774121761322, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.701186623516721, | |
| "grad_norm": 1.2543731927871704, | |
| "learning_rate": 2.487644824694288e-07, | |
| "loss": 0.2931416630744934, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.7033441208198488, | |
| "grad_norm": 2.056020975112915, | |
| "learning_rate": 2.48071650869062e-07, | |
| "loss": 0.24611467123031616, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.705501618122977, | |
| "grad_norm": 2.098752498626709, | |
| "learning_rate": 2.473836412530809e-07, | |
| "loss": 0.3165457248687744, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.7076591154261056, | |
| "grad_norm": 4.687522888183594, | |
| "learning_rate": 2.46700457515676e-07, | |
| "loss": 0.4136981666088104, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.709816612729234, | |
| "grad_norm": 0.5573480725288391, | |
| "learning_rate": 2.460221035237235e-07, | |
| "loss": 0.15423323214054108, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.7119741100323624, | |
| "grad_norm": 1.1807475090026855, | |
| "learning_rate": 2.453485831167625e-07, | |
| "loss": 0.2989809811115265, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.714131607335491, | |
| "grad_norm": 1.6326533555984497, | |
| "learning_rate": 2.446799001069742e-07, | |
| "loss": 0.3671968877315521, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.7162891046386193, | |
| "grad_norm": 4.470088958740234, | |
| "learning_rate": 2.440160582791589e-07, | |
| "loss": 0.3751377463340759, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.7184466019417477, | |
| "grad_norm": 0.9408198595046997, | |
| "learning_rate": 2.43357061390716e-07, | |
| "loss": 0.3237053155899048, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.720604099244876, | |
| "grad_norm": 1.8276516199111938, | |
| "learning_rate": 2.42702913171622e-07, | |
| "loss": 0.3124433755874634, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.722761596548004, | |
| "grad_norm": 1.5803215503692627, | |
| "learning_rate": 2.420536173244094e-07, | |
| "loss": 0.2791770100593567, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.724919093851133, | |
| "grad_norm": 3.284719705581665, | |
| "learning_rate": 2.414091775241462e-07, | |
| "loss": 0.3442307114601135, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.727076591154261, | |
| "grad_norm": 1.687919020652771, | |
| "learning_rate": 2.4076959741841445e-07, | |
| "loss": 0.3351602852344513, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.7292340884573894, | |
| "grad_norm": 7.2448225021362305, | |
| "learning_rate": 2.4013488062728993e-07, | |
| "loss": 0.3161589205265045, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.7313915857605178, | |
| "grad_norm": 4.134527206420898, | |
| "learning_rate": 2.395050307433219e-07, | |
| "loss": 0.36085984110832214, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.733549083063646, | |
| "grad_norm": 1.2739371061325073, | |
| "learning_rate": 2.3888005133151255e-07, | |
| "loss": 0.22625665366649628, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.7357065803667746, | |
| "grad_norm": 1.8671566247940063, | |
| "learning_rate": 2.3825994592929645e-07, | |
| "loss": 0.2694007158279419, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.737864077669903, | |
| "grad_norm": 7.57783842086792, | |
| "learning_rate": 2.3764471804652095e-07, | |
| "loss": 0.18972235918045044, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.7400215749730314, | |
| "grad_norm": 2.3178181648254395, | |
| "learning_rate": 2.370343711654267e-07, | |
| "loss": 0.277940571308136, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.7421790722761594, | |
| "grad_norm": 1.989964246749878, | |
| "learning_rate": 2.36428908740627e-07, | |
| "loss": 0.22704048454761505, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.7443365695792883, | |
| "grad_norm": 1.57944655418396, | |
| "learning_rate": 2.358283341990889e-07, | |
| "loss": 0.25424429774284363, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.7464940668824163, | |
| "grad_norm": 2.715576410293579, | |
| "learning_rate": 2.352326509401134e-07, | |
| "loss": 0.2091311663389206, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.7486515641855447, | |
| "grad_norm": 3.7704293727874756, | |
| "learning_rate": 2.3464186233531696e-07, | |
| "loss": 0.316684752702713, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.750809061488673, | |
| "grad_norm": 0.6256927847862244, | |
| "learning_rate": 2.3405597172861135e-07, | |
| "loss": 0.2244507223367691, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.7529665587918015, | |
| "grad_norm": 1.8245450258255005, | |
| "learning_rate": 2.3347498243618558e-07, | |
| "loss": 0.21601910889148712, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.75512405609493, | |
| "grad_norm": 2.7671761512756348, | |
| "learning_rate": 2.3289889774648675e-07, | |
| "loss": 0.25035250186920166, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.7572815533980584, | |
| "grad_norm": 1.276296854019165, | |
| "learning_rate": 2.3232772092020148e-07, | |
| "loss": 0.18391655385494232, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.759439050701187, | |
| "grad_norm": 3.1530673503875732, | |
| "learning_rate": 2.3176145519023742e-07, | |
| "loss": 0.2945748567581177, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.7615965480043148, | |
| "grad_norm": 1.5141795873641968, | |
| "learning_rate": 2.312001037617051e-07, | |
| "loss": 0.3175848424434662, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.7637540453074436, | |
| "grad_norm": 1.762587547302246, | |
| "learning_rate": 2.3064366981189995e-07, | |
| "loss": 0.2632935643196106, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.7659115426105716, | |
| "grad_norm": 2.4396347999572754, | |
| "learning_rate": 2.3009215649028332e-07, | |
| "loss": 0.12333346903324127, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.7680690399137, | |
| "grad_norm": 0.8734754323959351, | |
| "learning_rate": 2.295455669184662e-07, | |
| "loss": 0.1719101369380951, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.7702265372168284, | |
| "grad_norm": 2.0594780445098877, | |
| "learning_rate": 2.2900390419019047e-07, | |
| "loss": 0.24180670082569122, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.772384034519957, | |
| "grad_norm": 3.8657302856445312, | |
| "learning_rate": 2.2846717137131139e-07, | |
| "loss": 0.17427459359169006, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.7745415318230853, | |
| "grad_norm": 1.2864545583724976, | |
| "learning_rate": 2.2793537149978097e-07, | |
| "loss": 0.3185139298439026, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.7766990291262137, | |
| "grad_norm": 1.2937431335449219, | |
| "learning_rate": 2.2740850758563e-07, | |
| "loss": 0.25411853194236755, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.778856526429342, | |
| "grad_norm": 2.3176355361938477, | |
| "learning_rate": 2.2688658261095177e-07, | |
| "loss": 0.20836421847343445, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.78101402373247, | |
| "grad_norm": 2.0247695446014404, | |
| "learning_rate": 2.2636959952988402e-07, | |
| "loss": 0.27753064036369324, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.783171521035599, | |
| "grad_norm": 1.718490481376648, | |
| "learning_rate": 2.2585756126859373e-07, | |
| "loss": 0.26367393136024475, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.785329018338727, | |
| "grad_norm": 2.086444854736328, | |
| "learning_rate": 2.2535047072525968e-07, | |
| "loss": 0.2552420198917389, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.7874865156418553, | |
| "grad_norm": 2.691962957382202, | |
| "learning_rate": 2.2484833077005534e-07, | |
| "loss": 0.38996651768684387, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.7896440129449838, | |
| "grad_norm": 3.1365230083465576, | |
| "learning_rate": 2.2435114424513468e-07, | |
| "loss": 0.2881295680999756, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.791801510248112, | |
| "grad_norm": 0.678428053855896, | |
| "learning_rate": 2.23858913964614e-07, | |
| "loss": 0.3442489802837372, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.7939590075512406, | |
| "grad_norm": 1.9768662452697754, | |
| "learning_rate": 2.233716427145571e-07, | |
| "loss": 0.3336244225502014, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.796116504854369, | |
| "grad_norm": 7.05756139755249, | |
| "learning_rate": 2.2288933325295919e-07, | |
| "loss": 0.3653881251811981, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.7982740021574974, | |
| "grad_norm": 1.4996511936187744, | |
| "learning_rate": 2.224119883097315e-07, | |
| "loss": 0.337455153465271, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.8004314994606254, | |
| "grad_norm": 1.5251402854919434, | |
| "learning_rate": 2.2193961058668565e-07, | |
| "loss": 0.24892055988311768, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.8025889967637543, | |
| "grad_norm": 2.474886655807495, | |
| "learning_rate": 2.2147220275751817e-07, | |
| "loss": 0.38033241033554077, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.8047464940668823, | |
| "grad_norm": 2.1626007556915283, | |
| "learning_rate": 2.2100976746779575e-07, | |
| "loss": 0.2779306471347809, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.8069039913700107, | |
| "grad_norm": 1.5766234397888184, | |
| "learning_rate": 2.2055230733494034e-07, | |
| "loss": 0.198373481631279, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.809061488673139, | |
| "grad_norm": 1.3127539157867432, | |
| "learning_rate": 2.2009982494821354e-07, | |
| "loss": 0.2616628110408783, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.8112189859762675, | |
| "grad_norm": 1.8110085725784302, | |
| "learning_rate": 2.1965232286870293e-07, | |
| "loss": 0.34928035736083984, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.813376483279396, | |
| "grad_norm": 0.9856870770454407, | |
| "learning_rate": 2.1920980362930693e-07, | |
| "loss": 0.24830693006515503, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.8155339805825244, | |
| "grad_norm": 1.0019290447235107, | |
| "learning_rate": 2.1877226973472092e-07, | |
| "loss": 0.37566351890563965, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.8176914778856528, | |
| "grad_norm": 1.683439016342163, | |
| "learning_rate": 2.1833972366142252e-07, | |
| "loss": 0.20518970489501953, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.8198489751887807, | |
| "grad_norm": 1.2993359565734863, | |
| "learning_rate": 2.1791216785765812e-07, | |
| "loss": 0.3496171832084656, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.8220064724919096, | |
| "grad_norm": 1.8535475730895996, | |
| "learning_rate": 2.1748960474342858e-07, | |
| "loss": 0.40503692626953125, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.8241639697950376, | |
| "grad_norm": 1.230737328529358, | |
| "learning_rate": 2.1707203671047588e-07, | |
| "loss": 0.3467937111854553, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 2.826321467098166, | |
| "grad_norm": 2.092404365539551, | |
| "learning_rate": 2.166594661222692e-07, | |
| "loss": 0.45293277502059937, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.8284789644012944, | |
| "grad_norm": 1.2861336469650269, | |
| "learning_rate": 2.162518953139921e-07, | |
| "loss": 0.2429104447364807, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 2.830636461704423, | |
| "grad_norm": 1.7713249921798706, | |
| "learning_rate": 2.1584932659252883e-07, | |
| "loss": 0.31055137515068054, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 2.8327939590075513, | |
| "grad_norm": 2.2935972213745117, | |
| "learning_rate": 2.1545176223645118e-07, | |
| "loss": 0.44860854744911194, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 2.8349514563106797, | |
| "grad_norm": 3.7632486820220947, | |
| "learning_rate": 2.1505920449600637e-07, | |
| "loss": 0.45059871673583984, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 2.837108953613808, | |
| "grad_norm": 1.9739702939987183, | |
| "learning_rate": 2.146716555931031e-07, | |
| "loss": 0.32334843277931213, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.839266450916936, | |
| "grad_norm": 1.8699138164520264, | |
| "learning_rate": 2.1428911772130022e-07, | |
| "loss": 0.37822097539901733, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 2.841423948220065, | |
| "grad_norm": 1.3838216066360474, | |
| "learning_rate": 2.1391159304579338e-07, | |
| "loss": 0.2415277361869812, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 2.843581445523193, | |
| "grad_norm": 2.178896427154541, | |
| "learning_rate": 2.1353908370340319e-07, | |
| "loss": 0.36013925075531006, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 2.8457389428263213, | |
| "grad_norm": 1.6107887029647827, | |
| "learning_rate": 2.131715918025631e-07, | |
| "loss": 0.28840532898902893, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 2.8478964401294498, | |
| "grad_norm": 1.2073341608047485, | |
| "learning_rate": 2.1280911942330754e-07, | |
| "loss": 0.24024561047554016, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.850053937432578, | |
| "grad_norm": 1.346824049949646, | |
| "learning_rate": 2.1245166861725987e-07, | |
| "loss": 0.29280197620391846, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.8522114347357066, | |
| "grad_norm": 2.1668944358825684, | |
| "learning_rate": 2.1209924140762103e-07, | |
| "loss": 0.27317503094673157, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.854368932038835, | |
| "grad_norm": 1.5950590372085571, | |
| "learning_rate": 2.1175183978915794e-07, | |
| "loss": 0.16236615180969238, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.8565264293419634, | |
| "grad_norm": 1.4013762474060059, | |
| "learning_rate": 2.1140946572819222e-07, | |
| "loss": 0.23140932619571686, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.858683926645092, | |
| "grad_norm": 1.3808273077011108, | |
| "learning_rate": 2.1107212116258926e-07, | |
| "loss": 0.34648364782333374, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.8608414239482203, | |
| "grad_norm": 1.66391921043396, | |
| "learning_rate": 2.107398080017468e-07, | |
| "loss": 0.3459605872631073, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.8629989212513482, | |
| "grad_norm": 3.6172289848327637, | |
| "learning_rate": 2.1041252812658484e-07, | |
| "loss": 0.25366389751434326, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.8651564185544767, | |
| "grad_norm": 1.0681893825531006, | |
| "learning_rate": 2.100902833895342e-07, | |
| "loss": 0.185197651386261, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.867313915857605, | |
| "grad_norm": 1.549970030784607, | |
| "learning_rate": 2.0977307561452663e-07, | |
| "loss": 0.1989063322544098, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.8694714131607335, | |
| "grad_norm": 1.3811312913894653, | |
| "learning_rate": 2.09460906596984e-07, | |
| "loss": 0.31181615591049194, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.871628910463862, | |
| "grad_norm": 1.6068974733352661, | |
| "learning_rate": 2.091537781038089e-07, | |
| "loss": 0.30436015129089355, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.8737864077669903, | |
| "grad_norm": 2.1774463653564453, | |
| "learning_rate": 2.0885169187337344e-07, | |
| "loss": 0.1961633861064911, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.8759439050701188, | |
| "grad_norm": 1.3629629611968994, | |
| "learning_rate": 2.0855464961551068e-07, | |
| "loss": 0.2554187774658203, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.878101402373247, | |
| "grad_norm": 1.1737473011016846, | |
| "learning_rate": 2.0826265301150424e-07, | |
| "loss": 0.2499612420797348, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.8802588996763756, | |
| "grad_norm": 0.43503567576408386, | |
| "learning_rate": 2.0797570371407868e-07, | |
| "loss": 0.11031116545200348, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.8824163969795036, | |
| "grad_norm": 1.2711269855499268, | |
| "learning_rate": 2.0769380334739064e-07, | |
| "loss": 0.27530673146247864, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.884573894282632, | |
| "grad_norm": 1.6386080980300903, | |
| "learning_rate": 2.0741695350701957e-07, | |
| "loss": 0.21418559551239014, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.8867313915857604, | |
| "grad_norm": 0.6615068912506104, | |
| "learning_rate": 2.0714515575995788e-07, | |
| "loss": 0.3122788667678833, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 1.6554388999938965, | |
| "learning_rate": 2.068784116446034e-07, | |
| "loss": 0.2509201467037201, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.8910463861920173, | |
| "grad_norm": 1.535337209701538, | |
| "learning_rate": 2.0661672267074972e-07, | |
| "loss": 0.2228378802537918, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.8932038834951457, | |
| "grad_norm": 1.8591066598892212, | |
| "learning_rate": 2.0636009031957781e-07, | |
| "loss": 0.4586015045642853, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.895361380798274, | |
| "grad_norm": 1.2504751682281494, | |
| "learning_rate": 2.0610851604364787e-07, | |
| "loss": 0.21972203254699707, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.8975188781014025, | |
| "grad_norm": 1.314433217048645, | |
| "learning_rate": 2.0586200126689092e-07, | |
| "loss": 0.30095550417900085, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.899676375404531, | |
| "grad_norm": 1.6293613910675049, | |
| "learning_rate": 2.0562054738460098e-07, | |
| "loss": 0.12489507347345352, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.901833872707659, | |
| "grad_norm": 2.388120651245117, | |
| "learning_rate": 2.0538415576342665e-07, | |
| "loss": 0.2403588593006134, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.9039913700107873, | |
| "grad_norm": 1.676954984664917, | |
| "learning_rate": 2.0515282774136402e-07, | |
| "loss": 0.24414768815040588, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.9061488673139158, | |
| "grad_norm": 2.731567621231079, | |
| "learning_rate": 2.0492656462774877e-07, | |
| "loss": 0.3925679326057434, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.908306364617044, | |
| "grad_norm": 1.193949818611145, | |
| "learning_rate": 2.047053677032484e-07, | |
| "loss": 0.31919193267822266, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.9104638619201726, | |
| "grad_norm": 1.6734915971755981, | |
| "learning_rate": 2.0448923821985597e-07, | |
| "loss": 0.2700918912887573, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.912621359223301, | |
| "grad_norm": 2.165048837661743, | |
| "learning_rate": 2.0427817740088204e-07, | |
| "loss": 0.09998652338981628, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.9147788565264294, | |
| "grad_norm": 1.4805729389190674, | |
| "learning_rate": 2.0407218644094798e-07, | |
| "loss": 0.26490524411201477, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.916936353829558, | |
| "grad_norm": 1.9566199779510498, | |
| "learning_rate": 2.0387126650597966e-07, | |
| "loss": 0.37730756402015686, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.9190938511326863, | |
| "grad_norm": 1.5146074295043945, | |
| "learning_rate": 2.036754187332004e-07, | |
| "loss": 0.2974068224430084, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.9212513484358142, | |
| "grad_norm": 1.5399764776229858, | |
| "learning_rate": 2.034846442311247e-07, | |
| "loss": 0.2960386276245117, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.9234088457389427, | |
| "grad_norm": 1.4276615381240845, | |
| "learning_rate": 2.0329894407955186e-07, | |
| "loss": 0.24433766305446625, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.925566343042071, | |
| "grad_norm": 1.12831711769104, | |
| "learning_rate": 2.0311831932956003e-07, | |
| "loss": 0.194054514169693, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.9277238403451995, | |
| "grad_norm": 4.204019546508789, | |
| "learning_rate": 2.0294277100350006e-07, | |
| "loss": 0.3664979338645935, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.929881337648328, | |
| "grad_norm": 1.073026180267334, | |
| "learning_rate": 2.0277230009498994e-07, | |
| "loss": 0.2648014426231384, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.9320388349514563, | |
| "grad_norm": 2.1484158039093018, | |
| "learning_rate": 2.026069075689089e-07, | |
| "loss": 0.28026407957077026, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.9341963322545848, | |
| "grad_norm": 2.006321907043457, | |
| "learning_rate": 2.0244659436139232e-07, | |
| "loss": 0.451577752828598, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.936353829557713, | |
| "grad_norm": 0.9612744450569153, | |
| "learning_rate": 2.0229136137982607e-07, | |
| "loss": 0.2079283595085144, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.9385113268608416, | |
| "grad_norm": 1.127065896987915, | |
| "learning_rate": 2.021412095028416e-07, | |
| "loss": 0.29667848348617554, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.9406688241639696, | |
| "grad_norm": 0.9780626893043518, | |
| "learning_rate": 2.019961395803108e-07, | |
| "loss": 0.15733566880226135, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.9428263214670984, | |
| "grad_norm": 1.398445725440979, | |
| "learning_rate": 2.0185615243334142e-07, | |
| "loss": 0.3436535894870758, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.9449838187702264, | |
| "grad_norm": 22.650222778320312, | |
| "learning_rate": 2.0172124885427215e-07, | |
| "loss": 0.23946398496627808, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.947141316073355, | |
| "grad_norm": 1.695454478263855, | |
| "learning_rate": 2.0159142960666828e-07, | |
| "loss": 0.22133874893188477, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 2.9492988133764833, | |
| "grad_norm": 3.043394088745117, | |
| "learning_rate": 2.0146669542531755e-07, | |
| "loss": 0.23711824417114258, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 2.9514563106796117, | |
| "grad_norm": 0.8506256937980652, | |
| "learning_rate": 2.0134704701622555e-07, | |
| "loss": 0.15003100037574768, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 2.95361380798274, | |
| "grad_norm": 1.9315284490585327, | |
| "learning_rate": 2.0123248505661205e-07, | |
| "loss": 0.27814292907714844, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 2.9557713052858685, | |
| "grad_norm": 1.3882677555084229, | |
| "learning_rate": 2.011230101949073e-07, | |
| "loss": 0.23976953327655792, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.957928802588997, | |
| "grad_norm": 2.189103126525879, | |
| "learning_rate": 2.0101862305074788e-07, | |
| "loss": 0.30090874433517456, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 2.960086299892125, | |
| "grad_norm": 1.476915955543518, | |
| "learning_rate": 2.0091932421497359e-07, | |
| "loss": 0.2663874328136444, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 2.9622437971952538, | |
| "grad_norm": 1.6229114532470703, | |
| "learning_rate": 2.0082511424962407e-07, | |
| "loss": 0.2835708558559418, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 2.9644012944983817, | |
| "grad_norm": 1.7014294862747192, | |
| "learning_rate": 2.0073599368793536e-07, | |
| "loss": 0.3245730698108673, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 2.96655879180151, | |
| "grad_norm": 1.3853977918624878, | |
| "learning_rate": 2.0065196303433735e-07, | |
| "loss": 0.41964831948280334, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.9687162891046386, | |
| "grad_norm": 1.847307801246643, | |
| "learning_rate": 2.0057302276445018e-07, | |
| "loss": 0.33610066771507263, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 2.970873786407767, | |
| "grad_norm": 1.3295162916183472, | |
| "learning_rate": 2.0049917332508245e-07, | |
| "loss": 0.32951587438583374, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 2.9730312837108954, | |
| "grad_norm": 1.515709638595581, | |
| "learning_rate": 2.0043041513422793e-07, | |
| "loss": 0.1392497420310974, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 2.975188781014024, | |
| "grad_norm": 1.3852156400680542, | |
| "learning_rate": 2.0036674858106364e-07, | |
| "loss": 0.23723219335079193, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 2.9773462783171523, | |
| "grad_norm": 0.7373172044754028, | |
| "learning_rate": 2.0030817402594758e-07, | |
| "loss": 0.0664176344871521, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.9795037756202802, | |
| "grad_norm": 0.5779815912246704, | |
| "learning_rate": 2.0025469180041652e-07, | |
| "loss": 0.19533909857273102, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 2.981661272923409, | |
| "grad_norm": 1.1505489349365234, | |
| "learning_rate": 2.0020630220718412e-07, | |
| "loss": 0.40156224370002747, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 2.983818770226537, | |
| "grad_norm": 1.542429804801941, | |
| "learning_rate": 2.0016300552013962e-07, | |
| "loss": 0.11573772132396698, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 2.9859762675296655, | |
| "grad_norm": 4.137073040008545, | |
| "learning_rate": 2.0012480198434574e-07, | |
| "loss": 0.4161064624786377, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 2.988133764832794, | |
| "grad_norm": 3.1401453018188477, | |
| "learning_rate": 2.0009169181603766e-07, | |
| "loss": 0.26901060342788696, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.9902912621359223, | |
| "grad_norm": 1.872072696685791, | |
| "learning_rate": 2.0006367520262163e-07, | |
| "loss": 0.36560726165771484, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 2.9924487594390508, | |
| "grad_norm": 1.6279696226119995, | |
| "learning_rate": 2.0004075230267392e-07, | |
| "loss": 0.22248563170433044, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 2.994606256742179, | |
| "grad_norm": 2.087898015975952, | |
| "learning_rate": 2.0002292324594007e-07, | |
| "loss": 0.2563750445842743, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 2.9967637540453076, | |
| "grad_norm": 2.9592933654785156, | |
| "learning_rate": 2.000101881333341e-07, | |
| "loss": 0.12756453454494476, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 2.9989212513484356, | |
| "grad_norm": 2.473083734512329, | |
| "learning_rate": 2.0000254703693767e-07, | |
| "loss": 0.1695672571659088, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2781, | |
| "total_flos": 3.284111394515778e+18, | |
| "train_loss": 0.46147939157918366, | |
| "train_runtime": 26036.604, | |
| "train_samples_per_second": 1.709, | |
| "train_steps_per_second": 0.107 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2781, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 99999, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.284111394515778e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |