Instructions to use Ba2han/lfm-sft with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ba2han/lfm-sft with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="Ba2han/lfm-sft") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("Ba2han/lfm-sft") model = AutoModelForMultimodalLM.from_pretrained("Ba2han/lfm-sft") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Ba2han/lfm-sft with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Ba2han/lfm-sft" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-sft", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/Ba2han/lfm-sft
- SGLang
How to use Ba2han/lfm-sft with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Ba2han/lfm-sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-sft", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Ba2han/lfm-sft" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/lfm-sft", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Unsloth Studio
How to use Ba2han/lfm-sft with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/lfm-sft to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/lfm-sft to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Ba2han/lfm-sft to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Ba2han/lfm-sft", max_seq_length=2048, ) - Docker Model Runner
How to use Ba2han/lfm-sft with Docker Model Runner:
docker model run hf.co/Ba2han/lfm-sft
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.1001598295151838, | |
| "eval_steps": 500, | |
| "global_step": 4130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005327650506126798, | |
| "grad_norm": 16.25, | |
| "learning_rate": 3.8647342995169085e-07, | |
| "loss": 2.6138787269592285, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0010655301012253596, | |
| "grad_norm": 16.875, | |
| "learning_rate": 1.1594202898550726e-06, | |
| "loss": 2.5900964736938477, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0015982951518380393, | |
| "grad_norm": 15.375, | |
| "learning_rate": 1.932367149758454e-06, | |
| "loss": 2.5705907344818115, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.002131060202450719, | |
| "grad_norm": 13.4375, | |
| "learning_rate": 2.7053140096618356e-06, | |
| "loss": 2.5161614418029785, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.002663825253063399, | |
| "grad_norm": 11.9375, | |
| "learning_rate": 3.4782608695652175e-06, | |
| "loss": 2.573242425918579, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0031965903036760787, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 4.251207729468599e-06, | |
| "loss": 2.527428388595581, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0037293553542887587, | |
| "grad_norm": 6.1875, | |
| "learning_rate": 5.024154589371981e-06, | |
| "loss": 2.3534696102142334, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.004262120404901438, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 5.797101449275363e-06, | |
| "loss": 2.3852028846740723, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.004794885455514118, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 6.570048309178745e-06, | |
| "loss": 2.350311040878296, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.005327650506126798, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 7.342995169082127e-06, | |
| "loss": 2.2285757064819336, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005860415556739478, | |
| "grad_norm": 2.546875, | |
| "learning_rate": 8.115942028985508e-06, | |
| "loss": 2.2054595947265625, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.006393180607352157, | |
| "grad_norm": 2.875, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 2.237429618835449, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.006925945657964837, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 9.66183574879227e-06, | |
| "loss": 2.1398394107818604, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.007458710708577517, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 1.0434782608695653e-05, | |
| "loss": 2.1224007606506348, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.007991475759190196, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 1.1207729468599035e-05, | |
| "loss": 2.0460424423217773, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.008524240809802876, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 1.1980676328502416e-05, | |
| "loss": 2.047302484512329, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.009057005860415556, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 1.2753623188405797e-05, | |
| "loss": 2.0680582523345947, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.009589770911028236, | |
| "grad_norm": 1.4921875, | |
| "learning_rate": 1.352657004830918e-05, | |
| "loss": 1.9368038177490234, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.010122535961640916, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 1.4299516908212561e-05, | |
| "loss": 1.9544492959976196, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.010655301012253596, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 1.5072463768115944e-05, | |
| "loss": 1.9077532291412354, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011188066062866276, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 1.5845410628019324e-05, | |
| "loss": 1.8669679164886475, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.011720831113478956, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 1.6618357487922706e-05, | |
| "loss": 1.860713243484497, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.012253596164091636, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 1.8645515441894531, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.012786361214704315, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 1.816425120772947e-05, | |
| "loss": 1.8843761682510376, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.013319126265316995, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.893719806763285e-05, | |
| "loss": 1.81703519821167, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.013851891315929675, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 1.9710144927536236e-05, | |
| "loss": 1.791835069656372, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.014384656366542355, | |
| "grad_norm": 1.484375, | |
| "learning_rate": 2.0483091787439618e-05, | |
| "loss": 1.8730442523956299, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.014917421417155035, | |
| "grad_norm": 1.5, | |
| "learning_rate": 2.1256038647342997e-05, | |
| "loss": 1.7048213481903076, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.015450186467767715, | |
| "grad_norm": 1.375, | |
| "learning_rate": 2.202898550724638e-05, | |
| "loss": 1.7030128240585327, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.015982951518380393, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 2.280193236714976e-05, | |
| "loss": 1.730646014213562, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.016515716568993075, | |
| "grad_norm": 1.4453125, | |
| "learning_rate": 2.357487922705314e-05, | |
| "loss": 1.6557202339172363, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.017048481619605753, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 2.4347826086956526e-05, | |
| "loss": 1.7334563732147217, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.017581246670218435, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 2.5120772946859905e-05, | |
| "loss": 1.6493436098098755, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.018114011720831113, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 2.5893719806763288e-05, | |
| "loss": 1.6171550750732422, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.018646776771443795, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.6566861867904663, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.019179541822056473, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 2.7439613526570052e-05, | |
| "loss": 1.5906771421432495, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.019712306872669155, | |
| "grad_norm": 1.4296875, | |
| "learning_rate": 2.8212560386473435e-05, | |
| "loss": 1.568983554840088, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.020245071923281833, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 2.8985507246376814e-05, | |
| "loss": 1.5367965698242188, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02077783697389451, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 2.9758454106280196e-05, | |
| "loss": 1.5819628238677979, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.021310602024507193, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 3.053140096618358e-05, | |
| "loss": 1.5393096208572388, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02184336707511987, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 3.130434782608696e-05, | |
| "loss": 1.5357110500335693, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.022376132125732553, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 3.207729468599034e-05, | |
| "loss": 1.5443792343139648, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02290889717634523, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 3.2850241545893725e-05, | |
| "loss": 1.5320842266082764, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.023441662226957913, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 3.36231884057971e-05, | |
| "loss": 1.4929611682891846, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02397442727757059, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 3.439613526570049e-05, | |
| "loss": 1.4932339191436768, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.024507192328183273, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 3.5169082125603865e-05, | |
| "loss": 1.4885770082473755, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02503995737879595, | |
| "grad_norm": 1.375, | |
| "learning_rate": 3.594202898550725e-05, | |
| "loss": 1.462911605834961, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02557272242940863, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 3.671497584541063e-05, | |
| "loss": 1.4575042724609375, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.02610548748002131, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 3.748792270531401e-05, | |
| "loss": 1.4930751323699951, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.02663825253063399, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 3.8260869565217395e-05, | |
| "loss": 1.47833251953125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02717101758124667, | |
| "grad_norm": 1.3125, | |
| "learning_rate": 3.903381642512078e-05, | |
| "loss": 1.4232707023620605, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.02770378263185935, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 3.980676328502416e-05, | |
| "loss": 1.4310252666473389, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.02823654768247203, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.057971014492754e-05, | |
| "loss": 1.4417388439178467, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.02876931273308471, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.135265700483092e-05, | |
| "loss": 1.3514190912246704, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.02930207778369739, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.2125603864734306e-05, | |
| "loss": 1.4256916046142578, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02983484283431007, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.289855072463769e-05, | |
| "loss": 1.3847278356552124, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.030367607884922748, | |
| "grad_norm": 1.375, | |
| "learning_rate": 4.3671497584541064e-05, | |
| "loss": 1.3873201608657837, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03090037293553543, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 4.444444444444445e-05, | |
| "loss": 1.3949869871139526, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.03143313798614811, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 4.521739130434783e-05, | |
| "loss": 1.3873813152313232, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.031965903036760786, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.599033816425121e-05, | |
| "loss": 1.3549132347106934, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03249866808737347, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 4.6763285024154594e-05, | |
| "loss": 1.402418851852417, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.03303143313798615, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 4.7536231884057976e-05, | |
| "loss": 1.361701250076294, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.03356419818859883, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.830917874396136e-05, | |
| "loss": 1.3121955394744873, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.034096963239211506, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 4.9082125603864734e-05, | |
| "loss": 1.342367172241211, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03462972828982419, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.9855072463768116e-05, | |
| "loss": 1.3617416620254517, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03516249334043687, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 5.0628019323671505e-05, | |
| "loss": 1.3062248229980469, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03569525839104955, | |
| "grad_norm": 1.265625, | |
| "learning_rate": 5.140096618357488e-05, | |
| "loss": 1.287408471107483, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.036228023441662226, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 1.323954463005066, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.036760788492274904, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 5.294685990338165e-05, | |
| "loss": 1.3211263418197632, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03729355354288759, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 5.371980676328503e-05, | |
| "loss": 1.276440143585205, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03782631859350027, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 5.449275362318841e-05, | |
| "loss": 1.2463949918746948, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.038359083644112946, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 5.5265700483091786e-05, | |
| "loss": 1.2704516649246216, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.038891848694725624, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 5.6038647342995175e-05, | |
| "loss": 1.266597867012024, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.03942461374533831, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 5.681159420289856e-05, | |
| "loss": 1.3489875793457031, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.03995737879595099, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 5.758454106280193e-05, | |
| "loss": 1.2545640468597412, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.040490143846563666, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 5.835748792270532e-05, | |
| "loss": 1.28780198097229, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.041022908897176344, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 5.91304347826087e-05, | |
| "loss": 1.2295873165130615, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04155567394778902, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 5.990338164251208e-05, | |
| "loss": 1.2586952447891235, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04208843899840171, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 6.067632850241547e-05, | |
| "loss": 1.2376638650894165, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.042621204049014386, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 6.144927536231884e-05, | |
| "loss": 1.191737413406372, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.043153969099627064, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 6.222222222222223e-05, | |
| "loss": 1.207621455192566, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.04368673415023974, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 6.299516908212561e-05, | |
| "loss": 1.2221109867095947, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.04421949920085243, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 6.376811594202898e-05, | |
| "loss": 1.2498793601989746, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.044752264251465106, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 6.454106280193237e-05, | |
| "loss": 1.2533750534057617, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.045285029302077784, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 6.531400966183575e-05, | |
| "loss": 1.2369980812072754, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04581779435269046, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 6.608695652173914e-05, | |
| "loss": 1.199107050895691, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.04635055940330314, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 6.685990338164253e-05, | |
| "loss": 1.2124717235565186, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.046883324453915826, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 6.76328502415459e-05, | |
| "loss": 1.2636237144470215, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.047416089504528504, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 6.840579710144928e-05, | |
| "loss": 1.2178090810775757, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.04794885455514118, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 6.917874396135265e-05, | |
| "loss": 1.2136110067367554, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04848161960575386, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 6.995169082125604e-05, | |
| "loss": 1.1956346035003662, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.049014384656366546, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 7.072463768115943e-05, | |
| "loss": 1.1326745748519897, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.049547149706979224, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 7.149758454106281e-05, | |
| "loss": 1.1829109191894531, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0500799147575919, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 7.22705314009662e-05, | |
| "loss": 1.1837726831436157, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.05061267980820458, | |
| "grad_norm": 1.2421875, | |
| "learning_rate": 7.304347826086957e-05, | |
| "loss": 1.1446757316589355, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05114544485881726, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 7.381642512077295e-05, | |
| "loss": 1.1455689668655396, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.051678209909429944, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 7.458937198067634e-05, | |
| "loss": 1.163525104522705, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05221097496004262, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 7.536231884057971e-05, | |
| "loss": 1.2155964374542236, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0527437400106553, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 7.61352657004831e-05, | |
| "loss": 1.1462944746017456, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.05327650506126798, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 7.690821256038648e-05, | |
| "loss": 1.1662917137145996, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.053809270111880664, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 7.768115942028987e-05, | |
| "loss": 1.2034168243408203, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.05434203516249334, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 7.845410628019324e-05, | |
| "loss": 1.0869637727737427, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.05487480021310602, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 7.922705314009662e-05, | |
| "loss": 1.16934072971344, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.0554075652637187, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.2003157138824463, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.05594033031433138, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1935644149780273, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05647309536494406, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1314443349838257, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.05700586041555674, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.179926872253418, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.05753862546616942, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1462368965148926, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.0580713905167821, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1517932415008545, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.05860415556739478, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1624958515167236, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05913692061800746, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.118838906288147, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.05966968566862014, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.163698434829712, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.06020245071923282, | |
| "grad_norm": 1.234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1283934116363525, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.060735215769845495, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1495060920715332, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.06126798082045818, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.105478048324585, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06180074587107086, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0947949886322021, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.06233351092168354, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0812091827392578, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.06286627597229622, | |
| "grad_norm": 1.078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1314194202423096, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.0633990410229089, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1186072826385498, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.06393180607352157, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1118886470794678, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06446457112413426, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0832873582839966, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.06499733617474694, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1219228506088257, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.06553010122535961, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1250672340393066, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.0660628662759723, | |
| "grad_norm": 1.171875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.120064377784729, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.06659563132658497, | |
| "grad_norm": 1.125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0575106143951416, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06712839637719765, | |
| "grad_norm": 1.0546875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0453040599822998, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.06766116142781034, | |
| "grad_norm": 1.0, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0442687273025513, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06819392647842301, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.158420205116272, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.0687266915290357, | |
| "grad_norm": 1.046875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.140805959701538, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.06925945657964838, | |
| "grad_norm": 1.1328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1012375354766846, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06979222163026105, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1367709636688232, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.07032498668087374, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0242253541946411, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.07085775173148641, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0810563564300537, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.0713905167820991, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0501123666763306, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.07192328183271178, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0658546686172485, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07245604688332445, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0879520177841187, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.07298881193393714, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1073130369186401, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.07352157698454981, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.041626214981079, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.0740543420351625, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.061267375946045, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.07458710708577518, | |
| "grad_norm": 0.953125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0703907012939453, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07511987213638785, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.088350772857666, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.07565263718700053, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0447893142700195, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.0761854022376132, | |
| "grad_norm": 0.95703125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0158675909042358, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.07671816728822589, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0666112899780273, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.07725093233883858, | |
| "grad_norm": 1.03125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0512079000473022, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07778369738945125, | |
| "grad_norm": 1.0078125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0278513431549072, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.07831646244006393, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0547468662261963, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.07884922749067662, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9834574460983276, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.07938199254128929, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0703551769256592, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.07991475759190197, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0130079984664917, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08044752264251465, | |
| "grad_norm": 0.9921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0876761674880981, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.08098028769312733, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0426894426345825, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.08151305274374002, | |
| "grad_norm": 0.97265625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0565141439437866, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.08204581779435269, | |
| "grad_norm": 0.94921875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0379530191421509, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.08257858284496537, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0548206567764282, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08311134789557804, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.153540015220642, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.08364411294619073, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.063852310180664, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.08417687799680341, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0537505149841309, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.08470964304741609, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0552769899368286, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.08524240809802877, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0188905000686646, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08577517314864144, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0366284847259521, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.08630793819925413, | |
| "grad_norm": 0.9375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.995755136013031, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.08684070324986681, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0196154117584229, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.08737346830047948, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9898626208305359, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.08790623335109217, | |
| "grad_norm": 0.859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9742222428321838, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08843899840170485, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0032895803451538, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.08897176345231753, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.019012212753296, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.08950452850293021, | |
| "grad_norm": 0.9453125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0903642177581787, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.09003729355354288, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.02820885181427, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.09057005860415557, | |
| "grad_norm": 0.93359375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0357959270477295, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09110282365476825, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0563883781433105, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.09163558870538092, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9762277603149414, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.09216835375599361, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9926705360412598, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.09270111880660628, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.028612494468689, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.09323388385721897, | |
| "grad_norm": 0.90234375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0039349794387817, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09376664890783165, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9718811511993408, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.09429941395844432, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.025659203529358, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.09483217900905701, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0149027109146118, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.09536494405966968, | |
| "grad_norm": 0.92578125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.007585048675537, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.09589770911028236, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9930945634841919, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09643047416089505, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9892737865447998, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.09696323921150772, | |
| "grad_norm": 0.87890625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0519317388534546, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.0974960042621204, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0029200315475464, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.09802876931273309, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9893202781677246, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.09856153436334576, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9483737945556641, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09909429941395845, | |
| "grad_norm": 1.125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0160411596298218, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.09962706446457112, | |
| "grad_norm": 0.88671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9536669254302979, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.1001598295151838, | |
| "grad_norm": 0.8984375, | |
| "learning_rate": 8e-05, | |
| "loss": 1.008516788482666, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.10069259456579649, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9893296957015991, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.10122535961640916, | |
| "grad_norm": 0.875, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0012911558151245, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10175812466702185, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9556794166564941, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.10229088971763452, | |
| "grad_norm": 0.875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9937628507614136, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.1028236547682472, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0127745866775513, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.10335641981885989, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.983036458492279, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.10388918486947256, | |
| "grad_norm": 0.921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9836626648902893, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10442194992008524, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9373552203178406, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.10495471497069792, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9097200036048889, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1054874800213106, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.94362872838974, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.10602024507192329, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9865043759346008, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.10655301012253596, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9668699502944946, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10708577517314864, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9932379722595215, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.10761854022376133, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9790946245193481, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.108151305274374, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9935732483863831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.10868407032498668, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9672642946243286, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.10921683537559936, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9297552108764648, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.10974960042621204, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9727266430854797, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.11028236547682473, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9725493788719177, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.1108151305274374, | |
| "grad_norm": 0.86328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.969160258769989, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.11134789557805008, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.987875759601593, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.11188066062866275, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.939948320388794, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11241342567927544, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.980888307094574, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.11294619072988812, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9552009701728821, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1134789557805008, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.965975821018219, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.11401172083111348, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9458581209182739, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.11454448588172615, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9712637066841125, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11507725093233884, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.957909345626831, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.11561001598295152, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9364346861839294, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1161427810335642, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9434120655059814, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.11667554608417688, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9406663775444031, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.11720831113478956, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9373142719268799, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11774107618540224, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.935702919960022, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.11827384123601492, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9485442638397217, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.11880660628662759, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9245492219924927, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.11933937133724028, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9705182909965515, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.11987213638785296, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9660132527351379, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12040490143846563, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9636243581771851, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.12093766648907832, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.912445604801178, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.12147043153969099, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9414998292922974, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.12200319659030368, | |
| "grad_norm": 0.828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9331598281860352, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.12253596164091636, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9329249858856201, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12306872669152903, | |
| "grad_norm": 0.82421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9538522958755493, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.12360149174214172, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9617863893508911, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.1241342567927544, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9089372754096985, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.12466702184336707, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9629489779472351, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.12519978689397976, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9058244824409485, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12573255194459243, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9134584069252014, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.1262653169952051, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9098286628723145, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1267980820458178, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9566776752471924, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.12733084709643047, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9700292944908142, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.12786361214704314, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9420150518417358, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.12839637719765584, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9000596404075623, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.1289291422482685, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9281507730484009, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.12946190729888118, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9546276330947876, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.12999467234949388, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9615715742111206, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.13052743740010656, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8883857131004333, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13106020245071923, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9202597737312317, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.13159296750133193, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9927231669425964, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.1321257325519446, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9516614675521851, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.13265849760255727, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.973112940788269, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.13319126265316994, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9146295189857483, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13372402770378264, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.946043074131012, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.1342567927543953, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9582048654556274, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.13478955780500798, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8958991169929504, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.13532232285562068, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9187520742416382, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.13585508790623335, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9344346523284912, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.13638785295684602, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8789697289466858, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.13692061800745872, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9604220390319824, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.1374533830580714, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9376251101493835, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.13798614810868406, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8872209787368774, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.13851891315929676, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.935639500617981, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13905167820990944, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9652010202407837, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.1395844432605221, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9051821231842041, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.14011720831113478, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9186902046203613, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.14064997336174748, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8455410599708557, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.14118273841236015, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9377204775810242, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.14171550346297282, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9178054332733154, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.14224826851358552, | |
| "grad_norm": 0.8359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8930121660232544, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.1427810335641982, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9158589839935303, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.14331379861481086, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.925919234752655, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.14384656366542356, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9652289152145386, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14437932871603623, | |
| "grad_norm": 0.85546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9391449093818665, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.1449120937666489, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9376974701881409, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.14544485881726157, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9146329164505005, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.14597762386787427, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.935990571975708, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.14651038891848694, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9762868285179138, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14704315396909962, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9521989822387695, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.14757591901971231, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9237980246543884, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.148108684070325, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8922250270843506, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.14864144912093766, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9352413415908813, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.14917421417155036, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9463628530502319, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.14970697922216303, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8881433606147766, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.1502397442727757, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9072080254554749, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.1507725093233884, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.900499701499939, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.15130527437400107, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9123456478118896, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.15183803942461374, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8801102638244629, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1523708044752264, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8868352174758911, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.1529035695258391, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8904232978820801, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.15343633457645178, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9351356625556946, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.15396909962706445, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.949609100818634, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.15450186467767715, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9633817076683044, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15503462972828982, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8968989253044128, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.1555673947789025, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9304310083389282, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.1561001598295152, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9069873690605164, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.15663292488012787, | |
| "grad_norm": 0.8125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9126654863357544, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.15716568993074054, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8959242105484009, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.15769845498135324, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8930934071540833, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.1582312200319659, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9553401470184326, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.15876398508257858, | |
| "grad_norm": 0.77734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9127396941184998, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.15929675013319125, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8723542094230652, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.15982951518380395, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9096969366073608, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16036228023441662, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8800921440124512, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.1608950452850293, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9251409769058228, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.161427810335642, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8803302049636841, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.16196057538625466, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8997635841369629, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.16249334043686733, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9180471301078796, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16302610548748003, | |
| "grad_norm": 0.84375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9325738549232483, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.1635588705380927, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8572644591331482, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.16409163558870538, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9184677004814148, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.16462440063931805, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.951015830039978, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.16515716568993075, | |
| "grad_norm": 0.8046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9111671447753906, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16568993074054342, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9108637571334839, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.1662226957911561, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.886138379573822, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.1667554608417688, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9077087640762329, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.16728822589238146, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8817453384399414, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.16782099094299413, | |
| "grad_norm": 0.83984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9480854272842407, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.16835375599360683, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8651038408279419, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.1688865210442195, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8813058733940125, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.16941928609483217, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9414123296737671, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.16995205114544487, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8974052667617798, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.17048481619605754, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9421342015266418, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17101758124667021, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8592175841331482, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.17155034629728289, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8520596027374268, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.17208311134789558, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8983196020126343, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.17261587639850826, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.873570442199707, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.17314864144912093, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9159509539604187, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17368140649973363, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8728219270706177, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.1742141715503463, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9193546175956726, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.17474693660095897, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8523741364479065, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.17527970165157167, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9125362038612366, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.17581246670218434, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9080352783203125, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.176345231752797, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8742219805717468, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.1768779968034097, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9006013870239258, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.17741076185402238, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8835248947143555, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.17794352690463505, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8771501183509827, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.17847629195524772, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9349658489227295, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.17900905700586042, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.903891921043396, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.1795418220564731, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.905728816986084, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.18007458710708577, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8719464540481567, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.18060735215769846, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9010811448097229, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.18114011720831114, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8699101805686951, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1816728822589238, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8923678994178772, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.1822056473095365, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8914515972137451, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.18273841236014918, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8682945966720581, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.18327117741076185, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9265104532241821, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.18380394246137455, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9007678031921387, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.18433670751198722, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8766607642173767, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.1848694725625999, | |
| "grad_norm": 0.78125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8730136752128601, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.18540223761321256, | |
| "grad_norm": 0.78515625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9283718466758728, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.18593500266382526, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8665472865104675, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.18646776771443793, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8720895648002625, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1870005327650506, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8716840744018555, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.1875332978156633, | |
| "grad_norm": 0.80078125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.893147349357605, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.18806606286627597, | |
| "grad_norm": 0.796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8979114890098572, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.18859882791688865, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8585438132286072, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.18913159296750134, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9044113159179688, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.18966435801811402, | |
| "grad_norm": 0.84765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.920305073261261, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.1901971230687267, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8698307275772095, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.19072988811933936, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8913177847862244, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.19126265316995206, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8886463642120361, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.19179541822056473, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8651049733161926, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1923281832711774, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9027085900306702, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.1928609483217901, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8272101283073425, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.19339371337240277, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9042779207229614, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.19392647842301544, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8599862456321716, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.19445924347362814, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8881500959396362, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1949920085242408, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8275444507598877, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.19552477357485348, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8692565560340881, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.19605753862546618, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9346656799316406, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.19659030367607885, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8541794419288635, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.19712306872669152, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8708451986312866, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1976558337773042, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8439049124717712, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.1981885988279169, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8625826835632324, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.19872136387852957, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9517384171485901, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.19925412892914224, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9006468057632446, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.19978689397975494, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9177417159080505, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2003196590303676, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9008771181106567, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.20085242408098028, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8759271502494812, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.20138518913159298, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8917984962463379, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.20191795418220565, | |
| "grad_norm": 0.7421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8270426988601685, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.20245071923281832, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8944893479347229, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.20298348428343102, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8563660383224487, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.2035162493340437, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9090912938117981, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.20404901438465636, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8511034250259399, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.20458177943526903, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8830511569976807, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.20511454448588173, | |
| "grad_norm": 0.76953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8611262440681458, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2056473095364944, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8874700665473938, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.20618007458710708, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8730615377426147, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.20671283963771978, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8800356388092041, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.20724560468833245, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8427203893661499, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.20777836973894512, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8761508464813232, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.20831113478955782, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8280084729194641, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2088438998401705, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8750775456428528, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.20937666489078316, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8673681020736694, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.20990942994139583, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8897008299827576, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.21044219499200853, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8720916509628296, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2109749600426212, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8478310108184814, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.21150772509323387, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8465595841407776, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.21204049014384657, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8860556483268738, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.21257325519445924, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8419367074966431, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.21310602024507191, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8694333434104919, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2136387852956846, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8959736227989197, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.21417155034629728, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8992412090301514, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.21470431539690996, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8671194314956665, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.21523708044752266, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8547959327697754, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.21576984549813533, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8758715987205505, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.216302610548748, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8950058221817017, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.21683537559936067, | |
| "grad_norm": 0.74609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8568795323371887, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.21736814064997337, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.903777003288269, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.21790090570058604, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8380757570266724, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.2184336707511987, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.851688802242279, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2189664358018114, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8432849645614624, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.21949920085242408, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9238657355308533, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.22003196590303675, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8367424607276917, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.22056473095364945, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8334730863571167, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.22109749600426212, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8405196666717529, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2216302610548748, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8685415983200073, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.2221630261054875, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8226058483123779, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.22269579115610016, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8686912059783936, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.22322855620671284, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8767325282096863, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.2237613212573255, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8393811583518982, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2242940863079382, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8124199509620667, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.22482685135855088, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8643161654472351, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.22535961640916355, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8218422532081604, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.22589238145977625, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8444434404373169, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.22642514651038892, | |
| "grad_norm": 0.75, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9049234390258789, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2269579115610016, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8443453311920166, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.2274906766116143, | |
| "grad_norm": 0.7578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8720162510871887, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.22802344166222696, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8342218399047852, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.22855620671283963, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.816758394241333, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.2290889717634523, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.834097683429718, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.229621736814065, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8916500806808472, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.23015450186467767, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8376821875572205, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.23068726691529035, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8754009008407593, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.23122003196590304, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8596115708351135, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.23175279701651572, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8355474472045898, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2322855620671284, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.876761257648468, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.2328183271177411, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8948037624359131, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.23335109216835376, | |
| "grad_norm": 0.7734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8517124056816101, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.23388385721896643, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8235978484153748, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.23441662226957913, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8589498400688171, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2349493873201918, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8652699589729309, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.23548215237080447, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8380342125892639, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.23601491742141714, | |
| "grad_norm": 0.79296875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8523828387260437, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.23654768247202984, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9019739627838135, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.2370804475226425, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8879658579826355, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.23761321257325518, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8675874471664429, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.23814597762386788, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8754541873931885, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.23867874267448055, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8880608081817627, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.23921150772509323, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8680922985076904, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.23974427277570592, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8829432129859924, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2402770378263186, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8691644072532654, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.24080980287693127, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8773077130317688, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.24134256792754397, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8335128426551819, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.24187533297815664, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8632769584655762, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.2424080980287693, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8212178945541382, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.24294086307938198, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8350695967674255, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.24347362812999468, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8406141996383667, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.24400639318060735, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8700868487358093, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.24453915823122002, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8683998584747314, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.24507192328183272, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8839133977890015, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2456046883324454, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8943023085594177, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.24613745338305806, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8632996082305908, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.24667021843367076, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.833712637424469, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.24720298348428343, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.864801287651062, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.2477357485348961, | |
| "grad_norm": 0.765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.866331160068512, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2482685135855088, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8194761872291565, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.24880127863612148, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8057902455329895, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.24933404368673415, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8366430997848511, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.24986680873734682, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8721463084220886, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.2503995737879595, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.867581844329834, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2509323388385722, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8661463856697083, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.25146510388918486, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8397172093391418, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.25199786893979753, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8674642443656921, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.2525306339904102, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8149669766426086, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.25306339904102293, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8306043148040771, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2535961640916356, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8840003609657288, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.25412892914224827, | |
| "grad_norm": 0.75390625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8428335189819336, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.25466169419286094, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.842282772064209, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.2551944592434736, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8525648713111877, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.2557272242940863, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8552727699279785, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.256259989344699, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8829293251037598, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.2567927543953117, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8788723945617676, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.25732551944592436, | |
| "grad_norm": 0.7890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8775660395622253, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.257858284496537, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8680721521377563, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.2583910495471497, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8522012829780579, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.25892381459776237, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8104643821716309, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.25945657964837504, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8393515348434448, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.25998934469898777, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.829097330570221, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.26052210974960044, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8042383193969727, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.2610548748002131, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8093820214271545, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2615876398508258, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8751801252365112, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.26212040490143845, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7946058511734009, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.2626531699520511, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.817328929901123, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.26318593500266385, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8307056427001953, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.2637187000532765, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.837894082069397, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2642514651038892, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8467088341712952, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.26478423015450187, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8036914467811584, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.26531699520511454, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.819699764251709, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.2658497602557272, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8019229769706726, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.2663825253063399, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8053907155990601, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2669152903569526, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8073758482933044, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.2674480554075653, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.828862190246582, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.26798082045817795, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8768335580825806, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.2685135855087906, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.851819634437561, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.2690463505594033, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.812772274017334, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.26957911561001596, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8672690987586975, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.2701118806606287, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8474425077438354, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.27064464571124136, | |
| "grad_norm": 0.76171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.846489429473877, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.27117741076185403, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8402537107467651, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.2717101758124667, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.873051643371582, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2722429408630794, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8155555129051208, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.27277570591369205, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7977849245071411, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.2733084709643047, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8137617111206055, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.27384123601491744, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.848274827003479, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.2743740010655301, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8711082339286804, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2749067661161428, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8073972463607788, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.27543953116675546, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8326395750045776, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.27597229621736813, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.855026125907898, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.2765050612679808, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7754218578338623, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.2770378263185935, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8070095181465149, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2775705913692062, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8382344245910645, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.27810335641981887, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8222081661224365, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.27863612147043154, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8200312256813049, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.2791688865210442, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8260998129844666, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.2797016515716569, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.814910888671875, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.28023441662226956, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8156729936599731, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.2807671816728823, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8278362154960632, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.28129994672349495, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8697912096977234, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.2818327117741076, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8401728868484497, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.2823654768247203, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8192448019981384, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.28289824187533297, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8254504203796387, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.28343100692594564, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8694897294044495, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.2839637719765583, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8187917470932007, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.28449653702717104, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8559208512306213, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.2850293020777837, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.844135582447052, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2855620671283964, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8289559483528137, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.28609483217900905, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.845726490020752, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.2866275972296217, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8291332125663757, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.2871603622802344, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8118506073951721, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.2876931273308471, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8339268565177917, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2882258923814598, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8273485898971558, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.28875865743207246, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8129878640174866, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.28929142248268513, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8126479983329773, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.2898241875332978, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8130111694335938, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.2903569525839105, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8049681186676025, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.29088971763452315, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8302510976791382, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.2914224826851359, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7791358828544617, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.29195524773574855, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8203290700912476, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.2924880127863612, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8215634822845459, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.2930207778369739, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8419864177703857, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.29355354288758656, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8286892175674438, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.29408630793819923, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8043957948684692, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.29461907298881196, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8927145004272461, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.29515183803942463, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8291870951652527, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.2956846030900373, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8313156366348267, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.29621736814065, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8501160144805908, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.29675013319126264, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7889755368232727, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.2972828982418753, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8166150450706482, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.297815663292488, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8412237167358398, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.2983484283431007, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8289856910705566, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.2988811933937134, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8027371168136597, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.29941395844432606, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8847401738166809, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.2999467234949387, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8147903680801392, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.3004794885455514, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8418508768081665, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.30101225359616407, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8261881470680237, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3015450186467768, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8368499875068665, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.30207778369738947, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7928882837295532, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.30261054874800214, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8569204211235046, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.3031433137986148, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.789159893989563, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.3036760788492275, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8144698143005371, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.30420884389984015, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8477827310562134, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.3047416089504528, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7713529467582703, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.30527437400106555, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8129821419715881, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.3058071390516782, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8247785568237305, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.3063399041022909, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8340808153152466, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.30687266915290357, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.837786078453064, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.30740543420351624, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8202642202377319, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.3079381992541289, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7888924479484558, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.30847096430474163, | |
| "grad_norm": 0.7109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8010126352310181, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.3090037293553543, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8378207683563232, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.309536494405967, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7908037900924683, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.31006925945657965, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.793649435043335, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.3106020245071923, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8675748109817505, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.311134789557805, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7880247831344604, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.31166755460841766, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8333700299263, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3122003196590304, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8213086724281311, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.31273308470964306, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8234198689460754, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.31326584976025573, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8723286390304565, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.3137986148108684, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8310482501983643, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.3143313798614811, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8434503674507141, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.31486414491209375, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8098523020744324, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.3153969099627065, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8657939434051514, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.31592967501331914, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8621107339859009, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.3164624400639318, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8273295760154724, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.3169952051145445, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7910110354423523, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.31752797016515716, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8113824129104614, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.31806073521576983, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8246937394142151, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.3185935002663825, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.835896909236908, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.31912626531699523, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8543767929077148, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.3196590303676079, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7878560423851013, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32019179541822057, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8405436277389526, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.32072456046883324, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8454638719558716, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.3212573255194459, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8819068074226379, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.3217900905700586, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8613421320915222, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.3223228556206713, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.84280925989151, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.322855620671284, | |
| "grad_norm": 0.73046875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8268940448760986, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.32338838572189665, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8723543882369995, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.3239211507725093, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8331593871116638, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.324453915823122, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8328378200531006, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.32498668087373467, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8236236572265625, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.32551944592434734, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.9025675058364868, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.32605221097496007, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8124939799308777, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.32658497602557274, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8572587370872498, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.3271177410761854, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7765132188796997, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.3276505061267981, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8381130695343018, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.32818327117741075, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8289612531661987, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.3287160362280234, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8231470584869385, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.3292488012786361, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8185766339302063, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.3297815663292488, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.809109091758728, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.3303143313798615, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8600226044654846, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.33084709643047416, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8347437977790833, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.33137986148108683, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8622264862060547, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.3319126265316995, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8058921098709106, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.3324453915823122, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8255428075790405, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.3329781566329249, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8035192489624023, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3335109216835376, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8229770660400391, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.33404368673415025, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8099672198295593, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.3345764517847629, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8221847414970398, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.3351092168353756, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8075480461120605, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.33564198188598826, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8237086534500122, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.33617474693660093, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8089755773544312, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.33670751198721366, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8307473063468933, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.33724027703782633, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8326480388641357, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.337773042088439, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8556010723114014, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.3383058071390517, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8126214146614075, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.33883857218966434, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8234031796455383, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.339371337240277, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8191617727279663, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.33990410229088974, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7701093554496765, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.3404368673415024, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.82283616065979, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.3409696323921151, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8266280889511108, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.34150239744272776, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7687116861343384, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.34203516249334043, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8157622218132019, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.3425679275439531, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8259207606315613, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.34310069259456577, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7940812110900879, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.3436334576451785, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8644148707389832, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.34416622269579117, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8318431973457336, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.34469898774640384, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7810943126678467, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.3452317527970165, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8003625273704529, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.3457645178476292, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7984561920166016, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.34629728289824185, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8027920722961426, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3468300479488546, | |
| "grad_norm": 0.71875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8382170796394348, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.34736281299946725, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8113511800765991, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.3478955780500799, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7773157358169556, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.3484283431006926, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8348230719566345, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.34896110815130527, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8246813416481018, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.34949387320191794, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8007385730743408, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.3500266382525306, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.782619297504425, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.35055940330314334, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8223733305931091, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.351092168353756, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7972275018692017, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.3516249334043687, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.806035578250885, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.35215769845498135, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8033435940742493, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.352690463505594, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8439804315567017, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.3532232285562067, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8247978091239929, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.3537559936068194, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7770576477050781, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.3542887586574321, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8157473206520081, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.35482152370804476, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8069726824760437, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.35535428875865743, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8322383165359497, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.3558870538092701, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.797613799571991, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.3564198188598828, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8209842443466187, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.35695258391049545, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8391934037208557, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3574853489611082, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8320241570472717, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.35801811401172084, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8497931957244873, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.3585508790623335, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7735468149185181, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.3590836441129462, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7980416417121887, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.35961640916355886, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7885520458221436, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.36014917421417153, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.786660373210907, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.36068193926478426, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7961907386779785, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.36121470431539693, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8075615763664246, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.3617474693660096, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7986862659454346, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.36228023441662227, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7794106006622314, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.36281299946723494, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8085299134254456, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.3633457645178476, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8172681927680969, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.3638785295684603, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8157686591148376, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.364411294619073, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8455065488815308, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.3649440596696857, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8134093284606934, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.36547682472029835, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7692434787750244, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.366009589770911, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7843102812767029, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.3665423548215237, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8318651914596558, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.36707511987213637, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8075577616691589, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.3676078849227491, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7955432534217834, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.36814064997336177, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7789199352264404, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.36867341502397444, | |
| "grad_norm": 0.69140625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8257974982261658, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.3692061800745871, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7887584567070007, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.3697389451251998, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8510478734970093, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.37027171017581245, | |
| "grad_norm": 0.6875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8060406446456909, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3708044752264251, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7955806851387024, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.37133724027703785, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7971813082695007, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.3718700053276505, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7997479438781738, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.3724027703782632, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8180658221244812, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.37293553542887586, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.790256142616272, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37346830047948854, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7598444223403931, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.3740010655301012, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.801741361618042, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.3745338305807139, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7819033861160278, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.3750665956313266, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8237056732177734, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.3755993606819393, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8147417306900024, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.37613212573255195, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7891062498092651, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.3766648907831646, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8018314838409424, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.3771976558337773, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7935019135475159, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.37773042088438996, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8369472622871399, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.3782631859350027, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7935312986373901, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.37879595098561536, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7909234166145325, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.37932871603622803, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.817375898361206, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.3798614810868407, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.809626042842865, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.3803942461374534, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7614900469779968, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.38092701118806604, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7885522842407227, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.3814597762386787, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7955629825592041, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.38199254128929144, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7892552614212036, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.3825253063399041, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8095505833625793, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.3830580713905168, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7699292302131653, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.38359083644112946, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8210839629173279, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.38412360149174213, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7994286417961121, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.3846563665423548, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7771030068397522, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.3851891315929675, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7784361839294434, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.3857218966435802, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8078509569168091, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.38625466169419287, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8055387735366821, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.38678742674480554, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8143312931060791, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.3873201917954182, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8212940096855164, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.3878529568460309, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8249719738960266, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.38838572189664355, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8218085765838623, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.3889184869472563, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8255725502967834, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.38945125199786895, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7988225817680359, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.3899840170484816, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7831338047981262, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.3905167820990943, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8661478757858276, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.39104954714970697, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8106863498687744, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.39158231220031964, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7969095706939697, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.39211507725093236, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7830895781517029, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.39264784230154504, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8268316984176636, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.3931806073521577, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8054344654083252, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.3937133724027704, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7860501408576965, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.39424613745338305, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8310704231262207, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3947789025039957, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8038405179977417, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.3953116675546084, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7913386821746826, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.3958444326052211, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7641775012016296, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.3963771976558338, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8178999423980713, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.39690996270644646, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8257794380187988, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.39744272775705913, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7903448343276978, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.3979754928076718, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7882867455482483, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.3985082578582845, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8316589593887329, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.3990410229088972, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8053879141807556, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.3995737879595099, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8426420092582703, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.40010655301012255, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8209067583084106, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.4006393180607352, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8079086542129517, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.4011720831113479, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.808128297328949, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.40170484816196056, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8213729858398438, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.40223761321257323, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8084886074066162, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.40277037826318596, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8165018558502197, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.40330314331379863, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8012227416038513, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.4038359083644113, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8613946437835693, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.40436867341502397, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8433681726455688, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.40490143846563664, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.779987096786499, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4054342035162493, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8048115968704224, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.40596696856686204, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8666731715202332, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.4064997336174747, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.747114360332489, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.4070324986680874, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7566266059875488, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.40756526371870005, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7844398617744446, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4080980287693127, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7681721448898315, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.4086307938199254, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7945184111595154, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.40916355887053807, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7696878910064697, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.4096963239211508, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7923563718795776, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.41022908897176347, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8100162744522095, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.41076185402237614, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7867823243141174, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.4112946190729888, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8097234964370728, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.4118273841236015, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7911956310272217, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.41236014917421415, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8348594903945923, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.4128929142248269, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.77206951379776, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.41342567927543955, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8244647979736328, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.4139584443260522, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7584129571914673, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.4144912093766649, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7752898931503296, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.41502397442727756, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7845876812934875, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.41555673947789024, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7884317636489868, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4160895045285029, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8328264951705933, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.41662226957911563, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7535569071769714, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.4171550346297283, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7909643650054932, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.417687799680341, | |
| "grad_norm": 0.703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8720725774765015, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.41822056473095365, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7731503844261169, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4187533297815663, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7802464365959167, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.419286094832179, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7635596990585327, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.41981885988279166, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7855473160743713, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.4203516249334044, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7947031855583191, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.42088438998401706, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8119519352912903, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.42141715503462973, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7724003791809082, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.4219499200852424, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8322865962982178, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.4224826851358551, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7840743064880371, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.42301545018646775, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7936784625053406, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.42354821523708047, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.809872567653656, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.42408098028769314, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8307843208312988, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.4246137453383058, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8011277914047241, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.4251465103889185, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8050786852836609, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.42567927543953116, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7952936291694641, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.42621204049014383, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7305450439453125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4267448055407565, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7913362383842468, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.4272775705913692, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7669799327850342, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.4278103356419819, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.812600314617157, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.42834310069259457, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8100326657295227, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.42887586574320724, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7722445726394653, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4294086307938199, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7866500616073608, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.4299413958444326, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7936035394668579, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.4304741608950453, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8121829032897949, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.431006925945658, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7777450680732727, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.43153969099627065, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8149593472480774, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4320724560468833, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8239789605140686, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.432605221097496, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8340217471122742, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.43313798614810867, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8046810030937195, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.43367075119872134, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7964731454849243, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.43420351624933406, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7805262207984924, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.43473628129994674, | |
| "grad_norm": 0.671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8220688104629517, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.4352690463505594, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7977862358093262, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.4358018114011721, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7609307765960693, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.43633457645178475, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8068886399269104, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.4368673415023974, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7697072625160217, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.43740010655301015, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8072892427444458, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.4379328716036228, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7413228154182434, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.4384656366542355, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7742043733596802, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.43899840170484816, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7952238917350769, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.43953116675546083, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7638510465621948, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4400639318060735, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7926915884017944, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.4405966968566862, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7640718817710876, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.4411294619072989, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7931405305862427, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.4416622269579116, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7784867882728577, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.44219499200852425, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7957846522331238, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4427277570591369, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7507031559944153, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.4432605221097496, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8128439784049988, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.44379328716036226, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8259686231613159, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.444326052210975, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8185787796974182, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.44485881726158766, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8285982608795166, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.44539158231220033, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7934709787368774, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.445924347362813, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7914223670959473, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.44645711241342567, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7982125878334045, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.44698987746403834, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.798332929611206, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.447522642514651, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8056020140647888, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.44805540756526374, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7972841262817383, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.4485881726158764, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.841311514377594, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.4491209376664891, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7941474914550781, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.44965370271710176, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7953973412513733, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.4501864677677144, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8190003037452698, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4507192328183271, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8118422627449036, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.4512519978689398, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7708790898323059, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.4517847629195525, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7594013810157776, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.45231752797016517, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8185669183731079, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.45285029302077784, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8450738191604614, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4533830580713905, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8263086080551147, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.4539158231220032, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.793552041053772, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.45444858817261585, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.797087550163269, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.4549813532232286, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7767654061317444, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.45551411827384125, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7956861257553101, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4560468833244539, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7868641018867493, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.4565796483750666, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8069348335266113, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.45711241342567926, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8170084357261658, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.45764517847629194, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7934258580207825, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.4581779435269046, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8041369915008545, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.45871070857751733, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7930079102516174, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.45924347362813, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7743818163871765, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.4597762386787427, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7860970497131348, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.46030900372935535, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7822363972663879, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.460841768779968, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7959834933280945, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4613745338305807, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.828339159488678, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.4619072988811934, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.799767017364502, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.4624400639318061, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7834099531173706, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.46297282898241876, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7832657694816589, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.46350559403303143, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7735370993614197, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.4640383590836441, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7704412341117859, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.4645711241342568, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7932986617088318, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.46510388918486945, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8292320966720581, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.4656366542354822, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8028055429458618, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.46616941928609484, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.801913321018219, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4667021843367075, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7580520510673523, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.4672349493873202, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7673873901367188, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.46776771443793286, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7917901873588562, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.46830047948854553, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8057483434677124, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.46883324453915826, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.824272632598877, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.4693660095897709, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7674715518951416, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.4698987746403836, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7918990254402161, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.47043153969099627, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7527544498443604, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.47096430474160894, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7316806316375732, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.4714970697922216, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.764977753162384, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4720298348428343, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.754130482673645, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.472562599893447, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8153510689735413, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.4730953649440597, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7621491551399231, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.47362812999467235, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7979814410209656, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.474160895045285, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7620680928230286, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4746936600958977, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8257867097854614, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.47522642514651037, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8385603427886963, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.4757591901971231, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7887386679649353, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.47629195524773577, | |
| "grad_norm": 0.66796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.815808892250061, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.47682472029834844, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7848386764526367, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.4773574853489611, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7899839878082275, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.4778902503995738, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8273345828056335, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.47842301545018645, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7726816534996033, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.4789557805007991, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8128560781478882, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.47948854555141185, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7594364881515503, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4800213106020245, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7679142951965332, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.4805540756526372, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7729013562202454, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.48108684070324986, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7576342821121216, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.48161960575386253, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7645061612129211, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.4821523708044752, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7803142666816711, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.48268513585508793, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7762041687965393, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.4832179009057006, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7839690446853638, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.4837506659563133, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.787205159664154, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.48428343100692595, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8149699568748474, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.4848161960575386, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7666029334068298, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4853489611081513, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8224028944969177, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.48588172615876396, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7877684831619263, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.4864144912093767, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7987911701202393, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.48694725625998936, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.759647011756897, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.48748002131060203, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7700395584106445, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.4880127863612147, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7666257619857788, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.4885455514118274, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7913581132888794, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.48907831646244004, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7783340215682983, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.48961108151305277, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8159565925598145, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.49014384656366544, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7737563252449036, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.4906766116142781, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8198726177215576, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.4912093766648908, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7712583541870117, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.49174214171550346, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7808051705360413, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.4922749067661161, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7701867818832397, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.4928076718167288, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7609500288963318, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.4933404368673415, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8407063484191895, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.4938732019179542, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7863637208938599, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.49440596696856687, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.797248363494873, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.49493873201917954, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7627251148223877, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.4954714970697922, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8115253448486328, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.4960042621204049, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7831135988235474, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.4965370271710176, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8069642782211304, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.4970697922216303, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8169358372688293, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.49760255727224295, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7873024344444275, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.4981353223228556, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7919709086418152, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.4986680873734683, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7744743824005127, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.49920085242408097, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8178368806838989, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.49973361747469364, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7937871217727661, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.5002663825253063, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7904363870620728, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.500799147575919, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7489137053489685, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5013319126265317, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8006702065467834, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.5018646776771444, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7943602800369263, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.5023974427277571, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8127607703208923, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.5029302077783697, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7711650729179382, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.5034629728289824, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8170063495635986, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5039957378795951, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7900512218475342, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.5045285029302078, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.789554238319397, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.5050612679808204, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7663175463676453, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.5055940330314331, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7394262552261353, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.5061267980820459, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.776930570602417, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5066595631326585, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7874533534049988, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.5071923281832712, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.780265212059021, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.5077250932338838, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7821959853172302, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.5082578582844965, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7723431587219238, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.5087906233351093, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8031184673309326, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5093233883857219, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7822156548500061, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.5098561534363346, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7892681956291199, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.5103889184869472, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7619911432266235, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.51092168353756, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7405803203582764, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.5114544485881726, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7802076935768127, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5119872136387853, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7818913459777832, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.512519978689398, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7780552506446838, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.5130527437400106, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7681518793106079, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.5135855087906234, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8236287236213684, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.514118273841236, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7650191783905029, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5146510388918487, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7506141662597656, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.5151838039424613, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.761217474937439, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.515716568993074, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8237653970718384, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.5162493340436868, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7403720021247864, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.5167820990942994, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7571833729743958, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5173148641449121, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8090606927871704, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.5178476291955247, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7799073457717896, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.5183803942461375, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8075971007347107, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.5189131592967501, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7910662889480591, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.5194459243473628, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.786179780960083, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5199786893979755, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7777940034866333, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.5205114544485882, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7531383633613586, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.5210442194992009, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7762864828109741, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.5215769845498135, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.759125828742981, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.5221097496004262, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7885076403617859, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5226425146510388, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7936964631080627, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.5231752797016516, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7714090943336487, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.5237080447522643, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7484626770019531, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.5242408098028769, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7742045521736145, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.5247735748534896, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7953801155090332, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5253063399041022, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7853913903236389, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.525839104954715, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7650255560874939, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.5263718700053277, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8097711801528931, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.5269046350559403, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7798804640769958, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.527437400106553, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.77159184217453, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5279701651571657, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7470179200172424, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.5285029302077784, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7437633275985718, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.529035695258391, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7707769274711609, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.5295684603090037, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7968963384628296, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.5301012253596165, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7605825662612915, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5306339904102291, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7688091993331909, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.5311667554608418, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8013522028923035, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.5316995205114544, | |
| "grad_norm": 0.6953125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8307242393493652, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.5322322855620671, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7533189058303833, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.5327650506126798, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7661764621734619, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5332978156632925, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.761687159538269, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.5338305807139052, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7734599113464355, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.5343633457645178, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.764024555683136, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.5348961108151306, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7784127593040466, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.5354288758657432, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7688071131706238, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5359616409163559, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7830870151519775, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.5364944059669685, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7614960074424744, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.5370271710175812, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7830729484558105, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.537559936068194, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7983683943748474, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.5380927011188066, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7553051114082336, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5386254661694193, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7692767977714539, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.5391582312200319, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7751806974411011, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.5396909962706447, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7655361294746399, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.5402237613212574, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8146576285362244, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.54075652637187, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7740441560745239, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5412892914224827, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7796229124069214, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.5418220564730953, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7617045044898987, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.5423548215237081, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7547261714935303, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.5428875865743207, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7878617644309998, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.5434203516249334, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7670659422874451, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5439531166755461, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8015154600143433, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.5444858817261587, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7678729295730591, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.5450186467767715, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7684184312820435, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.5455514118273841, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7359906435012817, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.5460841768779968, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7884916067123413, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5466169419286094, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8283618092536926, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.5471497069792222, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7943402528762817, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.5476824720298349, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7284975647926331, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.5482152370804475, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7707847952842712, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.5487480021310602, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8470367193222046, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5492807671816728, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8090894222259521, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.5498135322322856, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7707048058509827, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.5503462972828982, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7647881507873535, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.5508790623335109, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.738248348236084, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.5514118273841236, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7581315040588379, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5519445924347363, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8011127710342407, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.552477357485349, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7452203631401062, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.5530101225359616, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7685807943344116, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.5535428875865743, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7906605005264282, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.554075652637187, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8196179270744324, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5546084176877997, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.782852292060852, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.5551411827384124, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7460805177688599, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.555673947789025, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7984696626663208, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.5562067128396377, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.777360200881958, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.5567394778902504, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7777242660522461, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5572722429408631, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7538855671882629, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.5578050079914758, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7709717750549316, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.5583377730420884, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7503103613853455, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.5588705380927012, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.775667130947113, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.5594033031433138, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7580850720405579, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5599360681939265, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7868179678916931, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.5604688332445391, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7456986308097839, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.5610015982951518, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7889756560325623, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.5615343633457646, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7284671664237976, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.5620671283963772, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7758548259735107, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5625998934469899, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7966741919517517, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.5631326584976025, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8286765217781067, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.5636654235482152, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8168929815292358, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.5641981885988279, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7689355611801147, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.5647309536494406, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7302696108818054, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5652637187000533, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.759016752243042, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.5657964837506659, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8112335801124573, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.5663292488012787, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7880202531814575, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.5668620138518913, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7357752323150635, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.567394778902504, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8004807233810425, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5679275439531166, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7320253252983093, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.5684603090037293, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7926676273345947, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.5689930740543421, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7315946817398071, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.5695258391049547, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7578614950180054, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.5700586041555674, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.759326696395874, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.57059136920618, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7619845867156982, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.5711241342567928, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7644577622413635, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.5716568993074055, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7847421765327454, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.5721896643580181, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7709816098213196, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.5727224294086308, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7508271932601929, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5732551944592434, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7665797472000122, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.5737879595098562, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7920853495597839, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.5743207245604688, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7717834711074829, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.5748534896110815, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7623285055160522, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.5753862546616942, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7920824885368347, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5759190197123069, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7590711116790771, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.5764517847629196, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7452772855758667, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.5769845498135322, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7672356963157654, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.5775173148641449, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.797944188117981, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.5780500799147575, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7617903351783752, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5785828449653703, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.763174295425415, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.579115610015983, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7957082390785217, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.5796483750665956, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7664093375205994, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.5801811401172083, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7532491087913513, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.580713905167821, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7795153856277466, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5812466702184337, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7416831254959106, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.5817794352690463, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.73810213804245, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.582312200319659, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7496551871299744, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.5828449653702718, | |
| "grad_norm": 0.65625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8033764958381653, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.5833777304208844, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7733720541000366, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5839104954714971, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8195459842681885, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.5844432605221097, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7616481781005859, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.5849760255727224, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7596360445022583, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.5855087906233352, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7688573598861694, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.5860415556739478, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8028122782707214, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5865743207245605, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7449119687080383, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.5871070857751731, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7859646677970886, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.5876398508257858, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7646852731704712, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.5881726158763985, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7665087580680847, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.5887053809270112, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8172298669815063, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5892381459776239, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7623305916786194, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.5897709110282365, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7520396709442139, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.5903036760788493, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8028483390808105, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.5908364411294619, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7868152260780334, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.5913692061800746, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7606732249259949, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5919019712306872, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7934197783470154, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.5924347362813, | |
| "grad_norm": 0.6484375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7841813564300537, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.5929675013319127, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7516508102416992, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.5935002663825253, | |
| "grad_norm": 0.66015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.810605525970459, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.594033031433138, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7940796613693237, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5945657964837506, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7363699674606323, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.5950985615343634, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7718603610992432, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.595631326584976, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7635315656661987, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.5961640916355887, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7487787008285522, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.5966968566862014, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7251549959182739, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.597229621736814, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7601324319839478, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.5977623867874268, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7800053954124451, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.5982951518380394, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8023176789283752, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.5988279168886521, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7815042734146118, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.5993606819392648, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7613332271575928, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5998934469898775, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8300601243972778, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.6004262120404902, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7365957498550415, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.6009589770911028, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7763136625289917, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.6014917421417155, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7749415636062622, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.6020245071923281, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7965446710586548, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6025572722429409, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7543548345565796, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.6030900372935536, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7496042251586914, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.6036228023441662, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7888254523277283, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.6041555673947789, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7962220311164856, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.6046883324453916, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7448618412017822, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6052210974960043, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7599774599075317, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.6057538625466169, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7902323007583618, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.6062866275972296, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7423989176750183, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.6068193926478423, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7656657099723816, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.607352157698455, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7378427982330322, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6078849227490677, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7627711296081543, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.6084176877996803, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7654937505722046, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.608950452850293, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7615671157836914, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.6094832179009056, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7753266096115112, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.6100159829515184, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7688212990760803, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6105487480021311, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7590000033378601, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.6110815130527437, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7802637815475464, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.6116142781033564, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7646292448043823, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.6121470431539691, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7465856671333313, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.6126798082045818, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.755443811416626, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6132125732551944, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7676706910133362, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.6137453383058071, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7980360388755798, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.6142781033564199, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7766455411911011, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.6148108684070325, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7732510566711426, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.6153436334576452, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7538511157035828, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6158763985082578, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7400153875350952, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.6164091635588705, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8004501461982727, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.6169419286094833, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7622724771499634, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.6174746936600959, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.759125292301178, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.6180074587107086, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7583044767379761, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6185402237613212, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8017464876174927, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.619072988811934, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.773422122001648, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.6196057538625466, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.782919704914093, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.6201385189131593, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7374987602233887, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.620671283963772, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7503113746643066, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6212040490143846, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7530639171600342, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.6217368140649974, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8065166473388672, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.62226957911561, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7694088816642761, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.6228023441662227, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8152406215667725, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.6233351092168353, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8167840242385864, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.623867874267448, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.748583972454071, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.6244006393180608, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7353124618530273, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.6249334043686734, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7833389043807983, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.6254661694192861, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7380432486534119, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.6259989344698987, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8244600892066956, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6265316995205115, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7240447402000427, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.6270644645711241, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7850971221923828, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.6275972296217368, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7789439558982849, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.6281299946723495, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8035836815834045, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.6286627597229622, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7577446103096008, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6291955247735749, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8039902448654175, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.6297282898241875, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7867581844329834, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.6302610548748002, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7361651062965393, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.630793819925413, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7749671339988708, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.6313265849760256, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7684974670410156, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6318593500266383, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7540404796600342, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.6323921150772509, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7764151096343994, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.6329248801278636, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7495799660682678, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.6334576451784762, | |
| "grad_norm": 0.72265625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7766825556755066, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.633990410229089, | |
| "grad_norm": 0.640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7477214336395264, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6345231752797017, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7461574077606201, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.6350559403303143, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7609319090843201, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.635588705380927, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7841947674751282, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.6361214704315397, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7920709848403931, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.6366542354821524, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7804465889930725, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.637187000532765, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7685946226119995, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.6377197655833777, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7582971453666687, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.6382525306339905, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7376336455345154, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.6387852956846031, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7812549471855164, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.6393180607352158, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7486171722412109, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6398508257858284, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7864870429039001, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.6403835908364411, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7745766043663025, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.6409163558870538, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7579283118247986, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.6414491209376665, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7351487278938293, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.6419818859882792, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8096469044685364, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6425146510388918, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7487743496894836, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.6430474160895046, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7955554127693176, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.6435801811401172, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7598539590835571, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.6441129461907299, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.794310986995697, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.6446457112413426, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7372432947158813, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6451784762919552, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7485856413841248, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.645711241342568, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7526128888130188, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.6462440063931806, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7520730495452881, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.6467767714437933, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7413308024406433, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.6473095364944059, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7861500382423401, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6478423015450187, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8049200773239136, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.6483750665956314, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7288910150527954, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.648907831646244, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7674762606620789, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.6494405966968567, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.79649418592453, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.6499733617474693, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7906514406204224, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6505061267980821, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7719740867614746, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.6510388918486947, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8404486179351807, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.6515716568993074, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7465118169784546, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.6521044219499201, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7870571613311768, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.6526371870005327, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7906562089920044, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6531699520511455, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7247576713562012, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.6537027171017581, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7732218503952026, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.6542354821523708, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7541797161102295, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.6547682472029834, | |
| "grad_norm": 0.625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7637116312980652, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.6553010122535962, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7784105539321899, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6558337773042089, | |
| "grad_norm": 0.6328125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7532057166099548, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.6563665423548215, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7266465425491333, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.6568993074054342, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7385075688362122, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.6574320724560468, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7806388139724731, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.6579648375066596, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7638847827911377, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6584976025572722, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7466439604759216, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.6590303676078849, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7079021334648132, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.6595631326584976, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 8e-05, | |
| "loss": 0.7956612706184387, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.6600958977091103, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8124134540557861, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.660628662759723, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.999992767148906e-05, | |
| "loss": 0.7136578559875488, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.6611614278103356, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.999934904497092e-05, | |
| "loss": 0.7554067373275757, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.6616941928609483, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.999819180030488e-05, | |
| "loss": 0.7867137789726257, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.662226957911561, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.999645595423128e-05, | |
| "loss": 0.8369832634925842, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.6627597229621737, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.999414153186031e-05, | |
| "loss": 0.745606005191803, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.6632924880127864, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.999124856667172e-05, | |
| "loss": 0.7783620357513428, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.663825253063399, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.998777710051422e-05, | |
| "loss": 0.7496659159660339, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.6643580181140117, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.998372718360495e-05, | |
| "loss": 0.735636293888092, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.6648907831646244, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.997909887452878e-05, | |
| "loss": 0.770561158657074, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.6654235482152371, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.997389224023737e-05, | |
| "loss": 0.7702518105506897, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.6659563132658498, | |
| "grad_norm": 0.62890625, | |
| "learning_rate": 7.996810735604828e-05, | |
| "loss": 0.8403434753417969, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6664890783164624, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.996174430564384e-05, | |
| "loss": 0.775733232498169, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 0.6670218433670752, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.995480318106997e-05, | |
| "loss": 0.7447243928909302, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.6675546084176878, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.99472840827348e-05, | |
| "loss": 0.7677955627441406, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.6680873734683005, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.99391871194073e-05, | |
| "loss": 0.7605108022689819, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 0.6686201385189131, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.993051240821559e-05, | |
| "loss": 0.7609809041023254, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.6691529035695258, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.992126007464537e-05, | |
| "loss": 0.7586944699287415, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 0.6696856686201386, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.991143025253801e-05, | |
| "loss": 0.7763959765434265, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 0.6702184336707512, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.990102308408867e-05, | |
| "loss": 0.7569335103034973, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 0.6707511987213639, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.98900387198442e-05, | |
| "loss": 0.7567064166069031, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 0.6712839637719765, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.9878477318701e-05, | |
| "loss": 0.8084724545478821, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.6718167288225892, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.98663390479027e-05, | |
| "loss": 0.748503565788269, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 0.6723494938732019, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.985362408303778e-05, | |
| "loss": 0.7644037008285522, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 0.6728822589238146, | |
| "grad_norm": 0.625, | |
| "learning_rate": 7.984033260803695e-05, | |
| "loss": 0.7853304743766785, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 0.6734150239744273, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.982646481517054e-05, | |
| "loss": 0.7597571015357971, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.6739477890250399, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.981202090504576e-05, | |
| "loss": 0.7463767528533936, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6744805540756527, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.97970010866037e-05, | |
| "loss": 0.7673113346099854, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 0.6750133191262653, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.97814055771164e-05, | |
| "loss": 0.7825121879577637, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.675546084176878, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.976523460218363e-05, | |
| "loss": 0.7622671723365784, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 0.6760788492274907, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.974848839572971e-05, | |
| "loss": 0.722598671913147, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 0.6766116142781033, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.973116720000005e-05, | |
| "loss": 0.7474793791770935, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6771443793287161, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.971327126555767e-05, | |
| "loss": 0.7257604598999023, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.6776771443793287, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.96948008512796e-05, | |
| "loss": 0.7461668252944946, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 0.6782099094299414, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.967575622435313e-05, | |
| "loss": 0.75556880235672, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.678742674480554, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.965613766027188e-05, | |
| "loss": 0.796931803226471, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.6792754395311668, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.963594544283193e-05, | |
| "loss": 0.7720798850059509, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6798082045817795, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 7.96151798641276e-05, | |
| "loss": 0.7493809461593628, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.6803409696323921, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.959384122454729e-05, | |
| "loss": 0.7640602588653564, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 0.6808737346830048, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 7.957192983276915e-05, | |
| "loss": 0.716975212097168, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 0.6814064997336174, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.954944600575654e-05, | |
| "loss": 0.7710456848144531, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 0.6819392647842302, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.952639006875353e-05, | |
| "loss": 0.7932459712028503, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6824720298348428, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.950276235528011e-05, | |
| "loss": 0.7565551400184631, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.6830047948854555, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.947856320712746e-05, | |
| "loss": 0.7231839895248413, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 0.6835375599360682, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.945379297435294e-05, | |
| "loss": 0.7507690787315369, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 0.6840703249866809, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.942845201527501e-05, | |
| "loss": 0.7406589388847351, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 0.6846030900372936, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.940254069646813e-05, | |
| "loss": 0.7486617565155029, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6851358550879062, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.937605939275736e-05, | |
| "loss": 0.7797908186912537, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 0.6856686201385189, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.934900848721304e-05, | |
| "loss": 0.8399383425712585, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 0.6862013851891315, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.932138837114512e-05, | |
| "loss": 0.7333605289459229, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.6867341502397443, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.929319944409765e-05, | |
| "loss": 0.776554524898529, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 0.687266915290357, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.926444211384286e-05, | |
| "loss": 0.7380905151367188, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6877996803409696, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 7.923511679637534e-05, | |
| "loss": 0.7293626070022583, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 0.6883324453915823, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.920522391590604e-05, | |
| "loss": 0.7142640352249146, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 0.688865210442195, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.917476390485606e-05, | |
| "loss": 0.8078116178512573, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 0.6893979754928077, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.914373720385048e-05, | |
| "loss": 0.7499976754188538, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 0.6899307405434204, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.911214426171187e-05, | |
| "loss": 0.7751161456108093, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.690463505594033, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.907998553545392e-05, | |
| "loss": 0.7775903344154358, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.6909962706446457, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.904726149027479e-05, | |
| "loss": 0.7565440535545349, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 0.6915290356952584, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.901397259955032e-05, | |
| "loss": 0.7655493021011353, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 0.6920618007458711, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.898011934482725e-05, | |
| "loss": 0.7513885498046875, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 0.6925945657964837, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.894570221581627e-05, | |
| "loss": 0.7957122325897217, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6931273308470964, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.891072171038483e-05, | |
| "loss": 0.752173125743866, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 0.6936600958977092, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.887517833455007e-05, | |
| "loss": 0.7955760359764099, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.6941928609483218, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.883907260247141e-05, | |
| "loss": 0.7562192678451538, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 0.6947256259989345, | |
| "grad_norm": 0.62109375, | |
| "learning_rate": 7.880240503644314e-05, | |
| "loss": 0.7903412580490112, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 0.6952583910495471, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.876517616688689e-05, | |
| "loss": 0.722710371017456, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6957911561001598, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.872738653234387e-05, | |
| "loss": 0.7610102891921997, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 0.6963239211507725, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.868903667946723e-05, | |
| "loss": 0.7523242831230164, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 0.6968566862013852, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.865012716301399e-05, | |
| "loss": 0.7445037364959717, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 0.6973894512519979, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.861065854583715e-05, | |
| "loss": 0.7576636075973511, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.6979222163026105, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.857063139887742e-05, | |
| "loss": 0.7409178614616394, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6984549813532233, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.85300463011551e-05, | |
| "loss": 0.7388145923614502, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 0.6989877464038359, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.848890383976155e-05, | |
| "loss": 0.7784285545349121, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.6995205114544486, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 7.844720460985086e-05, | |
| "loss": 0.7128530144691467, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 0.7000532765050612, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.840494921463112e-05, | |
| "loss": 0.7656941413879395, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 0.7005860415556739, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.836213826535574e-05, | |
| "loss": 0.8078965544700623, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7011188066062867, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.831877238131459e-05, | |
| "loss": 0.7390220165252686, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.7016515716568993, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.82748521898251e-05, | |
| "loss": 0.7566484212875366, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 0.702184336707512, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.823037832622307e-05, | |
| "loss": 0.7349902391433716, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 0.7027171017581246, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.818535143385359e-05, | |
| "loss": 0.7549850940704346, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 0.7032498668087374, | |
| "grad_norm": 0.625, | |
| "learning_rate": 7.81397721640617e-05, | |
| "loss": 0.7904667258262634, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.70378263185935, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.809364117618295e-05, | |
| "loss": 0.7925822734832764, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 0.7043153969099627, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.804695913753385e-05, | |
| "loss": 0.7261655926704407, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 0.7048481619605754, | |
| "grad_norm": 0.625, | |
| "learning_rate": 7.799972672340226e-05, | |
| "loss": 0.8227925896644592, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.705380927011188, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 7.795194461703763e-05, | |
| "loss": 0.6952813863754272, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 0.7059136920618008, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.790361350964101e-05, | |
| "loss": 0.7714648246765137, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7064464571124134, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.78547341003552e-05, | |
| "loss": 0.756645143032074, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 0.7069792221630261, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.780530709625455e-05, | |
| "loss": 0.74300217628479, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 0.7075119872136388, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.775533321233471e-05, | |
| "loss": 0.7490818500518799, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.7080447522642515, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.770481317150236e-05, | |
| "loss": 0.7314748167991638, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 0.7085775173148642, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.765374770456471e-05, | |
| "loss": 0.7653273344039917, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7091102823654768, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.760213755021892e-05, | |
| "loss": 0.784086287021637, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 0.7096430474160895, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.754998345504141e-05, | |
| "loss": 0.7511420249938965, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 0.7101758124667021, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 7.749728617347717e-05, | |
| "loss": 0.77214115858078, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.7107085775173149, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 7.744404646782866e-05, | |
| "loss": 0.7471661567687988, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.7112413425679276, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.739026510824489e-05, | |
| "loss": 0.8218797445297241, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7117741076185402, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.73359428727103e-05, | |
| "loss": 0.7144700288772583, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 0.7123068726691529, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.72810805470335e-05, | |
| "loss": 0.7447347640991211, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.7128396377197656, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.722567892483579e-05, | |
| "loss": 0.7260941863059998, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 0.7133724027703783, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.716973880753982e-05, | |
| "loss": 0.7791821360588074, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 0.7139051678209909, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.711326100435796e-05, | |
| "loss": 0.7719851732254028, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7144379328716036, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.70562463322805e-05, | |
| "loss": 0.7492231130599976, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 0.7149706979222163, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.699869561606403e-05, | |
| "loss": 0.7836228609085083, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 0.715503462972829, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.694060968821927e-05, | |
| "loss": 0.7258511781692505, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 0.7160362280234417, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.688198938899922e-05, | |
| "loss": 0.7510860562324524, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.7165689930740543, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.682283556638689e-05, | |
| "loss": 0.7514731884002686, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.717101758124667, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.676314907608309e-05, | |
| "loss": 0.7466673254966736, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 0.7176345231752796, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.670293078149403e-05, | |
| "loss": 0.7264184951782227, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 0.7181672882258924, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.664218155371884e-05, | |
| "loss": 0.7410547733306885, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 0.7187000532765051, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.658090227153697e-05, | |
| "loss": 0.7337695360183716, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 0.7192328183271177, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 7.651909382139545e-05, | |
| "loss": 0.7203134894371033, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7197655833777304, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.645675709739614e-05, | |
| "loss": 0.752673327922821, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 0.7202983484283431, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.639389300128266e-05, | |
| "loss": 0.7161597013473511, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 0.7208311134789558, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 7.633050244242752e-05, | |
| "loss": 0.7535909414291382, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 0.7213638785295685, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.62665863378188e-05, | |
| "loss": 0.8101969957351685, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 0.7218966435801811, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.620214561204704e-05, | |
| "loss": 0.7484019994735718, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7224294086307939, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.613718119729172e-05, | |
| "loss": 0.7134671211242676, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 0.7229621736814065, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.607169403330786e-05, | |
| "loss": 0.7816528677940369, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 0.7234949387320192, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.600568506741243e-05, | |
| "loss": 0.8324891924858093, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 0.7240277037826318, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 7.593915525447062e-05, | |
| "loss": 0.7704975605010986, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 0.7245604688332445, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 7.5872105556882e-05, | |
| "loss": 0.8043801784515381, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7250932338838573, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.580453694456664e-05, | |
| "loss": 0.7552354335784912, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 0.7256259989344699, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.57364503949511e-05, | |
| "loss": 0.7909277677536011, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 0.7261587639850826, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.566784689295425e-05, | |
| "loss": 0.7480258345603943, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 0.7266915290356952, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.5598727430973e-05, | |
| "loss": 0.7182474732398987, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 0.727224294086308, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.552909300886802e-05, | |
| "loss": 0.7433866858482361, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7277570591369206, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.545894463394918e-05, | |
| "loss": 0.825681209564209, | |
| "step": 2732 | |
| }, | |
| { | |
| "epoch": 0.7282898241875333, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.538828332096108e-05, | |
| "loss": 0.7488453388214111, | |
| "step": 2734 | |
| }, | |
| { | |
| "epoch": 0.728822589238146, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.531711009206831e-05, | |
| "loss": 0.7557936310768127, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 0.7293553542887586, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 7.524542597684066e-05, | |
| "loss": 0.7335447072982788, | |
| "step": 2738 | |
| }, | |
| { | |
| "epoch": 0.7298881193393714, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.517323201223829e-05, | |
| "loss": 0.7855934500694275, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.730420884389984, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.51005292425966e-05, | |
| "loss": 0.7879592180252075, | |
| "step": 2742 | |
| }, | |
| { | |
| "epoch": 0.7309536494405967, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 7.502731871961126e-05, | |
| "loss": 0.7301309704780579, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 0.7314864144912093, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 7.495360150232298e-05, | |
| "loss": 0.7647414803504944, | |
| "step": 2746 | |
| }, | |
| { | |
| "epoch": 0.732019179541822, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.487937865710206e-05, | |
| "loss": 0.7254839539527893, | |
| "step": 2748 | |
| }, | |
| { | |
| "epoch": 0.7325519445924348, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 7.480465125763312e-05, | |
| "loss": 0.7045494318008423, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7330847096430474, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.47294203848995e-05, | |
| "loss": 0.7956986427307129, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 0.7336174746936601, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.465368712716759e-05, | |
| "loss": 0.7624717950820923, | |
| "step": 2754 | |
| }, | |
| { | |
| "epoch": 0.7341502397442727, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.457745257997118e-05, | |
| "loss": 0.7222627401351929, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 0.7346830047948855, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.450071784609551e-05, | |
| "loss": 0.7368757724761963, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 0.7352157698454982, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.442348403556139e-05, | |
| "loss": 0.7786681652069092, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7357485348961108, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.43457522656091e-05, | |
| "loss": 0.7508862018585205, | |
| "step": 2762 | |
| }, | |
| { | |
| "epoch": 0.7362812999467235, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.42675236606823e-05, | |
| "loss": 0.7344875931739807, | |
| "step": 2764 | |
| }, | |
| { | |
| "epoch": 0.7368140649973361, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.418879935241162e-05, | |
| "loss": 0.7535064816474915, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 0.7373468300479489, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.410958047959845e-05, | |
| "loss": 0.7149871587753296, | |
| "step": 2768 | |
| }, | |
| { | |
| "epoch": 0.7378795950985615, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.402986818819838e-05, | |
| "loss": 0.7133111953735352, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7384123601491742, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.394966363130462e-05, | |
| "loss": 0.7914494276046753, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 0.738945125199787, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.386896796913137e-05, | |
| "loss": 0.7482845783233643, | |
| "step": 2774 | |
| }, | |
| { | |
| "epoch": 0.7394778902503996, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.3787782368997e-05, | |
| "loss": 0.732533872127533, | |
| "step": 2776 | |
| }, | |
| { | |
| "epoch": 0.7400106553010123, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.370610800530713e-05, | |
| "loss": 0.7276294827461243, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 0.7405434203516249, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.362394605953773e-05, | |
| "loss": 0.7570236325263977, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7410761854022376, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.354129772021796e-05, | |
| "loss": 0.7760714292526245, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 0.7416089504528502, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.345816418291303e-05, | |
| "loss": 0.7801597118377686, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 0.742141715503463, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.337454665020682e-05, | |
| "loss": 0.7562087178230286, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 0.7426744805540757, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.329044633168455e-05, | |
| "loss": 0.7230586409568787, | |
| "step": 2788 | |
| }, | |
| { | |
| "epoch": 0.7432072456046883, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.320586444391531e-05, | |
| "loss": 0.7483820915222168, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.743740010655301, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.312080221043438e-05, | |
| "loss": 0.7314382791519165, | |
| "step": 2792 | |
| }, | |
| { | |
| "epoch": 0.7442727757059137, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 7.303526086172558e-05, | |
| "loss": 0.7445840239524841, | |
| "step": 2794 | |
| }, | |
| { | |
| "epoch": 0.7448055407565264, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.294924163520349e-05, | |
| "loss": 0.7696006298065186, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 0.745338305807139, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.286274577519546e-05, | |
| "loss": 0.738504946231842, | |
| "step": 2798 | |
| }, | |
| { | |
| "epoch": 0.7458710708577517, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.277577453292373e-05, | |
| "loss": 0.7645745277404785, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7464038359083645, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 7.268832916648726e-05, | |
| "loss": 0.7574669718742371, | |
| "step": 2802 | |
| }, | |
| { | |
| "epoch": 0.7469366009589771, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.26004109408435e-05, | |
| "loss": 0.7300820350646973, | |
| "step": 2804 | |
| }, | |
| { | |
| "epoch": 0.7474693660095898, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 7.251202112779023e-05, | |
| "loss": 0.7510509490966797, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 0.7480021310602024, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.242316100594696e-05, | |
| "loss": 0.7628376483917236, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 0.7485348961108151, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.23338318607366e-05, | |
| "loss": 0.7346773147583008, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7490676611614278, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.22440349843668e-05, | |
| "loss": 0.768202543258667, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 0.7496004262120405, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 7.215377167581123e-05, | |
| "loss": 0.7435672879219055, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 0.7501331912626532, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 7.206304324079089e-05, | |
| "loss": 0.7180995941162109, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 0.7506659563132658, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.197185099175508e-05, | |
| "loss": 0.7368422746658325, | |
| "step": 2818 | |
| }, | |
| { | |
| "epoch": 0.7511987213638786, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.188019624786255e-05, | |
| "loss": 0.7638548612594604, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7517314864144912, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.17880803349623e-05, | |
| "loss": 0.803455114364624, | |
| "step": 2822 | |
| }, | |
| { | |
| "epoch": 0.7522642514651039, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.169550458557453e-05, | |
| "loss": 0.7775712609291077, | |
| "step": 2824 | |
| }, | |
| { | |
| "epoch": 0.7527970165157166, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.160247033887121e-05, | |
| "loss": 0.7178272604942322, | |
| "step": 2826 | |
| }, | |
| { | |
| "epoch": 0.7533297815663292, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 7.150897894065684e-05, | |
| "loss": 0.7626135349273682, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 0.753862546616942, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.141503174334894e-05, | |
| "loss": 0.7484996318817139, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.7543953116675546, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.132063010595843e-05, | |
| "loss": 0.7348583340644836, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 0.7549280767181673, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.122577539407009e-05, | |
| "loss": 0.7415651082992554, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 0.7554608417687799, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.113046897982265e-05, | |
| "loss": 0.7116685509681702, | |
| "step": 2836 | |
| }, | |
| { | |
| "epoch": 0.7559936068193926, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 7.103471224188908e-05, | |
| "loss": 0.7689422369003296, | |
| "step": 2838 | |
| }, | |
| { | |
| "epoch": 0.7565263718700054, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 7.093850656545659e-05, | |
| "loss": 0.7544586062431335, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.757059136920618, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 7.084185334220658e-05, | |
| "loss": 0.7671671509742737, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 0.7575919019712307, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 7.074475397029454e-05, | |
| "loss": 0.7252099514007568, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 0.7581246670218433, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 7.064720985432979e-05, | |
| "loss": 0.7508083581924438, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 0.7586574320724561, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 7.054922240535516e-05, | |
| "loss": 0.7430750727653503, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 0.7591901971230687, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 7.045079304082667e-05, | |
| "loss": 0.736114501953125, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.7597229621736814, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 7.035192318459288e-05, | |
| "loss": 0.7149706482887268, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 0.7602557272242941, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.02526142668744e-05, | |
| "loss": 0.7909661531448364, | |
| "step": 2854 | |
| }, | |
| { | |
| "epoch": 0.7607884922749067, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 7.015286772424316e-05, | |
| "loss": 0.7296648025512695, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 0.7613212573255195, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 7.005268499960162e-05, | |
| "loss": 0.7293763160705566, | |
| "step": 2858 | |
| }, | |
| { | |
| "epoch": 0.7618540223761321, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.995206754216194e-05, | |
| "loss": 0.7414215207099915, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.7623867874267448, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.985101680742499e-05, | |
| "loss": 0.7198176383972168, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 0.7629195524773574, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.974953425715926e-05, | |
| "loss": 0.7562193870544434, | |
| "step": 2864 | |
| }, | |
| { | |
| "epoch": 0.7634523175279702, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 6.96476213593798e-05, | |
| "loss": 0.7399710416793823, | |
| "step": 2866 | |
| }, | |
| { | |
| "epoch": 0.7639850825785829, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.95452795883269e-05, | |
| "loss": 0.739233672618866, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 0.7645178476291955, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.944251042444477e-05, | |
| "loss": 0.7336940765380859, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.7650506126798082, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.933931535436021e-05, | |
| "loss": 0.7608907222747803, | |
| "step": 2872 | |
| }, | |
| { | |
| "epoch": 0.7655833777304208, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.923569587086103e-05, | |
| "loss": 0.747681736946106, | |
| "step": 2874 | |
| }, | |
| { | |
| "epoch": 0.7661161427810336, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.913165347287444e-05, | |
| "loss": 0.6818616390228271, | |
| "step": 2876 | |
| }, | |
| { | |
| "epoch": 0.7666489078316463, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 6.902718966544545e-05, | |
| "loss": 0.7375771403312683, | |
| "step": 2878 | |
| }, | |
| { | |
| "epoch": 0.7671816728822589, | |
| "grad_norm": 0.83203125, | |
| "learning_rate": 6.8922305959715e-05, | |
| "loss": 0.8159199357032776, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.7677144379328716, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 6.881700387289819e-05, | |
| "loss": 0.7084010243415833, | |
| "step": 2882 | |
| }, | |
| { | |
| "epoch": 0.7682472029834843, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 6.871128492826226e-05, | |
| "loss": 0.7337329387664795, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 0.768779968034097, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.860515065510459e-05, | |
| "loss": 0.7246623635292053, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 0.7693127330847096, | |
| "grad_norm": 0.625, | |
| "learning_rate": 6.849860258873059e-05, | |
| "loss": 0.7965453863143921, | |
| "step": 2888 | |
| }, | |
| { | |
| "epoch": 0.7698454981353223, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 6.839164227043146e-05, | |
| "loss": 0.7623465657234192, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.770378263185935, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.828427124746191e-05, | |
| "loss": 0.7439296245574951, | |
| "step": 2892 | |
| }, | |
| { | |
| "epoch": 0.7709110282365477, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 6.817649107301777e-05, | |
| "loss": 0.6967207193374634, | |
| "step": 2894 | |
| }, | |
| { | |
| "epoch": 0.7714437932871604, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.806830330621355e-05, | |
| "loss": 0.7421116828918457, | |
| "step": 2896 | |
| }, | |
| { | |
| "epoch": 0.771976558337773, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 6.795970951205984e-05, | |
| "loss": 0.7583564519882202, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 0.7725093233883857, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.785071126144072e-05, | |
| "loss": 0.7536525130271912, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7730420884389984, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.774131013109097e-05, | |
| "loss": 0.7408878803253174, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 0.7735748534896111, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.763150770357337e-05, | |
| "loss": 0.7107818126678467, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 0.7741076185402238, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.752130556725567e-05, | |
| "loss": 0.7988401651382446, | |
| "step": 2906 | |
| }, | |
| { | |
| "epoch": 0.7746403835908364, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.741070531628771e-05, | |
| "loss": 0.7510604858398438, | |
| "step": 2908 | |
| }, | |
| { | |
| "epoch": 0.7751731486414492, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.729970855057835e-05, | |
| "loss": 0.6959093809127808, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.7757059136920618, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.718831687577228e-05, | |
| "loss": 0.7422518134117126, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 0.7762386787426745, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 6.707653190322687e-05, | |
| "loss": 0.7659637928009033, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 0.7767714437932871, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.696435524998875e-05, | |
| "loss": 0.7153394222259521, | |
| "step": 2916 | |
| }, | |
| { | |
| "epoch": 0.7773042088438998, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 6.685178853877052e-05, | |
| "loss": 0.7259315848350525, | |
| "step": 2918 | |
| }, | |
| { | |
| "epoch": 0.7778369738945126, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.673883339792723e-05, | |
| "loss": 0.7096306085586548, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.7783697389451252, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.662549146143281e-05, | |
| "loss": 0.7444663047790527, | |
| "step": 2922 | |
| }, | |
| { | |
| "epoch": 0.7789025039957379, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.651176436885651e-05, | |
| "loss": 0.7020083665847778, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 0.7794352690463505, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.639765376533909e-05, | |
| "loss": 0.7518250346183777, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 0.7799680340969632, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 6.628316130156904e-05, | |
| "loss": 0.7833393216133118, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 0.780500799147576, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 6.616828863375877e-05, | |
| "loss": 0.7342422604560852, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.7810335641981886, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 6.605303742362057e-05, | |
| "loss": 0.7364885807037354, | |
| "step": 2932 | |
| }, | |
| { | |
| "epoch": 0.7815663292488013, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 6.593740933834262e-05, | |
| "loss": 0.7553122043609619, | |
| "step": 2934 | |
| }, | |
| { | |
| "epoch": 0.7820990942994139, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.582140605056484e-05, | |
| "loss": 0.7586118578910828, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 0.7826318593500267, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.570502923835475e-05, | |
| "loss": 0.7087134718894958, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 0.7831646244006393, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.558828058518311e-05, | |
| "loss": 0.7354501485824585, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.783697389451252, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.547116177989967e-05, | |
| "loss": 0.7278033494949341, | |
| "step": 2942 | |
| }, | |
| { | |
| "epoch": 0.7842301545018647, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.535367451670862e-05, | |
| "loss": 0.7207239270210266, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 0.7847629195524773, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.523582049514422e-05, | |
| "loss": 0.7300347685813904, | |
| "step": 2946 | |
| }, | |
| { | |
| "epoch": 0.7852956846030901, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.511760142004608e-05, | |
| "loss": 0.701433539390564, | |
| "step": 2948 | |
| }, | |
| { | |
| "epoch": 0.7858284496537027, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.49990190015346e-05, | |
| "loss": 0.7079622149467468, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.7863612147043154, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.488007495498619e-05, | |
| "loss": 0.7154868841171265, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 0.786893979754928, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.476077100100841e-05, | |
| "loss": 0.7213861346244812, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 0.7874267448055408, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.464110886541521e-05, | |
| "loss": 0.7333036661148071, | |
| "step": 2956 | |
| }, | |
| { | |
| "epoch": 0.7879595098561535, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 6.452109027920183e-05, | |
| "loss": 0.727940559387207, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 0.7884922749067661, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 6.44007169785198e-05, | |
| "loss": 0.7133424282073975, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.7890250399573788, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.427999070465191e-05, | |
| "loss": 0.7307575345039368, | |
| "step": 2962 | |
| }, | |
| { | |
| "epoch": 0.7895578050079914, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.415891320398688e-05, | |
| "loss": 0.7410464286804199, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 0.7900905700586042, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.403748622799418e-05, | |
| "loss": 0.7647122144699097, | |
| "step": 2966 | |
| }, | |
| { | |
| "epoch": 0.7906233351092168, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.391571153319872e-05, | |
| "loss": 0.6683281064033508, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 0.7911561001598295, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.379359088115537e-05, | |
| "loss": 0.7463474273681641, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.7916888652104422, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.36711260384235e-05, | |
| "loss": 0.6725707650184631, | |
| "step": 2972 | |
| }, | |
| { | |
| "epoch": 0.7922216302610549, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.354831877654147e-05, | |
| "loss": 0.7473057508468628, | |
| "step": 2974 | |
| }, | |
| { | |
| "epoch": 0.7927543953116676, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 6.342517087200094e-05, | |
| "loss": 0.7107257843017578, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 0.7932871603622802, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.330168410622123e-05, | |
| "loss": 0.7127653360366821, | |
| "step": 2978 | |
| }, | |
| { | |
| "epoch": 0.7938199254128929, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 6.317786026552347e-05, | |
| "loss": 0.6991822123527527, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.7943526904635055, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 6.305370114110487e-05, | |
| "loss": 0.7100861072540283, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 0.7948854555141183, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.292920852901272e-05, | |
| "loss": 0.7302361130714417, | |
| "step": 2984 | |
| }, | |
| { | |
| "epoch": 0.795418220564731, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 6.280438423011843e-05, | |
| "loss": 0.7081553936004639, | |
| "step": 2986 | |
| }, | |
| { | |
| "epoch": 0.7959509856153436, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.267923005009153e-05, | |
| "loss": 0.722525954246521, | |
| "step": 2988 | |
| }, | |
| { | |
| "epoch": 0.7964837506659563, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.255374779937344e-05, | |
| "loss": 0.7379462718963623, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.797016515716569, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 6.242793929315143e-05, | |
| "loss": 0.7418494820594788, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 0.7975492807671817, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 6.230180635133221e-05, | |
| "loss": 0.7134079933166504, | |
| "step": 2994 | |
| }, | |
| { | |
| "epoch": 0.7980820458177944, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.217535079851569e-05, | |
| "loss": 0.6956773996353149, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 0.798614810868407, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.204857446396862e-05, | |
| "loss": 0.6780136227607727, | |
| "step": 2998 | |
| }, | |
| { | |
| "epoch": 0.7991475759190197, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 6.192147918159803e-05, | |
| "loss": 0.6957566142082214, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7996803409696324, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 6.179406678992476e-05, | |
| "loss": 0.705109179019928, | |
| "step": 3002 | |
| }, | |
| { | |
| "epoch": 0.8002131060202451, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.166633913205684e-05, | |
| "loss": 0.7465344071388245, | |
| "step": 3004 | |
| }, | |
| { | |
| "epoch": 0.8007458710708577, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.15382980556629e-05, | |
| "loss": 0.6974951028823853, | |
| "step": 3006 | |
| }, | |
| { | |
| "epoch": 0.8012786361214704, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.140994541294529e-05, | |
| "loss": 0.7093472480773926, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 0.8018114011720832, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 6.128128306061347e-05, | |
| "loss": 0.8073316812515259, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8023441662226958, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.115231285985703e-05, | |
| "loss": 0.7252596616744995, | |
| "step": 3012 | |
| }, | |
| { | |
| "epoch": 0.8028769312733085, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 6.102303667631878e-05, | |
| "loss": 0.7308025360107422, | |
| "step": 3014 | |
| }, | |
| { | |
| "epoch": 0.8034096963239211, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 6.089345638006782e-05, | |
| "loss": 0.7695532441139221, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 0.8039424613745338, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 6.0763573845572434e-05, | |
| "loss": 0.7262691259384155, | |
| "step": 3018 | |
| }, | |
| { | |
| "epoch": 0.8044752264251465, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.0633390951672965e-05, | |
| "loss": 0.6978904008865356, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8050079914757592, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 6.0502909581554706e-05, | |
| "loss": 0.6867971420288086, | |
| "step": 3022 | |
| }, | |
| { | |
| "epoch": 0.8055407565263719, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 6.037213162272056e-05, | |
| "loss": 0.7410815358161926, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 0.8060735215769845, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 6.0241058966963854e-05, | |
| "loss": 0.7607967257499695, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 0.8066062866275973, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 6.0109693510340867e-05, | |
| "loss": 0.7447317242622375, | |
| "step": 3028 | |
| }, | |
| { | |
| "epoch": 0.8071390516782099, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.997803715314345e-05, | |
| "loss": 0.7340399026870728, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8076718167288226, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.984609179987155e-05, | |
| "loss": 0.6988064050674438, | |
| "step": 3032 | |
| }, | |
| { | |
| "epoch": 0.8082045817794352, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.971385935920559e-05, | |
| "loss": 0.7076520323753357, | |
| "step": 3034 | |
| }, | |
| { | |
| "epoch": 0.8087373468300479, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.9581341743978986e-05, | |
| "loss": 0.7423697113990784, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 0.8092701118806607, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 5.944854087115035e-05, | |
| "loss": 0.7314884066581726, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 0.8098028769312733, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.931545866177581e-05, | |
| "loss": 0.7414923906326294, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.810335641981886, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 5.918209704098126e-05, | |
| "loss": 0.7651832103729248, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 0.8108684070324986, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.904845793793442e-05, | |
| "loss": 0.7165493369102478, | |
| "step": 3044 | |
| }, | |
| { | |
| "epoch": 0.8114011720831114, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.891454328581702e-05, | |
| "loss": 0.7285482883453369, | |
| "step": 3046 | |
| }, | |
| { | |
| "epoch": 0.8119339371337241, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.8780355021796774e-05, | |
| "loss": 0.739894449710846, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 0.8124667021843367, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 5.86458950869994e-05, | |
| "loss": 0.7955057621002197, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8129994672349494, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.8511165426480514e-05, | |
| "loss": 0.6887469291687012, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 0.813532232285562, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.8376167989197495e-05, | |
| "loss": 0.6830754280090332, | |
| "step": 3054 | |
| }, | |
| { | |
| "epoch": 0.8140649973361748, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.82409047279813e-05, | |
| "loss": 0.7223320007324219, | |
| "step": 3056 | |
| }, | |
| { | |
| "epoch": 0.8145977623867874, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.810537759950822e-05, | |
| "loss": 0.6974748969078064, | |
| "step": 3058 | |
| }, | |
| { | |
| "epoch": 0.8151305274374001, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 5.796958856427155e-05, | |
| "loss": 0.7329493761062622, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8156632924880128, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.783353958655328e-05, | |
| "loss": 0.6891670823097229, | |
| "step": 3062 | |
| }, | |
| { | |
| "epoch": 0.8161960575386255, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.7697232634395614e-05, | |
| "loss": 0.6876566410064697, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 0.8167288225892382, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.756066967957253e-05, | |
| "loss": 0.7235361933708191, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 0.8172615876398508, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.74238526975613e-05, | |
| "loss": 0.7141427993774414, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 0.8177943526904635, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.728678366751382e-05, | |
| "loss": 0.6909295916557312, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8183271177410761, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.7149464572228104e-05, | |
| "loss": 0.7080371379852295, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.8188598827916889, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.7011897398119486e-05, | |
| "loss": 0.6966223120689392, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 0.8193926478423016, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.687408413519191e-05, | |
| "loss": 0.7194684743881226, | |
| "step": 3076 | |
| }, | |
| { | |
| "epoch": 0.8199254128929142, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 5.6736026777009206e-05, | |
| "loss": 0.7557689547538757, | |
| "step": 3078 | |
| }, | |
| { | |
| "epoch": 0.8204581779435269, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 5.6597727320666205e-05, | |
| "loss": 0.76923006772995, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8209909429941395, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.645918776675985e-05, | |
| "loss": 0.7014768719673157, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 0.8215237080447523, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.632041011936025e-05, | |
| "loss": 0.7342231869697571, | |
| "step": 3084 | |
| }, | |
| { | |
| "epoch": 0.8220564730953649, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.6181396385981706e-05, | |
| "loss": 0.7379154562950134, | |
| "step": 3086 | |
| }, | |
| { | |
| "epoch": 0.8225892381459776, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.6042148577553665e-05, | |
| "loss": 0.7159122824668884, | |
| "step": 3088 | |
| }, | |
| { | |
| "epoch": 0.8231220031965903, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.590266870839165e-05, | |
| "loss": 0.7064898610115051, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.823654768247203, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.576295879616806e-05, | |
| "loss": 0.7205244898796082, | |
| "step": 3092 | |
| }, | |
| { | |
| "epoch": 0.8241875332978157, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.5623020861883075e-05, | |
| "loss": 0.7115920782089233, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 0.8247202983484283, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.5482856929835334e-05, | |
| "loss": 0.6948148012161255, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 0.825253063399041, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.534246902759269e-05, | |
| "loss": 0.7241220474243164, | |
| "step": 3098 | |
| }, | |
| { | |
| "epoch": 0.8257858284496538, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.520185918596292e-05, | |
| "loss": 0.7225724458694458, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8263185935002664, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.506102943896426e-05, | |
| "loss": 0.712283730506897, | |
| "step": 3102 | |
| }, | |
| { | |
| "epoch": 0.8268513585508791, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 5.4919981823796046e-05, | |
| "loss": 0.7341251373291016, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 0.8273841236014917, | |
| "grad_norm": 0.58984375, | |
| "learning_rate": 5.477871838080925e-05, | |
| "loss": 0.7807260155677795, | |
| "step": 3106 | |
| }, | |
| { | |
| "epoch": 0.8279168886521044, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.46372411534769e-05, | |
| "loss": 0.7081162929534912, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 0.8284496537027171, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.449555218836459e-05, | |
| "loss": 0.7185850739479065, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8289824187533298, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.435365353510083e-05, | |
| "loss": 0.7876030206680298, | |
| "step": 3112 | |
| }, | |
| { | |
| "epoch": 0.8295151838039425, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.421154724634743e-05, | |
| "loss": 0.673431396484375, | |
| "step": 3114 | |
| }, | |
| { | |
| "epoch": 0.8300479488545551, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.40692353777698e-05, | |
| "loss": 0.6700567007064819, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 0.8305807139051679, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.3926719988007173e-05, | |
| "loss": 0.6965268850326538, | |
| "step": 3118 | |
| }, | |
| { | |
| "epoch": 0.8311134789557805, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.3784003138642855e-05, | |
| "loss": 0.7045586109161377, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8316462440063932, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.364108689417444e-05, | |
| "loss": 0.6918834447860718, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 0.8321790090570058, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.3497973321983896e-05, | |
| "loss": 0.688016414642334, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 0.8327117741076185, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.335466449230765e-05, | |
| "loss": 0.7234639525413513, | |
| "step": 3126 | |
| }, | |
| { | |
| "epoch": 0.8332445391582313, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 5.321116247820669e-05, | |
| "loss": 0.7776771187782288, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 0.8337773042088439, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.3067469355536525e-05, | |
| "loss": 0.6885461211204529, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.8343100692594566, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.29235872029172e-05, | |
| "loss": 0.6760199666023254, | |
| "step": 3132 | |
| }, | |
| { | |
| "epoch": 0.8348428343100692, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.277951810170322e-05, | |
| "loss": 0.6977653503417969, | |
| "step": 3134 | |
| }, | |
| { | |
| "epoch": 0.835375599360682, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.2635264135953385e-05, | |
| "loss": 0.6952658295631409, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 0.8359083644112946, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.2490827392400735e-05, | |
| "loss": 0.7002542614936829, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 0.8364411294619073, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 5.2346209960422295e-05, | |
| "loss": 0.7366476655006409, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.83697389451252, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.2201413932008865e-05, | |
| "loss": 0.7242977619171143, | |
| "step": 3142 | |
| }, | |
| { | |
| "epoch": 0.8375066595631326, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.20564414017348e-05, | |
| "loss": 0.6978318095207214, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 0.8380394246137454, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.191129446672763e-05, | |
| "loss": 0.7196321487426758, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 0.838572189664358, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.1765975226637804e-05, | |
| "loss": 0.7309831380844116, | |
| "step": 3148 | |
| }, | |
| { | |
| "epoch": 0.8391049547149707, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 5.162048578360827e-05, | |
| "loss": 0.7818800210952759, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8396377197655833, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 5.1474828242244085e-05, | |
| "loss": 0.6933904886245728, | |
| "step": 3152 | |
| }, | |
| { | |
| "epoch": 0.840170484816196, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.132900470958194e-05, | |
| "loss": 0.6861001253128052, | |
| "step": 3154 | |
| }, | |
| { | |
| "epoch": 0.8407032498668088, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.1183017295059734e-05, | |
| "loss": 0.7419685125350952, | |
| "step": 3156 | |
| }, | |
| { | |
| "epoch": 0.8412360149174214, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 5.103686811048603e-05, | |
| "loss": 0.697303056716919, | |
| "step": 3158 | |
| }, | |
| { | |
| "epoch": 0.8417687799680341, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.089055927000948e-05, | |
| "loss": 0.7192218899726868, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8423015450186467, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 5.07440928900883e-05, | |
| "loss": 0.7201210856437683, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 0.8428343100692595, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 5.059747108945958e-05, | |
| "loss": 0.7341133952140808, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 0.8433670751198722, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.045069598910873e-05, | |
| "loss": 0.7105435132980347, | |
| "step": 3166 | |
| }, | |
| { | |
| "epoch": 0.8438998401704848, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 5.030376971223872e-05, | |
| "loss": 0.7143466472625732, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 0.8444326052210975, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.015669438423939e-05, | |
| "loss": 0.6983811259269714, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.8449653702717101, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 5.00094721326567e-05, | |
| "loss": 0.6914249062538147, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 0.8454981353223229, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.9862105087161986e-05, | |
| "loss": 0.6947450041770935, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 0.8460309003729355, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 4.9714595379521094e-05, | |
| "loss": 0.7301425933837891, | |
| "step": 3176 | |
| }, | |
| { | |
| "epoch": 0.8465636654235482, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 4.956694514356363e-05, | |
| "loss": 0.7228060364723206, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 0.8470964304741609, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.9419156515151956e-05, | |
| "loss": 0.7128725051879883, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.8476291955247736, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.927123163215047e-05, | |
| "loss": 0.7226285934448242, | |
| "step": 3182 | |
| }, | |
| { | |
| "epoch": 0.8481619605753863, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.9123172634394515e-05, | |
| "loss": 0.7291282415390015, | |
| "step": 3184 | |
| }, | |
| { | |
| "epoch": 0.8486947256259989, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.897498166365953e-05, | |
| "loss": 0.6915116310119629, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 0.8492274906766116, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 4.882666086363002e-05, | |
| "loss": 0.7105843424797058, | |
| "step": 3188 | |
| }, | |
| { | |
| "epoch": 0.8497602557272242, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.8678212379868585e-05, | |
| "loss": 0.7123350501060486, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.850293020777837, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 4.852963835978482e-05, | |
| "loss": 0.7145159244537354, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 0.8508257858284497, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.838094095260432e-05, | |
| "loss": 0.7674680948257446, | |
| "step": 3194 | |
| }, | |
| { | |
| "epoch": 0.8513585508790623, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.823212230933755e-05, | |
| "loss": 0.6895533204078674, | |
| "step": 3196 | |
| }, | |
| { | |
| "epoch": 0.851891315929675, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.808318458274874e-05, | |
| "loss": 0.7199817299842834, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 0.8524240809802877, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.7934129927324717e-05, | |
| "loss": 0.6851661205291748, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8529568460309004, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.778496049924381e-05, | |
| "loss": 0.6990004777908325, | |
| "step": 3202 | |
| }, | |
| { | |
| "epoch": 0.853489611081513, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.763567845634459e-05, | |
| "loss": 0.6694433689117432, | |
| "step": 3204 | |
| }, | |
| { | |
| "epoch": 0.8540223761321257, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.748628595809466e-05, | |
| "loss": 0.7453095316886902, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 0.8545551411827385, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.733678516555948e-05, | |
| "loss": 0.711460530757904, | |
| "step": 3208 | |
| }, | |
| { | |
| "epoch": 0.8550879062333511, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.718717824137102e-05, | |
| "loss": 0.6715887784957886, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.8556206712839638, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.703746734969653e-05, | |
| "loss": 0.704089343547821, | |
| "step": 3212 | |
| }, | |
| { | |
| "epoch": 0.8561534363345764, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.6887654656207255e-05, | |
| "loss": 0.6886122822761536, | |
| "step": 3214 | |
| }, | |
| { | |
| "epoch": 0.8566862013851891, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 4.673774232804701e-05, | |
| "loss": 0.7097099423408508, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 0.8572189664358019, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 4.6587732533800945e-05, | |
| "loss": 0.7401700615882874, | |
| "step": 3218 | |
| }, | |
| { | |
| "epoch": 0.8577517314864145, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.64376274434641e-05, | |
| "loss": 0.7368103265762329, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.8582844965370272, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.628742922841006e-05, | |
| "loss": 0.6967146396636963, | |
| "step": 3222 | |
| }, | |
| { | |
| "epoch": 0.8588172615876398, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.613714006135948e-05, | |
| "loss": 0.6933982968330383, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 0.8593500266382526, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.598676211634876e-05, | |
| "loss": 0.7536207437515259, | |
| "step": 3226 | |
| }, | |
| { | |
| "epoch": 0.8598827916888652, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.5836297568698475e-05, | |
| "loss": 0.7168260216712952, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 0.8604155567394779, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.568574859498201e-05, | |
| "loss": 0.723820686340332, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.8609483217900906, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.553511737299401e-05, | |
| "loss": 0.6810100674629211, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 0.8614810868407032, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.5384406081718895e-05, | |
| "loss": 0.7171721458435059, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 0.862013851891316, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.5233616901299364e-05, | |
| "loss": 0.7057915925979614, | |
| "step": 3236 | |
| }, | |
| { | |
| "epoch": 0.8625466169419286, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.508275201300482e-05, | |
| "loss": 0.6889278292655945, | |
| "step": 3238 | |
| }, | |
| { | |
| "epoch": 0.8630793819925413, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.493181359919983e-05, | |
| "loss": 0.72967928647995, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.8636121470431539, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.478080384331255e-05, | |
| "loss": 0.6688830852508545, | |
| "step": 3242 | |
| }, | |
| { | |
| "epoch": 0.8641449120937666, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.462972492980319e-05, | |
| "loss": 0.7294082045555115, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 0.8646776771443794, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.4478579044132314e-05, | |
| "loss": 0.7415005564689636, | |
| "step": 3246 | |
| }, | |
| { | |
| "epoch": 0.865210442194992, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.432736837272935e-05, | |
| "loss": 0.730790376663208, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 0.8657432072456047, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.417609510296082e-05, | |
| "loss": 0.713773787021637, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8662759722962173, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.4024761423098845e-05, | |
| "loss": 0.6486693620681763, | |
| "step": 3252 | |
| }, | |
| { | |
| "epoch": 0.8668087373468301, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.387336952228937e-05, | |
| "loss": 0.692020058631897, | |
| "step": 3254 | |
| }, | |
| { | |
| "epoch": 0.8673415023974427, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.372192159052058e-05, | |
| "loss": 0.7071133255958557, | |
| "step": 3256 | |
| }, | |
| { | |
| "epoch": 0.8678742674480554, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.357041981859118e-05, | |
| "loss": 0.7335031032562256, | |
| "step": 3258 | |
| }, | |
| { | |
| "epoch": 0.8684070324986681, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.3418866398078684e-05, | |
| "loss": 0.7153759002685547, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.8689397975492807, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.326726352130775e-05, | |
| "loss": 0.6825717091560364, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 0.8694725625998935, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.3115613381318485e-05, | |
| "loss": 0.700499415397644, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 0.8700053276505061, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.296391817183467e-05, | |
| "loss": 0.6975908279418945, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 0.8705380927011188, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 4.281218008723201e-05, | |
| "loss": 0.6818310618400574, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 0.8710708577517314, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.26604013225065e-05, | |
| "loss": 0.6989009976387024, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.8716036228023442, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.250858407324254e-05, | |
| "loss": 0.669248104095459, | |
| "step": 3272 | |
| }, | |
| { | |
| "epoch": 0.8721363878529569, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.235673053558127e-05, | |
| "loss": 0.7152054905891418, | |
| "step": 3274 | |
| }, | |
| { | |
| "epoch": 0.8726691529035695, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.220484290618876e-05, | |
| "loss": 0.6730162501335144, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 0.8732019179541822, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.205292338222423e-05, | |
| "loss": 0.7103643417358398, | |
| "step": 3278 | |
| }, | |
| { | |
| "epoch": 0.8737346830047948, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.190097416130828e-05, | |
| "loss": 0.7044172883033752, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.8742674480554076, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.174899744149112e-05, | |
| "loss": 0.7182801365852356, | |
| "step": 3282 | |
| }, | |
| { | |
| "epoch": 0.8748002131060203, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.159699542122071e-05, | |
| "loss": 0.6786679029464722, | |
| "step": 3284 | |
| }, | |
| { | |
| "epoch": 0.8753329781566329, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.1444970299311016e-05, | |
| "loss": 0.6989539861679077, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 0.8758657432072456, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.129292427491021e-05, | |
| "loss": 0.6917098760604858, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 0.8763985082578583, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.1140859547468794e-05, | |
| "loss": 0.6877356171607971, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.876931273308471, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 4.098877831670785e-05, | |
| "loss": 0.749396026134491, | |
| "step": 3292 | |
| }, | |
| { | |
| "epoch": 0.8774640383590836, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 4.083668278258717e-05, | |
| "loss": 0.7201125621795654, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 0.8779968034096963, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.0684575145273474e-05, | |
| "loss": 0.7145978808403015, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 0.878529568460309, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 4.053245760510856e-05, | |
| "loss": 0.7227798104286194, | |
| "step": 3298 | |
| }, | |
| { | |
| "epoch": 0.8790623335109217, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.038033236257746e-05, | |
| "loss": 0.7105327248573303, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8795950985615344, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.0228201618276655e-05, | |
| "loss": 0.7208723425865173, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 0.880127863612147, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 4.007606757288217e-05, | |
| "loss": 0.7228609323501587, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 0.8806606286627597, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.992393242711785e-05, | |
| "loss": 0.6760815382003784, | |
| "step": 3306 | |
| }, | |
| { | |
| "epoch": 0.8811933937133724, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.9771798381723365e-05, | |
| "loss": 0.6926905512809753, | |
| "step": 3308 | |
| }, | |
| { | |
| "epoch": 0.8817261587639851, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.9619667637422555e-05, | |
| "loss": 0.7134456038475037, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.8822589238145978, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.946754239489146e-05, | |
| "loss": 0.697494387626648, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 0.8827916888652104, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.931542485472654e-05, | |
| "loss": 0.6928008198738098, | |
| "step": 3314 | |
| }, | |
| { | |
| "epoch": 0.8833244539158231, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 3.9163317217412844e-05, | |
| "loss": 0.6829655766487122, | |
| "step": 3316 | |
| }, | |
| { | |
| "epoch": 0.8838572189664358, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.901122168329217e-05, | |
| "loss": 0.6962534785270691, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 0.8843899840170485, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.8859140452531206e-05, | |
| "loss": 0.7147413492202759, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.8849227490676611, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.8707075725089794e-05, | |
| "loss": 0.7086056470870972, | |
| "step": 3322 | |
| }, | |
| { | |
| "epoch": 0.8854555141182738, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 3.8555029700689e-05, | |
| "loss": 0.7095344066619873, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 0.8859882791688866, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.840300457877931e-05, | |
| "loss": 0.7212046980857849, | |
| "step": 3326 | |
| }, | |
| { | |
| "epoch": 0.8865210442194992, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.8251002558508896e-05, | |
| "loss": 0.7254408001899719, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 0.8870538092701119, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.809902583869172e-05, | |
| "loss": 0.6854255795478821, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.8875865743207245, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 3.7947076617775785e-05, | |
| "loss": 0.7515878081321716, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 0.8881193393713372, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.779515709381125e-05, | |
| "loss": 0.7171257734298706, | |
| "step": 3334 | |
| }, | |
| { | |
| "epoch": 0.88865210442195, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.7643269464418734e-05, | |
| "loss": 0.7486863136291504, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 0.8891848694725626, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 3.749141592675747e-05, | |
| "loss": 0.6408795118331909, | |
| "step": 3338 | |
| }, | |
| { | |
| "epoch": 0.8897176345231753, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.7339598677493515e-05, | |
| "loss": 0.6756150722503662, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.8902503995737879, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.7187819912768005e-05, | |
| "loss": 0.7228208780288696, | |
| "step": 3342 | |
| }, | |
| { | |
| "epoch": 0.8907831646244007, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.7036081828165353e-05, | |
| "loss": 0.6675580143928528, | |
| "step": 3344 | |
| }, | |
| { | |
| "epoch": 0.8913159296750133, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.688438661868153e-05, | |
| "loss": 0.7148852944374084, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 0.891848694725626, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 3.673273647869226e-05, | |
| "loss": 0.7156222462654114, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 0.8923814597762387, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 3.6581133601921336e-05, | |
| "loss": 0.7250128388404846, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.8929142248268513, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.6429580181408836e-05, | |
| "loss": 0.6913713812828064, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 0.8934469898774641, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.6278078409479424e-05, | |
| "loss": 0.7076290249824524, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 0.8939797549280767, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 3.6126630477710634e-05, | |
| "loss": 0.7225173115730286, | |
| "step": 3356 | |
| }, | |
| { | |
| "epoch": 0.8945125199786894, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 3.5975238576901175e-05, | |
| "loss": 0.6932091116905212, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 0.895045285029302, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.582390489703919e-05, | |
| "loss": 0.6957063674926758, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.8955780500799148, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.567263162727067e-05, | |
| "loss": 0.6680214405059814, | |
| "step": 3362 | |
| }, | |
| { | |
| "epoch": 0.8961108151305275, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.552142095586769e-05, | |
| "loss": 0.7048709988594055, | |
| "step": 3364 | |
| }, | |
| { | |
| "epoch": 0.8966435801811401, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.537027507019682e-05, | |
| "loss": 0.6847620606422424, | |
| "step": 3366 | |
| }, | |
| { | |
| "epoch": 0.8971763452317528, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 3.5219196156687454e-05, | |
| "loss": 0.7076305747032166, | |
| "step": 3368 | |
| }, | |
| { | |
| "epoch": 0.8977091102823654, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.506818640080018e-05, | |
| "loss": 0.7055946588516235, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.8982418753329782, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.491724798699519e-05, | |
| "loss": 0.7180204391479492, | |
| "step": 3372 | |
| }, | |
| { | |
| "epoch": 0.8987746403835908, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.476638309870064e-05, | |
| "loss": 0.6561832427978516, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 0.8993074054342035, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 3.4615593918281126e-05, | |
| "loss": 0.702898383140564, | |
| "step": 3376 | |
| }, | |
| { | |
| "epoch": 0.8998401704848162, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 3.446488262700601e-05, | |
| "loss": 0.7005707621574402, | |
| "step": 3378 | |
| }, | |
| { | |
| "epoch": 0.9003729355354289, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.431425140501801e-05, | |
| "loss": 0.6888461709022522, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9009057005860416, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 3.416370243130154e-05, | |
| "loss": 0.7409310340881348, | |
| "step": 3382 | |
| }, | |
| { | |
| "epoch": 0.9014384656366542, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.4013237883651255e-05, | |
| "loss": 0.6963149309158325, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 0.9019712306872669, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.386285993864053e-05, | |
| "loss": 0.6961240172386169, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 0.9025039957378796, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 3.3712570771589956e-05, | |
| "loss": 0.6980938911437988, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 0.9030367607884923, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.35623725565359e-05, | |
| "loss": 0.7153114676475525, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.903569525839105, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.341226746619906e-05, | |
| "loss": 0.7112528085708618, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 0.9041022908897176, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.326225767195301e-05, | |
| "loss": 0.6847183704376221, | |
| "step": 3394 | |
| }, | |
| { | |
| "epoch": 0.9046350559403303, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.3112345343792765e-05, | |
| "loss": 0.7189138531684875, | |
| "step": 3396 | |
| }, | |
| { | |
| "epoch": 0.905167820990943, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.2962532650303476e-05, | |
| "loss": 0.6928481459617615, | |
| "step": 3398 | |
| }, | |
| { | |
| "epoch": 0.9057005860415557, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.2812821758628995e-05, | |
| "loss": 0.6926461458206177, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9062333510921684, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 3.2663214834440536e-05, | |
| "loss": 0.7446824312210083, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 0.906766116142781, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 3.2513714041905354e-05, | |
| "loss": 0.6290156245231628, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 0.9072988811933937, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.2364321543655414e-05, | |
| "loss": 0.7160915732383728, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 0.9078316462440064, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.221503950075619e-05, | |
| "loss": 0.6737069487571716, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 0.9083644112946191, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 3.206587007267528e-05, | |
| "loss": 0.7074106931686401, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9088971763452317, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 3.191681541725128e-05, | |
| "loss": 0.7356147766113281, | |
| "step": 3412 | |
| }, | |
| { | |
| "epoch": 0.9094299413958444, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.176787769066247e-05, | |
| "loss": 0.7018274068832397, | |
| "step": 3414 | |
| }, | |
| { | |
| "epoch": 0.9099627064464572, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 3.161905904739569e-05, | |
| "loss": 0.7239270806312561, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 0.9104954714970698, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.147036164021519e-05, | |
| "loss": 0.661172091960907, | |
| "step": 3418 | |
| }, | |
| { | |
| "epoch": 0.9110282365476825, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.1321787620131435e-05, | |
| "loss": 0.667536735534668, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9115610015982951, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 3.117333913636999e-05, | |
| "loss": 0.6705557703971863, | |
| "step": 3422 | |
| }, | |
| { | |
| "epoch": 0.9120937666489078, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.1025018336340484e-05, | |
| "loss": 0.6964682936668396, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 0.9126265316995205, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.08768273656055e-05, | |
| "loss": 0.7074109315872192, | |
| "step": 3426 | |
| }, | |
| { | |
| "epoch": 0.9131592967501332, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.0728768367849545e-05, | |
| "loss": 0.6577396988868713, | |
| "step": 3428 | |
| }, | |
| { | |
| "epoch": 0.9136920618007459, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.058084348484806e-05, | |
| "loss": 0.6772574782371521, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.9142248268513585, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.0433054856436395e-05, | |
| "loss": 0.670283854007721, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 0.9147575919019713, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.0285404620478912e-05, | |
| "loss": 0.6941147446632385, | |
| "step": 3434 | |
| }, | |
| { | |
| "epoch": 0.9152903569525839, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.0137894912838027e-05, | |
| "loss": 0.652153730392456, | |
| "step": 3436 | |
| }, | |
| { | |
| "epoch": 0.9158231220031966, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.999052786734331e-05, | |
| "loss": 0.6745891571044922, | |
| "step": 3438 | |
| }, | |
| { | |
| "epoch": 0.9163558870538092, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.9843305615760623e-05, | |
| "loss": 0.7068504095077515, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9168886521044219, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.9696230287761288e-05, | |
| "loss": 0.699682354927063, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 0.9174214171550347, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.954930401089127e-05, | |
| "loss": 0.6988564729690552, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 0.9179541822056473, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.9402528910540433e-05, | |
| "loss": 0.6744426488876343, | |
| "step": 3446 | |
| }, | |
| { | |
| "epoch": 0.91848694725626, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.9255907109911725e-05, | |
| "loss": 0.6920981407165527, | |
| "step": 3448 | |
| }, | |
| { | |
| "epoch": 0.9190197123068726, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.9109440729990533e-05, | |
| "loss": 0.7215517163276672, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9195524773574854, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.8963131889513986e-05, | |
| "loss": 0.6962135434150696, | |
| "step": 3452 | |
| }, | |
| { | |
| "epoch": 0.9200852424080981, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.8816982704940276e-05, | |
| "loss": 0.6650117635726929, | |
| "step": 3454 | |
| }, | |
| { | |
| "epoch": 0.9206180074587107, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.8670995290418077e-05, | |
| "loss": 0.7167315483093262, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 0.9211507725093234, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.8525171757755932e-05, | |
| "loss": 0.6390708684921265, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 0.921683537559936, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.837951421639174e-05, | |
| "loss": 0.7065415978431702, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9222163026105488, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.8234024773362202e-05, | |
| "loss": 0.6877092719078064, | |
| "step": 3462 | |
| }, | |
| { | |
| "epoch": 0.9227490676611614, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.8088705533272382e-05, | |
| "loss": 0.6896106600761414, | |
| "step": 3464 | |
| }, | |
| { | |
| "epoch": 0.9232818327117741, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.7943558598265218e-05, | |
| "loss": 0.7128964066505432, | |
| "step": 3466 | |
| }, | |
| { | |
| "epoch": 0.9238145977623868, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.7798586067991142e-05, | |
| "loss": 0.683221697807312, | |
| "step": 3468 | |
| }, | |
| { | |
| "epoch": 0.9243473628129995, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.7653790039577725e-05, | |
| "loss": 0.6766422390937805, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9248801278636122, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.750917260759928e-05, | |
| "loss": 0.6711747646331787, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 0.9254128929142248, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.7364735864046625e-05, | |
| "loss": 0.7293622493743896, | |
| "step": 3474 | |
| }, | |
| { | |
| "epoch": 0.9259456579648375, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.7220481898296793e-05, | |
| "loss": 0.65350741147995, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 0.9264784230154501, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 2.70764127970828e-05, | |
| "loss": 0.6858737468719482, | |
| "step": 3478 | |
| }, | |
| { | |
| "epoch": 0.9270111880660629, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.693253064446348e-05, | |
| "loss": 0.7053213715553284, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9275439531166756, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.678883752179333e-05, | |
| "loss": 0.6678943037986755, | |
| "step": 3482 | |
| }, | |
| { | |
| "epoch": 0.9280767181672882, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.664533550769236e-05, | |
| "loss": 0.6876240968704224, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 0.9286094832179009, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.6502026678016117e-05, | |
| "loss": 0.7180918455123901, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 0.9291422482685135, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.6358913105825564e-05, | |
| "loss": 0.6760578155517578, | |
| "step": 3488 | |
| }, | |
| { | |
| "epoch": 0.9296750133191263, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 2.6215996861357152e-05, | |
| "loss": 0.7332901954650879, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9302077783697389, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.6073280011992833e-05, | |
| "loss": 0.6666563749313354, | |
| "step": 3492 | |
| }, | |
| { | |
| "epoch": 0.9307405434203516, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.59307646222302e-05, | |
| "loss": 0.664758026599884, | |
| "step": 3494 | |
| }, | |
| { | |
| "epoch": 0.9312733084709643, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.5788452753652563e-05, | |
| "loss": 0.6465103030204773, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 0.931806073521577, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.564634646489917e-05, | |
| "loss": 0.675029456615448, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 0.9323388385721897, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.5504447811635435e-05, | |
| "loss": 0.6932930946350098, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9328716036228023, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.536275884652312e-05, | |
| "loss": 0.7141386270523071, | |
| "step": 3502 | |
| }, | |
| { | |
| "epoch": 0.933404368673415, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.522128161919077e-05, | |
| "loss": 0.7156549692153931, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 0.9339371337240278, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.508001817620396e-05, | |
| "loss": 0.7093266844749451, | |
| "step": 3506 | |
| }, | |
| { | |
| "epoch": 0.9344698987746404, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.4938970561035753e-05, | |
| "loss": 0.6900848150253296, | |
| "step": 3508 | |
| }, | |
| { | |
| "epoch": 0.9350026638252531, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.479814081403709e-05, | |
| "loss": 0.684967041015625, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.9355354288758657, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.4657530972407316e-05, | |
| "loss": 0.6972053647041321, | |
| "step": 3512 | |
| }, | |
| { | |
| "epoch": 0.9360681939264784, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.4517143070164683e-05, | |
| "loss": 0.7469601631164551, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.9366009589770911, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.437697913811694e-05, | |
| "loss": 0.7062028050422668, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 0.9371337240277038, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.423704120383195e-05, | |
| "loss": 0.6981968879699707, | |
| "step": 3518 | |
| }, | |
| { | |
| "epoch": 0.9376664890783165, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 2.409733129160836e-05, | |
| "loss": 0.7789372205734253, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.9381992541289291, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.395785142244634e-05, | |
| "loss": 0.7303587198257446, | |
| "step": 3522 | |
| }, | |
| { | |
| "epoch": 0.9387320191795419, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.38186036140183e-05, | |
| "loss": 0.6874336004257202, | |
| "step": 3524 | |
| }, | |
| { | |
| "epoch": 0.9392647842301545, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 2.3679589880639758e-05, | |
| "loss": 0.736723005771637, | |
| "step": 3526 | |
| }, | |
| { | |
| "epoch": 0.9397975492807672, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.3540812233240154e-05, | |
| "loss": 0.6807746291160583, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 0.9403303143313798, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.3402272679333798e-05, | |
| "loss": 0.6877115964889526, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.9408630793819925, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.326397322299079e-05, | |
| "loss": 0.7108508348464966, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 0.9413958444326053, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.312591586480811e-05, | |
| "loss": 0.6459025740623474, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 0.9419286094832179, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.298810260188054e-05, | |
| "loss": 0.6610309481620789, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 0.9424613745338306, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.285053542777191e-05, | |
| "loss": 0.6491233706474304, | |
| "step": 3538 | |
| }, | |
| { | |
| "epoch": 0.9429941395844432, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.2713216332486187e-05, | |
| "loss": 0.7502667307853699, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.943526904635056, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.257614730243872e-05, | |
| "loss": 0.6700000762939453, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 0.9440596696856686, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.2439330320427484e-05, | |
| "loss": 0.6700481176376343, | |
| "step": 3544 | |
| }, | |
| { | |
| "epoch": 0.9445924347362813, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.2302767365604403e-05, | |
| "loss": 0.682191789150238, | |
| "step": 3546 | |
| }, | |
| { | |
| "epoch": 0.945125199786894, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.2166460413446725e-05, | |
| "loss": 0.687681257724762, | |
| "step": 3548 | |
| }, | |
| { | |
| "epoch": 0.9456579648375066, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.203041143572845e-05, | |
| "loss": 0.6589215993881226, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.9461907298881194, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.18946224004918e-05, | |
| "loss": 0.6683045029640198, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 0.946723494938732, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.175909527201872e-05, | |
| "loss": 0.695706307888031, | |
| "step": 3554 | |
| }, | |
| { | |
| "epoch": 0.9472562599893447, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.1623832010802525e-05, | |
| "loss": 0.6750278472900391, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 0.9477890250399574, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.1488834573519506e-05, | |
| "loss": 0.7252264022827148, | |
| "step": 3558 | |
| }, | |
| { | |
| "epoch": 0.94832179009057, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.1354104913000616e-05, | |
| "loss": 0.7190086841583252, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.9488545551411828, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.1219644978203246e-05, | |
| "loss": 0.6598690152168274, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 0.9493873201917954, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.1085456714183002e-05, | |
| "loss": 0.6947650909423828, | |
| "step": 3564 | |
| }, | |
| { | |
| "epoch": 0.9499200852424081, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.0951542062065596e-05, | |
| "loss": 0.6635608673095703, | |
| "step": 3566 | |
| }, | |
| { | |
| "epoch": 0.9504528502930207, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.0817902959018755e-05, | |
| "loss": 0.6340133547782898, | |
| "step": 3568 | |
| }, | |
| { | |
| "epoch": 0.9509856153436335, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.068454133822419e-05, | |
| "loss": 0.696979284286499, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.9515183803942462, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 2.0551459128849662e-05, | |
| "loss": 0.7071898579597473, | |
| "step": 3572 | |
| }, | |
| { | |
| "epoch": 0.9520511454448588, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.041865825602102e-05, | |
| "loss": 0.6977174282073975, | |
| "step": 3574 | |
| }, | |
| { | |
| "epoch": 0.9525839104954715, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.0286140640794416e-05, | |
| "loss": 0.6723811030387878, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 0.9531166755460841, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.015390820012847e-05, | |
| "loss": 0.6628782153129578, | |
| "step": 3578 | |
| }, | |
| { | |
| "epoch": 0.9536494405966969, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.0021962846856556e-05, | |
| "loss": 0.6885560154914856, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.9541822056473095, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.989030648965914e-05, | |
| "loss": 0.6671193242073059, | |
| "step": 3582 | |
| }, | |
| { | |
| "epoch": 0.9547149706979222, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.975894103303615e-05, | |
| "loss": 0.6817159652709961, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.9552477357485349, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.962786837727944e-05, | |
| "loss": 0.6346890926361084, | |
| "step": 3586 | |
| }, | |
| { | |
| "epoch": 0.9557805007991476, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.949709041844532e-05, | |
| "loss": 0.6845383644104004, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 0.9563132658497603, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.936660904832705e-05, | |
| "loss": 0.7126829624176025, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.9568460309003729, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.9236426154427583e-05, | |
| "loss": 0.647582471370697, | |
| "step": 3592 | |
| }, | |
| { | |
| "epoch": 0.9573787959509856, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.9106543619932188e-05, | |
| "loss": 0.6961623430252075, | |
| "step": 3594 | |
| }, | |
| { | |
| "epoch": 0.9579115610015982, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.8976963323681227e-05, | |
| "loss": 0.7102996110916138, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 0.958444326052211, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.8847687140142987e-05, | |
| "loss": 0.7002482414245605, | |
| "step": 3598 | |
| }, | |
| { | |
| "epoch": 0.9589770911028237, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.8718716939386543e-05, | |
| "loss": 0.6747076511383057, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9595098561534363, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.8590054587054728e-05, | |
| "loss": 0.6580986380577087, | |
| "step": 3602 | |
| }, | |
| { | |
| "epoch": 0.960042621204049, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.8461701944337137e-05, | |
| "loss": 0.6556539535522461, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 0.9605753862546617, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.8333660867943163e-05, | |
| "loss": 0.6604914665222168, | |
| "step": 3606 | |
| }, | |
| { | |
| "epoch": 0.9611081513052744, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.820593321007525e-05, | |
| "loss": 0.6752309203147888, | |
| "step": 3608 | |
| }, | |
| { | |
| "epoch": 0.961640916355887, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.807852081840197e-05, | |
| "loss": 0.6571139097213745, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.9621736814064997, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.7951425536031374e-05, | |
| "loss": 0.6813245415687561, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 0.9627064464571125, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.7824649201484306e-05, | |
| "loss": 0.6753969788551331, | |
| "step": 3614 | |
| }, | |
| { | |
| "epoch": 0.9632392115077251, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.76981936486678e-05, | |
| "loss": 0.6771748065948486, | |
| "step": 3616 | |
| }, | |
| { | |
| "epoch": 0.9637719765583378, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.7572060706848576e-05, | |
| "loss": 0.7080951929092407, | |
| "step": 3618 | |
| }, | |
| { | |
| "epoch": 0.9643047416089504, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.7446252200626555e-05, | |
| "loss": 0.6531881093978882, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.9648375066595631, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.732076994990849e-05, | |
| "loss": 0.6644502878189087, | |
| "step": 3622 | |
| }, | |
| { | |
| "epoch": 0.9653702717101759, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.719561576988158e-05, | |
| "loss": 0.6807780861854553, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 0.9659030367607885, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.7070791470987295e-05, | |
| "loss": 0.6746936440467834, | |
| "step": 3626 | |
| }, | |
| { | |
| "epoch": 0.9664358018114012, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.6946298858895144e-05, | |
| "loss": 0.7012004256248474, | |
| "step": 3628 | |
| }, | |
| { | |
| "epoch": 0.9669685668620138, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.6822139734476546e-05, | |
| "loss": 0.677057147026062, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.9675013319126265, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.6698315893778788e-05, | |
| "loss": 0.6767706871032715, | |
| "step": 3632 | |
| }, | |
| { | |
| "epoch": 0.9680340969632392, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.6574829127999067e-05, | |
| "loss": 0.6860625743865967, | |
| "step": 3634 | |
| }, | |
| { | |
| "epoch": 0.9685668620138519, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.645168122345854e-05, | |
| "loss": 0.6813229918479919, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 0.9690996270644646, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.6328873961576506e-05, | |
| "loss": 0.6505600214004517, | |
| "step": 3638 | |
| }, | |
| { | |
| "epoch": 0.9696323921150772, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.6206409118844654e-05, | |
| "loss": 0.6736657023429871, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.97016515716569, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.6084288466801295e-05, | |
| "loss": 0.7095986008644104, | |
| "step": 3642 | |
| }, | |
| { | |
| "epoch": 0.9706979222163026, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.5962513772005836e-05, | |
| "loss": 0.6710772514343262, | |
| "step": 3644 | |
| }, | |
| { | |
| "epoch": 0.9712306872669153, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.5841086796013142e-05, | |
| "loss": 0.6877874135971069, | |
| "step": 3646 | |
| }, | |
| { | |
| "epoch": 0.9717634523175279, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.5720009295348103e-05, | |
| "loss": 0.6922812461853027, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 0.9722962173681406, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.55992830214802e-05, | |
| "loss": 0.7035001516342163, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.9728289824187534, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.5478909720798187e-05, | |
| "loss": 0.6592362523078918, | |
| "step": 3652 | |
| }, | |
| { | |
| "epoch": 0.973361747469366, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.5358891134584802e-05, | |
| "loss": 0.7107102274894714, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 0.9738945125199787, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.52392289989916e-05, | |
| "loss": 0.6652364730834961, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 0.9744272775705913, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.5119925045013832e-05, | |
| "loss": 0.6816696524620056, | |
| "step": 3658 | |
| }, | |
| { | |
| "epoch": 0.9749600426212041, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.5000980998465409e-05, | |
| "loss": 0.6663632392883301, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.9754928076718167, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.4882398579953928e-05, | |
| "loss": 0.6931334733963013, | |
| "step": 3662 | |
| }, | |
| { | |
| "epoch": 0.9760255727224294, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.4764179504855793e-05, | |
| "loss": 0.7170167565345764, | |
| "step": 3664 | |
| }, | |
| { | |
| "epoch": 0.9765583377730421, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.4646325483291386e-05, | |
| "loss": 0.6615394949913025, | |
| "step": 3666 | |
| }, | |
| { | |
| "epoch": 0.9770911028236547, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.4528838220100344e-05, | |
| "loss": 0.6396226286888123, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 0.9776238678742675, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.4411719414816893e-05, | |
| "loss": 0.6949025988578796, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.9781566329248801, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.429497076164526e-05, | |
| "loss": 0.6923924684524536, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 0.9786893979754928, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.4178593949435162e-05, | |
| "loss": 0.7032251954078674, | |
| "step": 3674 | |
| }, | |
| { | |
| "epoch": 0.9792221630261055, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.40625906616574e-05, | |
| "loss": 0.6616812944412231, | |
| "step": 3676 | |
| }, | |
| { | |
| "epoch": 0.9797549280767182, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.3946962576379446e-05, | |
| "loss": 0.707070529460907, | |
| "step": 3678 | |
| }, | |
| { | |
| "epoch": 0.9802876931273309, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.3831711366241244e-05, | |
| "loss": 0.6896175146102905, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.9808204581779435, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.3716838698430972e-05, | |
| "loss": 0.6856859922409058, | |
| "step": 3682 | |
| }, | |
| { | |
| "epoch": 0.9813532232285562, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.3602346234660928e-05, | |
| "loss": 0.7103138566017151, | |
| "step": 3684 | |
| }, | |
| { | |
| "epoch": 0.9818859882791688, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.3488235631143498e-05, | |
| "loss": 0.6692371964454651, | |
| "step": 3686 | |
| }, | |
| { | |
| "epoch": 0.9824187533297816, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.3374508538567198e-05, | |
| "loss": 0.7444373965263367, | |
| "step": 3688 | |
| }, | |
| { | |
| "epoch": 0.9829515183803943, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.326116660207279e-05, | |
| "loss": 0.705718994140625, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.9834842834310069, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.3148211461229497e-05, | |
| "loss": 0.6810398101806641, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 0.9840170484816196, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.3035644750011262e-05, | |
| "loss": 0.7048704624176025, | |
| "step": 3694 | |
| }, | |
| { | |
| "epoch": 0.9845498135322323, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.292346809677314e-05, | |
| "loss": 0.7215117812156677, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 0.985082578582845, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.2811683124227719e-05, | |
| "loss": 0.71157306432724, | |
| "step": 3698 | |
| }, | |
| { | |
| "epoch": 0.9856153436334576, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.270029144942166e-05, | |
| "loss": 0.6895602345466614, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9861481086840703, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.2589294683712302e-05, | |
| "loss": 0.6617645621299744, | |
| "step": 3702 | |
| }, | |
| { | |
| "epoch": 0.986680873734683, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.2478694432744342e-05, | |
| "loss": 0.6597418785095215, | |
| "step": 3704 | |
| }, | |
| { | |
| "epoch": 0.9872136387852957, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.2368492296426636e-05, | |
| "loss": 0.6748197078704834, | |
| "step": 3706 | |
| }, | |
| { | |
| "epoch": 0.9877464038359084, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.2258689868909021e-05, | |
| "loss": 0.6904255151748657, | |
| "step": 3708 | |
| }, | |
| { | |
| "epoch": 0.988279168886521, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.2149288738559295e-05, | |
| "loss": 0.6973739266395569, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.9888119339371337, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.2040290487940166e-05, | |
| "loss": 0.7195823192596436, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 0.9893446989877464, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.1931696693786461e-05, | |
| "loss": 0.6562846899032593, | |
| "step": 3714 | |
| }, | |
| { | |
| "epoch": 0.9898774640383591, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.1823508926982239e-05, | |
| "loss": 0.6495825052261353, | |
| "step": 3716 | |
| }, | |
| { | |
| "epoch": 0.9904102290889718, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.1715728752538103e-05, | |
| "loss": 0.7018332481384277, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 0.9909429941395844, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.1608357729568547e-05, | |
| "loss": 0.6511444449424744, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.9914757591901971, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.1501397411269415e-05, | |
| "loss": 0.6892027258872986, | |
| "step": 3722 | |
| }, | |
| { | |
| "epoch": 0.9920085242408098, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.1394849344895413e-05, | |
| "loss": 0.6872977018356323, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 0.9925412892914225, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.1288715071737743e-05, | |
| "loss": 0.7205137014389038, | |
| "step": 3726 | |
| }, | |
| { | |
| "epoch": 0.9930740543420352, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.1182996127101822e-05, | |
| "loss": 0.6928572058677673, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 0.9936068193926478, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.1077694040285008e-05, | |
| "loss": 0.6700318455696106, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.9941395844432606, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 1.0972810334554565e-05, | |
| "loss": 0.6770192384719849, | |
| "step": 3732 | |
| }, | |
| { | |
| "epoch": 0.9946723494938732, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.086834652712557e-05, | |
| "loss": 0.7142175436019897, | |
| "step": 3734 | |
| }, | |
| { | |
| "epoch": 0.9952051145444859, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.076430412913899e-05, | |
| "loss": 0.6775243282318115, | |
| "step": 3736 | |
| }, | |
| { | |
| "epoch": 0.9957378795950985, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.0660684645639808e-05, | |
| "loss": 0.6838144063949585, | |
| "step": 3738 | |
| }, | |
| { | |
| "epoch": 0.9962706446457112, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.055748957555525e-05, | |
| "loss": 0.6580877900123596, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.996803409696324, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.045472041167313e-05, | |
| "loss": 0.6969035267829895, | |
| "step": 3742 | |
| }, | |
| { | |
| "epoch": 0.9973361747469366, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.0352378640620211e-05, | |
| "loss": 0.683407187461853, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 0.9978689397975493, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.0250465742840743e-05, | |
| "loss": 0.6365366578102112, | |
| "step": 3746 | |
| }, | |
| { | |
| "epoch": 0.9984017048481619, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.0148983192575023e-05, | |
| "loss": 0.6633042097091675, | |
| "step": 3748 | |
| }, | |
| { | |
| "epoch": 0.9989344698987747, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.0047932457838066e-05, | |
| "loss": 0.6902580261230469, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.9994672349493873, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 9.947315000398392e-06, | |
| "loss": 0.6602581739425659, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 9.847132275756857e-06, | |
| "loss": 0.7133287191390991, | |
| "step": 3754 | |
| }, | |
| { | |
| "epoch": 1.0005327650506126, | |
| "grad_norm": 0.6171875, | |
| "learning_rate": 9.74738573312561e-06, | |
| "loss": 0.554438054561615, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 1.0010655301012255, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 9.648076815407123e-06, | |
| "loss": 0.5593016147613525, | |
| "step": 3758 | |
| }, | |
| { | |
| "epoch": 1.001598295151838, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 9.549206959173331e-06, | |
| "loss": 0.5708969831466675, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.0021310602024507, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 9.45077759464485e-06, | |
| "loss": 0.5818780660629272, | |
| "step": 3762 | |
| }, | |
| { | |
| "epoch": 1.0026638252530633, | |
| "grad_norm": 0.58203125, | |
| "learning_rate": 9.352790145670237e-06, | |
| "loss": 0.5812588930130005, | |
| "step": 3764 | |
| }, | |
| { | |
| "epoch": 1.0031965903036761, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 9.255246029705476e-06, | |
| "loss": 0.6045463681221008, | |
| "step": 3766 | |
| }, | |
| { | |
| "epoch": 1.0037293553542888, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 9.158146657793429e-06, | |
| "loss": 0.5340660810470581, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 1.0042621204049014, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 9.061493434543425e-06, | |
| "loss": 0.569497287273407, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.0047948854555142, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8.965287758110932e-06, | |
| "loss": 0.5447984337806702, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 1.0053276505061268, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8.869531020177367e-06, | |
| "loss": 0.5706038475036621, | |
| "step": 3774 | |
| }, | |
| { | |
| "epoch": 1.0058604155567394, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 8.774224605929924e-06, | |
| "loss": 0.5389580130577087, | |
| "step": 3776 | |
| }, | |
| { | |
| "epoch": 1.006393180607352, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8.679369894041567e-06, | |
| "loss": 0.5501297116279602, | |
| "step": 3778 | |
| }, | |
| { | |
| "epoch": 1.006925945657965, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 8.584968256651067e-06, | |
| "loss": 0.5443601608276367, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.0074587107085775, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 8.491021059343163e-06, | |
| "loss": 0.5650312304496765, | |
| "step": 3782 | |
| }, | |
| { | |
| "epoch": 1.0079914757591901, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8.397529661128799e-06, | |
| "loss": 0.5328672528266907, | |
| "step": 3784 | |
| }, | |
| { | |
| "epoch": 1.008524240809803, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8.30449541442548e-06, | |
| "loss": 0.5154542922973633, | |
| "step": 3786 | |
| }, | |
| { | |
| "epoch": 1.0090570058604156, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8.211919665037697e-06, | |
| "loss": 0.5323178768157959, | |
| "step": 3788 | |
| }, | |
| { | |
| "epoch": 1.0095897709110282, | |
| "grad_norm": 0.578125, | |
| "learning_rate": 8.119803752137455e-06, | |
| "loss": 0.5955473184585571, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.0101225359616408, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 8.028149008244921e-06, | |
| "loss": 0.5868597030639648, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 1.0106553010122536, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 7.936956759209114e-06, | |
| "loss": 0.5995841026306152, | |
| "step": 3794 | |
| }, | |
| { | |
| "epoch": 1.0111880660628663, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 7.846228324188767e-06, | |
| "loss": 0.5501196384429932, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 1.0117208311134789, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 7.755965015633217e-06, | |
| "loss": 0.5720456838607788, | |
| "step": 3798 | |
| }, | |
| { | |
| "epoch": 1.0122535961640917, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 7.66616813926341e-06, | |
| "loss": 0.5405896902084351, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.0127863612147043, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 7.57683899405305e-06, | |
| "loss": 0.6008697748184204, | |
| "step": 3802 | |
| }, | |
| { | |
| "epoch": 1.013319126265317, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 7.487978872209783e-06, | |
| "loss": 0.5538079738616943, | |
| "step": 3804 | |
| }, | |
| { | |
| "epoch": 1.0138518913159296, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 7.3995890591564975e-06, | |
| "loss": 0.551633894443512, | |
| "step": 3806 | |
| }, | |
| { | |
| "epoch": 1.0143846563665424, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 7.311670833512763e-06, | |
| "loss": 0.5497746467590332, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 1.014917421417155, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 7.224225467076284e-06, | |
| "loss": 0.5558388829231262, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.0154501864677676, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 7.137254224804557e-06, | |
| "loss": 0.5494096875190735, | |
| "step": 3812 | |
| }, | |
| { | |
| "epoch": 1.0159829515183805, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 7.050758364796531e-06, | |
| "loss": 0.5741044282913208, | |
| "step": 3814 | |
| }, | |
| { | |
| "epoch": 1.016515716568993, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 6.964739138274433e-06, | |
| "loss": 0.6052870154380798, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 1.0170484816196057, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 6.879197789565632e-06, | |
| "loss": 0.5531472563743591, | |
| "step": 3818 | |
| }, | |
| { | |
| "epoch": 1.0175812466702185, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 6.794135556084698e-06, | |
| "loss": 0.5643627047538757, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.0181140117208312, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 6.709553668315454e-06, | |
| "loss": 0.5206726789474487, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 1.0186467767714438, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 6.625453349793196e-06, | |
| "loss": 0.5452365279197693, | |
| "step": 3824 | |
| }, | |
| { | |
| "epoch": 1.0191795418220564, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.541835817086979e-06, | |
| "loss": 0.5786857604980469, | |
| "step": 3826 | |
| }, | |
| { | |
| "epoch": 1.0197123068726692, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 6.458702279782038e-06, | |
| "loss": 0.534633457660675, | |
| "step": 3828 | |
| }, | |
| { | |
| "epoch": 1.0202450719232818, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 6.376053940462279e-06, | |
| "loss": 0.5802006721496582, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.0207778369738945, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.293891994692876e-06, | |
| "loss": 0.5870469808578491, | |
| "step": 3832 | |
| }, | |
| { | |
| "epoch": 1.0213106020245073, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 6.212217631003019e-06, | |
| "loss": 0.5592401027679443, | |
| "step": 3834 | |
| }, | |
| { | |
| "epoch": 1.02184336707512, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 6.1310320308686354e-06, | |
| "loss": 0.5553821921348572, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 1.0223761321257325, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 6.050336368695386e-06, | |
| "loss": 0.583335816860199, | |
| "step": 3838 | |
| }, | |
| { | |
| "epoch": 1.0229088971763451, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 5.9701318118016296e-06, | |
| "loss": 0.563973069190979, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.023441662226958, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 5.8904195204015555e-06, | |
| "loss": 0.5525979995727539, | |
| "step": 3842 | |
| }, | |
| { | |
| "epoch": 1.0239744272775706, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 5.811200647588386e-06, | |
| "loss": 0.5597478747367859, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 1.0245071923281832, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 5.73247633931771e-06, | |
| "loss": 0.5630712509155273, | |
| "step": 3846 | |
| }, | |
| { | |
| "epoch": 1.025039957378796, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.6542477343908944e-06, | |
| "loss": 0.5952086448669434, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 1.0255727224294087, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 5.5765159644386265e-06, | |
| "loss": 0.5853996872901917, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.0261054874800213, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 5.499282153904508e-06, | |
| "loss": 0.5681400299072266, | |
| "step": 3852 | |
| }, | |
| { | |
| "epoch": 1.026638252530634, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.422547420028839e-06, | |
| "loss": 0.5616742968559265, | |
| "step": 3854 | |
| }, | |
| { | |
| "epoch": 1.0271710175812467, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.346312872832422e-06, | |
| "loss": 0.5645125508308411, | |
| "step": 3856 | |
| }, | |
| { | |
| "epoch": 1.0277037826318594, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 5.270579615100518e-06, | |
| "loss": 0.5648372769355774, | |
| "step": 3858 | |
| }, | |
| { | |
| "epoch": 1.028236547682472, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.19534874236689e-06, | |
| "loss": 0.5514068603515625, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.0287693127330848, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 5.120621342897951e-06, | |
| "loss": 0.5846351385116577, | |
| "step": 3862 | |
| }, | |
| { | |
| "epoch": 1.0293020777836974, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 5.046398497677034e-06, | |
| "loss": 0.5671142339706421, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 1.02983484283431, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.972681280388738e-06, | |
| "loss": 0.5745285153388977, | |
| "step": 3866 | |
| }, | |
| { | |
| "epoch": 1.0303676078849227, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.899470757403415e-06, | |
| "loss": 0.5890294313430786, | |
| "step": 3868 | |
| }, | |
| { | |
| "epoch": 1.0309003729355355, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.826767987761725e-06, | |
| "loss": 0.5851269364356995, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.031433137986148, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.754574023159335e-06, | |
| "loss": 0.5567840933799744, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 1.0319659030367607, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.682889907931696e-06, | |
| "loss": 0.5986261963844299, | |
| "step": 3874 | |
| }, | |
| { | |
| "epoch": 1.0324986680873736, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.611716679038925e-06, | |
| "loss": 0.5585236549377441, | |
| "step": 3876 | |
| }, | |
| { | |
| "epoch": 1.0330314331379862, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.5410553660508284e-06, | |
| "loss": 0.574200451374054, | |
| "step": 3878 | |
| }, | |
| { | |
| "epoch": 1.0335641981885988, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.470906991131991e-06, | |
| "loss": 0.5537621974945068, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.0340969632392114, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 4.401272569026995e-06, | |
| "loss": 0.5498485565185547, | |
| "step": 3882 | |
| }, | |
| { | |
| "epoch": 1.0346297282898242, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.332153107045747e-06, | |
| "loss": 0.5446688532829285, | |
| "step": 3884 | |
| }, | |
| { | |
| "epoch": 1.0351624933404369, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.263549605048898e-06, | |
| "loss": 0.5680376291275024, | |
| "step": 3886 | |
| }, | |
| { | |
| "epoch": 1.0356952583910495, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 4.195463055433364e-06, | |
| "loss": 0.5543019771575928, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 1.0362280234416623, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.1278944431180164e-06, | |
| "loss": 0.5992298126220703, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.036760788492275, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.060844745529396e-06, | |
| "loss": 0.5606511235237122, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 1.0372935535428875, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.994314932587573e-06, | |
| "loss": 0.5762649178504944, | |
| "step": 3894 | |
| }, | |
| { | |
| "epoch": 1.0378263185935002, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 3.928305966692145e-06, | |
| "loss": 0.5266860723495483, | |
| "step": 3896 | |
| }, | |
| { | |
| "epoch": 1.038359083644113, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.862818802708295e-06, | |
| "loss": 0.5364474058151245, | |
| "step": 3898 | |
| }, | |
| { | |
| "epoch": 1.0388918486947256, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 3.7978543879529704e-06, | |
| "loss": 0.5435348153114319, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.0394246137453382, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 3.7334136621812023e-06, | |
| "loss": 0.608924150466919, | |
| "step": 3902 | |
| }, | |
| { | |
| "epoch": 1.039957378795951, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.6694975575725012e-06, | |
| "loss": 0.5841650366783142, | |
| "step": 3904 | |
| }, | |
| { | |
| "epoch": 1.0404901438465637, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.606106998717351e-06, | |
| "loss": 0.5527678728103638, | |
| "step": 3906 | |
| }, | |
| { | |
| "epoch": 1.0410229088971763, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 3.5432429026038784e-06, | |
| "loss": 0.5616254806518555, | |
| "step": 3908 | |
| }, | |
| { | |
| "epoch": 1.041555673947789, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.480906178604553e-06, | |
| "loss": 0.584281861782074, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.0420884389984018, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 3.419097728463041e-06, | |
| "loss": 0.5592966675758362, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 1.0426212040490144, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 3.3578184462811714e-06, | |
| "loss": 0.5340662598609924, | |
| "step": 3914 | |
| }, | |
| { | |
| "epoch": 1.043153969099627, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.2970692185059837e-06, | |
| "loss": 0.5586187839508057, | |
| "step": 3916 | |
| }, | |
| { | |
| "epoch": 1.0436867341502398, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 3.236850923916919e-06, | |
| "loss": 0.5644587278366089, | |
| "step": 3918 | |
| }, | |
| { | |
| "epoch": 1.0442194992008524, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.177164433613116e-06, | |
| "loss": 0.5369923114776611, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.044752264251465, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.1180106110007925e-06, | |
| "loss": 0.5914398431777954, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 1.045285029302078, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.0593903117807344e-06, | |
| "loss": 0.5819767117500305, | |
| "step": 3924 | |
| }, | |
| { | |
| "epoch": 1.0458177943526905, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.001304383935981e-06, | |
| "loss": 0.5808528065681458, | |
| "step": 3926 | |
| }, | |
| { | |
| "epoch": 1.0463505594033031, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.9437536677194976e-06, | |
| "loss": 0.5389923453330994, | |
| "step": 3928 | |
| }, | |
| { | |
| "epoch": 1.0468833244539157, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.8867389956420645e-06, | |
| "loss": 0.572472333908081, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.0474160895045286, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.8302611924601884e-06, | |
| "loss": 0.5559939742088318, | |
| "step": 3932 | |
| }, | |
| { | |
| "epoch": 1.0479488545551412, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.7743210751642212e-06, | |
| "loss": 0.5386156439781189, | |
| "step": 3934 | |
| }, | |
| { | |
| "epoch": 1.0484816196057538, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.718919452966509e-06, | |
| "loss": 0.5663049221038818, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 1.0490143846563666, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.664057127289699e-06, | |
| "loss": 0.5892527103424072, | |
| "step": 3938 | |
| }, | |
| { | |
| "epoch": 1.0495471497069793, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.6097348917551204e-06, | |
| "loss": 0.5835411548614502, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.0500799147575919, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 2.55595353217136e-06, | |
| "loss": 0.5439558029174805, | |
| "step": 3942 | |
| }, | |
| { | |
| "epoch": 1.0506126798082045, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.502713826522838e-06, | |
| "loss": 0.5934330224990845, | |
| "step": 3944 | |
| }, | |
| { | |
| "epoch": 1.0511454448588173, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.450016544958591e-06, | |
| "loss": 0.5806512832641602, | |
| "step": 3946 | |
| }, | |
| { | |
| "epoch": 1.05167820990943, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 2.3978624497811033e-06, | |
| "loss": 0.5375804305076599, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 1.0522109749600426, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.3462522954353073e-06, | |
| "loss": 0.5656116604804993, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.0527437400106554, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.2951868284976485e-06, | |
| "loss": 0.5351519584655762, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 1.053276505061268, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 2.244666787665297e-06, | |
| "loss": 0.5698959827423096, | |
| "step": 3954 | |
| }, | |
| { | |
| "epoch": 1.0538092701118806, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 2.194692903745459e-06, | |
| "loss": 0.5653844475746155, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 1.0543420351624933, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 2.145265899644802e-06, | |
| "loss": 0.5527917742729187, | |
| "step": 3958 | |
| }, | |
| { | |
| "epoch": 1.054874800213106, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.096386490358997e-06, | |
| "loss": 0.5695369243621826, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.0554075652637187, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.048055382962386e-06, | |
| "loss": 0.5575898289680481, | |
| "step": 3962 | |
| }, | |
| { | |
| "epoch": 1.0559403303143313, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.0002732765977395e-06, | |
| "loss": 0.5826997756958008, | |
| "step": 3964 | |
| }, | |
| { | |
| "epoch": 1.0564730953649442, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.9530408624661624e-06, | |
| "loss": 0.628014087677002, | |
| "step": 3966 | |
| }, | |
| { | |
| "epoch": 1.0570058604155568, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.9063588238170627e-06, | |
| "loss": 0.5535210371017456, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 1.0575386254661694, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.8602278359383063e-06, | |
| "loss": 0.5898425579071045, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.058071390516782, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.8146485661464153e-06, | |
| "loss": 0.556390643119812, | |
| "step": 3972 | |
| }, | |
| { | |
| "epoch": 1.0586041555673948, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.769621673776949e-06, | |
| "loss": 0.5970685482025146, | |
| "step": 3974 | |
| }, | |
| { | |
| "epoch": 1.0591369206180075, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.7251478101749163e-06, | |
| "loss": 0.5681304335594177, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 1.05966968566862, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.6812276186854105e-06, | |
| "loss": 0.5456256866455078, | |
| "step": 3978 | |
| }, | |
| { | |
| "epoch": 1.060202450719233, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.6378617346442682e-06, | |
| "loss": 0.5776793956756592, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.0607352157698455, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.595050785368888e-06, | |
| "loss": 0.5277162194252014, | |
| "step": 3982 | |
| }, | |
| { | |
| "epoch": 1.0612679808204581, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.5527953901491466e-06, | |
| "loss": 0.5984737873077393, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 1.0618007458710708, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.511096160238461e-06, | |
| "loss": 0.5542199611663818, | |
| "step": 3986 | |
| }, | |
| { | |
| "epoch": 1.0623335109216836, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.4699536988449193e-06, | |
| "loss": 0.5390311479568481, | |
| "step": 3988 | |
| }, | |
| { | |
| "epoch": 1.0628662759722962, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.4293686011225849e-06, | |
| "loss": 0.5391555428504944, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.0633990410229088, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.3893414541628647e-06, | |
| "loss": 0.533847987651825, | |
| "step": 3992 | |
| }, | |
| { | |
| "epoch": 1.0639318060735217, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.3498728369860125e-06, | |
| "loss": 0.5452683568000793, | |
| "step": 3994 | |
| }, | |
| { | |
| "epoch": 1.0644645711241343, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.310963320532781e-06, | |
| "loss": 0.5820298194885254, | |
| "step": 3996 | |
| }, | |
| { | |
| "epoch": 1.064997336174747, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.2726134676561385e-06, | |
| "loss": 0.5669692754745483, | |
| "step": 3998 | |
| }, | |
| { | |
| "epoch": 1.0655301012253595, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.2348238331131346e-06, | |
| "loss": 0.5747166275978088, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0660628662759724, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.1975949635568696e-06, | |
| "loss": 0.6048906445503235, | |
| "step": 4002 | |
| }, | |
| { | |
| "epoch": 1.066595631326585, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.1609273975285995e-06, | |
| "loss": 0.5434789657592773, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 1.0671283963771976, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 1.1248216654499377e-06, | |
| "loss": 0.5791047215461731, | |
| "step": 4006 | |
| }, | |
| { | |
| "epoch": 1.0676611614278104, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 1.089278289615181e-06, | |
| "loss": 0.5469709038734436, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 1.068193926478423, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 1.0542977841837465e-06, | |
| "loss": 0.547351598739624, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.0687266915290357, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 1.0198806551727557e-06, | |
| "loss": 0.6135736107826233, | |
| "step": 4012 | |
| }, | |
| { | |
| "epoch": 1.0692594565796485, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 9.860274004496939e-07, | |
| "loss": 0.5442401766777039, | |
| "step": 4014 | |
| }, | |
| { | |
| "epoch": 1.069792221630261, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 9.527385097252195e-07, | |
| "loss": 0.5563182234764099, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 1.0703249866808737, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 9.200144645460818e-07, | |
| "loss": 0.5793603658676147, | |
| "step": 4018 | |
| }, | |
| { | |
| "epoch": 1.0708577517314863, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 8.878557382881436e-07, | |
| "loss": 0.5915518999099731, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.0713905167820992, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8.56262796149534e-07, | |
| "loss": 0.5501149296760559, | |
| "step": 4022 | |
| }, | |
| { | |
| "epoch": 1.0719232818327118, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 8.252360951439375e-07, | |
| "loss": 0.5390005111694336, | |
| "step": 4024 | |
| }, | |
| { | |
| "epoch": 1.0724560468833244, | |
| "grad_norm": 0.5859375, | |
| "learning_rate": 7.947760840939688e-07, | |
| "loss": 0.5717595219612122, | |
| "step": 4026 | |
| }, | |
| { | |
| "epoch": 1.072988811933937, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 7.648832036246712e-07, | |
| "loss": 0.5640586614608765, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 1.0735215769845499, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 7.35557886157161e-07, | |
| "loss": 0.5609626173973083, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.0740543420351625, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 7.068005559023672e-07, | |
| "loss": 0.5454556941986084, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 1.074587107085775, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 6.786116288548839e-07, | |
| "loss": 0.5526958107948303, | |
| "step": 4034 | |
| }, | |
| { | |
| "epoch": 1.075119872136388, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 6.509915127869714e-07, | |
| "loss": 0.5440658926963806, | |
| "step": 4036 | |
| }, | |
| { | |
| "epoch": 1.0756526371870005, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 6.239406072426413e-07, | |
| "loss": 0.5510826110839844, | |
| "step": 4038 | |
| }, | |
| { | |
| "epoch": 1.0761854022376132, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 5.974593035318777e-07, | |
| "loss": 0.5809528827667236, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.076718167288226, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 5.715479847249939e-07, | |
| "loss": 0.5985695123672485, | |
| "step": 4042 | |
| }, | |
| { | |
| "epoch": 1.0772509323388386, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 5.46207025647072e-07, | |
| "loss": 0.5612497329711914, | |
| "step": 4044 | |
| }, | |
| { | |
| "epoch": 1.0777836973894512, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 5.214367928725405e-07, | |
| "loss": 0.5707313418388367, | |
| "step": 4046 | |
| }, | |
| { | |
| "epoch": 1.0783164624400638, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.972376447198945e-07, | |
| "loss": 0.5503619313240051, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 1.0788492274906767, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.736099312464815e-07, | |
| "loss": 0.5270297527313232, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.0793819925412893, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 4.505539942434656e-07, | |
| "loss": 0.5827047824859619, | |
| "step": 4052 | |
| }, | |
| { | |
| "epoch": 1.079914757591902, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 4.280701672308585e-07, | |
| "loss": 0.5816717147827148, | |
| "step": 4054 | |
| }, | |
| { | |
| "epoch": 1.0804475226425145, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.061587754527141e-07, | |
| "loss": 0.574262261390686, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 1.0809802876931274, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.84820135872408e-07, | |
| "loss": 0.5379737615585327, | |
| "step": 4058 | |
| }, | |
| { | |
| "epoch": 1.08151305274374, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.640545571680765e-07, | |
| "loss": 0.5664905905723572, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.0820458177943526, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 3.438623397281227e-07, | |
| "loss": 0.582391619682312, | |
| "step": 4062 | |
| }, | |
| { | |
| "epoch": 1.0825785828449654, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.2424377564687745e-07, | |
| "loss": 0.6073220372200012, | |
| "step": 4064 | |
| }, | |
| { | |
| "epoch": 1.083111347895578, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 3.051991487203987e-07, | |
| "loss": 0.6066765189170837, | |
| "step": 4066 | |
| }, | |
| { | |
| "epoch": 1.0836441129461907, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 2.867287344423364e-07, | |
| "loss": 0.5733582973480225, | |
| "step": 4068 | |
| }, | |
| { | |
| "epoch": 1.0841768779968035, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 2.6883279999996294e-07, | |
| "loss": 0.5795176029205322, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.0847096430474161, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 2.5151160427029584e-07, | |
| "loss": 0.5567296743392944, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 1.0852424080980287, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 2.3476539781637664e-07, | |
| "loss": 0.5603177547454834, | |
| "step": 4074 | |
| }, | |
| { | |
| "epoch": 1.0857751731486414, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 2.1859442288361567e-07, | |
| "loss": 0.5176109671592712, | |
| "step": 4076 | |
| }, | |
| { | |
| "epoch": 1.0863079381992542, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 2.0299891339630618e-07, | |
| "loss": 0.5881712436676025, | |
| "step": 4078 | |
| }, | |
| { | |
| "epoch": 1.0868407032498668, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 1.879790949542537e-07, | |
| "loss": 0.5769769549369812, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.0873734683004794, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 1.7353518482946308e-07, | |
| "loss": 0.5656998157501221, | |
| "step": 4082 | |
| }, | |
| { | |
| "epoch": 1.0879062333510923, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.596673919630609e-07, | |
| "loss": 0.5314844846725464, | |
| "step": 4084 | |
| }, | |
| { | |
| "epoch": 1.0884389984017049, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.4637591696222697e-07, | |
| "loss": 0.5581560134887695, | |
| "step": 4086 | |
| }, | |
| { | |
| "epoch": 1.0889717634523175, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.3366095209729868e-07, | |
| "loss": 0.5622378587722778, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 1.0895045285029301, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 1.215226812990089e-07, | |
| "loss": 0.5572565793991089, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.090037293553543, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 1.0996128015581253e-07, | |
| "loss": 0.5689173340797424, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 1.0905700586041556, | |
| "grad_norm": 0.54296875, | |
| "learning_rate": 9.897691591134184e-08, | |
| "loss": 0.542863130569458, | |
| "step": 4094 | |
| }, | |
| { | |
| "epoch": 1.0911028236547682, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 8.856974746199954e-08, | |
| "loss": 0.5954520106315613, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 1.091635588705381, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 7.873992535463615e-08, | |
| "loss": 0.5493767857551575, | |
| "step": 4098 | |
| }, | |
| { | |
| "epoch": 1.0921683537559936, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 6.9487591784414e-08, | |
| "loss": 0.550544023513794, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.0927011188066063, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 6.081288059271107e-08, | |
| "loss": 0.5349717736244202, | |
| "step": 4102 | |
| }, | |
| { | |
| "epoch": 1.0932338838572189, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 5.271591726520253e-08, | |
| "loss": 0.57042396068573, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 1.0937666489078317, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.519681893004002e-08, | |
| "loss": 0.5848683714866638, | |
| "step": 4106 | |
| }, | |
| { | |
| "epoch": 1.0942994139584443, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 3.825569435616405e-08, | |
| "loss": 0.6174777150154114, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 1.094832179009057, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 3.189264395172753e-08, | |
| "loss": 0.5845167636871338, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.0953649440596698, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 2.6107759762634687e-08, | |
| "loss": 0.5631780028343201, | |
| "step": 4112 | |
| }, | |
| { | |
| "epoch": 1.0958977091102824, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.090112547122658e-08, | |
| "loss": 0.551435649394989, | |
| "step": 4114 | |
| }, | |
| { | |
| "epoch": 1.096430474160895, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 1.6272816395050962e-08, | |
| "loss": 0.5512660145759583, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 1.0969632392115076, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 1.2222899485792027e-08, | |
| "loss": 0.5918897986412048, | |
| "step": 4118 | |
| }, | |
| { | |
| "epoch": 1.0974960042621205, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 8.751433328288982e-09, | |
| "loss": 0.5378780364990234, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.098028769312733, | |
| "grad_norm": 0.55859375, | |
| "learning_rate": 5.858468139687823e-09, | |
| "loss": 0.5922811627388, | |
| "step": 4122 | |
| }, | |
| { | |
| "epoch": 1.0985615343633457, | |
| "grad_norm": 0.51953125, | |
| "learning_rate": 3.544045768730797e-09, | |
| "loss": 0.5495621562004089, | |
| "step": 4124 | |
| }, | |
| { | |
| "epoch": 1.0990942994139585, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.8081996951258007e-09, | |
| "loss": 0.559400200843811, | |
| "step": 4126 | |
| }, | |
| { | |
| "epoch": 1.0996270644645711, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 6.509550290800803e-10, | |
| "loss": 0.6017919182777405, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 1.1001598295151838, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 7.23285109449634e-11, | |
| "loss": 0.6059576869010925, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.1001598295151838, | |
| "step": 4130, | |
| "total_flos": 3.5614889450215834e+18, | |
| "train_loss": 0.8152079602130677, | |
| "train_runtime": 10535.874, | |
| "train_samples_per_second": 9.406, | |
| "train_steps_per_second": 0.392 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 4130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 938, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.5614889450215834e+18, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |