Text Generation
Transformers
Safetensors
qwen3
Generated from Trainer
unsloth
trl
sft
conversational
custom_code
text-generation-inference
Instructions to use Ba2han/model-sft-q2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ba2han/model-sft-q2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Ba2han/model-sft-q2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Ba2han/model-sft-q2", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("Ba2han/model-sft-q2", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Ba2han/model-sft-q2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Ba2han/model-sft-q2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/model-sft-q2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Ba2han/model-sft-q2
- SGLang
How to use Ba2han/model-sft-q2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Ba2han/model-sft-q2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/model-sft-q2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Ba2han/model-sft-q2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Ba2han/model-sft-q2", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Unsloth Studio
How to use Ba2han/model-sft-q2 with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/model-sft-q2 to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Ba2han/model-sft-q2 to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Ba2han/model-sft-q2 to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Ba2han/model-sft-q2", max_seq_length=2048, ) - Docker Model Runner
How to use Ba2han/model-sft-q2 with Docker Model Runner:
docker model run hf.co/Ba2han/model-sft-q2
Training in progress, step 7043, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1049614696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:830c6e82e873a112a37dd424f4f371fe20d8a3252d03b78bfd3e287c8de3d81a
|
| 3 |
size 1049614696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1372902609
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e32fd6dca63545237e61e1d298819978874abd9335a646c649a89010c736cef
|
| 3 |
size 1372902609
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c10a0c9be6ff9bdac8694187029e0ec06b326fd90a60892aa3c63f5e1914a58f
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 705,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -22284,6 +22284,2457 @@
|
|
| 22284 |
"eval_samples_per_second": 55.127,
|
| 22285 |
"eval_steps_per_second": 6.892,
|
| 22286 |
"step": 6345
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22287 |
}
|
| 22288 |
],
|
| 22289 |
"logging_steps": 2,
|
|
@@ -22298,12 +24749,12 @@
|
|
| 22298 |
"should_evaluate": false,
|
| 22299 |
"should_log": false,
|
| 22300 |
"should_save": true,
|
| 22301 |
-
"should_training_stop":
|
| 22302 |
},
|
| 22303 |
"attributes": {}
|
| 22304 |
}
|
| 22305 |
},
|
| 22306 |
-
"total_flos":
|
| 22307 |
"train_batch_size": 4,
|
| 22308 |
"trial_name": null,
|
| 22309 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
"eval_steps": 705,
|
| 7 |
+
"global_step": 7043,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 22284 |
"eval_samples_per_second": 55.127,
|
| 22285 |
"eval_steps_per_second": 6.892,
|
| 22286 |
"step": 6345
|
| 22287 |
+
},
|
| 22288 |
+
{
|
| 22289 |
+
"epoch": 0.9011164557411384,
|
| 22290 |
+
"grad_norm": 0.1337890625,
|
| 22291 |
+
"learning_rate": 4.9663883488730087e-05,
|
| 22292 |
+
"loss": 1.6326470375061035,
|
| 22293 |
+
"step": 6346
|
| 22294 |
+
},
|
| 22295 |
+
{
|
| 22296 |
+
"epoch": 0.901400450842223,
|
| 22297 |
+
"grad_norm": 0.1259765625,
|
| 22298 |
+
"learning_rate": 4.9439813315402384e-05,
|
| 22299 |
+
"loss": 1.6485044956207275,
|
| 22300 |
+
"step": 6348
|
| 22301 |
+
},
|
| 22302 |
+
{
|
| 22303 |
+
"epoch": 0.9016844459433074,
|
| 22304 |
+
"grad_norm": 0.1201171875,
|
| 22305 |
+
"learning_rate": 4.9215754393194874e-05,
|
| 22306 |
+
"loss": 1.6287813186645508,
|
| 22307 |
+
"step": 6350
|
| 22308 |
+
},
|
| 22309 |
+
{
|
| 22310 |
+
"epoch": 0.901968441044392,
|
| 22311 |
+
"grad_norm": 0.1279296875,
|
| 22312 |
+
"learning_rate": 4.89917112222393e-05,
|
| 22313 |
+
"loss": 1.6801517009735107,
|
| 22314 |
+
"step": 6352
|
| 22315 |
+
},
|
| 22316 |
+
{
|
| 22317 |
+
"epoch": 0.9022524361454765,
|
| 22318 |
+
"grad_norm": 0.130859375,
|
| 22319 |
+
"learning_rate": 4.8767688302350956e-05,
|
| 22320 |
+
"loss": 1.6600592136383057,
|
| 22321 |
+
"step": 6354
|
| 22322 |
+
},
|
| 22323 |
+
{
|
| 22324 |
+
"epoch": 0.902536431246561,
|
| 22325 |
+
"grad_norm": 0.1220703125,
|
| 22326 |
+
"learning_rate": 4.8543690132938494e-05,
|
| 22327 |
+
"loss": 1.6464087963104248,
|
| 22328 |
+
"step": 6356
|
| 22329 |
+
},
|
| 22330 |
+
{
|
| 22331 |
+
"epoch": 0.9028204263476455,
|
| 22332 |
+
"grad_norm": 0.12890625,
|
| 22333 |
+
"learning_rate": 4.8319721212913436e-05,
|
| 22334 |
+
"loss": 1.6564900875091553,
|
| 22335 |
+
"step": 6358
|
| 22336 |
+
},
|
| 22337 |
+
{
|
| 22338 |
+
"epoch": 0.90310442144873,
|
| 22339 |
+
"grad_norm": 0.12890625,
|
| 22340 |
+
"learning_rate": 4.8095786040599844e-05,
|
| 22341 |
+
"loss": 1.6286182403564453,
|
| 22342 |
+
"step": 6360
|
| 22343 |
+
},
|
| 22344 |
+
{
|
| 22345 |
+
"epoch": 0.9033884165498145,
|
| 22346 |
+
"grad_norm": 0.1318359375,
|
| 22347 |
+
"learning_rate": 4.787188911364393e-05,
|
| 22348 |
+
"loss": 1.640902042388916,
|
| 22349 |
+
"step": 6362
|
| 22350 |
+
},
|
| 22351 |
+
{
|
| 22352 |
+
"epoch": 0.9036724116508991,
|
| 22353 |
+
"grad_norm": 0.12353515625,
|
| 22354 |
+
"learning_rate": 4.764803492892382e-05,
|
| 22355 |
+
"loss": 1.6655956506729126,
|
| 22356 |
+
"step": 6364
|
| 22357 |
+
},
|
| 22358 |
+
{
|
| 22359 |
+
"epoch": 0.9039564067519835,
|
| 22360 |
+
"grad_norm": 0.1337890625,
|
| 22361 |
+
"learning_rate": 4.742422798245916e-05,
|
| 22362 |
+
"loss": 1.629305362701416,
|
| 22363 |
+
"step": 6366
|
| 22364 |
+
},
|
| 22365 |
+
{
|
| 22366 |
+
"epoch": 0.9042404018530681,
|
| 22367 |
+
"grad_norm": 0.1220703125,
|
| 22368 |
+
"learning_rate": 4.720047276932084e-05,
|
| 22369 |
+
"loss": 1.6638224124908447,
|
| 22370 |
+
"step": 6368
|
| 22371 |
+
},
|
| 22372 |
+
{
|
| 22373 |
+
"epoch": 0.9045243969541525,
|
| 22374 |
+
"grad_norm": 0.1279296875,
|
| 22375 |
+
"learning_rate": 4.697677378354069e-05,
|
| 22376 |
+
"loss": 1.654644250869751,
|
| 22377 |
+
"step": 6370
|
| 22378 |
+
},
|
| 22379 |
+
{
|
| 22380 |
+
"epoch": 0.904808392055237,
|
| 22381 |
+
"grad_norm": 0.12109375,
|
| 22382 |
+
"learning_rate": 4.675313551802123e-05,
|
| 22383 |
+
"loss": 1.679664969444275,
|
| 22384 |
+
"step": 6372
|
| 22385 |
+
},
|
| 22386 |
+
{
|
| 22387 |
+
"epoch": 0.9050923871563216,
|
| 22388 |
+
"grad_norm": 0.1240234375,
|
| 22389 |
+
"learning_rate": 4.6529562464445484e-05,
|
| 22390 |
+
"loss": 1.6713364124298096,
|
| 22391 |
+
"step": 6374
|
| 22392 |
+
},
|
| 22393 |
+
{
|
| 22394 |
+
"epoch": 0.905376382257406,
|
| 22395 |
+
"grad_norm": 0.1220703125,
|
| 22396 |
+
"learning_rate": 4.630605911318667e-05,
|
| 22397 |
+
"loss": 1.6539726257324219,
|
| 22398 |
+
"step": 6376
|
| 22399 |
+
},
|
| 22400 |
+
{
|
| 22401 |
+
"epoch": 0.9056603773584906,
|
| 22402 |
+
"grad_norm": 0.12158203125,
|
| 22403 |
+
"learning_rate": 4.6082629953218086e-05,
|
| 22404 |
+
"loss": 1.682896375656128,
|
| 22405 |
+
"step": 6378
|
| 22406 |
+
},
|
| 22407 |
+
{
|
| 22408 |
+
"epoch": 0.905944372459575,
|
| 22409 |
+
"grad_norm": 0.12890625,
|
| 22410 |
+
"learning_rate": 4.585927947202294e-05,
|
| 22411 |
+
"loss": 1.6160575151443481,
|
| 22412 |
+
"step": 6380
|
| 22413 |
+
},
|
| 22414 |
+
{
|
| 22415 |
+
"epoch": 0.9062283675606596,
|
| 22416 |
+
"grad_norm": 0.1240234375,
|
| 22417 |
+
"learning_rate": 4.563601215550422e-05,
|
| 22418 |
+
"loss": 1.6658324003219604,
|
| 22419 |
+
"step": 6382
|
| 22420 |
+
},
|
| 22421 |
+
{
|
| 22422 |
+
"epoch": 0.9065123626617441,
|
| 22423 |
+
"grad_norm": 0.130859375,
|
| 22424 |
+
"learning_rate": 4.5412832487894495e-05,
|
| 22425 |
+
"loss": 1.6549348831176758,
|
| 22426 |
+
"step": 6384
|
| 22427 |
+
},
|
| 22428 |
+
{
|
| 22429 |
+
"epoch": 0.9067963577628286,
|
| 22430 |
+
"grad_norm": 0.12060546875,
|
| 22431 |
+
"learning_rate": 4.518974495166606e-05,
|
| 22432 |
+
"loss": 1.6667382717132568,
|
| 22433 |
+
"step": 6386
|
| 22434 |
+
},
|
| 22435 |
+
{
|
| 22436 |
+
"epoch": 0.9070803528639131,
|
| 22437 |
+
"grad_norm": 0.1279296875,
|
| 22438 |
+
"learning_rate": 4.496675402744072e-05,
|
| 22439 |
+
"loss": 1.6492877006530762,
|
| 22440 |
+
"step": 6388
|
| 22441 |
+
},
|
| 22442 |
+
{
|
| 22443 |
+
"epoch": 0.9073643479649977,
|
| 22444 |
+
"grad_norm": 0.140625,
|
| 22445 |
+
"learning_rate": 4.4743864193899914e-05,
|
| 22446 |
+
"loss": 1.688408374786377,
|
| 22447 |
+
"step": 6390
|
| 22448 |
+
},
|
| 22449 |
+
{
|
| 22450 |
+
"epoch": 0.9076483430660821,
|
| 22451 |
+
"grad_norm": 0.1318359375,
|
| 22452 |
+
"learning_rate": 4.452107992769463e-05,
|
| 22453 |
+
"loss": 1.6715737581253052,
|
| 22454 |
+
"step": 6392
|
| 22455 |
+
},
|
| 22456 |
+
{
|
| 22457 |
+
"epoch": 0.9079323381671666,
|
| 22458 |
+
"grad_norm": 0.1279296875,
|
| 22459 |
+
"learning_rate": 4.4298405703355685e-05,
|
| 22460 |
+
"loss": 1.6229228973388672,
|
| 22461 |
+
"step": 6394
|
| 22462 |
+
},
|
| 22463 |
+
{
|
| 22464 |
+
"epoch": 0.9082163332682511,
|
| 22465 |
+
"grad_norm": 0.1259765625,
|
| 22466 |
+
"learning_rate": 4.407584599320368e-05,
|
| 22467 |
+
"loss": 1.635927677154541,
|
| 22468 |
+
"step": 6396
|
| 22469 |
+
},
|
| 22470 |
+
{
|
| 22471 |
+
"epoch": 0.9085003283693356,
|
| 22472 |
+
"grad_norm": 0.126953125,
|
| 22473 |
+
"learning_rate": 4.385340526725928e-05,
|
| 22474 |
+
"loss": 1.6395500898361206,
|
| 22475 |
+
"step": 6398
|
| 22476 |
+
},
|
| 22477 |
+
{
|
| 22478 |
+
"epoch": 0.9087843234704202,
|
| 22479 |
+
"grad_norm": 0.1298828125,
|
| 22480 |
+
"learning_rate": 4.3631087993153375e-05,
|
| 22481 |
+
"loss": 1.68593168258667,
|
| 22482 |
+
"step": 6400
|
| 22483 |
+
},
|
| 22484 |
+
{
|
| 22485 |
+
"epoch": 0.9090683185715046,
|
| 22486 |
+
"grad_norm": 0.1298828125,
|
| 22487 |
+
"learning_rate": 4.3408898636037384e-05,
|
| 22488 |
+
"loss": 1.6664785146713257,
|
| 22489 |
+
"step": 6402
|
| 22490 |
+
},
|
| 22491 |
+
{
|
| 22492 |
+
"epoch": 0.9093523136725892,
|
| 22493 |
+
"grad_norm": 0.1259765625,
|
| 22494 |
+
"learning_rate": 4.318684165849359e-05,
|
| 22495 |
+
"loss": 1.6475862264633179,
|
| 22496 |
+
"step": 6404
|
| 22497 |
+
},
|
| 22498 |
+
{
|
| 22499 |
+
"epoch": 0.9096363087736736,
|
| 22500 |
+
"grad_norm": 0.1298828125,
|
| 22501 |
+
"learning_rate": 4.2964921520445466e-05,
|
| 22502 |
+
"loss": 1.6552307605743408,
|
| 22503 |
+
"step": 6406
|
| 22504 |
+
},
|
| 22505 |
+
{
|
| 22506 |
+
"epoch": 0.9099203038747582,
|
| 22507 |
+
"grad_norm": 0.130859375,
|
| 22508 |
+
"learning_rate": 4.274314267906809e-05,
|
| 22509 |
+
"loss": 1.6667717695236206,
|
| 22510 |
+
"step": 6408
|
| 22511 |
+
},
|
| 22512 |
+
{
|
| 22513 |
+
"epoch": 0.9102042989758427,
|
| 22514 |
+
"grad_norm": 0.1259765625,
|
| 22515 |
+
"learning_rate": 4.252150958869871e-05,
|
| 22516 |
+
"loss": 1.635591745376587,
|
| 22517 |
+
"step": 6410
|
| 22518 |
+
},
|
| 22519 |
+
{
|
| 22520 |
+
"epoch": 0.9104882940769272,
|
| 22521 |
+
"grad_norm": 0.12158203125,
|
| 22522 |
+
"learning_rate": 4.230002670074719e-05,
|
| 22523 |
+
"loss": 1.6460670232772827,
|
| 22524 |
+
"step": 6412
|
| 22525 |
+
},
|
| 22526 |
+
{
|
| 22527 |
+
"epoch": 0.9107722891780117,
|
| 22528 |
+
"grad_norm": 0.126953125,
|
| 22529 |
+
"learning_rate": 4.2078698463606674e-05,
|
| 22530 |
+
"loss": 1.6729345321655273,
|
| 22531 |
+
"step": 6414
|
| 22532 |
+
},
|
| 22533 |
+
{
|
| 22534 |
+
"epoch": 0.9110562842790962,
|
| 22535 |
+
"grad_norm": 0.1259765625,
|
| 22536 |
+
"learning_rate": 4.1857529322564136e-05,
|
| 22537 |
+
"loss": 1.683729648590088,
|
| 22538 |
+
"step": 6416
|
| 22539 |
+
},
|
| 22540 |
+
{
|
| 22541 |
+
"epoch": 0.9113402793801807,
|
| 22542 |
+
"grad_norm": 0.12109375,
|
| 22543 |
+
"learning_rate": 4.163652371971125e-05,
|
| 22544 |
+
"loss": 1.66416597366333,
|
| 22545 |
+
"step": 6418
|
| 22546 |
+
},
|
| 22547 |
+
{
|
| 22548 |
+
"epoch": 0.9116242744812652,
|
| 22549 |
+
"grad_norm": 0.125,
|
| 22550 |
+
"learning_rate": 4.141568609385504e-05,
|
| 22551 |
+
"loss": 1.673246145248413,
|
| 22552 |
+
"step": 6420
|
| 22553 |
+
},
|
| 22554 |
+
{
|
| 22555 |
+
"epoch": 0.9119082695823497,
|
| 22556 |
+
"grad_norm": 0.35546875,
|
| 22557 |
+
"learning_rate": 4.119502088042882e-05,
|
| 22558 |
+
"loss": 1.6377911567687988,
|
| 22559 |
+
"step": 6422
|
| 22560 |
+
},
|
| 22561 |
+
{
|
| 22562 |
+
"epoch": 0.9121922646834342,
|
| 22563 |
+
"grad_norm": 0.1240234375,
|
| 22564 |
+
"learning_rate": 4.097453251140305e-05,
|
| 22565 |
+
"loss": 1.6818692684173584,
|
| 22566 |
+
"step": 6424
|
| 22567 |
+
},
|
| 22568 |
+
{
|
| 22569 |
+
"epoch": 0.9124762597845187,
|
| 22570 |
+
"grad_norm": 0.123046875,
|
| 22571 |
+
"learning_rate": 4.0754225415196325e-05,
|
| 22572 |
+
"loss": 1.664668083190918,
|
| 22573 |
+
"step": 6426
|
| 22574 |
+
},
|
| 22575 |
+
{
|
| 22576 |
+
"epoch": 0.9127602548856032,
|
| 22577 |
+
"grad_norm": 0.1240234375,
|
| 22578 |
+
"learning_rate": 4.053410401658646e-05,
|
| 22579 |
+
"loss": 1.648174524307251,
|
| 22580 |
+
"step": 6428
|
| 22581 |
+
},
|
| 22582 |
+
{
|
| 22583 |
+
"epoch": 0.9130442499866878,
|
| 22584 |
+
"grad_norm": 0.12451171875,
|
| 22585 |
+
"learning_rate": 4.031417273662167e-05,
|
| 22586 |
+
"loss": 1.6480052471160889,
|
| 22587 |
+
"step": 6430
|
| 22588 |
+
},
|
| 22589 |
+
{
|
| 22590 |
+
"epoch": 0.9133282450877722,
|
| 22591 |
+
"grad_norm": 0.11767578125,
|
| 22592 |
+
"learning_rate": 4.00944359925316e-05,
|
| 22593 |
+
"loss": 1.61641263961792,
|
| 22594 |
+
"step": 6432
|
| 22595 |
+
},
|
| 22596 |
+
{
|
| 22597 |
+
"epoch": 0.9136122401888568,
|
| 22598 |
+
"grad_norm": 0.125,
|
| 22599 |
+
"learning_rate": 3.987489819763882e-05,
|
| 22600 |
+
"loss": 1.6644868850708008,
|
| 22601 |
+
"step": 6434
|
| 22602 |
+
},
|
| 22603 |
+
{
|
| 22604 |
+
"epoch": 0.9138962352899412,
|
| 22605 |
+
"grad_norm": 0.12890625,
|
| 22606 |
+
"learning_rate": 3.965556376127007e-05,
|
| 22607 |
+
"loss": 1.653461217880249,
|
| 22608 |
+
"step": 6436
|
| 22609 |
+
},
|
| 22610 |
+
{
|
| 22611 |
+
"epoch": 0.9141802303910258,
|
| 22612 |
+
"grad_norm": 0.126953125,
|
| 22613 |
+
"learning_rate": 3.943643708866764e-05,
|
| 22614 |
+
"loss": 1.662238359451294,
|
| 22615 |
+
"step": 6438
|
| 22616 |
+
},
|
| 22617 |
+
{
|
| 22618 |
+
"epoch": 0.9144642254921103,
|
| 22619 |
+
"grad_norm": 0.126953125,
|
| 22620 |
+
"learning_rate": 3.921752258090108e-05,
|
| 22621 |
+
"loss": 1.6478242874145508,
|
| 22622 |
+
"step": 6440
|
| 22623 |
+
},
|
| 22624 |
+
{
|
| 22625 |
+
"epoch": 0.9147482205931947,
|
| 22626 |
+
"grad_norm": 0.126953125,
|
| 22627 |
+
"learning_rate": 3.899882463477861e-05,
|
| 22628 |
+
"loss": 1.7027912139892578,
|
| 22629 |
+
"step": 6442
|
| 22630 |
+
},
|
| 22631 |
+
{
|
| 22632 |
+
"epoch": 0.9150322156942793,
|
| 22633 |
+
"grad_norm": 0.119140625,
|
| 22634 |
+
"learning_rate": 3.878034764275897e-05,
|
| 22635 |
+
"loss": 1.6240694522857666,
|
| 22636 |
+
"step": 6444
|
| 22637 |
+
},
|
| 22638 |
+
{
|
| 22639 |
+
"epoch": 0.9153162107953637,
|
| 22640 |
+
"grad_norm": 0.12158203125,
|
| 22641 |
+
"learning_rate": 3.856209599286305e-05,
|
| 22642 |
+
"loss": 1.63136887550354,
|
| 22643 |
+
"step": 6446
|
| 22644 |
+
},
|
| 22645 |
+
{
|
| 22646 |
+
"epoch": 0.9156002058964483,
|
| 22647 |
+
"grad_norm": 0.1259765625,
|
| 22648 |
+
"learning_rate": 3.83440740685859e-05,
|
| 22649 |
+
"loss": 1.6573362350463867,
|
| 22650 |
+
"step": 6448
|
| 22651 |
+
},
|
| 22652 |
+
{
|
| 22653 |
+
"epoch": 0.9158842009975328,
|
| 22654 |
+
"grad_norm": 0.11962890625,
|
| 22655 |
+
"learning_rate": 3.8126286248808586e-05,
|
| 22656 |
+
"loss": 1.67368745803833,
|
| 22657 |
+
"step": 6450
|
| 22658 |
+
},
|
| 22659 |
+
{
|
| 22660 |
+
"epoch": 0.9161681960986173,
|
| 22661 |
+
"grad_norm": 0.1318359375,
|
| 22662 |
+
"learning_rate": 3.790873690771032e-05,
|
| 22663 |
+
"loss": 1.6893939971923828,
|
| 22664 |
+
"step": 6452
|
| 22665 |
+
},
|
| 22666 |
+
{
|
| 22667 |
+
"epoch": 0.9164521911997018,
|
| 22668 |
+
"grad_norm": 0.1259765625,
|
| 22669 |
+
"learning_rate": 3.769143041468051e-05,
|
| 22670 |
+
"loss": 1.641585111618042,
|
| 22671 |
+
"step": 6454
|
| 22672 |
+
},
|
| 22673 |
+
{
|
| 22674 |
+
"epoch": 0.9167361863007863,
|
| 22675 |
+
"grad_norm": 0.123046875,
|
| 22676 |
+
"learning_rate": 3.747437113423111e-05,
|
| 22677 |
+
"loss": 1.647886872291565,
|
| 22678 |
+
"step": 6456
|
| 22679 |
+
},
|
| 22680 |
+
{
|
| 22681 |
+
"epoch": 0.9170201814018708,
|
| 22682 |
+
"grad_norm": 0.1279296875,
|
| 22683 |
+
"learning_rate": 3.72575634259089e-05,
|
| 22684 |
+
"loss": 1.667672872543335,
|
| 22685 |
+
"step": 6458
|
| 22686 |
+
},
|
| 22687 |
+
{
|
| 22688 |
+
"epoch": 0.9173041765029554,
|
| 22689 |
+
"grad_norm": 0.1201171875,
|
| 22690 |
+
"learning_rate": 3.7041011644207943e-05,
|
| 22691 |
+
"loss": 1.637958288192749,
|
| 22692 |
+
"step": 6460
|
| 22693 |
+
},
|
| 22694 |
+
{
|
| 22695 |
+
"epoch": 0.9175881716040398,
|
| 22696 |
+
"grad_norm": 0.1279296875,
|
| 22697 |
+
"learning_rate": 3.682472013848207e-05,
|
| 22698 |
+
"loss": 1.6648609638214111,
|
| 22699 |
+
"step": 6462
|
| 22700 |
+
},
|
| 22701 |
+
{
|
| 22702 |
+
"epoch": 0.9178721667051243,
|
| 22703 |
+
"grad_norm": 0.1279296875,
|
| 22704 |
+
"learning_rate": 3.6608693252857625e-05,
|
| 22705 |
+
"loss": 1.6248102188110352,
|
| 22706 |
+
"step": 6464
|
| 22707 |
+
},
|
| 22708 |
+
{
|
| 22709 |
+
"epoch": 0.9181561618062088,
|
| 22710 |
+
"grad_norm": 0.11865234375,
|
| 22711 |
+
"learning_rate": 3.639293532614616e-05,
|
| 22712 |
+
"loss": 1.664893627166748,
|
| 22713 |
+
"step": 6466
|
| 22714 |
+
},
|
| 22715 |
+
{
|
| 22716 |
+
"epoch": 0.9184401569072933,
|
| 22717 |
+
"grad_norm": 0.1357421875,
|
| 22718 |
+
"learning_rate": 3.617745069175732e-05,
|
| 22719 |
+
"loss": 1.6504709720611572,
|
| 22720 |
+
"step": 6468
|
| 22721 |
+
},
|
| 22722 |
+
{
|
| 22723 |
+
"epoch": 0.9187241520083779,
|
| 22724 |
+
"grad_norm": 0.125,
|
| 22725 |
+
"learning_rate": 3.5962243677611704e-05,
|
| 22726 |
+
"loss": 1.666931390762329,
|
| 22727 |
+
"step": 6470
|
| 22728 |
+
},
|
| 22729 |
+
{
|
| 22730 |
+
"epoch": 0.9190081471094623,
|
| 22731 |
+
"grad_norm": 0.1357421875,
|
| 22732 |
+
"learning_rate": 3.5747318606054105e-05,
|
| 22733 |
+
"loss": 1.6407499313354492,
|
| 22734 |
+
"step": 6472
|
| 22735 |
+
},
|
| 22736 |
+
{
|
| 22737 |
+
"epoch": 0.9192921422105469,
|
| 22738 |
+
"grad_norm": 0.12451171875,
|
| 22739 |
+
"learning_rate": 3.5532679793766584e-05,
|
| 22740 |
+
"loss": 1.66179621219635,
|
| 22741 |
+
"step": 6474
|
| 22742 |
+
},
|
| 22743 |
+
{
|
| 22744 |
+
"epoch": 0.9195761373116313,
|
| 22745 |
+
"grad_norm": 0.166015625,
|
| 22746 |
+
"learning_rate": 3.531833155168182e-05,
|
| 22747 |
+
"loss": 1.6688923835754395,
|
| 22748 |
+
"step": 6476
|
| 22749 |
+
},
|
| 22750 |
+
{
|
| 22751 |
+
"epoch": 0.9198601324127159,
|
| 22752 |
+
"grad_norm": 0.12451171875,
|
| 22753 |
+
"learning_rate": 3.510427818489643e-05,
|
| 22754 |
+
"loss": 1.6631419658660889,
|
| 22755 |
+
"step": 6478
|
| 22756 |
+
},
|
| 22757 |
+
{
|
| 22758 |
+
"epoch": 0.9201441275138004,
|
| 22759 |
+
"grad_norm": 0.130859375,
|
| 22760 |
+
"learning_rate": 3.48905239925847e-05,
|
| 22761 |
+
"loss": 1.6476298570632935,
|
| 22762 |
+
"step": 6480
|
| 22763 |
+
},
|
| 22764 |
+
{
|
| 22765 |
+
"epoch": 0.9204281226148849,
|
| 22766 |
+
"grad_norm": 0.1259765625,
|
| 22767 |
+
"learning_rate": 3.467707326791204e-05,
|
| 22768 |
+
"loss": 1.7067539691925049,
|
| 22769 |
+
"step": 6482
|
| 22770 |
+
},
|
| 22771 |
+
{
|
| 22772 |
+
"epoch": 0.9207121177159694,
|
| 22773 |
+
"grad_norm": 0.1279296875,
|
| 22774 |
+
"learning_rate": 3.4463930297948877e-05,
|
| 22775 |
+
"loss": 1.5974493026733398,
|
| 22776 |
+
"step": 6484
|
| 22777 |
+
},
|
| 22778 |
+
{
|
| 22779 |
+
"epoch": 0.920996112817054,
|
| 22780 |
+
"grad_norm": 0.123046875,
|
| 22781 |
+
"learning_rate": 3.4251099363584466e-05,
|
| 22782 |
+
"loss": 1.6740283966064453,
|
| 22783 |
+
"step": 6486
|
| 22784 |
+
},
|
| 22785 |
+
{
|
| 22786 |
+
"epoch": 0.9212801079181384,
|
| 22787 |
+
"grad_norm": 0.1259765625,
|
| 22788 |
+
"learning_rate": 3.4038584739440985e-05,
|
| 22789 |
+
"loss": 1.6813969612121582,
|
| 22790 |
+
"step": 6488
|
| 22791 |
+
},
|
| 22792 |
+
{
|
| 22793 |
+
"epoch": 0.9215641030192229,
|
| 22794 |
+
"grad_norm": 0.1318359375,
|
| 22795 |
+
"learning_rate": 3.3826390693787656e-05,
|
| 22796 |
+
"loss": 1.6607600450515747,
|
| 22797 |
+
"step": 6490
|
| 22798 |
+
},
|
| 22799 |
+
{
|
| 22800 |
+
"epoch": 0.9218480981203074,
|
| 22801 |
+
"grad_norm": 0.12451171875,
|
| 22802 |
+
"learning_rate": 3.3614521488455026e-05,
|
| 22803 |
+
"loss": 1.6704461574554443,
|
| 22804 |
+
"step": 6492
|
| 22805 |
+
},
|
| 22806 |
+
{
|
| 22807 |
+
"epoch": 0.9221320932213919,
|
| 22808 |
+
"grad_norm": 0.125,
|
| 22809 |
+
"learning_rate": 3.3402981378749285e-05,
|
| 22810 |
+
"loss": 1.6542103290557861,
|
| 22811 |
+
"step": 6494
|
| 22812 |
+
},
|
| 22813 |
+
{
|
| 22814 |
+
"epoch": 0.9224160883224765,
|
| 22815 |
+
"grad_norm": 0.1279296875,
|
| 22816 |
+
"learning_rate": 3.319177461336695e-05,
|
| 22817 |
+
"loss": 1.6618162393569946,
|
| 22818 |
+
"step": 6496
|
| 22819 |
+
},
|
| 22820 |
+
{
|
| 22821 |
+
"epoch": 0.9227000834235609,
|
| 22822 |
+
"grad_norm": 0.1279296875,
|
| 22823 |
+
"learning_rate": 3.298090543430944e-05,
|
| 22824 |
+
"loss": 1.670125961303711,
|
| 22825 |
+
"step": 6498
|
| 22826 |
+
},
|
| 22827 |
+
{
|
| 22828 |
+
"epoch": 0.9229840785246455,
|
| 22829 |
+
"grad_norm": 0.1298828125,
|
| 22830 |
+
"learning_rate": 3.277037807679783e-05,
|
| 22831 |
+
"loss": 1.6545251607894897,
|
| 22832 |
+
"step": 6500
|
| 22833 |
+
},
|
| 22834 |
+
{
|
| 22835 |
+
"epoch": 0.9232680736257299,
|
| 22836 |
+
"grad_norm": 0.12451171875,
|
| 22837 |
+
"learning_rate": 3.256019676918793e-05,
|
| 22838 |
+
"loss": 1.6476445198059082,
|
| 22839 |
+
"step": 6502
|
| 22840 |
+
},
|
| 22841 |
+
{
|
| 22842 |
+
"epoch": 0.9235520687268145,
|
| 22843 |
+
"grad_norm": 0.1259765625,
|
| 22844 |
+
"learning_rate": 3.235036573288525e-05,
|
| 22845 |
+
"loss": 1.679513931274414,
|
| 22846 |
+
"step": 6504
|
| 22847 |
+
},
|
| 22848 |
+
{
|
| 22849 |
+
"epoch": 0.923836063827899,
|
| 22850 |
+
"grad_norm": 0.12109375,
|
| 22851 |
+
"learning_rate": 3.2140889182260225e-05,
|
| 22852 |
+
"loss": 1.630507230758667,
|
| 22853 |
+
"step": 6506
|
| 22854 |
+
},
|
| 22855 |
+
{
|
| 22856 |
+
"epoch": 0.9241200589289835,
|
| 22857 |
+
"grad_norm": 0.12353515625,
|
| 22858 |
+
"learning_rate": 3.19317713245636e-05,
|
| 22859 |
+
"loss": 1.6423308849334717,
|
| 22860 |
+
"step": 6508
|
| 22861 |
+
},
|
| 22862 |
+
{
|
| 22863 |
+
"epoch": 0.924404054030068,
|
| 22864 |
+
"grad_norm": 0.1201171875,
|
| 22865 |
+
"learning_rate": 3.172301635984194e-05,
|
| 22866 |
+
"loss": 1.6178326606750488,
|
| 22867 |
+
"step": 6510
|
| 22868 |
+
},
|
| 22869 |
+
{
|
| 22870 |
+
"epoch": 0.9246880491311524,
|
| 22871 |
+
"grad_norm": 0.12353515625,
|
| 22872 |
+
"learning_rate": 3.1514628480853226e-05,
|
| 22873 |
+
"loss": 1.6421864032745361,
|
| 22874 |
+
"step": 6512
|
| 22875 |
+
},
|
| 22876 |
+
{
|
| 22877 |
+
"epoch": 0.924972044232237,
|
| 22878 |
+
"grad_norm": 0.130859375,
|
| 22879 |
+
"learning_rate": 3.13066118729827e-05,
|
| 22880 |
+
"loss": 1.6278722286224365,
|
| 22881 |
+
"step": 6514
|
| 22882 |
+
},
|
| 22883 |
+
{
|
| 22884 |
+
"epoch": 0.9252560393333215,
|
| 22885 |
+
"grad_norm": 0.1259765625,
|
| 22886 |
+
"learning_rate": 3.1098970714158746e-05,
|
| 22887 |
+
"loss": 1.6514774560928345,
|
| 22888 |
+
"step": 6516
|
| 22889 |
+
},
|
| 22890 |
+
{
|
| 22891 |
+
"epoch": 0.925540034434406,
|
| 22892 |
+
"grad_norm": 0.1259765625,
|
| 22893 |
+
"learning_rate": 3.0891709174769e-05,
|
| 22894 |
+
"loss": 1.678621768951416,
|
| 22895 |
+
"step": 6518
|
| 22896 |
+
},
|
| 22897 |
+
{
|
| 22898 |
+
"epoch": 0.9258240295354905,
|
| 22899 |
+
"grad_norm": 0.1298828125,
|
| 22900 |
+
"learning_rate": 3.068483141757665e-05,
|
| 22901 |
+
"loss": 1.6730953454971313,
|
| 22902 |
+
"step": 6520
|
| 22903 |
+
},
|
| 22904 |
+
{
|
| 22905 |
+
"epoch": 0.926108024636575,
|
| 22906 |
+
"grad_norm": 0.126953125,
|
| 22907 |
+
"learning_rate": 3.0478341597636762e-05,
|
| 22908 |
+
"loss": 1.6621979475021362,
|
| 22909 |
+
"step": 6522
|
| 22910 |
+
},
|
| 22911 |
+
{
|
| 22912 |
+
"epoch": 0.9263920197376595,
|
| 22913 |
+
"grad_norm": 0.1220703125,
|
| 22914 |
+
"learning_rate": 3.0272243862212756e-05,
|
| 22915 |
+
"loss": 1.6461114883422852,
|
| 22916 |
+
"step": 6524
|
| 22917 |
+
},
|
| 22918 |
+
{
|
| 22919 |
+
"epoch": 0.9266760148387441,
|
| 22920 |
+
"grad_norm": 0.1279296875,
|
| 22921 |
+
"learning_rate": 3.0066542350693317e-05,
|
| 22922 |
+
"loss": 1.6254409551620483,
|
| 22923 |
+
"step": 6526
|
| 22924 |
+
},
|
| 22925 |
+
{
|
| 22926 |
+
"epoch": 0.9269600099398285,
|
| 22927 |
+
"grad_norm": 0.1259765625,
|
| 22928 |
+
"learning_rate": 2.9861241194509048e-05,
|
| 22929 |
+
"loss": 1.6794788837432861,
|
| 22930 |
+
"step": 6528
|
| 22931 |
+
},
|
| 22932 |
+
{
|
| 22933 |
+
"epoch": 0.9272440050409131,
|
| 22934 |
+
"grad_norm": 0.123046875,
|
| 22935 |
+
"learning_rate": 2.9656344517049656e-05,
|
| 22936 |
+
"loss": 1.6605265140533447,
|
| 22937 |
+
"step": 6530
|
| 22938 |
+
},
|
| 22939 |
+
{
|
| 22940 |
+
"epoch": 0.9275280001419975,
|
| 22941 |
+
"grad_norm": 0.12158203125,
|
| 22942 |
+
"learning_rate": 2.945185643358096e-05,
|
| 22943 |
+
"loss": 1.6932629346847534,
|
| 22944 |
+
"step": 6532
|
| 22945 |
+
},
|
| 22946 |
+
{
|
| 22947 |
+
"epoch": 0.9278119952430821,
|
| 22948 |
+
"grad_norm": 0.1259765625,
|
| 22949 |
+
"learning_rate": 2.924778105116241e-05,
|
| 22950 |
+
"loss": 1.6501400470733643,
|
| 22951 |
+
"step": 6534
|
| 22952 |
+
},
|
| 22953 |
+
{
|
| 22954 |
+
"epoch": 0.9280959903441666,
|
| 22955 |
+
"grad_norm": 0.12060546875,
|
| 22956 |
+
"learning_rate": 2.90441224685645e-05,
|
| 22957 |
+
"loss": 1.6564494371414185,
|
| 22958 |
+
"step": 6536
|
| 22959 |
+
},
|
| 22960 |
+
{
|
| 22961 |
+
"epoch": 0.928379985445251,
|
| 22962 |
+
"grad_norm": 0.125,
|
| 22963 |
+
"learning_rate": 2.8840884776186515e-05,
|
| 22964 |
+
"loss": 1.657090425491333,
|
| 22965 |
+
"step": 6538
|
| 22966 |
+
},
|
| 22967 |
+
{
|
| 22968 |
+
"epoch": 0.9286639805463356,
|
| 22969 |
+
"grad_norm": 0.1240234375,
|
| 22970 |
+
"learning_rate": 2.863807205597422e-05,
|
| 22971 |
+
"loss": 1.6438353061676025,
|
| 22972 |
+
"step": 6540
|
| 22973 |
+
},
|
| 22974 |
+
{
|
| 22975 |
+
"epoch": 0.92894797564742,
|
| 22976 |
+
"grad_norm": 0.1259765625,
|
| 22977 |
+
"learning_rate": 2.8435688381338116e-05,
|
| 22978 |
+
"loss": 1.6713764667510986,
|
| 22979 |
+
"step": 6542
|
| 22980 |
+
},
|
| 22981 |
+
{
|
| 22982 |
+
"epoch": 0.9292319707485046,
|
| 22983 |
+
"grad_norm": 0.125,
|
| 22984 |
+
"learning_rate": 2.8233737817071405e-05,
|
| 22985 |
+
"loss": 1.639617681503296,
|
| 22986 |
+
"step": 6544
|
| 22987 |
+
},
|
| 22988 |
+
{
|
| 22989 |
+
"epoch": 0.9295159658495891,
|
| 22990 |
+
"grad_norm": 0.12060546875,
|
| 22991 |
+
"learning_rate": 2.8032224419268517e-05,
|
| 22992 |
+
"loss": 1.6151936054229736,
|
| 22993 |
+
"step": 6546
|
| 22994 |
+
},
|
| 22995 |
+
{
|
| 22996 |
+
"epoch": 0.9297999609506736,
|
| 22997 |
+
"grad_norm": 0.123046875,
|
| 22998 |
+
"learning_rate": 2.7831152235243517e-05,
|
| 22999 |
+
"loss": 1.6277703046798706,
|
| 23000 |
+
"step": 6548
|
| 23001 |
+
},
|
| 23002 |
+
{
|
| 23003 |
+
"epoch": 0.9300839560517581,
|
| 23004 |
+
"grad_norm": 0.1279296875,
|
| 23005 |
+
"learning_rate": 2.7630525303448897e-05,
|
| 23006 |
+
"loss": 1.6260011196136475,
|
| 23007 |
+
"step": 6550
|
| 23008 |
+
},
|
| 23009 |
+
{
|
| 23010 |
+
"epoch": 0.9303679511528427,
|
| 23011 |
+
"grad_norm": 0.1298828125,
|
| 23012 |
+
"learning_rate": 2.7430347653394418e-05,
|
| 23013 |
+
"loss": 1.6701017618179321,
|
| 23014 |
+
"step": 6552
|
| 23015 |
+
},
|
| 23016 |
+
{
|
| 23017 |
+
"epoch": 0.9306519462539271,
|
| 23018 |
+
"grad_norm": 0.126953125,
|
| 23019 |
+
"learning_rate": 2.7230623305566254e-05,
|
| 23020 |
+
"loss": 1.6378331184387207,
|
| 23021 |
+
"step": 6554
|
| 23022 |
+
},
|
| 23023 |
+
{
|
| 23024 |
+
"epoch": 0.9309359413550117,
|
| 23025 |
+
"grad_norm": 0.1318359375,
|
| 23026 |
+
"learning_rate": 2.7031356271346144e-05,
|
| 23027 |
+
"loss": 1.6505334377288818,
|
| 23028 |
+
"step": 6556
|
| 23029 |
+
},
|
| 23030 |
+
{
|
| 23031 |
+
"epoch": 0.9312199364560961,
|
| 23032 |
+
"grad_norm": 0.1298828125,
|
| 23033 |
+
"learning_rate": 2.683255055293084e-05,
|
| 23034 |
+
"loss": 1.6599491834640503,
|
| 23035 |
+
"step": 6558
|
| 23036 |
+
},
|
| 23037 |
+
{
|
| 23038 |
+
"epoch": 0.9315039315571806,
|
| 23039 |
+
"grad_norm": 0.1298828125,
|
| 23040 |
+
"learning_rate": 2.663421014325188e-05,
|
| 23041 |
+
"loss": 1.6618008613586426,
|
| 23042 |
+
"step": 6560
|
| 23043 |
+
},
|
| 23044 |
+
{
|
| 23045 |
+
"epoch": 0.9317879266582652,
|
| 23046 |
+
"grad_norm": 0.12109375,
|
| 23047 |
+
"learning_rate": 2.6436339025895084e-05,
|
| 23048 |
+
"loss": 1.6160664558410645,
|
| 23049 |
+
"step": 6562
|
| 23050 |
+
},
|
| 23051 |
+
{
|
| 23052 |
+
"epoch": 0.9320719217593496,
|
| 23053 |
+
"grad_norm": 0.12109375,
|
| 23054 |
+
"learning_rate": 2.6238941175020903e-05,
|
| 23055 |
+
"loss": 1.624788761138916,
|
| 23056 |
+
"step": 6564
|
| 23057 |
+
},
|
| 23058 |
+
{
|
| 23059 |
+
"epoch": 0.9323559168604342,
|
| 23060 |
+
"grad_norm": 0.1279296875,
|
| 23061 |
+
"learning_rate": 2.604202055528429e-05,
|
| 23062 |
+
"loss": 1.6408872604370117,
|
| 23063 |
+
"step": 6566
|
| 23064 |
+
},
|
| 23065 |
+
{
|
| 23066 |
+
"epoch": 0.9326399119615186,
|
| 23067 |
+
"grad_norm": 0.1259765625,
|
| 23068 |
+
"learning_rate": 2.5845581121755335e-05,
|
| 23069 |
+
"loss": 1.667233943939209,
|
| 23070 |
+
"step": 6568
|
| 23071 |
+
},
|
| 23072 |
+
{
|
| 23073 |
+
"epoch": 0.9329239070626032,
|
| 23074 |
+
"grad_norm": 0.1240234375,
|
| 23075 |
+
"learning_rate": 2.5649626819839555e-05,
|
| 23076 |
+
"loss": 1.6937549114227295,
|
| 23077 |
+
"step": 6570
|
| 23078 |
+
},
|
| 23079 |
+
{
|
| 23080 |
+
"epoch": 0.9332079021636877,
|
| 23081 |
+
"grad_norm": 0.125,
|
| 23082 |
+
"learning_rate": 2.545416158519891e-05,
|
| 23083 |
+
"loss": 1.657670497894287,
|
| 23084 |
+
"step": 6572
|
| 23085 |
+
},
|
| 23086 |
+
{
|
| 23087 |
+
"epoch": 0.9334918972647722,
|
| 23088 |
+
"grad_norm": 0.1240234375,
|
| 23089 |
+
"learning_rate": 2.5259189343672583e-05,
|
| 23090 |
+
"loss": 1.6195838451385498,
|
| 23091 |
+
"step": 6574
|
| 23092 |
+
},
|
| 23093 |
+
{
|
| 23094 |
+
"epoch": 0.9337758923658567,
|
| 23095 |
+
"grad_norm": 0.126953125,
|
| 23096 |
+
"learning_rate": 2.5064714011198252e-05,
|
| 23097 |
+
"loss": 1.6312012672424316,
|
| 23098 |
+
"step": 6576
|
| 23099 |
+
},
|
| 23100 |
+
{
|
| 23101 |
+
"epoch": 0.9340598874669412,
|
| 23102 |
+
"grad_norm": 0.1328125,
|
| 23103 |
+
"learning_rate": 2.487073949373332e-05,
|
| 23104 |
+
"loss": 1.6684744358062744,
|
| 23105 |
+
"step": 6578
|
| 23106 |
+
},
|
| 23107 |
+
{
|
| 23108 |
+
"epoch": 0.9343438825680257,
|
| 23109 |
+
"grad_norm": 0.126953125,
|
| 23110 |
+
"learning_rate": 2.4677269687176526e-05,
|
| 23111 |
+
"loss": 1.6751328706741333,
|
| 23112 |
+
"step": 6580
|
| 23113 |
+
},
|
| 23114 |
+
{
|
| 23115 |
+
"epoch": 0.9346278776691103,
|
| 23116 |
+
"grad_norm": 0.12353515625,
|
| 23117 |
+
"learning_rate": 2.4484308477289758e-05,
|
| 23118 |
+
"loss": 1.638372540473938,
|
| 23119 |
+
"step": 6582
|
| 23120 |
+
},
|
| 23121 |
+
{
|
| 23122 |
+
"epoch": 0.9349118727701947,
|
| 23123 |
+
"grad_norm": 0.1181640625,
|
| 23124 |
+
"learning_rate": 2.4291859739619888e-05,
|
| 23125 |
+
"loss": 1.662428379058838,
|
| 23126 |
+
"step": 6584
|
| 23127 |
+
},
|
| 23128 |
+
{
|
| 23129 |
+
"epoch": 0.9351958678712792,
|
| 23130 |
+
"grad_norm": 0.12353515625,
|
| 23131 |
+
"learning_rate": 2.409992733942102e-05,
|
| 23132 |
+
"loss": 1.6858851909637451,
|
| 23133 |
+
"step": 6586
|
| 23134 |
+
},
|
| 23135 |
+
{
|
| 23136 |
+
"epoch": 0.9354798629723637,
|
| 23137 |
+
"grad_norm": 0.12890625,
|
| 23138 |
+
"learning_rate": 2.3908515131576805e-05,
|
| 23139 |
+
"loss": 1.6546742916107178,
|
| 23140 |
+
"step": 6588
|
| 23141 |
+
},
|
| 23142 |
+
{
|
| 23143 |
+
"epoch": 0.9357638580734482,
|
| 23144 |
+
"grad_norm": 0.11962890625,
|
| 23145 |
+
"learning_rate": 2.3717626960523103e-05,
|
| 23146 |
+
"loss": 1.6390483379364014,
|
| 23147 |
+
"step": 6590
|
| 23148 |
+
},
|
| 23149 |
+
{
|
| 23150 |
+
"epoch": 0.9360478531745328,
|
| 23151 |
+
"grad_norm": 0.1328125,
|
| 23152 |
+
"learning_rate": 2.352726666017065e-05,
|
| 23153 |
+
"loss": 1.6461637020111084,
|
| 23154 |
+
"step": 6592
|
| 23155 |
+
},
|
| 23156 |
+
{
|
| 23157 |
+
"epoch": 0.9363318482756172,
|
| 23158 |
+
"grad_norm": 0.1162109375,
|
| 23159 |
+
"learning_rate": 2.3337438053828136e-05,
|
| 23160 |
+
"loss": 1.619539737701416,
|
| 23161 |
+
"step": 6594
|
| 23162 |
+
},
|
| 23163 |
+
{
|
| 23164 |
+
"epoch": 0.9366158433767018,
|
| 23165 |
+
"grad_norm": 0.126953125,
|
| 23166 |
+
"learning_rate": 2.3148144954125372e-05,
|
| 23167 |
+
"loss": 1.6508175134658813,
|
| 23168 |
+
"step": 6596
|
| 23169 |
+
},
|
| 23170 |
+
{
|
| 23171 |
+
"epoch": 0.9368998384777862,
|
| 23172 |
+
"grad_norm": 0.126953125,
|
| 23173 |
+
"learning_rate": 2.295939116293681e-05,
|
| 23174 |
+
"loss": 1.6342027187347412,
|
| 23175 |
+
"step": 6598
|
| 23176 |
+
},
|
| 23177 |
+
{
|
| 23178 |
+
"epoch": 0.9371838335788708,
|
| 23179 |
+
"grad_norm": 0.12109375,
|
| 23180 |
+
"learning_rate": 2.277118047130503e-05,
|
| 23181 |
+
"loss": 1.6193323135375977,
|
| 23182 |
+
"step": 6600
|
| 23183 |
+
},
|
| 23184 |
+
{
|
| 23185 |
+
"epoch": 0.9374678286799553,
|
| 23186 |
+
"grad_norm": 0.12109375,
|
| 23187 |
+
"learning_rate": 2.2583516659364716e-05,
|
| 23188 |
+
"loss": 1.6812199354171753,
|
| 23189 |
+
"step": 6602
|
| 23190 |
+
},
|
| 23191 |
+
{
|
| 23192 |
+
"epoch": 0.9377518237810398,
|
| 23193 |
+
"grad_norm": 0.1259765625,
|
| 23194 |
+
"learning_rate": 2.2396403496266676e-05,
|
| 23195 |
+
"loss": 1.6526546478271484,
|
| 23196 |
+
"step": 6604
|
| 23197 |
+
},
|
| 23198 |
+
{
|
| 23199 |
+
"epoch": 0.9380358188821243,
|
| 23200 |
+
"grad_norm": 0.1328125,
|
| 23201 |
+
"learning_rate": 2.220984474010221e-05,
|
| 23202 |
+
"loss": 1.6458194255828857,
|
| 23203 |
+
"step": 6606
|
| 23204 |
+
},
|
| 23205 |
+
{
|
| 23206 |
+
"epoch": 0.9383198139832087,
|
| 23207 |
+
"grad_norm": 0.1318359375,
|
| 23208 |
+
"learning_rate": 2.202384413782754e-05,
|
| 23209 |
+
"loss": 1.6680066585540771,
|
| 23210 |
+
"step": 6608
|
| 23211 |
+
},
|
| 23212 |
+
{
|
| 23213 |
+
"epoch": 0.9386038090842933,
|
| 23214 |
+
"grad_norm": 0.11962890625,
|
| 23215 |
+
"learning_rate": 2.183840542518857e-05,
|
| 23216 |
+
"loss": 1.6331427097320557,
|
| 23217 |
+
"step": 6610
|
| 23218 |
+
},
|
| 23219 |
+
{
|
| 23220 |
+
"epoch": 0.9388878041853778,
|
| 23221 |
+
"grad_norm": 0.126953125,
|
| 23222 |
+
"learning_rate": 2.1653532326645963e-05,
|
| 23223 |
+
"loss": 1.6482030153274536,
|
| 23224 |
+
"step": 6612
|
| 23225 |
+
},
|
| 23226 |
+
{
|
| 23227 |
+
"epoch": 0.9391717992864623,
|
| 23228 |
+
"grad_norm": 0.12890625,
|
| 23229 |
+
"learning_rate": 2.146922855530018e-05,
|
| 23230 |
+
"loss": 1.628589153289795,
|
| 23231 |
+
"step": 6614
|
| 23232 |
+
},
|
| 23233 |
+
{
|
| 23234 |
+
"epoch": 0.9394557943875468,
|
| 23235 |
+
"grad_norm": 0.125,
|
| 23236 |
+
"learning_rate": 2.1285497812817044e-05,
|
| 23237 |
+
"loss": 1.631730556488037,
|
| 23238 |
+
"step": 6616
|
| 23239 |
+
},
|
| 23240 |
+
{
|
| 23241 |
+
"epoch": 0.9397397894886313,
|
| 23242 |
+
"grad_norm": 0.12451171875,
|
| 23243 |
+
"learning_rate": 2.110234378935323e-05,
|
| 23244 |
+
"loss": 1.6600490808486938,
|
| 23245 |
+
"step": 6618
|
| 23246 |
+
},
|
| 23247 |
+
{
|
| 23248 |
+
"epoch": 0.9400237845897158,
|
| 23249 |
+
"grad_norm": 0.1259765625,
|
| 23250 |
+
"learning_rate": 2.091977016348235e-05,
|
| 23251 |
+
"loss": 1.7200943231582642,
|
| 23252 |
+
"step": 6620
|
| 23253 |
+
},
|
| 23254 |
+
{
|
| 23255 |
+
"epoch": 0.9403077796908004,
|
| 23256 |
+
"grad_norm": 0.1318359375,
|
| 23257 |
+
"learning_rate": 2.0737780602120914e-05,
|
| 23258 |
+
"loss": 1.6427900791168213,
|
| 23259 |
+
"step": 6622
|
| 23260 |
+
},
|
| 23261 |
+
{
|
| 23262 |
+
"epoch": 0.9405917747918848,
|
| 23263 |
+
"grad_norm": 0.1279296875,
|
| 23264 |
+
"learning_rate": 2.0556378760454747e-05,
|
| 23265 |
+
"loss": 1.6485931873321533,
|
| 23266 |
+
"step": 6624
|
| 23267 |
+
},
|
| 23268 |
+
{
|
| 23269 |
+
"epoch": 0.9408757698929694,
|
| 23270 |
+
"grad_norm": 0.138671875,
|
| 23271 |
+
"learning_rate": 2.037556828186555e-05,
|
| 23272 |
+
"loss": 1.6425774097442627,
|
| 23273 |
+
"step": 6626
|
| 23274 |
+
},
|
| 23275 |
+
{
|
| 23276 |
+
"epoch": 0.9411597649940538,
|
| 23277 |
+
"grad_norm": 0.1279296875,
|
| 23278 |
+
"learning_rate": 2.019535279785778e-05,
|
| 23279 |
+
"loss": 1.6571080684661865,
|
| 23280 |
+
"step": 6628
|
| 23281 |
+
},
|
| 23282 |
+
{
|
| 23283 |
+
"epoch": 0.9414437600951383,
|
| 23284 |
+
"grad_norm": 0.130859375,
|
| 23285 |
+
"learning_rate": 2.001573592798563e-05,
|
| 23286 |
+
"loss": 1.6546900272369385,
|
| 23287 |
+
"step": 6630
|
| 23288 |
+
},
|
| 23289 |
+
{
|
| 23290 |
+
"epoch": 0.9417277551962229,
|
| 23291 |
+
"grad_norm": 0.12353515625,
|
| 23292 |
+
"learning_rate": 1.9836721279780373e-05,
|
| 23293 |
+
"loss": 1.6568119525909424,
|
| 23294 |
+
"step": 6632
|
| 23295 |
+
},
|
| 23296 |
+
{
|
| 23297 |
+
"epoch": 0.9420117502973073,
|
| 23298 |
+
"grad_norm": 0.1181640625,
|
| 23299 |
+
"learning_rate": 1.965831244867795e-05,
|
| 23300 |
+
"loss": 1.6468884944915771,
|
| 23301 |
+
"step": 6634
|
| 23302 |
+
},
|
| 23303 |
+
{
|
| 23304 |
+
"epoch": 0.9422957453983919,
|
| 23305 |
+
"grad_norm": 0.12890625,
|
| 23306 |
+
"learning_rate": 1.948051301794666e-05,
|
| 23307 |
+
"loss": 1.6835911273956299,
|
| 23308 |
+
"step": 6636
|
| 23309 |
+
},
|
| 23310 |
+
{
|
| 23311 |
+
"epoch": 0.9425797404994763,
|
| 23312 |
+
"grad_norm": 0.123046875,
|
| 23313 |
+
"learning_rate": 1.9303326558615315e-05,
|
| 23314 |
+
"loss": 1.6839208602905273,
|
| 23315 |
+
"step": 6638
|
| 23316 |
+
},
|
| 23317 |
+
{
|
| 23318 |
+
"epoch": 0.9428637356005609,
|
| 23319 |
+
"grad_norm": 0.123046875,
|
| 23320 |
+
"learning_rate": 1.912675662940133e-05,
|
| 23321 |
+
"loss": 1.6442358493804932,
|
| 23322 |
+
"step": 6640
|
| 23323 |
+
},
|
| 23324 |
+
{
|
| 23325 |
+
"epoch": 0.9431477307016454,
|
| 23326 |
+
"grad_norm": 0.12109375,
|
| 23327 |
+
"learning_rate": 1.8950806776639503e-05,
|
| 23328 |
+
"loss": 1.6288275718688965,
|
| 23329 |
+
"step": 6642
|
| 23330 |
+
},
|
| 23331 |
+
{
|
| 23332 |
+
"epoch": 0.9434317258027299,
|
| 23333 |
+
"grad_norm": 0.12353515625,
|
| 23334 |
+
"learning_rate": 1.8775480534210526e-05,
|
| 23335 |
+
"loss": 1.6193206310272217,
|
| 23336 |
+
"step": 6644
|
| 23337 |
+
},
|
| 23338 |
+
{
|
| 23339 |
+
"epoch": 0.9437157209038144,
|
| 23340 |
+
"grad_norm": 0.125,
|
| 23341 |
+
"learning_rate": 1.8600781423470264e-05,
|
| 23342 |
+
"loss": 1.675943374633789,
|
| 23343 |
+
"step": 6646
|
| 23344 |
+
},
|
| 23345 |
+
{
|
| 23346 |
+
"epoch": 0.943999716004899,
|
| 23347 |
+
"grad_norm": 0.1220703125,
|
| 23348 |
+
"learning_rate": 1.842671295317875e-05,
|
| 23349 |
+
"loss": 1.6317131519317627,
|
| 23350 |
+
"step": 6648
|
| 23351 |
+
},
|
| 23352 |
+
{
|
| 23353 |
+
"epoch": 0.9442837111059834,
|
| 23354 |
+
"grad_norm": 0.12255859375,
|
| 23355 |
+
"learning_rate": 1.8253278619429994e-05,
|
| 23356 |
+
"loss": 1.642336368560791,
|
| 23357 |
+
"step": 6650
|
| 23358 |
+
},
|
| 23359 |
+
{
|
| 23360 |
+
"epoch": 0.944567706207068,
|
| 23361 |
+
"grad_norm": 0.1220703125,
|
| 23362 |
+
"learning_rate": 1.808048190558153e-05,
|
| 23363 |
+
"loss": 1.634922742843628,
|
| 23364 |
+
"step": 6652
|
| 23365 |
+
},
|
| 23366 |
+
{
|
| 23367 |
+
"epoch": 0.9448517013081524,
|
| 23368 |
+
"grad_norm": 0.13671875,
|
| 23369 |
+
"learning_rate": 1.7908326282184666e-05,
|
| 23370 |
+
"loss": 1.6575052738189697,
|
| 23371 |
+
"step": 6654
|
| 23372 |
+
},
|
| 23373 |
+
{
|
| 23374 |
+
"epoch": 0.9451356964092369,
|
| 23375 |
+
"grad_norm": 0.11962890625,
|
| 23376 |
+
"learning_rate": 1.7736815206914544e-05,
|
| 23377 |
+
"loss": 1.6284030675888062,
|
| 23378 |
+
"step": 6656
|
| 23379 |
+
},
|
| 23380 |
+
{
|
| 23381 |
+
"epoch": 0.9454196915103215,
|
| 23382 |
+
"grad_norm": 0.12255859375,
|
| 23383 |
+
"learning_rate": 1.7565952124500927e-05,
|
| 23384 |
+
"loss": 1.6644654273986816,
|
| 23385 |
+
"step": 6658
|
| 23386 |
+
},
|
| 23387 |
+
{
|
| 23388 |
+
"epoch": 0.9457036866114059,
|
| 23389 |
+
"grad_norm": 0.1220703125,
|
| 23390 |
+
"learning_rate": 1.739574046665884e-05,
|
| 23391 |
+
"loss": 1.625715732574463,
|
| 23392 |
+
"step": 6660
|
| 23393 |
+
},
|
| 23394 |
+
{
|
| 23395 |
+
"epoch": 0.9459876817124905,
|
| 23396 |
+
"grad_norm": 0.126953125,
|
| 23397 |
+
"learning_rate": 1.7226183652019767e-05,
|
| 23398 |
+
"loss": 1.6447319984436035,
|
| 23399 |
+
"step": 6662
|
| 23400 |
+
},
|
| 23401 |
+
{
|
| 23402 |
+
"epoch": 0.9462716768135749,
|
| 23403 |
+
"grad_norm": 0.1220703125,
|
| 23404 |
+
"learning_rate": 1.7057285086062897e-05,
|
| 23405 |
+
"loss": 1.6117002964019775,
|
| 23406 |
+
"step": 6664
|
| 23407 |
+
},
|
| 23408 |
+
{
|
| 23409 |
+
"epoch": 0.9465556719146595,
|
| 23410 |
+
"grad_norm": 0.12158203125,
|
| 23411 |
+
"learning_rate": 1.688904816104674e-05,
|
| 23412 |
+
"loss": 1.632535696029663,
|
| 23413 |
+
"step": 6666
|
| 23414 |
+
},
|
| 23415 |
+
{
|
| 23416 |
+
"epoch": 0.946839667015744,
|
| 23417 |
+
"grad_norm": 0.1279296875,
|
| 23418 |
+
"learning_rate": 1.67214762559411e-05,
|
| 23419 |
+
"loss": 1.6630303859710693,
|
| 23420 |
+
"step": 6668
|
| 23421 |
+
},
|
| 23422 |
+
{
|
| 23423 |
+
"epoch": 0.9471236621168285,
|
| 23424 |
+
"grad_norm": 0.1240234375,
|
| 23425 |
+
"learning_rate": 1.6554572736359052e-05,
|
| 23426 |
+
"loss": 1.6721594333648682,
|
| 23427 |
+
"step": 6670
|
| 23428 |
+
},
|
| 23429 |
+
{
|
| 23430 |
+
"epoch": 0.947407657217913,
|
| 23431 |
+
"grad_norm": 0.1318359375,
|
| 23432 |
+
"learning_rate": 1.6388340954489458e-05,
|
| 23433 |
+
"loss": 1.6387388706207275,
|
| 23434 |
+
"step": 6672
|
| 23435 |
+
},
|
| 23436 |
+
{
|
| 23437 |
+
"epoch": 0.9476916523189975,
|
| 23438 |
+
"grad_norm": 0.12890625,
|
| 23439 |
+
"learning_rate": 1.6222784249029566e-05,
|
| 23440 |
+
"loss": 1.6531705856323242,
|
| 23441 |
+
"step": 6674
|
| 23442 |
+
},
|
| 23443 |
+
{
|
| 23444 |
+
"epoch": 0.947975647420082,
|
| 23445 |
+
"grad_norm": 0.1220703125,
|
| 23446 |
+
"learning_rate": 1.6057905945118056e-05,
|
| 23447 |
+
"loss": 1.6726493835449219,
|
| 23448 |
+
"step": 6676
|
| 23449 |
+
},
|
| 23450 |
+
{
|
| 23451 |
+
"epoch": 0.9482596425211665,
|
| 23452 |
+
"grad_norm": 0.123046875,
|
| 23453 |
+
"learning_rate": 1.5893709354268143e-05,
|
| 23454 |
+
"loss": 1.6192290782928467,
|
| 23455 |
+
"step": 6678
|
| 23456 |
+
},
|
| 23457 |
+
{
|
| 23458 |
+
"epoch": 0.948543637622251,
|
| 23459 |
+
"grad_norm": 0.12353515625,
|
| 23460 |
+
"learning_rate": 1.5730197774301114e-05,
|
| 23461 |
+
"loss": 1.6476869583129883,
|
| 23462 |
+
"step": 6680
|
| 23463 |
+
},
|
| 23464 |
+
{
|
| 23465 |
+
"epoch": 0.9488276327233355,
|
| 23466 |
+
"grad_norm": 0.12255859375,
|
| 23467 |
+
"learning_rate": 1.5567374489280096e-05,
|
| 23468 |
+
"loss": 1.692678451538086,
|
| 23469 |
+
"step": 6682
|
| 23470 |
+
},
|
| 23471 |
+
{
|
| 23472 |
+
"epoch": 0.94911162782442,
|
| 23473 |
+
"grad_norm": 0.12255859375,
|
| 23474 |
+
"learning_rate": 1.5405242769444127e-05,
|
| 23475 |
+
"loss": 1.650191307067871,
|
| 23476 |
+
"step": 6684
|
| 23477 |
+
},
|
| 23478 |
+
{
|
| 23479 |
+
"epoch": 0.9493956229255045,
|
| 23480 |
+
"grad_norm": 0.125,
|
| 23481 |
+
"learning_rate": 1.5243805871142402e-05,
|
| 23482 |
+
"loss": 1.6335558891296387,
|
| 23483 |
+
"step": 6686
|
| 23484 |
+
},
|
| 23485 |
+
{
|
| 23486 |
+
"epoch": 0.9496796180265891,
|
| 23487 |
+
"grad_norm": 0.1201171875,
|
| 23488 |
+
"learning_rate": 1.5083067036768905e-05,
|
| 23489 |
+
"loss": 1.6644365787506104,
|
| 23490 |
+
"step": 6688
|
| 23491 |
+
},
|
| 23492 |
+
{
|
| 23493 |
+
"epoch": 0.9499636131276735,
|
| 23494 |
+
"grad_norm": 0.125,
|
| 23495 |
+
"learning_rate": 1.4923029494697348e-05,
|
| 23496 |
+
"loss": 1.640918493270874,
|
| 23497 |
+
"step": 6690
|
| 23498 |
+
},
|
| 23499 |
+
{
|
| 23500 |
+
"epoch": 0.9502476082287581,
|
| 23501 |
+
"grad_norm": 0.1328125,
|
| 23502 |
+
"learning_rate": 1.476369645921622e-05,
|
| 23503 |
+
"loss": 1.639884352684021,
|
| 23504 |
+
"step": 6692
|
| 23505 |
+
},
|
| 23506 |
+
{
|
| 23507 |
+
"epoch": 0.9505316033298425,
|
| 23508 |
+
"grad_norm": 0.126953125,
|
| 23509 |
+
"learning_rate": 1.4605071130464293e-05,
|
| 23510 |
+
"loss": 1.7039388418197632,
|
| 23511 |
+
"step": 6694
|
| 23512 |
+
},
|
| 23513 |
+
{
|
| 23514 |
+
"epoch": 0.9508155984309271,
|
| 23515 |
+
"grad_norm": 0.1259765625,
|
| 23516 |
+
"learning_rate": 1.4447156694366332e-05,
|
| 23517 |
+
"loss": 1.6638362407684326,
|
| 23518 |
+
"step": 6696
|
| 23519 |
+
},
|
| 23520 |
+
{
|
| 23521 |
+
"epoch": 0.9510995935320116,
|
| 23522 |
+
"grad_norm": 0.125,
|
| 23523 |
+
"learning_rate": 1.428995632256917e-05,
|
| 23524 |
+
"loss": 1.6702481508255005,
|
| 23525 |
+
"step": 6698
|
| 23526 |
+
},
|
| 23527 |
+
{
|
| 23528 |
+
"epoch": 0.9513835886330961,
|
| 23529 |
+
"grad_norm": 0.1220703125,
|
| 23530 |
+
"learning_rate": 1.4133473172377882e-05,
|
| 23531 |
+
"loss": 1.6522693634033203,
|
| 23532 |
+
"step": 6700
|
| 23533 |
+
},
|
| 23534 |
+
{
|
| 23535 |
+
"epoch": 0.9516675837341806,
|
| 23536 |
+
"grad_norm": 0.1259765625,
|
| 23537 |
+
"learning_rate": 1.3977710386692473e-05,
|
| 23538 |
+
"loss": 1.6674026250839233,
|
| 23539 |
+
"step": 6702
|
| 23540 |
+
},
|
| 23541 |
+
{
|
| 23542 |
+
"epoch": 0.951951578835265,
|
| 23543 |
+
"grad_norm": 0.125,
|
| 23544 |
+
"learning_rate": 1.38226710939447e-05,
|
| 23545 |
+
"loss": 1.6179451942443848,
|
| 23546 |
+
"step": 6704
|
| 23547 |
+
},
|
| 23548 |
+
{
|
| 23549 |
+
"epoch": 0.9522355739363496,
|
| 23550 |
+
"grad_norm": 0.12158203125,
|
| 23551 |
+
"learning_rate": 1.366835840803531e-05,
|
| 23552 |
+
"loss": 1.654510498046875,
|
| 23553 |
+
"step": 6706
|
| 23554 |
+
},
|
| 23555 |
+
{
|
| 23556 |
+
"epoch": 0.9525195690374341,
|
| 23557 |
+
"grad_norm": 0.1279296875,
|
| 23558 |
+
"learning_rate": 1.3514775428271393e-05,
|
| 23559 |
+
"loss": 1.673440933227539,
|
| 23560 |
+
"step": 6708
|
| 23561 |
+
},
|
| 23562 |
+
{
|
| 23563 |
+
"epoch": 0.9528035641385186,
|
| 23564 |
+
"grad_norm": 0.1171875,
|
| 23565 |
+
"learning_rate": 1.336192523930423e-05,
|
| 23566 |
+
"loss": 1.621158480644226,
|
| 23567 |
+
"step": 6710
|
| 23568 |
+
},
|
| 23569 |
+
{
|
| 23570 |
+
"epoch": 0.9530875592396031,
|
| 23571 |
+
"grad_norm": 0.1259765625,
|
| 23572 |
+
"learning_rate": 1.320981091106725e-05,
|
| 23573 |
+
"loss": 1.6313719749450684,
|
| 23574 |
+
"step": 6712
|
| 23575 |
+
},
|
| 23576 |
+
{
|
| 23577 |
+
"epoch": 0.9533715543406877,
|
| 23578 |
+
"grad_norm": 0.1220703125,
|
| 23579 |
+
"learning_rate": 1.3058435498714494e-05,
|
| 23580 |
+
"loss": 1.6824302673339844,
|
| 23581 |
+
"step": 6714
|
| 23582 |
+
},
|
| 23583 |
+
{
|
| 23584 |
+
"epoch": 0.9536555494417721,
|
| 23585 |
+
"grad_norm": 0.1318359375,
|
| 23586 |
+
"learning_rate": 1.2907802042559119e-05,
|
| 23587 |
+
"loss": 1.6434886455535889,
|
| 23588 |
+
"step": 6716
|
| 23589 |
+
},
|
| 23590 |
+
{
|
| 23591 |
+
"epoch": 0.9539395445428567,
|
| 23592 |
+
"grad_norm": 0.125,
|
| 23593 |
+
"learning_rate": 1.2757913568012408e-05,
|
| 23594 |
+
"loss": 1.6681139469146729,
|
| 23595 |
+
"step": 6718
|
| 23596 |
+
},
|
| 23597 |
+
{
|
| 23598 |
+
"epoch": 0.9542235396439411,
|
| 23599 |
+
"grad_norm": 0.123046875,
|
| 23600 |
+
"learning_rate": 1.2608773085523017e-05,
|
| 23601 |
+
"loss": 1.676089882850647,
|
| 23602 |
+
"step": 6720
|
| 23603 |
+
},
|
| 23604 |
+
{
|
| 23605 |
+
"epoch": 0.9545075347450257,
|
| 23606 |
+
"grad_norm": 0.12255859375,
|
| 23607 |
+
"learning_rate": 1.2460383590516456e-05,
|
| 23608 |
+
"loss": 1.653357744216919,
|
| 23609 |
+
"step": 6722
|
| 23610 |
+
},
|
| 23611 |
+
{
|
| 23612 |
+
"epoch": 0.9547915298461102,
|
| 23613 |
+
"grad_norm": 0.12353515625,
|
| 23614 |
+
"learning_rate": 1.2312748063335006e-05,
|
| 23615 |
+
"loss": 1.6455838680267334,
|
| 23616 |
+
"step": 6724
|
| 23617 |
+
},
|
| 23618 |
+
{
|
| 23619 |
+
"epoch": 0.9550755249471946,
|
| 23620 |
+
"grad_norm": 0.11669921875,
|
| 23621 |
+
"learning_rate": 1.2165869469177731e-05,
|
| 23622 |
+
"loss": 1.6417770385742188,
|
| 23623 |
+
"step": 6726
|
| 23624 |
+
},
|
| 23625 |
+
{
|
| 23626 |
+
"epoch": 0.9553595200482792,
|
| 23627 |
+
"grad_norm": 0.1201171875,
|
| 23628 |
+
"learning_rate": 1.2019750758041099e-05,
|
| 23629 |
+
"loss": 1.6262093782424927,
|
| 23630 |
+
"step": 6728
|
| 23631 |
+
},
|
| 23632 |
+
{
|
| 23633 |
+
"epoch": 0.9556435151493636,
|
| 23634 |
+
"grad_norm": 0.1357421875,
|
| 23635 |
+
"learning_rate": 1.187439486465956e-05,
|
| 23636 |
+
"loss": 1.6558501720428467,
|
| 23637 |
+
"step": 6730
|
| 23638 |
+
},
|
| 23639 |
+
{
|
| 23640 |
+
"epoch": 0.9559275102504482,
|
| 23641 |
+
"grad_norm": 0.1240234375,
|
| 23642 |
+
"learning_rate": 1.1729804708446768e-05,
|
| 23643 |
+
"loss": 1.6913039684295654,
|
| 23644 |
+
"step": 6732
|
| 23645 |
+
},
|
| 23646 |
+
{
|
| 23647 |
+
"epoch": 0.9562115053515327,
|
| 23648 |
+
"grad_norm": 0.126953125,
|
| 23649 |
+
"learning_rate": 1.158598319343675e-05,
|
| 23650 |
+
"loss": 1.6481530666351318,
|
| 23651 |
+
"step": 6734
|
| 23652 |
+
},
|
| 23653 |
+
{
|
| 23654 |
+
"epoch": 0.9564955004526172,
|
| 23655 |
+
"grad_norm": 0.12451171875,
|
| 23656 |
+
"learning_rate": 1.1442933208225786e-05,
|
| 23657 |
+
"loss": 1.654104471206665,
|
| 23658 |
+
"step": 6736
|
| 23659 |
+
},
|
| 23660 |
+
{
|
| 23661 |
+
"epoch": 0.9567794955537017,
|
| 23662 |
+
"grad_norm": 0.125,
|
| 23663 |
+
"learning_rate": 1.130065762591423e-05,
|
| 23664 |
+
"loss": 1.642432689666748,
|
| 23665 |
+
"step": 6738
|
| 23666 |
+
},
|
| 23667 |
+
{
|
| 23668 |
+
"epoch": 0.9570634906547862,
|
| 23669 |
+
"grad_norm": 0.125,
|
| 23670 |
+
"learning_rate": 1.1159159304048934e-05,
|
| 23671 |
+
"loss": 1.6536848545074463,
|
| 23672 |
+
"step": 6740
|
| 23673 |
+
},
|
| 23674 |
+
{
|
| 23675 |
+
"epoch": 0.9573474857558707,
|
| 23676 |
+
"grad_norm": 0.12109375,
|
| 23677 |
+
"learning_rate": 1.1018441084565728e-05,
|
| 23678 |
+
"loss": 1.6880589723587036,
|
| 23679 |
+
"step": 6742
|
| 23680 |
+
},
|
| 23681 |
+
{
|
| 23682 |
+
"epoch": 0.9576314808569553,
|
| 23683 |
+
"grad_norm": 0.1259765625,
|
| 23684 |
+
"learning_rate": 1.0878505793732452e-05,
|
| 23685 |
+
"loss": 1.6856951713562012,
|
| 23686 |
+
"step": 6744
|
| 23687 |
+
},
|
| 23688 |
+
{
|
| 23689 |
+
"epoch": 0.9579154759580397,
|
| 23690 |
+
"grad_norm": 0.12255859375,
|
| 23691 |
+
"learning_rate": 1.0739356242092108e-05,
|
| 23692 |
+
"loss": 1.6466474533081055,
|
| 23693 |
+
"step": 6746
|
| 23694 |
+
},
|
| 23695 |
+
{
|
| 23696 |
+
"epoch": 0.9581994710591242,
|
| 23697 |
+
"grad_norm": 0.12353515625,
|
| 23698 |
+
"learning_rate": 1.0600995224406462e-05,
|
| 23699 |
+
"loss": 1.6551868915557861,
|
| 23700 |
+
"step": 6748
|
| 23701 |
+
},
|
| 23702 |
+
{
|
| 23703 |
+
"epoch": 0.9584834661602087,
|
| 23704 |
+
"grad_norm": 0.1259765625,
|
| 23705 |
+
"learning_rate": 1.0463425519599934e-05,
|
| 23706 |
+
"loss": 1.6469042301177979,
|
| 23707 |
+
"step": 6750
|
| 23708 |
+
},
|
| 23709 |
+
{
|
| 23710 |
+
"epoch": 0.9587674612612932,
|
| 23711 |
+
"grad_norm": 0.12890625,
|
| 23712 |
+
"learning_rate": 1.0326649890703682e-05,
|
| 23713 |
+
"loss": 1.6663037538528442,
|
| 23714 |
+
"step": 6752
|
| 23715 |
+
},
|
| 23716 |
+
{
|
| 23717 |
+
"epoch": 0.9590514563623778,
|
| 23718 |
+
"grad_norm": 0.125,
|
| 23719 |
+
"learning_rate": 1.0190671084800257e-05,
|
| 23720 |
+
"loss": 1.6584124565124512,
|
| 23721 |
+
"step": 6754
|
| 23722 |
+
},
|
| 23723 |
+
{
|
| 23724 |
+
"epoch": 0.9593354514634622,
|
| 23725 |
+
"grad_norm": 0.11767578125,
|
| 23726 |
+
"learning_rate": 1.0055491832968239e-05,
|
| 23727 |
+
"loss": 1.6256663799285889,
|
| 23728 |
+
"step": 6756
|
| 23729 |
+
},
|
| 23730 |
+
{
|
| 23731 |
+
"epoch": 0.9596194465645468,
|
| 23732 |
+
"grad_norm": 0.12890625,
|
| 23733 |
+
"learning_rate": 9.921114850227597e-06,
|
| 23734 |
+
"loss": 1.6238288879394531,
|
| 23735 |
+
"step": 6758
|
| 23736 |
+
},
|
| 23737 |
+
{
|
| 23738 |
+
"epoch": 0.9599034416656312,
|
| 23739 |
+
"grad_norm": 0.125,
|
| 23740 |
+
"learning_rate": 9.78754283548498e-06,
|
| 23741 |
+
"loss": 1.6390256881713867,
|
| 23742 |
+
"step": 6760
|
| 23743 |
+
},
|
| 23744 |
+
{
|
| 23745 |
+
"epoch": 0.9601874367667158,
|
| 23746 |
+
"grad_norm": 0.1259765625,
|
| 23747 |
+
"learning_rate": 9.65477847147966e-06,
|
| 23748 |
+
"loss": 1.6369152069091797,
|
| 23749 |
+
"step": 6762
|
| 23750 |
+
},
|
| 23751 |
+
{
|
| 23752 |
+
"epoch": 0.9604714318678003,
|
| 23753 |
+
"grad_norm": 0.1279296875,
|
| 23754 |
+
"learning_rate": 9.522824424729459e-06,
|
| 23755 |
+
"loss": 1.6385613679885864,
|
| 23756 |
+
"step": 6764
|
| 23757 |
+
},
|
| 23758 |
+
{
|
| 23759 |
+
"epoch": 0.9607554269688848,
|
| 23760 |
+
"grad_norm": 0.12255859375,
|
| 23761 |
+
"learning_rate": 9.391683345477436e-06,
|
| 23762 |
+
"loss": 1.6186283826828003,
|
| 23763 |
+
"step": 6766
|
| 23764 |
+
},
|
| 23765 |
+
{
|
| 23766 |
+
"epoch": 0.9610394220699693,
|
| 23767 |
+
"grad_norm": 0.1279296875,
|
| 23768 |
+
"learning_rate": 9.261357867638436e-06,
|
| 23769 |
+
"loss": 1.6255123615264893,
|
| 23770 |
+
"step": 6768
|
| 23771 |
+
},
|
| 23772 |
+
{
|
| 23773 |
+
"epoch": 0.9613234171710539,
|
| 23774 |
+
"grad_norm": 0.119140625,
|
| 23775 |
+
"learning_rate": 9.131850608746351e-06,
|
| 23776 |
+
"loss": 1.6574430465698242,
|
| 23777 |
+
"step": 6770
|
| 23778 |
+
},
|
| 23779 |
+
{
|
| 23780 |
+
"epoch": 0.9616074122721383,
|
| 23781 |
+
"grad_norm": 0.12109375,
|
| 23782 |
+
"learning_rate": 9.00316416990143e-06,
|
| 23783 |
+
"loss": 1.6040034294128418,
|
| 23784 |
+
"step": 6772
|
| 23785 |
+
},
|
| 23786 |
+
{
|
| 23787 |
+
"epoch": 0.9618914073732228,
|
| 23788 |
+
"grad_norm": 0.1259765625,
|
| 23789 |
+
"learning_rate": 8.875301135718094e-06,
|
| 23790 |
+
"loss": 1.6074063777923584,
|
| 23791 |
+
"step": 6774
|
| 23792 |
+
},
|
| 23793 |
+
{
|
| 23794 |
+
"epoch": 0.9621754024743073,
|
| 23795 |
+
"grad_norm": 0.12060546875,
|
| 23796 |
+
"learning_rate": 8.748264074273065e-06,
|
| 23797 |
+
"loss": 1.6380449533462524,
|
| 23798 |
+
"step": 6776
|
| 23799 |
+
},
|
| 23800 |
+
{
|
| 23801 |
+
"epoch": 0.9624593975753918,
|
| 23802 |
+
"grad_norm": 0.123046875,
|
| 23803 |
+
"learning_rate": 8.622055537053692e-06,
|
| 23804 |
+
"loss": 1.6326104402542114,
|
| 23805 |
+
"step": 6778
|
| 23806 |
+
},
|
| 23807 |
+
{
|
| 23808 |
+
"epoch": 0.9627433926764764,
|
| 23809 |
+
"grad_norm": 0.125,
|
| 23810 |
+
"learning_rate": 8.496678058906765e-06,
|
| 23811 |
+
"loss": 1.6591897010803223,
|
| 23812 |
+
"step": 6780
|
| 23813 |
+
},
|
| 23814 |
+
{
|
| 23815 |
+
"epoch": 0.9630273877775608,
|
| 23816 |
+
"grad_norm": 0.126953125,
|
| 23817 |
+
"learning_rate": 8.372134157987588e-06,
|
| 23818 |
+
"loss": 1.6626744270324707,
|
| 23819 |
+
"step": 6782
|
| 23820 |
+
},
|
| 23821 |
+
{
|
| 23822 |
+
"epoch": 0.9633113828786454,
|
| 23823 |
+
"grad_norm": 0.1298828125,
|
| 23824 |
+
"learning_rate": 8.248426335709442e-06,
|
| 23825 |
+
"loss": 1.6613175868988037,
|
| 23826 |
+
"step": 6784
|
| 23827 |
+
},
|
| 23828 |
+
{
|
| 23829 |
+
"epoch": 0.9635953779797298,
|
| 23830 |
+
"grad_norm": 0.130859375,
|
| 23831 |
+
"learning_rate": 8.125557076693268e-06,
|
| 23832 |
+
"loss": 1.6723227500915527,
|
| 23833 |
+
"step": 6786
|
| 23834 |
+
},
|
| 23835 |
+
{
|
| 23836 |
+
"epoch": 0.9638793730808144,
|
| 23837 |
+
"grad_norm": 0.1220703125,
|
| 23838 |
+
"learning_rate": 8.003528848717828e-06,
|
| 23839 |
+
"loss": 1.6429615020751953,
|
| 23840 |
+
"step": 6788
|
| 23841 |
+
},
|
| 23842 |
+
{
|
| 23843 |
+
"epoch": 0.9641633681818988,
|
| 23844 |
+
"grad_norm": 0.12109375,
|
| 23845 |
+
"learning_rate": 7.882344102670108e-06,
|
| 23846 |
+
"loss": 1.6666431427001953,
|
| 23847 |
+
"step": 6790
|
| 23848 |
+
},
|
| 23849 |
+
{
|
| 23850 |
+
"epoch": 0.9644473632829834,
|
| 23851 |
+
"grad_norm": 0.125,
|
| 23852 |
+
"learning_rate": 7.762005272496137e-06,
|
| 23853 |
+
"loss": 1.6811373233795166,
|
| 23854 |
+
"step": 6792
|
| 23855 |
+
},
|
| 23856 |
+
{
|
| 23857 |
+
"epoch": 0.9647313583840679,
|
| 23858 |
+
"grad_norm": 0.12353515625,
|
| 23859 |
+
"learning_rate": 7.642514775152027e-06,
|
| 23860 |
+
"loss": 1.6308224201202393,
|
| 23861 |
+
"step": 6794
|
| 23862 |
+
},
|
| 23863 |
+
{
|
| 23864 |
+
"epoch": 0.9650153534851523,
|
| 23865 |
+
"grad_norm": 0.1162109375,
|
| 23866 |
+
"learning_rate": 7.5238750105555035e-06,
|
| 23867 |
+
"loss": 1.6672980785369873,
|
| 23868 |
+
"step": 6796
|
| 23869 |
+
},
|
| 23870 |
+
{
|
| 23871 |
+
"epoch": 0.9652993485862369,
|
| 23872 |
+
"grad_norm": 0.126953125,
|
| 23873 |
+
"learning_rate": 7.406088361537639e-06,
|
| 23874 |
+
"loss": 1.6616121530532837,
|
| 23875 |
+
"step": 6798
|
| 23876 |
+
},
|
| 23877 |
+
{
|
| 23878 |
+
"epoch": 0.9655833436873213,
|
| 23879 |
+
"grad_norm": 0.126953125,
|
| 23880 |
+
"learning_rate": 7.289157193795082e-06,
|
| 23881 |
+
"loss": 1.6759233474731445,
|
| 23882 |
+
"step": 6800
|
| 23883 |
+
},
|
| 23884 |
+
{
|
| 23885 |
+
"epoch": 0.9658673387884059,
|
| 23886 |
+
"grad_norm": 0.12353515625,
|
| 23887 |
+
"learning_rate": 7.173083855842444e-06,
|
| 23888 |
+
"loss": 1.6492140293121338,
|
| 23889 |
+
"step": 6802
|
| 23890 |
+
},
|
| 23891 |
+
{
|
| 23892 |
+
"epoch": 0.9661513338894904,
|
| 23893 |
+
"grad_norm": 0.1328125,
|
| 23894 |
+
"learning_rate": 7.057870678965189e-06,
|
| 23895 |
+
"loss": 1.6711063385009766,
|
| 23896 |
+
"step": 6804
|
| 23897 |
+
},
|
| 23898 |
+
{
|
| 23899 |
+
"epoch": 0.9664353289905749,
|
| 23900 |
+
"grad_norm": 0.12158203125,
|
| 23901 |
+
"learning_rate": 6.943519977172808e-06,
|
| 23902 |
+
"loss": 1.638947606086731,
|
| 23903 |
+
"step": 6806
|
| 23904 |
+
},
|
| 23905 |
+
{
|
| 23906 |
+
"epoch": 0.9667193240916594,
|
| 23907 |
+
"grad_norm": 0.12890625,
|
| 23908 |
+
"learning_rate": 6.830034047152328e-06,
|
| 23909 |
+
"loss": 1.679686188697815,
|
| 23910 |
+
"step": 6808
|
| 23911 |
+
},
|
| 23912 |
+
{
|
| 23913 |
+
"epoch": 0.967003319192744,
|
| 23914 |
+
"grad_norm": 0.125,
|
| 23915 |
+
"learning_rate": 6.7174151682221854e-06,
|
| 23916 |
+
"loss": 1.6446295976638794,
|
| 23917 |
+
"step": 6810
|
| 23918 |
+
},
|
| 23919 |
+
{
|
| 23920 |
+
"epoch": 0.9672873142938284,
|
| 23921 |
+
"grad_norm": 0.1240234375,
|
| 23922 |
+
"learning_rate": 6.6056656022864315e-06,
|
| 23923 |
+
"loss": 1.6103341579437256,
|
| 23924 |
+
"step": 6812
|
| 23925 |
+
},
|
| 23926 |
+
{
|
| 23927 |
+
"epoch": 0.967571309394913,
|
| 23928 |
+
"grad_norm": 0.1279296875,
|
| 23929 |
+
"learning_rate": 6.494787593789376e-06,
|
| 23930 |
+
"loss": 1.6539026498794556,
|
| 23931 |
+
"step": 6814
|
| 23932 |
+
},
|
| 23933 |
+
{
|
| 23934 |
+
"epoch": 0.9678553044959974,
|
| 23935 |
+
"grad_norm": 0.12890625,
|
| 23936 |
+
"learning_rate": 6.384783369670405e-06,
|
| 23937 |
+
"loss": 1.6685879230499268,
|
| 23938 |
+
"step": 6816
|
| 23939 |
+
},
|
| 23940 |
+
{
|
| 23941 |
+
"epoch": 0.968139299597082,
|
| 23942 |
+
"grad_norm": 0.12451171875,
|
| 23943 |
+
"learning_rate": 6.2756551393193195e-06,
|
| 23944 |
+
"loss": 1.661516785621643,
|
| 23945 |
+
"step": 6818
|
| 23946 |
+
},
|
| 23947 |
+
{
|
| 23948 |
+
"epoch": 0.9684232946981665,
|
| 23949 |
+
"grad_norm": 0.1298828125,
|
| 23950 |
+
"learning_rate": 6.16740509453193e-06,
|
| 23951 |
+
"loss": 1.7060528993606567,
|
| 23952 |
+
"step": 6820
|
| 23953 |
+
},
|
| 23954 |
+
{
|
| 23955 |
+
"epoch": 0.9687072897992509,
|
| 23956 |
+
"grad_norm": 0.1298828125,
|
| 23957 |
+
"learning_rate": 6.060035409466086e-06,
|
| 23958 |
+
"loss": 1.6504364013671875,
|
| 23959 |
+
"step": 6822
|
| 23960 |
+
},
|
| 23961 |
+
{
|
| 23962 |
+
"epoch": 0.9689912849003355,
|
| 23963 |
+
"grad_norm": 0.126953125,
|
| 23964 |
+
"learning_rate": 5.953548240597945e-06,
|
| 23965 |
+
"loss": 1.6967990398406982,
|
| 23966 |
+
"step": 6824
|
| 23967 |
+
},
|
| 23968 |
+
{
|
| 23969 |
+
"epoch": 0.9692752800014199,
|
| 23970 |
+
"grad_norm": 0.126953125,
|
| 23971 |
+
"learning_rate": 5.847945726678672e-06,
|
| 23972 |
+
"loss": 1.659487009048462,
|
| 23973 |
+
"step": 6826
|
| 23974 |
+
},
|
| 23975 |
+
{
|
| 23976 |
+
"epoch": 0.9695592751025045,
|
| 23977 |
+
"grad_norm": 0.126953125,
|
| 23978 |
+
"learning_rate": 5.743229988691551e-06,
|
| 23979 |
+
"loss": 1.6479309797286987,
|
| 23980 |
+
"step": 6828
|
| 23981 |
+
},
|
| 23982 |
+
{
|
| 23983 |
+
"epoch": 0.969843270203589,
|
| 23984 |
+
"grad_norm": 0.12060546875,
|
| 23985 |
+
"learning_rate": 5.6394031298092785e-06,
|
| 23986 |
+
"loss": 1.6425352096557617,
|
| 23987 |
+
"step": 6830
|
| 23988 |
+
},
|
| 23989 |
+
{
|
| 23990 |
+
"epoch": 0.9701272653046735,
|
| 23991 |
+
"grad_norm": 0.125,
|
| 23992 |
+
"learning_rate": 5.536467235351839e-06,
|
| 23993 |
+
"loss": 1.670881986618042,
|
| 23994 |
+
"step": 6832
|
| 23995 |
+
},
|
| 23996 |
+
{
|
| 23997 |
+
"epoch": 0.970411260405758,
|
| 23998 |
+
"grad_norm": 0.1279296875,
|
| 23999 |
+
"learning_rate": 5.434424372744473e-06,
|
| 24000 |
+
"loss": 1.6419832706451416,
|
| 24001 |
+
"step": 6834
|
| 24002 |
+
},
|
| 24003 |
+
{
|
| 24004 |
+
"epoch": 0.9706952555068425,
|
| 24005 |
+
"grad_norm": 0.123046875,
|
| 24006 |
+
"learning_rate": 5.333276591476338e-06,
|
| 24007 |
+
"loss": 1.6520084142684937,
|
| 24008 |
+
"step": 6836
|
| 24009 |
+
},
|
| 24010 |
+
{
|
| 24011 |
+
"epoch": 0.970979250607927,
|
| 24012 |
+
"grad_norm": 0.1298828125,
|
| 24013 |
+
"learning_rate": 5.233025923059182e-06,
|
| 24014 |
+
"loss": 1.6769835948944092,
|
| 24015 |
+
"step": 6838
|
| 24016 |
+
},
|
| 24017 |
+
{
|
| 24018 |
+
"epoch": 0.9712632457090116,
|
| 24019 |
+
"grad_norm": 0.1298828125,
|
| 24020 |
+
"learning_rate": 5.133674380986664e-06,
|
| 24021 |
+
"loss": 1.6453429460525513,
|
| 24022 |
+
"step": 6840
|
| 24023 |
+
},
|
| 24024 |
+
{
|
| 24025 |
+
"epoch": 0.971547240810096,
|
| 24026 |
+
"grad_norm": 0.1240234375,
|
| 24027 |
+
"learning_rate": 5.03522396069378e-06,
|
| 24028 |
+
"loss": 1.644869327545166,
|
| 24029 |
+
"step": 6842
|
| 24030 |
+
},
|
| 24031 |
+
{
|
| 24032 |
+
"epoch": 0.9718312359111805,
|
| 24033 |
+
"grad_norm": 0.125,
|
| 24034 |
+
"learning_rate": 4.9376766395169525e-06,
|
| 24035 |
+
"loss": 1.6352219581604004,
|
| 24036 |
+
"step": 6844
|
| 24037 |
+
},
|
| 24038 |
+
{
|
| 24039 |
+
"epoch": 0.972115231012265,
|
| 24040 |
+
"grad_norm": 0.125,
|
| 24041 |
+
"learning_rate": 4.841034376654158e-06,
|
| 24042 |
+
"loss": 1.6524848937988281,
|
| 24043 |
+
"step": 6846
|
| 24044 |
+
},
|
| 24045 |
+
{
|
| 24046 |
+
"epoch": 0.9723992261133495,
|
| 24047 |
+
"grad_norm": 0.1240234375,
|
| 24048 |
+
"learning_rate": 4.745299113125701e-06,
|
| 24049 |
+
"loss": 1.6345412731170654,
|
| 24050 |
+
"step": 6848
|
| 24051 |
+
},
|
| 24052 |
+
{
|
| 24053 |
+
"epoch": 0.9726832212144341,
|
| 24054 |
+
"grad_norm": 0.1298828125,
|
| 24055 |
+
"learning_rate": 4.650472771735137e-06,
|
| 24056 |
+
"loss": 1.6433501243591309,
|
| 24057 |
+
"step": 6850
|
| 24058 |
+
},
|
| 24059 |
+
{
|
| 24060 |
+
"epoch": 0.9729672163155185,
|
| 24061 |
+
"grad_norm": 0.1298828125,
|
| 24062 |
+
"learning_rate": 4.556557257030708e-06,
|
| 24063 |
+
"loss": 1.6776695251464844,
|
| 24064 |
+
"step": 6852
|
| 24065 |
+
},
|
| 24066 |
+
{
|
| 24067 |
+
"epoch": 0.9732512114166031,
|
| 24068 |
+
"grad_norm": 0.123046875,
|
| 24069 |
+
"learning_rate": 4.463554455267066e-06,
|
| 24070 |
+
"loss": 1.624751329421997,
|
| 24071 |
+
"step": 6854
|
| 24072 |
+
},
|
| 24073 |
+
{
|
| 24074 |
+
"epoch": 0.9735352065176875,
|
| 24075 |
+
"grad_norm": 0.1201171875,
|
| 24076 |
+
"learning_rate": 4.371466234367421e-06,
|
| 24077 |
+
"loss": 1.686977744102478,
|
| 24078 |
+
"step": 6856
|
| 24079 |
+
},
|
| 24080 |
+
{
|
| 24081 |
+
"epoch": 0.9738192016187721,
|
| 24082 |
+
"grad_norm": 0.11962890625,
|
| 24083 |
+
"learning_rate": 4.280294443885973e-06,
|
| 24084 |
+
"loss": 1.6380048990249634,
|
| 24085 |
+
"step": 6858
|
| 24086 |
+
},
|
| 24087 |
+
{
|
| 24088 |
+
"epoch": 0.9741031967198566,
|
| 24089 |
+
"grad_norm": 0.12353515625,
|
| 24090 |
+
"learning_rate": 4.190040914970805e-06,
|
| 24091 |
+
"loss": 1.6679741144180298,
|
| 24092 |
+
"step": 6860
|
| 24093 |
+
},
|
| 24094 |
+
{
|
| 24095 |
+
"epoch": 0.9743871918209411,
|
| 24096 |
+
"grad_norm": 0.12109375,
|
| 24097 |
+
"learning_rate": 4.100707460327108e-06,
|
| 24098 |
+
"loss": 1.6518080234527588,
|
| 24099 |
+
"step": 6862
|
| 24100 |
+
},
|
| 24101 |
+
{
|
| 24102 |
+
"epoch": 0.9746711869220256,
|
| 24103 |
+
"grad_norm": 0.123046875,
|
| 24104 |
+
"learning_rate": 4.01229587418071e-06,
|
| 24105 |
+
"loss": 1.6278109550476074,
|
| 24106 |
+
"step": 6864
|
| 24107 |
+
},
|
| 24108 |
+
{
|
| 24109 |
+
"epoch": 0.9749551820231102,
|
| 24110 |
+
"grad_norm": 0.12109375,
|
| 24111 |
+
"learning_rate": 3.924807932242142e-06,
|
| 24112 |
+
"loss": 1.6852149963378906,
|
| 24113 |
+
"step": 6866
|
| 24114 |
+
},
|
| 24115 |
+
{
|
| 24116 |
+
"epoch": 0.9752391771241946,
|
| 24117 |
+
"grad_norm": 0.1328125,
|
| 24118 |
+
"learning_rate": 3.8382453916708805e-06,
|
| 24119 |
+
"loss": 1.6273053884506226,
|
| 24120 |
+
"step": 6868
|
| 24121 |
+
},
|
| 24122 |
+
{
|
| 24123 |
+
"epoch": 0.9755231722252791,
|
| 24124 |
+
"grad_norm": 0.12451171875,
|
| 24125 |
+
"learning_rate": 3.7526099910401348e-06,
|
| 24126 |
+
"loss": 1.649459958076477,
|
| 24127 |
+
"step": 6870
|
| 24128 |
+
},
|
| 24129 |
+
{
|
| 24130 |
+
"epoch": 0.9758071673263636,
|
| 24131 |
+
"grad_norm": 0.125,
|
| 24132 |
+
"learning_rate": 3.6679034503018293e-06,
|
| 24133 |
+
"loss": 1.6260278224945068,
|
| 24134 |
+
"step": 6872
|
| 24135 |
+
},
|
| 24136 |
+
{
|
| 24137 |
+
"epoch": 0.9760911624274481,
|
| 24138 |
+
"grad_norm": 0.1220703125,
|
| 24139 |
+
"learning_rate": 3.5841274707521856e-06,
|
| 24140 |
+
"loss": 1.7036383152008057,
|
| 24141 |
+
"step": 6874
|
| 24142 |
+
},
|
| 24143 |
+
{
|
| 24144 |
+
"epoch": 0.9763751575285327,
|
| 24145 |
+
"grad_norm": 0.12158203125,
|
| 24146 |
+
"learning_rate": 3.501283734997418e-06,
|
| 24147 |
+
"loss": 1.6645114421844482,
|
| 24148 |
+
"step": 6876
|
| 24149 |
+
},
|
| 24150 |
+
{
|
| 24151 |
+
"epoch": 0.9766591526296171,
|
| 24152 |
+
"grad_norm": 0.1181640625,
|
| 24153 |
+
"learning_rate": 3.4193739069200703e-06,
|
| 24154 |
+
"loss": 1.6695365905761719,
|
| 24155 |
+
"step": 6878
|
| 24156 |
+
},
|
| 24157 |
+
{
|
| 24158 |
+
"epoch": 0.9769431477307017,
|
| 24159 |
+
"grad_norm": 0.125,
|
| 24160 |
+
"learning_rate": 3.338399631645489e-06,
|
| 24161 |
+
"loss": 1.66145920753479,
|
| 24162 |
+
"step": 6880
|
| 24163 |
+
},
|
| 24164 |
+
{
|
| 24165 |
+
"epoch": 0.9772271428317861,
|
| 24166 |
+
"grad_norm": 0.12353515625,
|
| 24167 |
+
"learning_rate": 3.2583625355088243e-06,
|
| 24168 |
+
"loss": 1.6386982202529907,
|
| 24169 |
+
"step": 6882
|
| 24170 |
+
},
|
| 24171 |
+
{
|
| 24172 |
+
"epoch": 0.9775111379328707,
|
| 24173 |
+
"grad_norm": 0.130859375,
|
| 24174 |
+
"learning_rate": 3.1792642260224047e-06,
|
| 24175 |
+
"loss": 1.6711777448654175,
|
| 24176 |
+
"step": 6884
|
| 24177 |
+
},
|
| 24178 |
+
{
|
| 24179 |
+
"epoch": 0.9777951330339552,
|
| 24180 |
+
"grad_norm": 0.123046875,
|
| 24181 |
+
"learning_rate": 3.101106291843381e-06,
|
| 24182 |
+
"loss": 1.6668962240219116,
|
| 24183 |
+
"step": 6886
|
| 24184 |
+
},
|
| 24185 |
+
{
|
| 24186 |
+
"epoch": 0.9780791281350397,
|
| 24187 |
+
"grad_norm": 0.12255859375,
|
| 24188 |
+
"learning_rate": 3.0238903027418553e-06,
|
| 24189 |
+
"loss": 1.6597087383270264,
|
| 24190 |
+
"step": 6888
|
| 24191 |
+
},
|
| 24192 |
+
{
|
| 24193 |
+
"epoch": 0.9783631232361242,
|
| 24194 |
+
"grad_norm": 0.12890625,
|
| 24195 |
+
"learning_rate": 2.9476178095693472e-06,
|
| 24196 |
+
"loss": 1.6730546951293945,
|
| 24197 |
+
"step": 6890
|
| 24198 |
+
},
|
| 24199 |
+
{
|
| 24200 |
+
"epoch": 0.9786471183372086,
|
| 24201 |
+
"grad_norm": 0.12353515625,
|
| 24202 |
+
"learning_rate": 2.872290344227668e-06,
|
| 24203 |
+
"loss": 1.6554884910583496,
|
| 24204 |
+
"step": 6892
|
| 24205 |
+
},
|
| 24206 |
+
{
|
| 24207 |
+
"epoch": 0.9789311134382932,
|
| 24208 |
+
"grad_norm": 0.126953125,
|
| 24209 |
+
"learning_rate": 2.7979094196381117e-06,
|
| 24210 |
+
"loss": 1.6670081615447998,
|
| 24211 |
+
"step": 6894
|
| 24212 |
+
},
|
| 24213 |
+
{
|
| 24214 |
+
"epoch": 0.9792151085393777,
|
| 24215 |
+
"grad_norm": 0.1259765625,
|
| 24216 |
+
"learning_rate": 2.724476529711084e-06,
|
| 24217 |
+
"loss": 1.6630990505218506,
|
| 24218 |
+
"step": 6896
|
| 24219 |
+
},
|
| 24220 |
+
{
|
| 24221 |
+
"epoch": 0.9794991036404622,
|
| 24222 |
+
"grad_norm": 0.125,
|
| 24223 |
+
"learning_rate": 2.6519931493161176e-06,
|
| 24224 |
+
"loss": 1.676001787185669,
|
| 24225 |
+
"step": 6898
|
| 24226 |
+
},
|
| 24227 |
+
{
|
| 24228 |
+
"epoch": 0.9797830987415467,
|
| 24229 |
+
"grad_norm": 0.126953125,
|
| 24230 |
+
"learning_rate": 2.580460734252238e-06,
|
| 24231 |
+
"loss": 1.660456895828247,
|
| 24232 |
+
"step": 6900
|
| 24233 |
+
},
|
| 24234 |
+
{
|
| 24235 |
+
"epoch": 0.9800670938426312,
|
| 24236 |
+
"grad_norm": 0.1259765625,
|
| 24237 |
+
"learning_rate": 2.509880721218705e-06,
|
| 24238 |
+
"loss": 1.6949119567871094,
|
| 24239 |
+
"step": 6902
|
| 24240 |
+
},
|
| 24241 |
+
{
|
| 24242 |
+
"epoch": 0.9803510889437157,
|
| 24243 |
+
"grad_norm": 0.12109375,
|
| 24244 |
+
"learning_rate": 2.4402545277861788e-06,
|
| 24245 |
+
"loss": 1.6519129276275635,
|
| 24246 |
+
"step": 6904
|
| 24247 |
+
},
|
| 24248 |
+
{
|
| 24249 |
+
"epoch": 0.9806350840448003,
|
| 24250 |
+
"grad_norm": 0.1279296875,
|
| 24251 |
+
"learning_rate": 2.371583552368245e-06,
|
| 24252 |
+
"loss": 1.6996619701385498,
|
| 24253 |
+
"step": 6906
|
| 24254 |
+
},
|
| 24255 |
+
{
|
| 24256 |
+
"epoch": 0.9809190791458847,
|
| 24257 |
+
"grad_norm": 0.125,
|
| 24258 |
+
"learning_rate": 2.3038691741933406e-06,
|
| 24259 |
+
"loss": 1.6510899066925049,
|
| 24260 |
+
"step": 6908
|
| 24261 |
+
},
|
| 24262 |
+
{
|
| 24263 |
+
"epoch": 0.9812030742469693,
|
| 24264 |
+
"grad_norm": 0.130859375,
|
| 24265 |
+
"learning_rate": 2.2371127532770155e-06,
|
| 24266 |
+
"loss": 1.666084885597229,
|
| 24267 |
+
"step": 6910
|
| 24268 |
+
},
|
| 24269 |
+
{
|
| 24270 |
+
"epoch": 0.9814870693480537,
|
| 24271 |
+
"grad_norm": 0.12060546875,
|
| 24272 |
+
"learning_rate": 2.171315630394649e-06,
|
| 24273 |
+
"loss": 1.6329777240753174,
|
| 24274 |
+
"step": 6912
|
| 24275 |
+
},
|
| 24276 |
+
{
|
| 24277 |
+
"epoch": 0.9817710644491382,
|
| 24278 |
+
"grad_norm": 0.12255859375,
|
| 24279 |
+
"learning_rate": 2.1064791270545225e-06,
|
| 24280 |
+
"loss": 1.6736146211624146,
|
| 24281 |
+
"step": 6914
|
| 24282 |
+
},
|
| 24283 |
+
{
|
| 24284 |
+
"epoch": 0.9820550595502228,
|
| 24285 |
+
"grad_norm": 0.12255859375,
|
| 24286 |
+
"learning_rate": 2.0426045454712493e-06,
|
| 24287 |
+
"loss": 1.6562269926071167,
|
| 24288 |
+
"step": 6916
|
| 24289 |
+
},
|
| 24290 |
+
{
|
| 24291 |
+
"epoch": 0.9823390546513072,
|
| 24292 |
+
"grad_norm": 0.1279296875,
|
| 24293 |
+
"learning_rate": 1.9796931685396647e-06,
|
| 24294 |
+
"loss": 1.6335300207138062,
|
| 24295 |
+
"step": 6918
|
| 24296 |
+
},
|
| 24297 |
+
{
|
| 24298 |
+
"epoch": 0.9826230497523918,
|
| 24299 |
+
"grad_norm": 0.12353515625,
|
| 24300 |
+
"learning_rate": 1.917746259808989e-06,
|
| 24301 |
+
"loss": 1.6502379179000854,
|
| 24302 |
+
"step": 6920
|
| 24303 |
+
},
|
| 24304 |
+
{
|
| 24305 |
+
"epoch": 0.9829070448534762,
|
| 24306 |
+
"grad_norm": 0.1240234375,
|
| 24307 |
+
"learning_rate": 1.8567650634575384e-06,
|
| 24308 |
+
"loss": 1.6750049591064453,
|
| 24309 |
+
"step": 6922
|
| 24310 |
+
},
|
| 24311 |
+
{
|
| 24312 |
+
"epoch": 0.9831910399545608,
|
| 24313 |
+
"grad_norm": 0.12451171875,
|
| 24314 |
+
"learning_rate": 1.7967508042676662e-06,
|
| 24315 |
+
"loss": 1.6218786239624023,
|
| 24316 |
+
"step": 6924
|
| 24317 |
+
},
|
| 24318 |
+
{
|
| 24319 |
+
"epoch": 0.9834750350556453,
|
| 24320 |
+
"grad_norm": 0.1298828125,
|
| 24321 |
+
"learning_rate": 1.7377046876011993e-06,
|
| 24322 |
+
"loss": 1.6702368259429932,
|
| 24323 |
+
"step": 6926
|
| 24324 |
+
},
|
| 24325 |
+
{
|
| 24326 |
+
"epoch": 0.9837590301567298,
|
| 24327 |
+
"grad_norm": 0.123046875,
|
| 24328 |
+
"learning_rate": 1.6796278993752024e-06,
|
| 24329 |
+
"loss": 1.6584724187850952,
|
| 24330 |
+
"step": 6928
|
| 24331 |
+
},
|
| 24332 |
+
{
|
| 24333 |
+
"epoch": 0.9840430252578143,
|
| 24334 |
+
"grad_norm": 0.1259765625,
|
| 24335 |
+
"learning_rate": 1.6225216060382075e-06,
|
| 24336 |
+
"loss": 1.6555862426757812,
|
| 24337 |
+
"step": 6930
|
| 24338 |
+
},
|
| 24339 |
+
{
|
| 24340 |
+
"epoch": 0.9843270203588989,
|
| 24341 |
+
"grad_norm": 0.12109375,
|
| 24342 |
+
"learning_rate": 1.566386954546728e-06,
|
| 24343 |
+
"loss": 1.6616029739379883,
|
| 24344 |
+
"step": 6932
|
| 24345 |
+
},
|
| 24346 |
+
{
|
| 24347 |
+
"epoch": 0.9846110154599833,
|
| 24348 |
+
"grad_norm": 0.12255859375,
|
| 24349 |
+
"learning_rate": 1.5112250723422427e-06,
|
| 24350 |
+
"loss": 1.6865534782409668,
|
| 24351 |
+
"step": 6934
|
| 24352 |
+
},
|
| 24353 |
+
{
|
| 24354 |
+
"epoch": 0.9848950105610679,
|
| 24355 |
+
"grad_norm": 0.1279296875,
|
| 24356 |
+
"learning_rate": 1.4570370673285815e-06,
|
| 24357 |
+
"loss": 1.656646728515625,
|
| 24358 |
+
"step": 6936
|
| 24359 |
+
},
|
| 24360 |
+
{
|
| 24361 |
+
"epoch": 0.9851790056621523,
|
| 24362 |
+
"grad_norm": 0.12451171875,
|
| 24363 |
+
"learning_rate": 1.4038240278496374e-06,
|
| 24364 |
+
"loss": 1.6809053421020508,
|
| 24365 |
+
"step": 6938
|
| 24366 |
+
},
|
| 24367 |
+
{
|
| 24368 |
+
"epoch": 0.9854630007632368,
|
| 24369 |
+
"grad_norm": 0.11962890625,
|
| 24370 |
+
"learning_rate": 1.351587022667522e-06,
|
| 24371 |
+
"loss": 1.6200101375579834,
|
| 24372 |
+
"step": 6940
|
| 24373 |
+
},
|
| 24374 |
+
{
|
| 24375 |
+
"epoch": 0.9857469958643214,
|
| 24376 |
+
"grad_norm": 0.12353515625,
|
| 24377 |
+
"learning_rate": 1.3003271009410844e-06,
|
| 24378 |
+
"loss": 1.6474997997283936,
|
| 24379 |
+
"step": 6942
|
| 24380 |
+
},
|
| 24381 |
+
{
|
| 24382 |
+
"epoch": 0.9860309909654058,
|
| 24383 |
+
"grad_norm": 0.12451171875,
|
| 24384 |
+
"learning_rate": 1.2500452922048766e-06,
|
| 24385 |
+
"loss": 1.6671321392059326,
|
| 24386 |
+
"step": 6944
|
| 24387 |
+
},
|
| 24388 |
+
{
|
| 24389 |
+
"epoch": 0.9863149860664904,
|
| 24390 |
+
"grad_norm": 0.12353515625,
|
| 24391 |
+
"learning_rate": 1.2007426063484261e-06,
|
| 24392 |
+
"loss": 1.6690073013305664,
|
| 24393 |
+
"step": 6946
|
| 24394 |
+
},
|
| 24395 |
+
{
|
| 24396 |
+
"epoch": 0.9865989811675748,
|
| 24397 |
+
"grad_norm": 0.1240234375,
|
| 24398 |
+
"learning_rate": 1.1524200335960134e-06,
|
| 24399 |
+
"loss": 1.6505852937698364,
|
| 24400 |
+
"step": 6948
|
| 24401 |
+
},
|
| 24402 |
+
{
|
| 24403 |
+
"epoch": 0.9868829762686594,
|
| 24404 |
+
"grad_norm": 0.12060546875,
|
| 24405 |
+
"learning_rate": 1.105078544486704e-06,
|
| 24406 |
+
"loss": 1.6218998432159424,
|
| 24407 |
+
"step": 6950
|
| 24408 |
+
},
|
| 24409 |
+
{
|
| 24410 |
+
"epoch": 0.9871669713697439,
|
| 24411 |
+
"grad_norm": 0.1298828125,
|
| 24412 |
+
"learning_rate": 1.058719089854937e-06,
|
| 24413 |
+
"loss": 1.7010353803634644,
|
| 24414 |
+
"step": 6952
|
| 24415 |
+
},
|
| 24416 |
+
{
|
| 24417 |
+
"epoch": 0.9874509664708284,
|
| 24418 |
+
"grad_norm": 0.12060546875,
|
| 24419 |
+
"learning_rate": 1.0133426008113678e-06,
|
| 24420 |
+
"loss": 1.6759577989578247,
|
| 24421 |
+
"step": 6954
|
| 24422 |
+
},
|
| 24423 |
+
{
|
| 24424 |
+
"epoch": 0.9877349615719129,
|
| 24425 |
+
"grad_norm": 0.1279296875,
|
| 24426 |
+
"learning_rate": 9.689499887242104e-07,
|
| 24427 |
+
"loss": 1.6455817222595215,
|
| 24428 |
+
"step": 6956
|
| 24429 |
+
},
|
| 24430 |
+
{
|
| 24431 |
+
"epoch": 0.9880189566729974,
|
| 24432 |
+
"grad_norm": 0.11865234375,
|
| 24433 |
+
"learning_rate": 9.255421452008916e-07,
|
| 24434 |
+
"loss": 1.6357256174087524,
|
| 24435 |
+
"step": 6958
|
| 24436 |
+
},
|
| 24437 |
+
{
|
| 24438 |
+
"epoch": 0.9883029517740819,
|
| 24439 |
+
"grad_norm": 0.12109375,
|
| 24440 |
+
"learning_rate": 8.831199420701763e-07,
|
| 24441 |
+
"loss": 1.6579055786132812,
|
| 24442 |
+
"step": 6960
|
| 24443 |
+
},
|
| 24444 |
+
{
|
| 24445 |
+
"epoch": 0.9885869468751664,
|
| 24446 |
+
"grad_norm": 0.130859375,
|
| 24447 |
+
"learning_rate": 8.416842313646478e-07,
|
| 24448 |
+
"loss": 1.6640031337738037,
|
| 24449 |
+
"step": 6962
|
| 24450 |
+
},
|
| 24451 |
+
{
|
| 24452 |
+
"epoch": 0.9888709419762509,
|
| 24453 |
+
"grad_norm": 0.123046875,
|
| 24454 |
+
"learning_rate": 8.012358453035939e-07,
|
| 24455 |
+
"loss": 1.6428769826889038,
|
| 24456 |
+
"step": 6964
|
| 24457 |
+
},
|
| 24458 |
+
{
|
| 24459 |
+
"epoch": 0.9891549370773354,
|
| 24460 |
+
"grad_norm": 0.1328125,
|
| 24461 |
+
"learning_rate": 7.617755962762874e-07,
|
| 24462 |
+
"loss": 1.6737720966339111,
|
| 24463 |
+
"step": 6966
|
| 24464 |
+
},
|
| 24465 |
+
{
|
| 24466 |
+
"epoch": 0.9894389321784199,
|
| 24467 |
+
"grad_norm": 0.12158203125,
|
| 24468 |
+
"learning_rate": 7.233042768256593e-07,
|
| 24469 |
+
"loss": 1.667173147201538,
|
| 24470 |
+
"step": 6968
|
| 24471 |
+
},
|
| 24472 |
+
{
|
| 24473 |
+
"epoch": 0.9897229272795044,
|
| 24474 |
+
"grad_norm": 0.1298828125,
|
| 24475 |
+
"learning_rate": 6.858226596324235e-07,
|
| 24476 |
+
"loss": 1.684898853302002,
|
| 24477 |
+
"step": 6970
|
| 24478 |
+
},
|
| 24479 |
+
{
|
| 24480 |
+
"epoch": 0.990006922380589,
|
| 24481 |
+
"grad_norm": 0.126953125,
|
| 24482 |
+
"learning_rate": 6.493314974995113e-07,
|
| 24483 |
+
"loss": 1.6259828805923462,
|
| 24484 |
+
"step": 6972
|
| 24485 |
+
},
|
| 24486 |
+
{
|
| 24487 |
+
"epoch": 0.9902909174816734,
|
| 24488 |
+
"grad_norm": 0.1298828125,
|
| 24489 |
+
"learning_rate": 6.138315233369663e-07,
|
| 24490 |
+
"loss": 1.6429195404052734,
|
| 24491 |
+
"step": 6974
|
| 24492 |
+
},
|
| 24493 |
+
{
|
| 24494 |
+
"epoch": 0.990574912582758,
|
| 24495 |
+
"grad_norm": 0.12060546875,
|
| 24496 |
+
"learning_rate": 5.7932345014724e-07,
|
| 24497 |
+
"loss": 1.6119306087493896,
|
| 24498 |
+
"step": 6976
|
| 24499 |
+
},
|
| 24500 |
+
{
|
| 24501 |
+
"epoch": 0.9908589076838424,
|
| 24502 |
+
"grad_norm": 0.125,
|
| 24503 |
+
"learning_rate": 5.458079710108533e-07,
|
| 24504 |
+
"loss": 1.6442238092422485,
|
| 24505 |
+
"step": 6978
|
| 24506 |
+
},
|
| 24507 |
+
{
|
| 24508 |
+
"epoch": 0.991142902784927,
|
| 24509 |
+
"grad_norm": 0.11572265625,
|
| 24510 |
+
"learning_rate": 5.132857590724794e-07,
|
| 24511 |
+
"loss": 1.6374033689498901,
|
| 24512 |
+
"step": 6980
|
| 24513 |
+
},
|
| 24514 |
+
{
|
| 24515 |
+
"epoch": 0.9914268978860115,
|
| 24516 |
+
"grad_norm": 0.1240234375,
|
| 24517 |
+
"learning_rate": 4.817574675274328e-07,
|
| 24518 |
+
"loss": 1.6524770259857178,
|
| 24519 |
+
"step": 6982
|
| 24520 |
+
},
|
| 24521 |
+
{
|
| 24522 |
+
"epoch": 0.991710892987096,
|
| 24523 |
+
"grad_norm": 0.123046875,
|
| 24524 |
+
"learning_rate": 4.512237296085353e-07,
|
| 24525 |
+
"loss": 1.6336588859558105,
|
| 24526 |
+
"step": 6984
|
| 24527 |
+
},
|
| 24528 |
+
{
|
| 24529 |
+
"epoch": 0.9919948880881805,
|
| 24530 |
+
"grad_norm": 0.12109375,
|
| 24531 |
+
"learning_rate": 4.2168515857341497e-07,
|
| 24532 |
+
"loss": 1.6137917041778564,
|
| 24533 |
+
"step": 6986
|
| 24534 |
+
},
|
| 24535 |
+
{
|
| 24536 |
+
"epoch": 0.9922788831892649,
|
| 24537 |
+
"grad_norm": 0.12158203125,
|
| 24538 |
+
"learning_rate": 3.9314234769217137e-07,
|
| 24539 |
+
"loss": 1.6510038375854492,
|
| 24540 |
+
"step": 6988
|
| 24541 |
+
},
|
| 24542 |
+
{
|
| 24543 |
+
"epoch": 0.9925628782903495,
|
| 24544 |
+
"grad_norm": 0.130859375,
|
| 24545 |
+
"learning_rate": 3.6559587023548e-07,
|
| 24546 |
+
"loss": 1.6569230556488037,
|
| 24547 |
+
"step": 6990
|
| 24548 |
+
},
|
| 24549 |
+
{
|
| 24550 |
+
"epoch": 0.992846873391434,
|
| 24551 |
+
"grad_norm": 0.12890625,
|
| 24552 |
+
"learning_rate": 3.3904627946305667e-07,
|
| 24553 |
+
"loss": 1.6170315742492676,
|
| 24554 |
+
"step": 6992
|
| 24555 |
+
},
|
| 24556 |
+
{
|
| 24557 |
+
"epoch": 0.9931308684925185,
|
| 24558 |
+
"grad_norm": 0.12353515625,
|
| 24559 |
+
"learning_rate": 3.1349410861256645e-07,
|
| 24560 |
+
"loss": 1.652906060218811,
|
| 24561 |
+
"step": 6994
|
| 24562 |
+
},
|
| 24563 |
+
{
|
| 24564 |
+
"epoch": 0.993414863593603,
|
| 24565 |
+
"grad_norm": 0.12890625,
|
| 24566 |
+
"learning_rate": 2.889398708888991e-07,
|
| 24567 |
+
"loss": 1.646234393119812,
|
| 24568 |
+
"step": 6996
|
| 24569 |
+
},
|
| 24570 |
+
{
|
| 24571 |
+
"epoch": 0.9936988586946875,
|
| 24572 |
+
"grad_norm": 0.1181640625,
|
| 24573 |
+
"learning_rate": 2.6538405945385483e-07,
|
| 24574 |
+
"loss": 1.6336615085601807,
|
| 24575 |
+
"step": 6998
|
| 24576 |
+
},
|
| 24577 |
+
{
|
| 24578 |
+
"epoch": 0.993982853795772,
|
| 24579 |
+
"grad_norm": 0.125,
|
| 24580 |
+
"learning_rate": 2.4282714741627464e-07,
|
| 24581 |
+
"loss": 1.643019199371338,
|
| 24582 |
+
"step": 7000
|
| 24583 |
+
},
|
| 24584 |
+
{
|
| 24585 |
+
"epoch": 0.9942668488968566,
|
| 24586 |
+
"grad_norm": 0.130859375,
|
| 24587 |
+
"learning_rate": 2.2126958782249796e-07,
|
| 24588 |
+
"loss": 1.679926872253418,
|
| 24589 |
+
"step": 7002
|
| 24590 |
+
},
|
| 24591 |
+
{
|
| 24592 |
+
"epoch": 0.994550843997941,
|
| 24593 |
+
"grad_norm": 0.1259765625,
|
| 24594 |
+
"learning_rate": 2.007118136472752e-07,
|
| 24595 |
+
"loss": 1.6487417221069336,
|
| 24596 |
+
"step": 7004
|
| 24597 |
+
},
|
| 24598 |
+
{
|
| 24599 |
+
"epoch": 0.9948348390990256,
|
| 24600 |
+
"grad_norm": 0.1220703125,
|
| 24601 |
+
"learning_rate": 1.8115423778509167e-07,
|
| 24602 |
+
"loss": 1.6569416522979736,
|
| 24603 |
+
"step": 7006
|
| 24604 |
+
},
|
| 24605 |
+
{
|
| 24606 |
+
"epoch": 0.99511883420011,
|
| 24607 |
+
"grad_norm": 0.125,
|
| 24608 |
+
"learning_rate": 1.6259725304184626e-07,
|
| 24609 |
+
"loss": 1.680594563484192,
|
| 24610 |
+
"step": 7008
|
| 24611 |
+
},
|
| 24612 |
+
{
|
| 24613 |
+
"epoch": 0.9954028293011945,
|
| 24614 |
+
"grad_norm": 0.1201171875,
|
| 24615 |
+
"learning_rate": 1.4504123212698006e-07,
|
| 24616 |
+
"loss": 1.6156156063079834,
|
| 24617 |
+
"step": 7010
|
| 24618 |
+
},
|
| 24619 |
+
{
|
| 24620 |
+
"epoch": 0.9956868244022791,
|
| 24621 |
+
"grad_norm": 0.12353515625,
|
| 24622 |
+
"learning_rate": 1.2848652764598789e-07,
|
| 24623 |
+
"loss": 1.6274839639663696,
|
| 24624 |
+
"step": 7012
|
| 24625 |
+
},
|
| 24626 |
+
{
|
| 24627 |
+
"epoch": 0.9959708195033635,
|
| 24628 |
+
"grad_norm": 0.12451171875,
|
| 24629 |
+
"learning_rate": 1.1293347209332395e-07,
|
| 24630 |
+
"loss": 1.6665003299713135,
|
| 24631 |
+
"step": 7014
|
| 24632 |
+
},
|
| 24633 |
+
{
|
| 24634 |
+
"epoch": 0.9962548146044481,
|
| 24635 |
+
"grad_norm": 0.130859375,
|
| 24636 |
+
"learning_rate": 9.838237784575155e-08,
|
| 24637 |
+
"loss": 1.6789369583129883,
|
| 24638 |
+
"step": 7016
|
| 24639 |
+
},
|
| 24640 |
+
{
|
| 24641 |
+
"epoch": 0.9965388097055325,
|
| 24642 |
+
"grad_norm": 0.1220703125,
|
| 24643 |
+
"learning_rate": 8.48335371560316e-08,
|
| 24644 |
+
"loss": 1.6686030626296997,
|
| 24645 |
+
"step": 7018
|
| 24646 |
+
},
|
| 24647 |
+
{
|
| 24648 |
+
"epoch": 0.9968228048066171,
|
| 24649 |
+
"grad_norm": 0.1201171875,
|
| 24650 |
+
"learning_rate": 7.228722214708272e-08,
|
| 24651 |
+
"loss": 1.6648080348968506,
|
| 24652 |
+
"step": 7020
|
| 24653 |
+
},
|
| 24654 |
+
{
|
| 24655 |
+
"epoch": 0.9971067999077016,
|
| 24656 |
+
"grad_norm": 0.12255859375,
|
| 24657 |
+
"learning_rate": 6.074368480650794e-08,
|
| 24658 |
+
"loss": 1.6471000909805298,
|
| 24659 |
+
"step": 7022
|
| 24660 |
+
},
|
| 24661 |
+
{
|
| 24662 |
+
"epoch": 0.9973907950087861,
|
| 24663 |
+
"grad_norm": 0.126953125,
|
| 24664 |
+
"learning_rate": 5.0203156981526446e-08,
|
| 24665 |
+
"loss": 1.6676064729690552,
|
| 24666 |
+
"step": 7024
|
| 24667 |
+
},
|
| 24668 |
+
{
|
| 24669 |
+
"epoch": 0.9976747901098706,
|
| 24670 |
+
"grad_norm": 0.126953125,
|
| 24671 |
+
"learning_rate": 4.066585037432735e-08,
|
| 24672 |
+
"loss": 1.6278235912322998,
|
| 24673 |
+
"step": 7026
|
| 24674 |
+
},
|
| 24675 |
+
{
|
| 24676 |
+
"epoch": 0.9979587852109552,
|
| 24677 |
+
"grad_norm": 0.125,
|
| 24678 |
+
"learning_rate": 3.213195653780643e-08,
|
| 24679 |
+
"loss": 1.662877082824707,
|
| 24680 |
+
"step": 7028
|
| 24681 |
+
},
|
| 24682 |
+
{
|
| 24683 |
+
"epoch": 0.9982427803120396,
|
| 24684 |
+
"grad_norm": 0.1298828125,
|
| 24685 |
+
"learning_rate": 2.4601646871724726e-08,
|
| 24686 |
+
"loss": 1.6766605377197266,
|
| 24687 |
+
"step": 7030
|
| 24688 |
+
},
|
| 24689 |
+
{
|
| 24690 |
+
"epoch": 0.9985267754131242,
|
| 24691 |
+
"grad_norm": 0.12109375,
|
| 24692 |
+
"learning_rate": 1.8075072619272436e-08,
|
| 24693 |
+
"loss": 1.6600494384765625,
|
| 24694 |
+
"step": 7032
|
| 24695 |
+
},
|
| 24696 |
+
{
|
| 24697 |
+
"epoch": 0.9988107705142086,
|
| 24698 |
+
"grad_norm": 0.1279296875,
|
| 24699 |
+
"learning_rate": 1.2552364864021337e-08,
|
| 24700 |
+
"loss": 1.6512842178344727,
|
| 24701 |
+
"step": 7034
|
| 24702 |
+
},
|
| 24703 |
+
{
|
| 24704 |
+
"epoch": 0.9990947656152931,
|
| 24705 |
+
"grad_norm": 0.12353515625,
|
| 24706 |
+
"learning_rate": 8.033634527288004e-09,
|
| 24707 |
+
"loss": 1.6618376970291138,
|
| 24708 |
+
"step": 7036
|
| 24709 |
+
},
|
| 24710 |
+
{
|
| 24711 |
+
"epoch": 0.9993787607163777,
|
| 24712 |
+
"grad_norm": 0.12255859375,
|
| 24713 |
+
"learning_rate": 4.518972365930019e-09,
|
| 24714 |
+
"loss": 1.6308016777038574,
|
| 24715 |
+
"step": 7038
|
| 24716 |
+
},
|
| 24717 |
+
{
|
| 24718 |
+
"epoch": 0.9996627558174621,
|
| 24719 |
+
"grad_norm": 0.1298828125,
|
| 24720 |
+
"learning_rate": 2.0084489704974563e-09,
|
| 24721 |
+
"loss": 1.6697489023208618,
|
| 24722 |
+
"step": 7040
|
| 24723 |
+
},
|
| 24724 |
+
{
|
| 24725 |
+
"epoch": 0.9999467509185467,
|
| 24726 |
+
"grad_norm": 0.126953125,
|
| 24727 |
+
"learning_rate": 5.021147638173407e-10,
|
| 24728 |
+
"loss": 1.6276886463165283,
|
| 24729 |
+
"step": 7042
|
| 24730 |
+
},
|
| 24731 |
+
{
|
| 24732 |
+
"epoch": 1.0,
|
| 24733 |
+
"eval_loss": 1.6529316902160645,
|
| 24734 |
+
"eval_runtime": 267.921,
|
| 24735 |
+
"eval_samples_per_second": 54.901,
|
| 24736 |
+
"eval_steps_per_second": 6.864,
|
| 24737 |
+
"step": 7043
|
| 24738 |
}
|
| 24739 |
],
|
| 24740 |
"logging_steps": 2,
|
|
|
|
| 24749 |
"should_evaluate": false,
|
| 24750 |
"should_log": false,
|
| 24751 |
"should_save": true,
|
| 24752 |
+
"should_training_stop": true
|
| 24753 |
},
|
| 24754 |
"attributes": {}
|
| 24755 |
}
|
| 24756 |
},
|
| 24757 |
+
"total_flos": 5.227389362626363e+18,
|
| 24758 |
"train_batch_size": 4,
|
| 24759 |
"trial_name": null,
|
| 24760 |
"trial_params": null
|