Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +30 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/README.md +57 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/adapter_config.json +29 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/adapter_model.safetensors +3 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/all_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/logfile.txt +0 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/special_tokens_map.json +24 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer.json +0 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer.model +3 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer_config.json +44 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/train_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/trainer_log.jsonl +501 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/trainer_state.json +3542 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/training_args.bin +3 -0
- Llama-2-13b-chat-hf/DomainBench/Agriculture/training_loss.png +0 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/README.md +57 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/adapter_config.json +29 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/adapter_model.safetensors +3 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/all_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/logfile.txt +0 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/special_tokens_map.json +24 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer.json +0 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer.model +3 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer_config.json +44 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/train_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/trainer_log.jsonl +501 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/trainer_state.json +3542 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/training_args.bin +3 -0
- Llama-2-13b-chat-hf/DomainBench/Finance/training_loss.png +0 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/README.md +57 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/adapter_config.json +29 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/adapter_model.safetensors +3 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/all_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/logfile.txt +0 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/special_tokens_map.json +24 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer.json +0 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer.model +3 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer_config.json +44 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/train_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/trainer_log.jsonl +501 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/trainer_state.json +3542 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/training_args.bin +3 -0
- Llama-2-13b-chat-hf/DomainBench/Geography/training_loss.png +0 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/README.md +57 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/adapter_config.json +29 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/adapter_model.safetensors +3 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/all_results.json +8 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/logfile.txt +0 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/special_tokens_map.json +24 -0
- Llama-2-13b-chat-hf/DomainBench/Medicine/tokenizer.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,33 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Llama-3.2-3B-Instruct/DomainBench/Agriculture/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Llama-3.2-3B-Instruct/DomainBench/Finance/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Llama-3.2-3B-Instruct/DomainBench/Geography/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
Llama-3.2-3B-Instruct/DomainBench/Medicine/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Llama-3.2-3B-Instruct/InstructionBench/Alpaca-GPT4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
Llama-3.2-3B-Instruct/InstructionBench/Dolly/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
Llama-3.2-3B-Instruct/InstructionBench/InstructionWild/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
Llama-3.2-3B-Instruct/ReasoningBench/GSM8K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
Llama-3.2-3B-Instruct/ReasoningBench/Logiqa/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
Llama-3.2-3B-Instruct/ReasoningBench/MetaMath/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
Meta-Llama-3-8B-Instruct/DomainBench/Agriculture/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
Meta-Llama-3-8B-Instruct/DomainBench/Finance/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
Meta-Llama-3-8B-Instruct/DomainBench/Geography/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
Meta-Llama-3-8B-Instruct/DomainBench/Medicine/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
Meta-Llama-3-8B-Instruct/InstructionBench/Alpaca-GPT4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
Meta-Llama-3-8B-Instruct/InstructionBench/Dolly/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
Meta-Llama-3-8B-Instruct/InstructionBench/InstructionWild/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
Meta-Llama-3-8B-Instruct/ReasoningBench/GSM8K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
Meta-Llama-3-8B-Instruct/ReasoningBench/Logiqa/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
Meta-Llama-3-8B-Instruct/ReasoningBench/MetaMath/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
Qwen2.5-7B-Instruct/DomainBench/Agriculture/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
Qwen2.5-7B-Instruct/DomainBench/Finance/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
Qwen2.5-7B-Instruct/DomainBench/Geography/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
Qwen2.5-7B-Instruct/DomainBench/Medicine/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
Qwen2.5-7B-Instruct/InstructionBench/Alpaca-GPT4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
Qwen2.5-7B-Instruct/InstructionBench/Dolly/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
Qwen2.5-7B-Instruct/InstructionBench/InstructionWild/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
Qwen2.5-7B-Instruct/ReasoningBench/GSM8K/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
Qwen2.5-7B-Instruct/ReasoningBench/Logiqa/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
Qwen2.5-7B-Instruct/ReasoningBench/MetaMath/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: other
|
| 4 |
+
base_model: /hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf
|
| 5 |
+
tags:
|
| 6 |
+
- llama-factory
|
| 7 |
+
- lora
|
| 8 |
+
- generated_from_trainer
|
| 9 |
+
model-index:
|
| 10 |
+
- name: threshold_3-lamb_0.1-lr_5e-5
|
| 11 |
+
results: []
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 15 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 16 |
+
|
| 17 |
+
# threshold_3-lamb_0.1-lr_5e-5
|
| 18 |
+
|
| 19 |
+
This model is a fine-tuned version of [/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf](https://huggingface.co//hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf) on the agriculture dataset.
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 5e-05
|
| 39 |
+
- train_batch_size: 1
|
| 40 |
+
- eval_batch_size: 8
|
| 41 |
+
- seed: 42
|
| 42 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 43 |
+
- lr_scheduler_type: cosine
|
| 44 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 45 |
+
- num_epochs: 1.0
|
| 46 |
+
|
| 47 |
+
### Training results
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
### Framework versions
|
| 52 |
+
|
| 53 |
+
- PEFT 0.12.0
|
| 54 |
+
- Transformers 4.46.1
|
| 55 |
+
- Pytorch 2.5.1+cu124
|
| 56 |
+
- Datasets 3.1.0
|
| 57 |
+
- Tokenizers 0.20.3
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/adapter_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": true,
|
| 8 |
+
"init_lora_weights": true,
|
| 9 |
+
"layer_replication": null,
|
| 10 |
+
"layers_pattern": null,
|
| 11 |
+
"layers_to_transform": null,
|
| 12 |
+
"loftq_config": {},
|
| 13 |
+
"lora_alpha": 16,
|
| 14 |
+
"lora_dropout": 0.0,
|
| 15 |
+
"megatron_config": null,
|
| 16 |
+
"megatron_core": "megatron.core",
|
| 17 |
+
"modules_to_save": null,
|
| 18 |
+
"peft_type": "LORA",
|
| 19 |
+
"r": 8,
|
| 20 |
+
"rank_pattern": {},
|
| 21 |
+
"revision": null,
|
| 22 |
+
"target_modules": [
|
| 23 |
+
"q_proj",
|
| 24 |
+
"v_proj"
|
| 25 |
+
],
|
| 26 |
+
"task_type": "CAUSAL_LM",
|
| 27 |
+
"use_dora": false,
|
| 28 |
+
"use_rslora": false
|
| 29 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34823ac8f137597ed7fb597f267ca574184059387f87535c4f3c36cfc91c3fa6
|
| 3 |
+
size 26235704
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 9478520693637120.0,
|
| 4 |
+
"train_loss": 2.8943692499160765,
|
| 5 |
+
"train_runtime": 1526.3896,
|
| 6 |
+
"train_samples_per_second": 3.276,
|
| 7 |
+
"train_steps_per_second": 3.276
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/logfile.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/tokenizer_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
|
| 33 |
+
"clean_up_tokenization_spaces": false,
|
| 34 |
+
"eos_token": "</s>",
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"split_special_tokens": false,
|
| 41 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 42 |
+
"unk_token": "<unk>",
|
| 43 |
+
"use_default_system_prompt": false
|
| 44 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 9478520693637120.0,
|
| 4 |
+
"train_loss": 2.8943692499160765,
|
| 5 |
+
"train_runtime": 1526.3896,
|
| 6 |
+
"train_samples_per_second": 3.276,
|
| 7 |
+
"train_steps_per_second": 3.276
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/trainer_log.jsonl
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 5000, "loss": 10.8135, "lr": 1.0000000000000002e-06, "epoch": 0.002, "percentage": 0.2, "elapsed_time": "0:00:03", "remaining_time": "0:33:05"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 5000, "loss": 8.4638, "lr": 2.0000000000000003e-06, "epoch": 0.004, "percentage": 0.4, "elapsed_time": "0:00:07", "remaining_time": "0:29:03"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 5000, "loss": 15.4904, "lr": 3e-06, "epoch": 0.006, "percentage": 0.6, "elapsed_time": "0:00:10", "remaining_time": "0:27:39"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 5000, "loss": 11.4875, "lr": 4.000000000000001e-06, "epoch": 0.008, "percentage": 0.8, "elapsed_time": "0:00:13", "remaining_time": "0:26:56"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 5000, "loss": 14.2003, "lr": 5e-06, "epoch": 0.01, "percentage": 1.0, "elapsed_time": "0:00:16", "remaining_time": "0:26:26"}
|
| 6 |
+
{"current_steps": 60, "total_steps": 5000, "loss": 12.1374, "lr": 6e-06, "epoch": 0.012, "percentage": 1.2, "elapsed_time": "0:00:18", "remaining_time": "0:25:44"}
|
| 7 |
+
{"current_steps": 70, "total_steps": 5000, "loss": 11.6844, "lr": 7.000000000000001e-06, "epoch": 0.014, "percentage": 1.4, "elapsed_time": "0:00:21", "remaining_time": "0:25:36"}
|
| 8 |
+
{"current_steps": 80, "total_steps": 5000, "loss": 10.4387, "lr": 8.000000000000001e-06, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:56", "remaining_time": "0:57:32"}
|
| 9 |
+
{"current_steps": 90, "total_steps": 5000, "loss": 5.5739, "lr": 9e-06, "epoch": 0.018, "percentage": 1.8, "elapsed_time": "0:00:59", "remaining_time": "0:53:49"}
|
| 10 |
+
{"current_steps": 100, "total_steps": 5000, "loss": 12.7118, "lr": 1e-05, "epoch": 0.02, "percentage": 2.0, "elapsed_time": "0:01:02", "remaining_time": "0:50:47"}
|
| 11 |
+
{"current_steps": 110, "total_steps": 5000, "loss": 15.3528, "lr": 1.1000000000000001e-05, "epoch": 0.022, "percentage": 2.2, "elapsed_time": "0:01:05", "remaining_time": "0:48:16"}
|
| 12 |
+
{"current_steps": 120, "total_steps": 5000, "loss": 14.2922, "lr": 1.2e-05, "epoch": 0.024, "percentage": 2.4, "elapsed_time": "0:01:08", "remaining_time": "0:46:11"}
|
| 13 |
+
{"current_steps": 130, "total_steps": 5000, "loss": 6.3563, "lr": 1.3000000000000001e-05, "epoch": 0.026, "percentage": 2.6, "elapsed_time": "0:01:11", "remaining_time": "0:44:26"}
|
| 14 |
+
{"current_steps": 140, "total_steps": 5000, "loss": 7.9494, "lr": 1.4000000000000001e-05, "epoch": 0.028, "percentage": 2.8, "elapsed_time": "0:01:14", "remaining_time": "0:42:51"}
|
| 15 |
+
{"current_steps": 150, "total_steps": 5000, "loss": 10.7366, "lr": 1.5e-05, "epoch": 0.03, "percentage": 3.0, "elapsed_time": "0:01:17", "remaining_time": "0:41:32"}
|
| 16 |
+
{"current_steps": 160, "total_steps": 5000, "loss": 4.7961, "lr": 1.6000000000000003e-05, "epoch": 0.032, "percentage": 3.2, "elapsed_time": "0:01:20", "remaining_time": "0:40:21"}
|
| 17 |
+
{"current_steps": 170, "total_steps": 5000, "loss": 2.8594, "lr": 1.7000000000000003e-05, "epoch": 0.034, "percentage": 3.4, "elapsed_time": "0:01:23", "remaining_time": "0:39:20"}
|
| 18 |
+
{"current_steps": 180, "total_steps": 5000, "loss": 4.2593, "lr": 1.8e-05, "epoch": 0.036, "percentage": 3.6, "elapsed_time": "0:01:26", "remaining_time": "0:38:24"}
|
| 19 |
+
{"current_steps": 190, "total_steps": 5000, "loss": 5.3202, "lr": 1.9e-05, "epoch": 0.038, "percentage": 3.8, "elapsed_time": "0:01:29", "remaining_time": "0:37:34"}
|
| 20 |
+
{"current_steps": 200, "total_steps": 5000, "loss": 8.7095, "lr": 2e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:01:31", "remaining_time": "0:36:47"}
|
| 21 |
+
{"current_steps": 210, "total_steps": 5000, "loss": 4.7786, "lr": 2.1e-05, "epoch": 0.042, "percentage": 4.2, "elapsed_time": "0:01:34", "remaining_time": "0:36:06"}
|
| 22 |
+
{"current_steps": 220, "total_steps": 5000, "loss": 2.5694, "lr": 2.2000000000000003e-05, "epoch": 0.044, "percentage": 4.4, "elapsed_time": "0:01:37", "remaining_time": "0:35:29"}
|
| 23 |
+
{"current_steps": 230, "total_steps": 5000, "loss": 4.3152, "lr": 2.3000000000000003e-05, "epoch": 0.046, "percentage": 4.6, "elapsed_time": "0:01:41", "remaining_time": "0:34:54"}
|
| 24 |
+
{"current_steps": 240, "total_steps": 5000, "loss": 4.0996, "lr": 2.4e-05, "epoch": 0.048, "percentage": 4.8, "elapsed_time": "0:01:43", "remaining_time": "0:34:22"}
|
| 25 |
+
{"current_steps": 250, "total_steps": 5000, "loss": 4.9146, "lr": 2.5e-05, "epoch": 0.05, "percentage": 5.0, "elapsed_time": "0:01:46", "remaining_time": "0:33:52"}
|
| 26 |
+
{"current_steps": 260, "total_steps": 5000, "loss": 1.8707, "lr": 2.6000000000000002e-05, "epoch": 0.052, "percentage": 5.2, "elapsed_time": "0:01:49", "remaining_time": "0:33:24"}
|
| 27 |
+
{"current_steps": 270, "total_steps": 5000, "loss": 3.1247, "lr": 2.7000000000000002e-05, "epoch": 0.054, "percentage": 5.4, "elapsed_time": "0:01:52", "remaining_time": "0:32:59"}
|
| 28 |
+
{"current_steps": 280, "total_steps": 5000, "loss": 3.8507, "lr": 2.8000000000000003e-05, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:01:55", "remaining_time": "0:32:33"}
|
| 29 |
+
{"current_steps": 290, "total_steps": 5000, "loss": 2.8481, "lr": 2.9e-05, "epoch": 0.058, "percentage": 5.8, "elapsed_time": "0:01:58", "remaining_time": "0:32:10"}
|
| 30 |
+
{"current_steps": 300, "total_steps": 5000, "loss": 4.4567, "lr": 3e-05, "epoch": 0.06, "percentage": 6.0, "elapsed_time": "0:02:01", "remaining_time": "0:31:49"}
|
| 31 |
+
{"current_steps": 310, "total_steps": 5000, "loss": 3.544, "lr": 3.1e-05, "epoch": 0.062, "percentage": 6.2, "elapsed_time": "0:02:04", "remaining_time": "0:31:29"}
|
| 32 |
+
{"current_steps": 320, "total_steps": 5000, "loss": 2.028, "lr": 3.2000000000000005e-05, "epoch": 0.064, "percentage": 6.4, "elapsed_time": "0:02:07", "remaining_time": "0:31:09"}
|
| 33 |
+
{"current_steps": 330, "total_steps": 5000, "loss": 3.4244, "lr": 3.3e-05, "epoch": 0.066, "percentage": 6.6, "elapsed_time": "0:02:10", "remaining_time": "0:30:51"}
|
| 34 |
+
{"current_steps": 340, "total_steps": 5000, "loss": 5.216, "lr": 3.4000000000000007e-05, "epoch": 0.068, "percentage": 6.8, "elapsed_time": "0:02:13", "remaining_time": "0:30:33"}
|
| 35 |
+
{"current_steps": 350, "total_steps": 5000, "loss": 2.7441, "lr": 3.5e-05, "epoch": 0.07, "percentage": 7.0, "elapsed_time": "0:02:16", "remaining_time": "0:30:16"}
|
| 36 |
+
{"current_steps": 360, "total_steps": 5000, "loss": 2.6191, "lr": 3.6e-05, "epoch": 0.072, "percentage": 7.2, "elapsed_time": "0:02:19", "remaining_time": "0:30:01"}
|
| 37 |
+
{"current_steps": 370, "total_steps": 5000, "loss": 5.3131, "lr": 3.7e-05, "epoch": 0.074, "percentage": 7.4, "elapsed_time": "0:02:22", "remaining_time": "0:29:46"}
|
| 38 |
+
{"current_steps": 380, "total_steps": 5000, "loss": 5.2818, "lr": 3.8e-05, "epoch": 0.076, "percentage": 7.6, "elapsed_time": "0:02:25", "remaining_time": "0:29:31"}
|
| 39 |
+
{"current_steps": 390, "total_steps": 5000, "loss": 3.086, "lr": 3.9000000000000006e-05, "epoch": 0.078, "percentage": 7.8, "elapsed_time": "0:02:28", "remaining_time": "0:29:17"}
|
| 40 |
+
{"current_steps": 400, "total_steps": 5000, "loss": 2.6475, "lr": 4e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:02:31", "remaining_time": "0:29:03"}
|
| 41 |
+
{"current_steps": 410, "total_steps": 5000, "loss": 2.0594, "lr": 4.1e-05, "epoch": 0.082, "percentage": 8.2, "elapsed_time": "0:02:34", "remaining_time": "0:28:51"}
|
| 42 |
+
{"current_steps": 420, "total_steps": 5000, "loss": 3.5431, "lr": 4.2e-05, "epoch": 0.084, "percentage": 8.4, "elapsed_time": "0:02:37", "remaining_time": "0:28:38"}
|
| 43 |
+
{"current_steps": 430, "total_steps": 5000, "loss": 2.7867, "lr": 4.3e-05, "epoch": 0.086, "percentage": 8.6, "elapsed_time": "0:02:40", "remaining_time": "0:28:26"}
|
| 44 |
+
{"current_steps": 440, "total_steps": 5000, "loss": 6.2247, "lr": 4.4000000000000006e-05, "epoch": 0.088, "percentage": 8.8, "elapsed_time": "0:02:43", "remaining_time": "0:28:15"}
|
| 45 |
+
{"current_steps": 450, "total_steps": 5000, "loss": 1.8291, "lr": 4.5e-05, "epoch": 0.09, "percentage": 9.0, "elapsed_time": "0:02:46", "remaining_time": "0:28:04"}
|
| 46 |
+
{"current_steps": 460, "total_steps": 5000, "loss": 5.4416, "lr": 4.600000000000001e-05, "epoch": 0.092, "percentage": 9.2, "elapsed_time": "0:02:49", "remaining_time": "0:27:53"}
|
| 47 |
+
{"current_steps": 470, "total_steps": 5000, "loss": 6.0983, "lr": 4.7e-05, "epoch": 0.094, "percentage": 9.4, "elapsed_time": "0:02:52", "remaining_time": "0:27:43"}
|
| 48 |
+
{"current_steps": 480, "total_steps": 5000, "loss": 1.8501, "lr": 4.8e-05, "epoch": 0.096, "percentage": 9.6, "elapsed_time": "0:02:55", "remaining_time": "0:27:33"}
|
| 49 |
+
{"current_steps": 490, "total_steps": 5000, "loss": 1.5013, "lr": 4.9e-05, "epoch": 0.098, "percentage": 9.8, "elapsed_time": "0:02:58", "remaining_time": "0:27:23"}
|
| 50 |
+
{"current_steps": 500, "total_steps": 5000, "loss": 2.6987, "lr": 5e-05, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:03:01", "remaining_time": "0:27:12"}
|
| 51 |
+
{"current_steps": 510, "total_steps": 5000, "loss": 2.268, "lr": 4.999939076763487e-05, "epoch": 0.102, "percentage": 10.2, "elapsed_time": "0:03:04", "remaining_time": "0:27:02"}
|
| 52 |
+
{"current_steps": 520, "total_steps": 5000, "loss": 2.1733, "lr": 4.999756310023261e-05, "epoch": 0.104, "percentage": 10.4, "elapsed_time": "0:03:07", "remaining_time": "0:26:53"}
|
| 53 |
+
{"current_steps": 530, "total_steps": 5000, "loss": 6.0941, "lr": 4.999451708687114e-05, "epoch": 0.106, "percentage": 10.6, "elapsed_time": "0:03:10", "remaining_time": "0:26:45"}
|
| 54 |
+
{"current_steps": 540, "total_steps": 5000, "loss": 5.3397, "lr": 4.999025287600886e-05, "epoch": 0.108, "percentage": 10.8, "elapsed_time": "0:03:13", "remaining_time": "0:26:36"}
|
| 55 |
+
{"current_steps": 550, "total_steps": 5000, "loss": 1.2973, "lr": 4.99847706754774e-05, "epoch": 0.11, "percentage": 11.0, "elapsed_time": "0:03:16", "remaining_time": "0:26:26"}
|
| 56 |
+
{"current_steps": 560, "total_steps": 5000, "loss": 2.9408, "lr": 4.997807075247146e-05, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:03:19", "remaining_time": "0:26:18"}
|
| 57 |
+
{"current_steps": 570, "total_steps": 5000, "loss": 1.5613, "lr": 4.997015343353585e-05, "epoch": 0.114, "percentage": 11.4, "elapsed_time": "0:03:22", "remaining_time": "0:26:10"}
|
| 58 |
+
{"current_steps": 580, "total_steps": 5000, "loss": 1.5912, "lr": 4.996101910454953e-05, "epoch": 0.116, "percentage": 11.6, "elapsed_time": "0:03:25", "remaining_time": "0:26:02"}
|
| 59 |
+
{"current_steps": 590, "total_steps": 5000, "loss": 3.5306, "lr": 4.995066821070679e-05, "epoch": 0.118, "percentage": 11.8, "elapsed_time": "0:03:28", "remaining_time": "0:25:54"}
|
| 60 |
+
{"current_steps": 600, "total_steps": 5000, "loss": 2.1385, "lr": 4.993910125649561e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:03:31", "remaining_time": "0:25:47"}
|
| 61 |
+
{"current_steps": 610, "total_steps": 5000, "loss": 1.5452, "lr": 4.992631880567301e-05, "epoch": 0.122, "percentage": 12.2, "elapsed_time": "0:03:34", "remaining_time": "0:25:40"}
|
| 62 |
+
{"current_steps": 620, "total_steps": 5000, "loss": 3.0193, "lr": 4.991232148123761e-05, "epoch": 0.124, "percentage": 12.4, "elapsed_time": "0:03:37", "remaining_time": "0:25:33"}
|
| 63 |
+
{"current_steps": 630, "total_steps": 5000, "loss": 2.3063, "lr": 4.989710996539926e-05, "epoch": 0.126, "percentage": 12.6, "elapsed_time": "0:03:40", "remaining_time": "0:25:26"}
|
| 64 |
+
{"current_steps": 640, "total_steps": 5000, "loss": 2.2441, "lr": 4.988068499954578e-05, "epoch": 0.128, "percentage": 12.8, "elapsed_time": "0:03:42", "remaining_time": "0:25:19"}
|
| 65 |
+
{"current_steps": 650, "total_steps": 5000, "loss": 2.3075, "lr": 4.9863047384206835e-05, "epoch": 0.13, "percentage": 13.0, "elapsed_time": "0:03:45", "remaining_time": "0:25:12"}
|
| 66 |
+
{"current_steps": 660, "total_steps": 5000, "loss": 2.9721, "lr": 4.984419797901491e-05, "epoch": 0.132, "percentage": 13.2, "elapsed_time": "0:03:48", "remaining_time": "0:25:05"}
|
| 67 |
+
{"current_steps": 670, "total_steps": 5000, "loss": 2.7736, "lr": 4.982413770266342e-05, "epoch": 0.134, "percentage": 13.4, "elapsed_time": "0:03:51", "remaining_time": "0:24:59"}
|
| 68 |
+
{"current_steps": 680, "total_steps": 5000, "loss": 1.8991, "lr": 4.980286753286195e-05, "epoch": 0.136, "percentage": 13.6, "elapsed_time": "0:03:54", "remaining_time": "0:24:52"}
|
| 69 |
+
{"current_steps": 690, "total_steps": 5000, "loss": 2.2606, "lr": 4.978038850628854e-05, "epoch": 0.138, "percentage": 13.8, "elapsed_time": "0:03:58", "remaining_time": "0:24:46"}
|
| 70 |
+
{"current_steps": 700, "total_steps": 5000, "loss": 1.5491, "lr": 4.975670171853926e-05, "epoch": 0.14, "percentage": 14.0, "elapsed_time": "0:04:01", "remaining_time": "0:24:40"}
|
| 71 |
+
{"current_steps": 710, "total_steps": 5000, "loss": 2.2913, "lr": 4.9731808324074717e-05, "epoch": 0.142, "percentage": 14.2, "elapsed_time": "0:04:04", "remaining_time": "0:24:34"}
|
| 72 |
+
{"current_steps": 720, "total_steps": 5000, "loss": 2.6315, "lr": 4.9705709536163824e-05, "epoch": 0.144, "percentage": 14.4, "elapsed_time": "0:04:06", "remaining_time": "0:24:28"}
|
| 73 |
+
{"current_steps": 730, "total_steps": 5000, "loss": 2.5128, "lr": 4.96784066268247e-05, "epoch": 0.146, "percentage": 14.6, "elapsed_time": "0:04:09", "remaining_time": "0:24:22"}
|
| 74 |
+
{"current_steps": 740, "total_steps": 5000, "loss": 5.8145, "lr": 4.964990092676263e-05, "epoch": 0.148, "percentage": 14.8, "elapsed_time": "0:04:12", "remaining_time": "0:24:16"}
|
| 75 |
+
{"current_steps": 750, "total_steps": 5000, "loss": 2.3354, "lr": 4.962019382530521e-05, "epoch": 0.15, "percentage": 15.0, "elapsed_time": "0:04:15", "remaining_time": "0:24:10"}
|
| 76 |
+
{"current_steps": 760, "total_steps": 5000, "loss": 4.621, "lr": 4.9589286770334654e-05, "epoch": 0.152, "percentage": 15.2, "elapsed_time": "0:04:19", "remaining_time": "0:24:05"}
|
| 77 |
+
{"current_steps": 770, "total_steps": 5000, "loss": 2.7795, "lr": 4.9557181268217227e-05, "epoch": 0.154, "percentage": 15.4, "elapsed_time": "0:04:22", "remaining_time": "0:23:59"}
|
| 78 |
+
{"current_steps": 780, "total_steps": 5000, "loss": 3.0171, "lr": 4.952387888372979e-05, "epoch": 0.156, "percentage": 15.6, "elapsed_time": "0:04:25", "remaining_time": "0:23:54"}
|
| 79 |
+
{"current_steps": 790, "total_steps": 5000, "loss": 2.1028, "lr": 4.94893812399836e-05, "epoch": 0.158, "percentage": 15.8, "elapsed_time": "0:04:28", "remaining_time": "0:23:48"}
|
| 80 |
+
{"current_steps": 800, "total_steps": 5000, "loss": 13.4531, "lr": 4.9453690018345144e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:04:31", "remaining_time": "0:23:42"}
|
| 81 |
+
{"current_steps": 810, "total_steps": 5000, "loss": 2.0661, "lr": 4.94168069583542e-05, "epoch": 0.162, "percentage": 16.2, "elapsed_time": "0:04:33", "remaining_time": "0:23:37"}
|
| 82 |
+
{"current_steps": 820, "total_steps": 5000, "loss": 2.6598, "lr": 4.937873385763908e-05, "epoch": 0.164, "percentage": 16.4, "elapsed_time": "0:04:37", "remaining_time": "0:23:32"}
|
| 83 |
+
{"current_steps": 830, "total_steps": 5000, "loss": 2.58, "lr": 4.933947257182901e-05, "epoch": 0.166, "percentage": 16.6, "elapsed_time": "0:04:39", "remaining_time": "0:23:26"}
|
| 84 |
+
{"current_steps": 840, "total_steps": 5000, "loss": 2.9303, "lr": 4.929902501446366e-05, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:04:42", "remaining_time": "0:23:21"}
|
| 85 |
+
{"current_steps": 850, "total_steps": 5000, "loss": 3.0212, "lr": 4.925739315689991e-05, "epoch": 0.17, "percentage": 17.0, "elapsed_time": "0:04:45", "remaining_time": "0:23:16"}
|
| 86 |
+
{"current_steps": 860, "total_steps": 5000, "loss": 4.8252, "lr": 4.9214579028215776e-05, "epoch": 0.172, "percentage": 17.2, "elapsed_time": "0:04:48", "remaining_time": "0:23:11"}
|
| 87 |
+
{"current_steps": 870, "total_steps": 5000, "loss": 2.5644, "lr": 4.917058471511149e-05, "epoch": 0.174, "percentage": 17.4, "elapsed_time": "0:04:51", "remaining_time": "0:23:05"}
|
| 88 |
+
{"current_steps": 880, "total_steps": 5000, "loss": 1.8755, "lr": 4.912541236180779e-05, "epoch": 0.176, "percentage": 17.6, "elapsed_time": "0:04:54", "remaining_time": "0:23:00"}
|
| 89 |
+
{"current_steps": 890, "total_steps": 5000, "loss": 0.7075, "lr": 4.907906416994146e-05, "epoch": 0.178, "percentage": 17.8, "elapsed_time": "0:04:58", "remaining_time": "0:22:56"}
|
| 90 |
+
{"current_steps": 900, "total_steps": 5000, "loss": 7.253, "lr": 4.9031542398457974e-05, "epoch": 0.18, "percentage": 18.0, "elapsed_time": "0:05:01", "remaining_time": "0:22:51"}
|
| 91 |
+
{"current_steps": 910, "total_steps": 5000, "loss": 3.3396, "lr": 4.898284936350144e-05, "epoch": 0.182, "percentage": 18.2, "elapsed_time": "0:05:04", "remaining_time": "0:22:47"}
|
| 92 |
+
{"current_steps": 920, "total_steps": 5000, "loss": 2.9798, "lr": 4.893298743830168e-05, "epoch": 0.184, "percentage": 18.4, "elapsed_time": "0:05:07", "remaining_time": "0:22:42"}
|
| 93 |
+
{"current_steps": 930, "total_steps": 5000, "loss": 1.153, "lr": 4.888195905305859e-05, "epoch": 0.186, "percentage": 18.6, "elapsed_time": "0:05:10", "remaining_time": "0:22:37"}
|
| 94 |
+
{"current_steps": 940, "total_steps": 5000, "loss": 3.1989, "lr": 4.882976669482367e-05, "epoch": 0.188, "percentage": 18.8, "elapsed_time": "0:05:13", "remaining_time": "0:22:32"}
|
| 95 |
+
{"current_steps": 950, "total_steps": 5000, "loss": 2.7478, "lr": 4.877641290737884e-05, "epoch": 0.19, "percentage": 19.0, "elapsed_time": "0:05:16", "remaining_time": "0:22:27"}
|
| 96 |
+
{"current_steps": 960, "total_steps": 5000, "loss": 2.09, "lr": 4.8721900291112415e-05, "epoch": 0.192, "percentage": 19.2, "elapsed_time": "0:05:19", "remaining_time": "0:22:23"}
|
| 97 |
+
{"current_steps": 970, "total_steps": 5000, "loss": 1.7634, "lr": 4.8666231502892415e-05, "epoch": 0.194, "percentage": 19.4, "elapsed_time": "0:05:22", "remaining_time": "0:22:18"}
|
| 98 |
+
{"current_steps": 980, "total_steps": 5000, "loss": 1.6288, "lr": 4.860940925593703e-05, "epoch": 0.196, "percentage": 19.6, "elapsed_time": "0:05:25", "remaining_time": "0:22:13"}
|
| 99 |
+
{"current_steps": 990, "total_steps": 5000, "loss": 2.9691, "lr": 4.855143631968242e-05, "epoch": 0.198, "percentage": 19.8, "elapsed_time": "0:05:28", "remaining_time": "0:22:09"}
|
| 100 |
+
{"current_steps": 1000, "total_steps": 5000, "loss": 3.5196, "lr": 4.849231551964771e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:05:31", "remaining_time": "0:22:04"}
|
| 101 |
+
{"current_steps": 1010, "total_steps": 5000, "loss": 3.1836, "lr": 4.843204973729729e-05, "epoch": 0.202, "percentage": 20.2, "elapsed_time": "0:05:34", "remaining_time": "0:21:59"}
|
| 102 |
+
{"current_steps": 1020, "total_steps": 5000, "loss": 3.1554, "lr": 4.837064190990036e-05, "epoch": 0.204, "percentage": 20.4, "elapsed_time": "0:05:37", "remaining_time": "0:21:55"}
|
| 103 |
+
{"current_steps": 1030, "total_steps": 5000, "loss": 3.0401, "lr": 4.830809503038781e-05, "epoch": 0.206, "percentage": 20.6, "elapsed_time": "0:05:39", "remaining_time": "0:21:49"}
|
| 104 |
+
{"current_steps": 1040, "total_steps": 5000, "loss": 4.8929, "lr": 4.8244412147206284e-05, "epoch": 0.208, "percentage": 20.8, "elapsed_time": "0:05:42", "remaining_time": "0:21:45"}
|
| 105 |
+
{"current_steps": 1050, "total_steps": 5000, "loss": 2.9868, "lr": 4.817959636416969e-05, "epoch": 0.21, "percentage": 21.0, "elapsed_time": "0:05:45", "remaining_time": "0:21:40"}
|
| 106 |
+
{"current_steps": 1060, "total_steps": 5000, "loss": 5.5685, "lr": 4.8113650840307834e-05, "epoch": 0.212, "percentage": 21.2, "elapsed_time": "0:05:48", "remaining_time": "0:21:36"}
|
| 107 |
+
{"current_steps": 1070, "total_steps": 5000, "loss": 2.2926, "lr": 4.8046578789712515e-05, "epoch": 0.214, "percentage": 21.4, "elapsed_time": "0:05:51", "remaining_time": "0:21:31"}
|
| 108 |
+
{"current_steps": 1080, "total_steps": 5000, "loss": 3.3832, "lr": 4.797838348138086e-05, "epoch": 0.216, "percentage": 21.6, "elapsed_time": "0:05:54", "remaining_time": "0:21:27"}
|
| 109 |
+
{"current_steps": 1090, "total_steps": 5000, "loss": 4.5766, "lr": 4.790906823905599e-05, "epoch": 0.218, "percentage": 21.8, "elapsed_time": "0:05:57", "remaining_time": "0:21:23"}
|
| 110 |
+
{"current_steps": 1100, "total_steps": 5000, "loss": 3.2696, "lr": 4.783863644106502e-05, "epoch": 0.22, "percentage": 22.0, "elapsed_time": "0:06:00", "remaining_time": "0:21:18"}
|
| 111 |
+
{"current_steps": 1110, "total_steps": 5000, "loss": 1.583, "lr": 4.776709152015443e-05, "epoch": 0.222, "percentage": 22.2, "elapsed_time": "0:06:03", "remaining_time": "0:21:14"}
|
| 112 |
+
{"current_steps": 1120, "total_steps": 5000, "loss": 2.2554, "lr": 4.769443696332272e-05, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:06:06", "remaining_time": "0:21:10"}
|
| 113 |
+
{"current_steps": 1130, "total_steps": 5000, "loss": 1.6651, "lr": 4.762067631165049e-05, "epoch": 0.226, "percentage": 22.6, "elapsed_time": "0:06:09", "remaining_time": "0:21:06"}
|
| 114 |
+
{"current_steps": 1140, "total_steps": 5000, "loss": 3.1578, "lr": 4.754581316012785e-05, "epoch": 0.228, "percentage": 22.8, "elapsed_time": "0:06:12", "remaining_time": "0:21:01"}
|
| 115 |
+
{"current_steps": 1150, "total_steps": 5000, "loss": 2.4297, "lr": 4.7469851157479177e-05, "epoch": 0.23, "percentage": 23.0, "elapsed_time": "0:06:15", "remaining_time": "0:20:57"}
|
| 116 |
+
{"current_steps": 1160, "total_steps": 5000, "loss": 2.9167, "lr": 4.7392794005985326e-05, "epoch": 0.232, "percentage": 23.2, "elapsed_time": "0:06:18", "remaining_time": "0:20:53"}
|
| 117 |
+
{"current_steps": 1170, "total_steps": 5000, "loss": 4.5697, "lr": 4.731464546130314e-05, "epoch": 0.234, "percentage": 23.4, "elapsed_time": "0:06:21", "remaining_time": "0:20:49"}
|
| 118 |
+
{"current_steps": 1180, "total_steps": 5000, "loss": 2.1219, "lr": 4.723540933228244e-05, "epoch": 0.236, "percentage": 23.6, "elapsed_time": "0:06:24", "remaining_time": "0:20:45"}
|
| 119 |
+
{"current_steps": 1190, "total_steps": 5000, "loss": 2.4125, "lr": 4.715508948078037e-05, "epoch": 0.238, "percentage": 23.8, "elapsed_time": "0:06:27", "remaining_time": "0:20:41"}
|
| 120 |
+
{"current_steps": 1200, "total_steps": 5000, "loss": 4.1166, "lr": 4.707368982147318e-05, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:06:30", "remaining_time": "0:20:37"}
|
| 121 |
+
{"current_steps": 1210, "total_steps": 5000, "loss": 3.1524, "lr": 4.6991214321665414e-05, "epoch": 0.242, "percentage": 24.2, "elapsed_time": "0:06:33", "remaining_time": "0:20:32"}
|
| 122 |
+
{"current_steps": 1220, "total_steps": 5000, "loss": 1.5237, "lr": 4.690766700109659e-05, "epoch": 0.244, "percentage": 24.4, "elapsed_time": "0:06:36", "remaining_time": "0:20:28"}
|
| 123 |
+
{"current_steps": 1230, "total_steps": 5000, "loss": 2.8092, "lr": 4.682305193174524e-05, "epoch": 0.246, "percentage": 24.6, "elapsed_time": "0:06:39", "remaining_time": "0:20:24"}
|
| 124 |
+
{"current_steps": 1240, "total_steps": 5000, "loss": 2.256, "lr": 4.6737373237630476e-05, "epoch": 0.248, "percentage": 24.8, "elapsed_time": "0:06:42", "remaining_time": "0:20:20"}
|
| 125 |
+
{"current_steps": 1250, "total_steps": 5000, "loss": 2.5601, "lr": 4.665063509461097e-05, "epoch": 0.25, "percentage": 25.0, "elapsed_time": "0:06:45", "remaining_time": "0:20:16"}
|
| 126 |
+
{"current_steps": 1260, "total_steps": 5000, "loss": 2.5502, "lr": 4.656284173018144e-05, "epoch": 0.252, "percentage": 25.2, "elapsed_time": "0:06:48", "remaining_time": "0:20:12"}
|
| 127 |
+
{"current_steps": 1270, "total_steps": 5000, "loss": 3.4447, "lr": 4.6473997423266614e-05, "epoch": 0.254, "percentage": 25.4, "elapsed_time": "0:06:51", "remaining_time": "0:20:07"}
|
| 128 |
+
{"current_steps": 1280, "total_steps": 5000, "loss": 1.6954, "lr": 4.638410650401267e-05, "epoch": 0.256, "percentage": 25.6, "elapsed_time": "0:06:54", "remaining_time": "0:20:03"}
|
| 129 |
+
{"current_steps": 1290, "total_steps": 5000, "loss": 2.353, "lr": 4.629317335357619e-05, "epoch": 0.258, "percentage": 25.8, "elapsed_time": "0:06:57", "remaining_time": "0:19:59"}
|
| 130 |
+
{"current_steps": 1300, "total_steps": 5000, "loss": 2.1544, "lr": 4.620120240391065e-05, "epoch": 0.26, "percentage": 26.0, "elapsed_time": "0:07:00", "remaining_time": "0:19:55"}
|
| 131 |
+
{"current_steps": 1310, "total_steps": 5000, "loss": 1.2159, "lr": 4.610819813755038e-05, "epoch": 0.262, "percentage": 26.2, "elapsed_time": "0:07:02", "remaining_time": "0:19:51"}
|
| 132 |
+
{"current_steps": 1320, "total_steps": 5000, "loss": 1.9003, "lr": 4.601416508739211e-05, "epoch": 0.264, "percentage": 26.4, "elapsed_time": "0:07:05", "remaining_time": "0:19:47"}
|
| 133 |
+
{"current_steps": 1330, "total_steps": 5000, "loss": 4.5354, "lr": 4.591910783647404e-05, "epoch": 0.266, "percentage": 26.6, "elapsed_time": "0:07:08", "remaining_time": "0:19:43"}
|
| 134 |
+
{"current_steps": 1340, "total_steps": 5000, "loss": 1.9724, "lr": 4.5823031017752485e-05, "epoch": 0.268, "percentage": 26.8, "elapsed_time": "0:07:11", "remaining_time": "0:19:39"}
|
| 135 |
+
{"current_steps": 1350, "total_steps": 5000, "loss": 2.4934, "lr": 4.572593931387604e-05, "epoch": 0.27, "percentage": 27.0, "elapsed_time": "0:07:14", "remaining_time": "0:19:35"}
|
| 136 |
+
{"current_steps": 1360, "total_steps": 5000, "loss": 2.1056, "lr": 4.562783745695738e-05, "epoch": 0.272, "percentage": 27.2, "elapsed_time": "0:07:17", "remaining_time": "0:19:31"}
|
| 137 |
+
{"current_steps": 1370, "total_steps": 5000, "loss": 1.5175, "lr": 4.5528730228342605e-05, "epoch": 0.274, "percentage": 27.4, "elapsed_time": "0:07:20", "remaining_time": "0:19:28"}
|
| 138 |
+
{"current_steps": 1380, "total_steps": 5000, "loss": 2.1733, "lr": 4.542862245837821e-05, "epoch": 0.276, "percentage": 27.6, "elapsed_time": "0:07:23", "remaining_time": "0:19:24"}
|
| 139 |
+
{"current_steps": 1390, "total_steps": 5000, "loss": 1.431, "lr": 4.532751902617569e-05, "epoch": 0.278, "percentage": 27.8, "elapsed_time": "0:07:26", "remaining_time": "0:19:20"}
|
| 140 |
+
{"current_steps": 1400, "total_steps": 5000, "loss": 1.5888, "lr": 4.522542485937369e-05, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:07:29", "remaining_time": "0:19:16"}
|
| 141 |
+
{"current_steps": 1410, "total_steps": 5000, "loss": 2.84, "lr": 4.512234493389785e-05, "epoch": 0.282, "percentage": 28.2, "elapsed_time": "0:07:32", "remaining_time": "0:19:13"}
|
| 142 |
+
{"current_steps": 1420, "total_steps": 5000, "loss": 3.0874, "lr": 4.5018284273718336e-05, "epoch": 0.284, "percentage": 28.4, "elapsed_time": "0:07:35", "remaining_time": "0:19:09"}
|
| 143 |
+
{"current_steps": 1430, "total_steps": 5000, "loss": 1.6856, "lr": 4.491324795060491e-05, "epoch": 0.286, "percentage": 28.6, "elapsed_time": "0:07:39", "remaining_time": "0:19:05"}
|
| 144 |
+
{"current_steps": 1440, "total_steps": 5000, "loss": 3.0233, "lr": 4.480724108387977e-05, "epoch": 0.288, "percentage": 28.8, "elapsed_time": "0:07:42", "remaining_time": "0:19:02"}
|
| 145 |
+
{"current_steps": 1450, "total_steps": 5000, "loss": 2.6897, "lr": 4.4700268840168045e-05, "epoch": 0.29, "percentage": 29.0, "elapsed_time": "0:07:45", "remaining_time": "0:18:58"}
|
| 146 |
+
{"current_steps": 1460, "total_steps": 5000, "loss": 5.0716, "lr": 4.4592336433146e-05, "epoch": 0.292, "percentage": 29.2, "elapsed_time": "0:07:48", "remaining_time": "0:18:54"}
|
| 147 |
+
{"current_steps": 1470, "total_steps": 5000, "loss": 1.5289, "lr": 4.448344912328686e-05, "epoch": 0.294, "percentage": 29.4, "elapsed_time": "0:07:51", "remaining_time": "0:18:51"}
|
| 148 |
+
{"current_steps": 1480, "total_steps": 5000, "loss": 5.0471, "lr": 4.4373612217604496e-05, "epoch": 0.296, "percentage": 29.6, "elapsed_time": "0:07:54", "remaining_time": "0:18:47"}
|
| 149 |
+
{"current_steps": 1490, "total_steps": 5000, "loss": 1.6411, "lr": 4.426283106939474e-05, "epoch": 0.298, "percentage": 29.8, "elapsed_time": "0:07:57", "remaining_time": "0:18:43"}
|
| 150 |
+
{"current_steps": 1500, "total_steps": 5000, "loss": 3.0973, "lr": 4.415111107797445e-05, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:08:00", "remaining_time": "0:18:40"}
|
| 151 |
+
{"current_steps": 1510, "total_steps": 5000, "loss": 2.0314, "lr": 4.403845768841842e-05, "epoch": 0.302, "percentage": 30.2, "elapsed_time": "0:08:03", "remaining_time": "0:18:36"}
|
| 152 |
+
{"current_steps": 1520, "total_steps": 5000, "loss": 5.9555, "lr": 4.3924876391293915e-05, "epoch": 0.304, "percentage": 30.4, "elapsed_time": "0:08:06", "remaining_time": "0:18:33"}
|
| 153 |
+
{"current_steps": 1530, "total_steps": 5000, "loss": 2.5155, "lr": 4.381037272239311e-05, "epoch": 0.306, "percentage": 30.6, "elapsed_time": "0:08:08", "remaining_time": "0:18:28"}
|
| 154 |
+
{"current_steps": 1540, "total_steps": 5000, "loss": 3.6459, "lr": 4.36949522624633e-05, "epoch": 0.308, "percentage": 30.8, "elapsed_time": "0:08:11", "remaining_time": "0:18:25"}
|
| 155 |
+
{"current_steps": 1550, "total_steps": 5000, "loss": 3.0688, "lr": 4.357862063693486e-05, "epoch": 0.31, "percentage": 31.0, "elapsed_time": "0:08:14", "remaining_time": "0:18:21"}
|
| 156 |
+
{"current_steps": 1560, "total_steps": 5000, "loss": 5.096, "lr": 4.3461383515647106e-05, "epoch": 0.312, "percentage": 31.2, "elapsed_time": "0:08:17", "remaining_time": "0:18:17"}
|
| 157 |
+
{"current_steps": 1570, "total_steps": 5000, "loss": 3.9776, "lr": 4.334324661257191e-05, "epoch": 0.314, "percentage": 31.4, "elapsed_time": "0:08:20", "remaining_time": "0:18:14"}
|
| 158 |
+
{"current_steps": 1580, "total_steps": 5000, "loss": 1.8869, "lr": 4.3224215685535294e-05, "epoch": 0.316, "percentage": 31.6, "elapsed_time": "0:08:23", "remaining_time": "0:18:10"}
|
| 159 |
+
{"current_steps": 1590, "total_steps": 5000, "loss": 2.1454, "lr": 4.3104296535936695e-05, "epoch": 0.318, "percentage": 31.8, "elapsed_time": "0:08:26", "remaining_time": "0:18:06"}
|
| 160 |
+
{"current_steps": 1600, "total_steps": 5000, "loss": 3.0165, "lr": 4.2983495008466276e-05, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:08:29", "remaining_time": "0:18:03"}
|
| 161 |
+
{"current_steps": 1610, "total_steps": 5000, "loss": 2.9248, "lr": 4.2861816990820084e-05, "epoch": 0.322, "percentage": 32.2, "elapsed_time": "0:08:32", "remaining_time": "0:17:59"}
|
| 162 |
+
{"current_steps": 1620, "total_steps": 5000, "loss": 5.9823, "lr": 4.273926841341302e-05, "epoch": 0.324, "percentage": 32.4, "elapsed_time": "0:08:35", "remaining_time": "0:17:55"}
|
| 163 |
+
{"current_steps": 1630, "total_steps": 5000, "loss": 4.5845, "lr": 4.261585524908987e-05, "epoch": 0.326, "percentage": 32.6, "elapsed_time": "0:08:38", "remaining_time": "0:17:52"}
|
| 164 |
+
{"current_steps": 1640, "total_steps": 5000, "loss": 2.5418, "lr": 4.249158351283414e-05, "epoch": 0.328, "percentage": 32.8, "elapsed_time": "0:08:41", "remaining_time": "0:17:48"}
|
| 165 |
+
{"current_steps": 1650, "total_steps": 5000, "loss": 3.9026, "lr": 4.2366459261474933e-05, "epoch": 0.33, "percentage": 33.0, "elapsed_time": "0:08:44", "remaining_time": "0:17:45"}
|
| 166 |
+
{"current_steps": 1660, "total_steps": 5000, "loss": 3.7564, "lr": 4.224048859339175e-05, "epoch": 0.332, "percentage": 33.2, "elapsed_time": "0:08:47", "remaining_time": "0:17:41"}
|
| 167 |
+
{"current_steps": 1670, "total_steps": 5000, "loss": 1.7162, "lr": 4.211367764821722e-05, "epoch": 0.334, "percentage": 33.4, "elapsed_time": "0:08:50", "remaining_time": "0:17:37"}
|
| 168 |
+
{"current_steps": 1680, "total_steps": 5000, "loss": 3.2491, "lr": 4.198603260653792e-05, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:08:53", "remaining_time": "0:17:34"}
|
| 169 |
+
{"current_steps": 1690, "total_steps": 5000, "loss": 1.595, "lr": 4.185755968959308e-05, "epoch": 0.338, "percentage": 33.8, "elapsed_time": "0:08:56", "remaining_time": "0:17:30"}
|
| 170 |
+
{"current_steps": 1700, "total_steps": 5000, "loss": 2.5093, "lr": 4.172826515897146e-05, "epoch": 0.34, "percentage": 34.0, "elapsed_time": "0:08:59", "remaining_time": "0:17:27"}
|
| 171 |
+
{"current_steps": 1710, "total_steps": 5000, "loss": 5.9477, "lr": 4.1598155316306044e-05, "epoch": 0.342, "percentage": 34.2, "elapsed_time": "0:09:02", "remaining_time": "0:17:23"}
|
| 172 |
+
{"current_steps": 1720, "total_steps": 5000, "loss": 2.6358, "lr": 4.146723650296701e-05, "epoch": 0.344, "percentage": 34.4, "elapsed_time": "0:09:05", "remaining_time": "0:17:20"}
|
| 173 |
+
{"current_steps": 1730, "total_steps": 5000, "loss": 1.4777, "lr": 4.133551509975264e-05, "epoch": 0.346, "percentage": 34.6, "elapsed_time": "0:09:08", "remaining_time": "0:17:16"}
|
| 174 |
+
{"current_steps": 1740, "total_steps": 5000, "loss": 2.132, "lr": 4.1202997526578276e-05, "epoch": 0.348, "percentage": 34.8, "elapsed_time": "0:09:11", "remaining_time": "0:17:13"}
|
| 175 |
+
{"current_steps": 1750, "total_steps": 5000, "loss": 1.4507, "lr": 4.1069690242163484e-05, "epoch": 0.35, "percentage": 35.0, "elapsed_time": "0:09:14", "remaining_time": "0:17:09"}
|
| 176 |
+
{"current_steps": 1760, "total_steps": 5000, "loss": 1.2368, "lr": 4.093559974371725e-05, "epoch": 0.352, "percentage": 35.2, "elapsed_time": "0:09:17", "remaining_time": "0:17:06"}
|
| 177 |
+
{"current_steps": 1770, "total_steps": 5000, "loss": 0.9967, "lr": 4.080073256662127e-05, "epoch": 0.354, "percentage": 35.4, "elapsed_time": "0:09:20", "remaining_time": "0:17:02"}
|
| 178 |
+
{"current_steps": 1780, "total_steps": 5000, "loss": 5.9177, "lr": 4.066509528411152e-05, "epoch": 0.356, "percentage": 35.6, "elapsed_time": "0:09:23", "remaining_time": "0:16:59"}
|
| 179 |
+
{"current_steps": 1790, "total_steps": 5000, "loss": 2.3273, "lr": 4.052869450695776e-05, "epoch": 0.358, "percentage": 35.8, "elapsed_time": "0:09:26", "remaining_time": "0:16:55"}
|
| 180 |
+
{"current_steps": 1800, "total_steps": 5000, "loss": 2.7299, "lr": 4.039153688314145e-05, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:09:29", "remaining_time": "0:16:52"}
|
| 181 |
+
{"current_steps": 1810, "total_steps": 5000, "loss": 2.0494, "lr": 4.02536290975317e-05, "epoch": 0.362, "percentage": 36.2, "elapsed_time": "0:09:32", "remaining_time": "0:16:48"}
|
| 182 |
+
{"current_steps": 1820, "total_steps": 5000, "loss": 1.9595, "lr": 4.011497787155938e-05, "epoch": 0.364, "percentage": 36.4, "elapsed_time": "0:09:35", "remaining_time": "0:16:45"}
|
| 183 |
+
{"current_steps": 1830, "total_steps": 5000, "loss": 2.4506, "lr": 3.997558996288965e-05, "epoch": 0.366, "percentage": 36.6, "elapsed_time": "0:09:38", "remaining_time": "0:16:41"}
|
| 184 |
+
{"current_steps": 1840, "total_steps": 5000, "loss": 3.6176, "lr": 3.983547216509254e-05, "epoch": 0.368, "percentage": 36.8, "elapsed_time": "0:09:41", "remaining_time": "0:16:38"}
|
| 185 |
+
{"current_steps": 1850, "total_steps": 5000, "loss": 3.2954, "lr": 3.969463130731183e-05, "epoch": 0.37, "percentage": 37.0, "elapsed_time": "0:09:44", "remaining_time": "0:16:34"}
|
| 186 |
+
{"current_steps": 1860, "total_steps": 5000, "loss": 2.0271, "lr": 3.955307425393224e-05, "epoch": 0.372, "percentage": 37.2, "elapsed_time": "0:09:47", "remaining_time": "0:16:31"}
|
| 187 |
+
{"current_steps": 1870, "total_steps": 5000, "loss": 1.6445, "lr": 3.941080790424484e-05, "epoch": 0.374, "percentage": 37.4, "elapsed_time": "0:09:49", "remaining_time": "0:16:27"}
|
| 188 |
+
{"current_steps": 1880, "total_steps": 5000, "loss": 2.4891, "lr": 3.92678391921108e-05, "epoch": 0.376, "percentage": 37.6, "elapsed_time": "0:09:52", "remaining_time": "0:16:24"}
|
| 189 |
+
{"current_steps": 1890, "total_steps": 5000, "loss": 1.9795, "lr": 3.912417508562345e-05, "epoch": 0.378, "percentage": 37.8, "elapsed_time": "0:09:55", "remaining_time": "0:16:20"}
|
| 190 |
+
{"current_steps": 1900, "total_steps": 5000, "loss": 1.7931, "lr": 3.897982258676867e-05, "epoch": 0.38, "percentage": 38.0, "elapsed_time": "0:09:58", "remaining_time": "0:16:17"}
|
| 191 |
+
{"current_steps": 1910, "total_steps": 5000, "loss": 2.9522, "lr": 3.883478873108361e-05, "epoch": 0.382, "percentage": 38.2, "elapsed_time": "0:10:01", "remaining_time": "0:16:13"}
|
| 192 |
+
{"current_steps": 1920, "total_steps": 5000, "loss": 2.0274, "lr": 3.868908058731376e-05, "epoch": 0.384, "percentage": 38.4, "elapsed_time": "0:10:04", "remaining_time": "0:16:10"}
|
| 193 |
+
{"current_steps": 1930, "total_steps": 5000, "loss": 3.1124, "lr": 3.85427052570685e-05, "epoch": 0.386, "percentage": 38.6, "elapsed_time": "0:10:07", "remaining_time": "0:16:07"}
|
| 194 |
+
{"current_steps": 1940, "total_steps": 5000, "loss": 2.101, "lr": 3.8395669874474915e-05, "epoch": 0.388, "percentage": 38.8, "elapsed_time": "0:10:10", "remaining_time": "0:16:03"}
|
| 195 |
+
{"current_steps": 1950, "total_steps": 5000, "loss": 3.1344, "lr": 3.824798160583012e-05, "epoch": 0.39, "percentage": 39.0, "elapsed_time": "0:10:13", "remaining_time": "0:16:00"}
|
| 196 |
+
{"current_steps": 1960, "total_steps": 5000, "loss": 1.7248, "lr": 3.8099647649251986e-05, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:10:16", "remaining_time": "0:15:56"}
|
| 197 |
+
{"current_steps": 1970, "total_steps": 5000, "loss": 1.9146, "lr": 3.795067523432826e-05, "epoch": 0.394, "percentage": 39.4, "elapsed_time": "0:10:19", "remaining_time": "0:15:53"}
|
| 198 |
+
{"current_steps": 1980, "total_steps": 5000, "loss": 2.3492, "lr": 3.780107162176429e-05, "epoch": 0.396, "percentage": 39.6, "elapsed_time": "0:10:22", "remaining_time": "0:15:49"}
|
| 199 |
+
{"current_steps": 1990, "total_steps": 5000, "loss": 1.5525, "lr": 3.765084410302909e-05, "epoch": 0.398, "percentage": 39.8, "elapsed_time": "0:10:25", "remaining_time": "0:15:46"}
|
| 200 |
+
{"current_steps": 2000, "total_steps": 5000, "loss": 2.8312, "lr": 3.7500000000000003e-05, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:10:28", "remaining_time": "0:15:43"}
|
| 201 |
+
{"current_steps": 2010, "total_steps": 5000, "loss": 3.3568, "lr": 3.7348546664605777e-05, "epoch": 0.402, "percentage": 40.2, "elapsed_time": "0:10:31", "remaining_time": "0:15:39"}
|
| 202 |
+
{"current_steps": 2020, "total_steps": 5000, "loss": 1.1778, "lr": 3.719649147846832e-05, "epoch": 0.404, "percentage": 40.4, "elapsed_time": "0:10:34", "remaining_time": "0:15:36"}
|
| 203 |
+
{"current_steps": 2030, "total_steps": 5000, "loss": 2.3964, "lr": 3.704384185254288e-05, "epoch": 0.406, "percentage": 40.6, "elapsed_time": "0:10:37", "remaining_time": "0:15:33"}
|
| 204 |
+
{"current_steps": 2040, "total_steps": 5000, "loss": 2.0172, "lr": 3.689060522675689e-05, "epoch": 0.408, "percentage": 40.8, "elapsed_time": "0:10:40", "remaining_time": "0:15:29"}
|
| 205 |
+
{"current_steps": 2050, "total_steps": 5000, "loss": 2.1464, "lr": 3.673678906964727e-05, "epoch": 0.41, "percentage": 41.0, "elapsed_time": "0:10:43", "remaining_time": "0:15:26"}
|
| 206 |
+
{"current_steps": 2060, "total_steps": 5000, "loss": 2.1743, "lr": 3.6582400877996546e-05, "epoch": 0.412, "percentage": 41.2, "elapsed_time": "0:10:46", "remaining_time": "0:15:23"}
|
| 207 |
+
{"current_steps": 2070, "total_steps": 5000, "loss": 2.785, "lr": 3.642744817646736e-05, "epoch": 0.414, "percentage": 41.4, "elapsed_time": "0:10:49", "remaining_time": "0:15:19"}
|
| 208 |
+
{"current_steps": 2080, "total_steps": 5000, "loss": 1.5782, "lr": 3.627193851723577e-05, "epoch": 0.416, "percentage": 41.6, "elapsed_time": "0:10:52", "remaining_time": "0:15:16"}
|
| 209 |
+
{"current_steps": 2090, "total_steps": 5000, "loss": 3.6745, "lr": 3.611587947962319e-05, "epoch": 0.418, "percentage": 41.8, "elapsed_time": "0:10:55", "remaining_time": "0:15:13"}
|
| 210 |
+
{"current_steps": 2100, "total_steps": 5000, "loss": 1.7939, "lr": 3.5959278669726935e-05, "epoch": 0.42, "percentage": 42.0, "elapsed_time": "0:10:58", "remaining_time": "0:15:09"}
|
| 211 |
+
{"current_steps": 2110, "total_steps": 5000, "loss": 1.9243, "lr": 3.580214372004956e-05, "epoch": 0.422, "percentage": 42.2, "elapsed_time": "0:11:01", "remaining_time": "0:15:06"}
|
| 212 |
+
{"current_steps": 2120, "total_steps": 5000, "loss": 3.6028, "lr": 3.564448228912682e-05, "epoch": 0.424, "percentage": 42.4, "elapsed_time": "0:11:04", "remaining_time": "0:15:03"}
|
| 213 |
+
{"current_steps": 2130, "total_steps": 5000, "loss": 1.3939, "lr": 3.548630206115443e-05, "epoch": 0.426, "percentage": 42.6, "elapsed_time": "0:11:07", "remaining_time": "0:14:59"}
|
| 214 |
+
{"current_steps": 2140, "total_steps": 5000, "loss": 2.1193, "lr": 3.532761074561355e-05, "epoch": 0.428, "percentage": 42.8, "elapsed_time": "0:11:10", "remaining_time": "0:14:56"}
|
| 215 |
+
{"current_steps": 2150, "total_steps": 5000, "loss": 2.0047, "lr": 3.516841607689501e-05, "epoch": 0.43, "percentage": 43.0, "elapsed_time": "0:11:13", "remaining_time": "0:14:53"}
|
| 216 |
+
{"current_steps": 2160, "total_steps": 5000, "loss": 5.2666, "lr": 3.5008725813922386e-05, "epoch": 0.432, "percentage": 43.2, "elapsed_time": "0:11:16", "remaining_time": "0:14:49"}
|
| 217 |
+
{"current_steps": 2170, "total_steps": 5000, "loss": 1.8129, "lr": 3.484854773977378e-05, "epoch": 0.434, "percentage": 43.4, "elapsed_time": "0:11:19", "remaining_time": "0:14:46"}
|
| 218 |
+
{"current_steps": 2180, "total_steps": 5000, "loss": 1.3969, "lr": 3.4687889661302576e-05, "epoch": 0.436, "percentage": 43.6, "elapsed_time": "0:11:23", "remaining_time": "0:14:43"}
|
| 219 |
+
{"current_steps": 2190, "total_steps": 5000, "loss": 1.9788, "lr": 3.452675940875686e-05, "epoch": 0.438, "percentage": 43.8, "elapsed_time": "0:11:25", "remaining_time": "0:14:39"}
|
| 220 |
+
{"current_steps": 2200, "total_steps": 5000, "loss": 1.5522, "lr": 3.436516483539781e-05, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:11:28", "remaining_time": "0:14:36"}
|
| 221 |
+
{"current_steps": 2210, "total_steps": 5000, "loss": 2.1951, "lr": 3.4203113817116957e-05, "epoch": 0.442, "percentage": 44.2, "elapsed_time": "0:11:31", "remaining_time": "0:14:33"}
|
| 222 |
+
{"current_steps": 2220, "total_steps": 5000, "loss": 5.2669, "lr": 3.4040614252052305e-05, "epoch": 0.444, "percentage": 44.4, "elapsed_time": "0:11:34", "remaining_time": "0:14:29"}
|
| 223 |
+
{"current_steps": 2230, "total_steps": 5000, "loss": 2.5184, "lr": 3.387767406020343e-05, "epoch": 0.446, "percentage": 44.6, "elapsed_time": "0:11:37", "remaining_time": "0:14:26"}
|
| 224 |
+
{"current_steps": 2240, "total_steps": 5000, "loss": 2.0463, "lr": 3.3714301183045385e-05, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:11:40", "remaining_time": "0:14:23"}
|
| 225 |
+
{"current_steps": 2250, "total_steps": 5000, "loss": 2.9354, "lr": 3.355050358314172e-05, "epoch": 0.45, "percentage": 45.0, "elapsed_time": "0:11:43", "remaining_time": "0:14:20"}
|
| 226 |
+
{"current_steps": 2260, "total_steps": 5000, "loss": 2.1297, "lr": 3.338628924375638e-05, "epoch": 0.452, "percentage": 45.2, "elapsed_time": "0:11:46", "remaining_time": "0:14:16"}
|
| 227 |
+
{"current_steps": 2270, "total_steps": 5000, "loss": 3.2082, "lr": 3.322166616846458e-05, "epoch": 0.454, "percentage": 45.4, "elapsed_time": "0:11:49", "remaining_time": "0:14:13"}
|
| 228 |
+
{"current_steps": 2280, "total_steps": 5000, "loss": 2.0667, "lr": 3.305664238076278e-05, "epoch": 0.456, "percentage": 45.6, "elapsed_time": "0:11:52", "remaining_time": "0:14:10"}
|
| 229 |
+
{"current_steps": 2290, "total_steps": 5000, "loss": 2.4089, "lr": 3.289122592367757e-05, "epoch": 0.458, "percentage": 45.8, "elapsed_time": "0:11:55", "remaining_time": "0:14:06"}
|
| 230 |
+
{"current_steps": 2300, "total_steps": 5000, "loss": 2.0842, "lr": 3.272542485937369e-05, "epoch": 0.46, "percentage": 46.0, "elapsed_time": "0:11:58", "remaining_time": "0:14:03"}
|
| 231 |
+
{"current_steps": 2310, "total_steps": 5000, "loss": 3.9489, "lr": 3.2559247268761115e-05, "epoch": 0.462, "percentage": 46.2, "elapsed_time": "0:12:01", "remaining_time": "0:14:00"}
|
| 232 |
+
{"current_steps": 2320, "total_steps": 5000, "loss": 1.8036, "lr": 3.239270125110117e-05, "epoch": 0.464, "percentage": 46.4, "elapsed_time": "0:12:04", "remaining_time": "0:13:57"}
|
| 233 |
+
{"current_steps": 2330, "total_steps": 5000, "loss": 2.8004, "lr": 3.222579492361179e-05, "epoch": 0.466, "percentage": 46.6, "elapsed_time": "0:12:07", "remaining_time": "0:13:53"}
|
| 234 |
+
{"current_steps": 2340, "total_steps": 5000, "loss": 1.0807, "lr": 3.205853642107192e-05, "epoch": 0.468, "percentage": 46.8, "elapsed_time": "0:12:10", "remaining_time": "0:13:50"}
|
| 235 |
+
{"current_steps": 2350, "total_steps": 5000, "loss": 4.2218, "lr": 3.1890933895424976e-05, "epoch": 0.47, "percentage": 47.0, "elapsed_time": "0:12:13", "remaining_time": "0:13:47"}
|
| 236 |
+
{"current_steps": 2360, "total_steps": 5000, "loss": 1.9778, "lr": 3.172299551538164e-05, "epoch": 0.472, "percentage": 47.2, "elapsed_time": "0:12:16", "remaining_time": "0:13:44"}
|
| 237 |
+
{"current_steps": 2370, "total_steps": 5000, "loss": 2.7487, "lr": 3.155472946602162e-05, "epoch": 0.474, "percentage": 47.4, "elapsed_time": "0:12:19", "remaining_time": "0:13:40"}
|
| 238 |
+
{"current_steps": 2380, "total_steps": 5000, "loss": 2.199, "lr": 3.138614394839476e-05, "epoch": 0.476, "percentage": 47.6, "elapsed_time": "0:12:22", "remaining_time": "0:13:37"}
|
| 239 |
+
{"current_steps": 2390, "total_steps": 5000, "loss": 3.5763, "lr": 3.121724717912138e-05, "epoch": 0.478, "percentage": 47.8, "elapsed_time": "0:12:25", "remaining_time": "0:13:34"}
|
| 240 |
+
{"current_steps": 2400, "total_steps": 5000, "loss": 3.4331, "lr": 3.104804738999169e-05, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:12:28", "remaining_time": "0:13:31"}
|
| 241 |
+
{"current_steps": 2410, "total_steps": 5000, "loss": 2.2419, "lr": 3.087855282756475e-05, "epoch": 0.482, "percentage": 48.2, "elapsed_time": "0:12:31", "remaining_time": "0:13:27"}
|
| 242 |
+
{"current_steps": 2420, "total_steps": 5000, "loss": 2.0476, "lr": 3.0708771752766394e-05, "epoch": 0.484, "percentage": 48.4, "elapsed_time": "0:12:34", "remaining_time": "0:13:24"}
|
| 243 |
+
{"current_steps": 2430, "total_steps": 5000, "loss": 1.3934, "lr": 3.053871244048669e-05, "epoch": 0.486, "percentage": 48.6, "elapsed_time": "0:12:37", "remaining_time": "0:13:21"}
|
| 244 |
+
{"current_steps": 2440, "total_steps": 5000, "loss": 2.7532, "lr": 3.0368383179176585e-05, "epoch": 0.488, "percentage": 48.8, "elapsed_time": "0:12:40", "remaining_time": "0:13:18"}
|
| 245 |
+
{"current_steps": 2450, "total_steps": 5000, "loss": 2.112, "lr": 3.0197792270443982e-05, "epoch": 0.49, "percentage": 49.0, "elapsed_time": "0:12:43", "remaining_time": "0:13:14"}
|
| 246 |
+
{"current_steps": 2460, "total_steps": 5000, "loss": 1.8119, "lr": 3.002694802864912e-05, "epoch": 0.492, "percentage": 49.2, "elapsed_time": "0:12:46", "remaining_time": "0:13:11"}
|
| 247 |
+
{"current_steps": 2470, "total_steps": 5000, "loss": 1.5163, "lr": 2.98558587804993e-05, "epoch": 0.494, "percentage": 49.4, "elapsed_time": "0:12:49", "remaining_time": "0:13:08"}
|
| 248 |
+
{"current_steps": 2480, "total_steps": 5000, "loss": 3.372, "lr": 2.9684532864643122e-05, "epoch": 0.496, "percentage": 49.6, "elapsed_time": "0:12:52", "remaining_time": "0:13:05"}
|
| 249 |
+
{"current_steps": 2490, "total_steps": 5000, "loss": 1.5534, "lr": 2.9512978631264006e-05, "epoch": 0.498, "percentage": 49.8, "elapsed_time": "0:12:55", "remaining_time": "0:13:01"}
|
| 250 |
+
{"current_steps": 2500, "total_steps": 5000, "loss": 1.8644, "lr": 2.9341204441673266e-05, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:12:58", "remaining_time": "0:12:58"}
|
| 251 |
+
{"current_steps": 2510, "total_steps": 5000, "loss": 4.3985, "lr": 2.916921866790256e-05, "epoch": 0.502, "percentage": 50.2, "elapsed_time": "0:13:01", "remaining_time": "0:12:55"}
|
| 252 |
+
{"current_steps": 2520, "total_steps": 5000, "loss": 2.0158, "lr": 2.8997029692295874e-05, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:13:04", "remaining_time": "0:12:51"}
|
| 253 |
+
{"current_steps": 2530, "total_steps": 5000, "loss": 1.3677, "lr": 2.8824645907100954e-05, "epoch": 0.506, "percentage": 50.6, "elapsed_time": "0:13:07", "remaining_time": "0:12:48"}
|
| 254 |
+
{"current_steps": 2540, "total_steps": 5000, "loss": 2.399, "lr": 2.8652075714060295e-05, "epoch": 0.508, "percentage": 50.8, "elapsed_time": "0:13:10", "remaining_time": "0:12:45"}
|
| 255 |
+
{"current_steps": 2550, "total_steps": 5000, "loss": 4.2765, "lr": 2.8479327524001636e-05, "epoch": 0.51, "percentage": 51.0, "elapsed_time": "0:13:13", "remaining_time": "0:12:42"}
|
| 256 |
+
{"current_steps": 2560, "total_steps": 5000, "loss": 3.2872, "lr": 2.8306409756428064e-05, "epoch": 0.512, "percentage": 51.2, "elapsed_time": "0:13:16", "remaining_time": "0:12:38"}
|
| 257 |
+
{"current_steps": 2570, "total_steps": 5000, "loss": 3.1682, "lr": 2.8133330839107608e-05, "epoch": 0.514, "percentage": 51.4, "elapsed_time": "0:13:19", "remaining_time": "0:12:35"}
|
| 258 |
+
{"current_steps": 2580, "total_steps": 5000, "loss": 3.3066, "lr": 2.7960099207662532e-05, "epoch": 0.516, "percentage": 51.6, "elapsed_time": "0:13:22", "remaining_time": "0:12:32"}
|
| 259 |
+
{"current_steps": 2590, "total_steps": 5000, "loss": 1.8911, "lr": 2.7786723305158136e-05, "epoch": 0.518, "percentage": 51.8, "elapsed_time": "0:13:25", "remaining_time": "0:12:29"}
|
| 260 |
+
{"current_steps": 2600, "total_steps": 5000, "loss": 1.7402, "lr": 2.761321158169134e-05, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:13:28", "remaining_time": "0:12:26"}
|
| 261 |
+
{"current_steps": 2610, "total_steps": 5000, "loss": 1.1721, "lr": 2.7439572493978736e-05, "epoch": 0.522, "percentage": 52.2, "elapsed_time": "0:13:31", "remaining_time": "0:12:22"}
|
| 262 |
+
{"current_steps": 2620, "total_steps": 5000, "loss": 1.2482, "lr": 2.726581450494451e-05, "epoch": 0.524, "percentage": 52.4, "elapsed_time": "0:13:34", "remaining_time": "0:12:19"}
|
| 263 |
+
{"current_steps": 2630, "total_steps": 5000, "loss": 3.3641, "lr": 2.7091946083307896e-05, "epoch": 0.526, "percentage": 52.6, "elapsed_time": "0:13:37", "remaining_time": "0:12:16"}
|
| 264 |
+
{"current_steps": 2640, "total_steps": 5000, "loss": 1.9946, "lr": 2.6917975703170466e-05, "epoch": 0.528, "percentage": 52.8, "elapsed_time": "0:13:40", "remaining_time": "0:12:13"}
|
| 265 |
+
{"current_steps": 2650, "total_steps": 5000, "loss": 1.0218, "lr": 2.674391184360313e-05, "epoch": 0.53, "percentage": 53.0, "elapsed_time": "0:13:43", "remaining_time": "0:12:10"}
|
| 266 |
+
{"current_steps": 2660, "total_steps": 5000, "loss": 2.336, "lr": 2.656976298823284e-05, "epoch": 0.532, "percentage": 53.2, "elapsed_time": "0:13:46", "remaining_time": "0:12:06"}
|
| 267 |
+
{"current_steps": 2670, "total_steps": 5000, "loss": 4.6423, "lr": 2.6395537624829096e-05, "epoch": 0.534, "percentage": 53.4, "elapsed_time": "0:13:49", "remaining_time": "0:12:03"}
|
| 268 |
+
{"current_steps": 2680, "total_steps": 5000, "loss": 1.845, "lr": 2.6221244244890336e-05, "epoch": 0.536, "percentage": 53.6, "elapsed_time": "0:13:52", "remaining_time": "0:12:00"}
|
| 269 |
+
{"current_steps": 2690, "total_steps": 5000, "loss": 2.3117, "lr": 2.604689134322999e-05, "epoch": 0.538, "percentage": 53.8, "elapsed_time": "0:13:55", "remaining_time": "0:11:57"}
|
| 270 |
+
{"current_steps": 2700, "total_steps": 5000, "loss": 3.3991, "lr": 2.587248741756253e-05, "epoch": 0.54, "percentage": 54.0, "elapsed_time": "0:13:58", "remaining_time": "0:11:53"}
|
| 271 |
+
{"current_steps": 2710, "total_steps": 5000, "loss": 3.7802, "lr": 2.5698040968089225e-05, "epoch": 0.542, "percentage": 54.2, "elapsed_time": "0:14:01", "remaining_time": "0:11:50"}
|
| 272 |
+
{"current_steps": 2720, "total_steps": 5000, "loss": 1.4431, "lr": 2.5523560497083926e-05, "epoch": 0.544, "percentage": 54.4, "elapsed_time": "0:14:04", "remaining_time": "0:11:47"}
|
| 273 |
+
{"current_steps": 2730, "total_steps": 5000, "loss": 0.8369, "lr": 2.5349054508478637e-05, "epoch": 0.546, "percentage": 54.6, "elapsed_time": "0:14:07", "remaining_time": "0:11:44"}
|
| 274 |
+
{"current_steps": 2740, "total_steps": 5000, "loss": 2.658, "lr": 2.517453150744904e-05, "epoch": 0.548, "percentage": 54.8, "elapsed_time": "0:14:10", "remaining_time": "0:11:41"}
|
| 275 |
+
{"current_steps": 2750, "total_steps": 5000, "loss": 2.12, "lr": 2.5e-05, "epoch": 0.55, "percentage": 55.0, "elapsed_time": "0:14:13", "remaining_time": "0:11:37"}
|
| 276 |
+
{"current_steps": 2760, "total_steps": 5000, "loss": 2.1272, "lr": 2.4825468492550964e-05, "epoch": 0.552, "percentage": 55.2, "elapsed_time": "0:14:16", "remaining_time": "0:11:34"}
|
| 277 |
+
{"current_steps": 2770, "total_steps": 5000, "loss": 1.5502, "lr": 2.4650945491521372e-05, "epoch": 0.554, "percentage": 55.4, "elapsed_time": "0:14:18", "remaining_time": "0:11:31"}
|
| 278 |
+
{"current_steps": 2780, "total_steps": 5000, "loss": 1.4815, "lr": 2.447643950291608e-05, "epoch": 0.556, "percentage": 55.6, "elapsed_time": "0:14:21", "remaining_time": "0:11:28"}
|
| 279 |
+
{"current_steps": 2790, "total_steps": 5000, "loss": 1.3745, "lr": 2.4301959031910784e-05, "epoch": 0.558, "percentage": 55.8, "elapsed_time": "0:14:24", "remaining_time": "0:11:25"}
|
| 280 |
+
{"current_steps": 2800, "total_steps": 5000, "loss": 2.9765, "lr": 2.4127512582437485e-05, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:14:28", "remaining_time": "0:11:22"}
|
| 281 |
+
{"current_steps": 2810, "total_steps": 5000, "loss": 1.2392, "lr": 2.3953108656770016e-05, "epoch": 0.562, "percentage": 56.2, "elapsed_time": "0:14:31", "remaining_time": "0:11:18"}
|
| 282 |
+
{"current_steps": 2820, "total_steps": 5000, "loss": 1.6202, "lr": 2.377875575510967e-05, "epoch": 0.564, "percentage": 56.4, "elapsed_time": "0:14:33", "remaining_time": "0:11:15"}
|
| 283 |
+
{"current_steps": 2830, "total_steps": 5000, "loss": 4.6898, "lr": 2.3604462375170906e-05, "epoch": 0.566, "percentage": 56.6, "elapsed_time": "0:14:36", "remaining_time": "0:11:12"}
|
| 284 |
+
{"current_steps": 2840, "total_steps": 5000, "loss": 1.3387, "lr": 2.3430237011767167e-05, "epoch": 0.568, "percentage": 56.8, "elapsed_time": "0:14:39", "remaining_time": "0:11:09"}
|
| 285 |
+
{"current_steps": 2850, "total_steps": 5000, "loss": 3.6984, "lr": 2.3256088156396868e-05, "epoch": 0.57, "percentage": 57.0, "elapsed_time": "0:14:42", "remaining_time": "0:11:06"}
|
| 286 |
+
{"current_steps": 2860, "total_steps": 5000, "loss": 1.874, "lr": 2.3082024296829536e-05, "epoch": 0.572, "percentage": 57.2, "elapsed_time": "0:14:45", "remaining_time": "0:11:02"}
|
| 287 |
+
{"current_steps": 2870, "total_steps": 5000, "loss": 1.7756, "lr": 2.2908053916692117e-05, "epoch": 0.574, "percentage": 57.4, "elapsed_time": "0:14:48", "remaining_time": "0:10:59"}
|
| 288 |
+
{"current_steps": 2880, "total_steps": 5000, "loss": 8.2453, "lr": 2.2734185495055503e-05, "epoch": 0.576, "percentage": 57.6, "elapsed_time": "0:14:51", "remaining_time": "0:10:56"}
|
| 289 |
+
{"current_steps": 2890, "total_steps": 5000, "loss": 1.6711, "lr": 2.2560427506021266e-05, "epoch": 0.578, "percentage": 57.8, "elapsed_time": "0:14:54", "remaining_time": "0:10:53"}
|
| 290 |
+
{"current_steps": 2900, "total_steps": 5000, "loss": 2.9633, "lr": 2.238678841830867e-05, "epoch": 0.58, "percentage": 58.0, "elapsed_time": "0:14:57", "remaining_time": "0:10:50"}
|
| 291 |
+
{"current_steps": 2910, "total_steps": 5000, "loss": 1.882, "lr": 2.2213276694841866e-05, "epoch": 0.582, "percentage": 58.2, "elapsed_time": "0:15:00", "remaining_time": "0:10:47"}
|
| 292 |
+
{"current_steps": 2920, "total_steps": 5000, "loss": 2.1105, "lr": 2.2039900792337474e-05, "epoch": 0.584, "percentage": 58.4, "elapsed_time": "0:15:03", "remaining_time": "0:10:43"}
|
| 293 |
+
{"current_steps": 2930, "total_steps": 5000, "loss": 2.285, "lr": 2.186666916089239e-05, "epoch": 0.586, "percentage": 58.6, "elapsed_time": "0:15:06", "remaining_time": "0:10:40"}
|
| 294 |
+
{"current_steps": 2940, "total_steps": 5000, "loss": 2.7095, "lr": 2.1693590243571938e-05, "epoch": 0.588, "percentage": 58.8, "elapsed_time": "0:15:09", "remaining_time": "0:10:37"}
|
| 295 |
+
{"current_steps": 2950, "total_steps": 5000, "loss": 1.4936, "lr": 2.1520672475998373e-05, "epoch": 0.59, "percentage": 59.0, "elapsed_time": "0:15:12", "remaining_time": "0:10:34"}
|
| 296 |
+
{"current_steps": 2960, "total_steps": 5000, "loss": 2.9807, "lr": 2.1347924285939714e-05, "epoch": 0.592, "percentage": 59.2, "elapsed_time": "0:15:15", "remaining_time": "0:10:31"}
|
| 297 |
+
{"current_steps": 2970, "total_steps": 5000, "loss": 1.8499, "lr": 2.117535409289905e-05, "epoch": 0.594, "percentage": 59.4, "elapsed_time": "0:15:18", "remaining_time": "0:10:28"}
|
| 298 |
+
{"current_steps": 2980, "total_steps": 5000, "loss": 2.6104, "lr": 2.1002970307704132e-05, "epoch": 0.596, "percentage": 59.6, "elapsed_time": "0:15:21", "remaining_time": "0:10:24"}
|
| 299 |
+
{"current_steps": 2990, "total_steps": 5000, "loss": 4.632, "lr": 2.0830781332097446e-05, "epoch": 0.598, "percentage": 59.8, "elapsed_time": "0:15:24", "remaining_time": "0:10:21"}
|
| 300 |
+
{"current_steps": 3000, "total_steps": 5000, "loss": 2.1349, "lr": 2.0658795558326743e-05, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:15:27", "remaining_time": "0:10:18"}
|
| 301 |
+
{"current_steps": 3010, "total_steps": 5000, "loss": 2.881, "lr": 2.0487021368736003e-05, "epoch": 0.602, "percentage": 60.2, "elapsed_time": "0:15:30", "remaining_time": "0:10:15"}
|
| 302 |
+
{"current_steps": 3020, "total_steps": 5000, "loss": 2.999, "lr": 2.031546713535688e-05, "epoch": 0.604, "percentage": 60.4, "elapsed_time": "0:15:33", "remaining_time": "0:10:12"}
|
| 303 |
+
{"current_steps": 3030, "total_steps": 5000, "loss": 3.0427, "lr": 2.0144141219500705e-05, "epoch": 0.606, "percentage": 60.6, "elapsed_time": "0:15:36", "remaining_time": "0:10:09"}
|
| 304 |
+
{"current_steps": 3040, "total_steps": 5000, "loss": 2.5017, "lr": 1.9973051971350888e-05, "epoch": 0.608, "percentage": 60.8, "elapsed_time": "0:15:39", "remaining_time": "0:10:05"}
|
| 305 |
+
{"current_steps": 3050, "total_steps": 5000, "loss": 1.5811, "lr": 1.980220772955602e-05, "epoch": 0.61, "percentage": 61.0, "elapsed_time": "0:15:42", "remaining_time": "0:10:02"}
|
| 306 |
+
{"current_steps": 3060, "total_steps": 5000, "loss": 1.6409, "lr": 1.963161682082342e-05, "epoch": 0.612, "percentage": 61.2, "elapsed_time": "0:15:45", "remaining_time": "0:09:59"}
|
| 307 |
+
{"current_steps": 3070, "total_steps": 5000, "loss": 3.8443, "lr": 1.946128755951332e-05, "epoch": 0.614, "percentage": 61.4, "elapsed_time": "0:15:48", "remaining_time": "0:09:56"}
|
| 308 |
+
{"current_steps": 3080, "total_steps": 5000, "loss": 2.3784, "lr": 1.9291228247233605e-05, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:15:51", "remaining_time": "0:09:53"}
|
| 309 |
+
{"current_steps": 3090, "total_steps": 5000, "loss": 1.9809, "lr": 1.912144717243525e-05, "epoch": 0.618, "percentage": 61.8, "elapsed_time": "0:15:54", "remaining_time": "0:09:50"}
|
| 310 |
+
{"current_steps": 3100, "total_steps": 5000, "loss": 1.5875, "lr": 1.895195261000831e-05, "epoch": 0.62, "percentage": 62.0, "elapsed_time": "0:15:57", "remaining_time": "0:09:46"}
|
| 311 |
+
{"current_steps": 3110, "total_steps": 5000, "loss": 2.2161, "lr": 1.8782752820878634e-05, "epoch": 0.622, "percentage": 62.2, "elapsed_time": "0:16:00", "remaining_time": "0:09:43"}
|
| 312 |
+
{"current_steps": 3120, "total_steps": 5000, "loss": 6.9032, "lr": 1.8613856051605243e-05, "epoch": 0.624, "percentage": 62.4, "elapsed_time": "0:16:03", "remaining_time": "0:09:40"}
|
| 313 |
+
{"current_steps": 3130, "total_steps": 5000, "loss": 2.9789, "lr": 1.8445270533978388e-05, "epoch": 0.626, "percentage": 62.6, "elapsed_time": "0:16:06", "remaining_time": "0:09:37"}
|
| 314 |
+
{"current_steps": 3140, "total_steps": 5000, "loss": 2.0478, "lr": 1.827700448461836e-05, "epoch": 0.628, "percentage": 62.8, "elapsed_time": "0:16:09", "remaining_time": "0:09:34"}
|
| 315 |
+
{"current_steps": 3150, "total_steps": 5000, "loss": 1.2295, "lr": 1.8109066104575023e-05, "epoch": 0.63, "percentage": 63.0, "elapsed_time": "0:16:12", "remaining_time": "0:09:31"}
|
| 316 |
+
{"current_steps": 3160, "total_steps": 5000, "loss": 2.6306, "lr": 1.7941463578928086e-05, "epoch": 0.632, "percentage": 63.2, "elapsed_time": "0:16:15", "remaining_time": "0:09:28"}
|
| 317 |
+
{"current_steps": 3170, "total_steps": 5000, "loss": 1.2663, "lr": 1.7774205076388206e-05, "epoch": 0.634, "percentage": 63.4, "elapsed_time": "0:16:18", "remaining_time": "0:09:24"}
|
| 318 |
+
{"current_steps": 3180, "total_steps": 5000, "loss": 3.5995, "lr": 1.7607298748898842e-05, "epoch": 0.636, "percentage": 63.6, "elapsed_time": "0:16:21", "remaining_time": "0:09:21"}
|
| 319 |
+
{"current_steps": 3190, "total_steps": 5000, "loss": 3.4226, "lr": 1.744075273123889e-05, "epoch": 0.638, "percentage": 63.8, "elapsed_time": "0:16:24", "remaining_time": "0:09:18"}
|
| 320 |
+
{"current_steps": 3200, "total_steps": 5000, "loss": 0.9354, "lr": 1.7274575140626318e-05, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:16:27", "remaining_time": "0:09:15"}
|
| 321 |
+
{"current_steps": 3210, "total_steps": 5000, "loss": 4.6267, "lr": 1.7108774076322443e-05, "epoch": 0.642, "percentage": 64.2, "elapsed_time": "0:16:30", "remaining_time": "0:09:12"}
|
| 322 |
+
{"current_steps": 3220, "total_steps": 5000, "loss": 0.9114, "lr": 1.6943357619237226e-05, "epoch": 0.644, "percentage": 64.4, "elapsed_time": "0:16:33", "remaining_time": "0:09:09"}
|
| 323 |
+
{"current_steps": 3230, "total_steps": 5000, "loss": 2.5474, "lr": 1.677833383153542e-05, "epoch": 0.646, "percentage": 64.6, "elapsed_time": "0:16:36", "remaining_time": "0:09:06"}
|
| 324 |
+
{"current_steps": 3240, "total_steps": 5000, "loss": 2.7762, "lr": 1.6613710756243626e-05, "epoch": 0.648, "percentage": 64.8, "elapsed_time": "0:16:39", "remaining_time": "0:09:02"}
|
| 325 |
+
{"current_steps": 3250, "total_steps": 5000, "loss": 1.6468, "lr": 1.6449496416858284e-05, "epoch": 0.65, "percentage": 65.0, "elapsed_time": "0:16:42", "remaining_time": "0:08:59"}
|
| 326 |
+
{"current_steps": 3260, "total_steps": 5000, "loss": 1.2256, "lr": 1.6285698816954624e-05, "epoch": 0.652, "percentage": 65.2, "elapsed_time": "0:16:45", "remaining_time": "0:08:56"}
|
| 327 |
+
{"current_steps": 3270, "total_steps": 5000, "loss": 1.5563, "lr": 1.612232593979658e-05, "epoch": 0.654, "percentage": 65.4, "elapsed_time": "0:16:48", "remaining_time": "0:08:53"}
|
| 328 |
+
{"current_steps": 3280, "total_steps": 5000, "loss": 3.3846, "lr": 1.5959385747947698e-05, "epoch": 0.656, "percentage": 65.6, "elapsed_time": "0:16:51", "remaining_time": "0:08:50"}
|
| 329 |
+
{"current_steps": 3290, "total_steps": 5000, "loss": 2.2551, "lr": 1.5796886182883053e-05, "epoch": 0.658, "percentage": 65.8, "elapsed_time": "0:16:54", "remaining_time": "0:08:47"}
|
| 330 |
+
{"current_steps": 3300, "total_steps": 5000, "loss": 0.8103, "lr": 1.56348351646022e-05, "epoch": 0.66, "percentage": 66.0, "elapsed_time": "0:16:57", "remaining_time": "0:08:44"}
|
| 331 |
+
{"current_steps": 3310, "total_steps": 5000, "loss": 1.7983, "lr": 1.547324059124315e-05, "epoch": 0.662, "percentage": 66.2, "elapsed_time": "0:17:00", "remaining_time": "0:08:41"}
|
| 332 |
+
{"current_steps": 3320, "total_steps": 5000, "loss": 4.4483, "lr": 1.5312110338697426e-05, "epoch": 0.664, "percentage": 66.4, "elapsed_time": "0:17:03", "remaining_time": "0:08:37"}
|
| 333 |
+
{"current_steps": 3330, "total_steps": 5000, "loss": 6.7593, "lr": 1.5151452260226224e-05, "epoch": 0.666, "percentage": 66.6, "elapsed_time": "0:17:06", "remaining_time": "0:08:34"}
|
| 334 |
+
{"current_steps": 3340, "total_steps": 5000, "loss": 3.4548, "lr": 1.4991274186077632e-05, "epoch": 0.668, "percentage": 66.8, "elapsed_time": "0:17:09", "remaining_time": "0:08:31"}
|
| 335 |
+
{"current_steps": 3350, "total_steps": 5000, "loss": 2.0242, "lr": 1.4831583923104999e-05, "epoch": 0.67, "percentage": 67.0, "elapsed_time": "0:17:12", "remaining_time": "0:08:28"}
|
| 336 |
+
{"current_steps": 3360, "total_steps": 5000, "loss": 2.3468, "lr": 1.467238925438646e-05, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:17:15", "remaining_time": "0:08:25"}
|
| 337 |
+
{"current_steps": 3370, "total_steps": 5000, "loss": 1.3924, "lr": 1.4513697938845572e-05, "epoch": 0.674, "percentage": 67.4, "elapsed_time": "0:17:18", "remaining_time": "0:08:22"}
|
| 338 |
+
{"current_steps": 3380, "total_steps": 5000, "loss": 2.3896, "lr": 1.4355517710873184e-05, "epoch": 0.676, "percentage": 67.6, "elapsed_time": "0:17:21", "remaining_time": "0:08:19"}
|
| 339 |
+
{"current_steps": 3390, "total_steps": 5000, "loss": 2.2237, "lr": 1.4197856279950438e-05, "epoch": 0.678, "percentage": 67.8, "elapsed_time": "0:17:24", "remaining_time": "0:08:16"}
|
| 340 |
+
{"current_steps": 3400, "total_steps": 5000, "loss": 6.2669, "lr": 1.4040721330273062e-05, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:17:27", "remaining_time": "0:08:12"}
|
| 341 |
+
{"current_steps": 3410, "total_steps": 5000, "loss": 2.271, "lr": 1.388412052037682e-05, "epoch": 0.682, "percentage": 68.2, "elapsed_time": "0:17:30", "remaining_time": "0:08:09"}
|
| 342 |
+
{"current_steps": 3420, "total_steps": 5000, "loss": 1.7512, "lr": 1.3728061482764238e-05, "epoch": 0.684, "percentage": 68.4, "elapsed_time": "0:17:33", "remaining_time": "0:08:06"}
|
| 343 |
+
{"current_steps": 3430, "total_steps": 5000, "loss": 1.3673, "lr": 1.3572551823532654e-05, "epoch": 0.686, "percentage": 68.6, "elapsed_time": "0:17:36", "remaining_time": "0:08:03"}
|
| 344 |
+
{"current_steps": 3440, "total_steps": 5000, "loss": 3.8359, "lr": 1.3417599122003464e-05, "epoch": 0.688, "percentage": 68.8, "elapsed_time": "0:17:39", "remaining_time": "0:08:00"}
|
| 345 |
+
{"current_steps": 3450, "total_steps": 5000, "loss": 1.6534, "lr": 1.3263210930352737e-05, "epoch": 0.69, "percentage": 69.0, "elapsed_time": "0:17:42", "remaining_time": "0:07:57"}
|
| 346 |
+
{"current_steps": 3460, "total_steps": 5000, "loss": 0.955, "lr": 1.3109394773243117e-05, "epoch": 0.692, "percentage": 69.2, "elapsed_time": "0:17:45", "remaining_time": "0:07:54"}
|
| 347 |
+
{"current_steps": 3470, "total_steps": 5000, "loss": 1.9489, "lr": 1.2956158147457115e-05, "epoch": 0.694, "percentage": 69.4, "elapsed_time": "0:17:48", "remaining_time": "0:07:51"}
|
| 348 |
+
{"current_steps": 3480, "total_steps": 5000, "loss": 2.5898, "lr": 1.280350852153168e-05, "epoch": 0.696, "percentage": 69.6, "elapsed_time": "0:17:51", "remaining_time": "0:07:47"}
|
| 349 |
+
{"current_steps": 3490, "total_steps": 5000, "loss": 2.9695, "lr": 1.2651453335394231e-05, "epoch": 0.698, "percentage": 69.8, "elapsed_time": "0:17:54", "remaining_time": "0:07:44"}
|
| 350 |
+
{"current_steps": 3500, "total_steps": 5000, "loss": 1.8141, "lr": 1.2500000000000006e-05, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:17:57", "remaining_time": "0:07:41"}
|
| 351 |
+
{"current_steps": 3510, "total_steps": 5000, "loss": 1.6577, "lr": 1.234915589697091e-05, "epoch": 0.702, "percentage": 70.2, "elapsed_time": "0:18:00", "remaining_time": "0:07:38"}
|
| 352 |
+
{"current_steps": 3520, "total_steps": 5000, "loss": 1.0784, "lr": 1.2198928378235716e-05, "epoch": 0.704, "percentage": 70.4, "elapsed_time": "0:18:03", "remaining_time": "0:07:35"}
|
| 353 |
+
{"current_steps": 3530, "total_steps": 5000, "loss": 1.4946, "lr": 1.2049324765671749e-05, "epoch": 0.706, "percentage": 70.6, "elapsed_time": "0:18:06", "remaining_time": "0:07:32"}
|
| 354 |
+
{"current_steps": 3540, "total_steps": 5000, "loss": 2.9905, "lr": 1.1900352350748026e-05, "epoch": 0.708, "percentage": 70.8, "elapsed_time": "0:18:09", "remaining_time": "0:07:29"}
|
| 355 |
+
{"current_steps": 3550, "total_steps": 5000, "loss": 2.2346, "lr": 1.175201839416988e-05, "epoch": 0.71, "percentage": 71.0, "elapsed_time": "0:18:12", "remaining_time": "0:07:26"}
|
| 356 |
+
{"current_steps": 3560, "total_steps": 5000, "loss": 1.6511, "lr": 1.1604330125525079e-05, "epoch": 0.712, "percentage": 71.2, "elapsed_time": "0:18:15", "remaining_time": "0:07:22"}
|
| 357 |
+
{"current_steps": 3570, "total_steps": 5000, "loss": 3.0413, "lr": 1.1457294742931507e-05, "epoch": 0.714, "percentage": 71.4, "elapsed_time": "0:18:18", "remaining_time": "0:07:19"}
|
| 358 |
+
{"current_steps": 3580, "total_steps": 5000, "loss": 2.2105, "lr": 1.1310919412686247e-05, "epoch": 0.716, "percentage": 71.6, "elapsed_time": "0:18:21", "remaining_time": "0:07:16"}
|
| 359 |
+
{"current_steps": 3590, "total_steps": 5000, "loss": 2.0821, "lr": 1.11652112689164e-05, "epoch": 0.718, "percentage": 71.8, "elapsed_time": "0:18:24", "remaining_time": "0:07:13"}
|
| 360 |
+
{"current_steps": 3600, "total_steps": 5000, "loss": 3.4402, "lr": 1.1020177413231334e-05, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:18:27", "remaining_time": "0:07:10"}
|
| 361 |
+
{"current_steps": 3610, "total_steps": 5000, "loss": 1.5246, "lr": 1.0875824914376553e-05, "epoch": 0.722, "percentage": 72.2, "elapsed_time": "0:18:30", "remaining_time": "0:07:07"}
|
| 362 |
+
{"current_steps": 3620, "total_steps": 5000, "loss": 4.4784, "lr": 1.0732160807889211e-05, "epoch": 0.724, "percentage": 72.4, "elapsed_time": "0:18:33", "remaining_time": "0:07:04"}
|
| 363 |
+
{"current_steps": 3630, "total_steps": 5000, "loss": 2.8935, "lr": 1.058919209575517e-05, "epoch": 0.726, "percentage": 72.6, "elapsed_time": "0:18:36", "remaining_time": "0:07:01"}
|
| 364 |
+
{"current_steps": 3640, "total_steps": 5000, "loss": 1.4045, "lr": 1.0446925746067768e-05, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:18:39", "remaining_time": "0:06:58"}
|
| 365 |
+
{"current_steps": 3650, "total_steps": 5000, "loss": 2.0619, "lr": 1.0305368692688174e-05, "epoch": 0.73, "percentage": 73.0, "elapsed_time": "0:18:42", "remaining_time": "0:06:55"}
|
| 366 |
+
{"current_steps": 3660, "total_steps": 5000, "loss": 1.8641, "lr": 1.0164527834907467e-05, "epoch": 0.732, "percentage": 73.2, "elapsed_time": "0:18:45", "remaining_time": "0:06:51"}
|
| 367 |
+
{"current_steps": 3670, "total_steps": 5000, "loss": 3.3477, "lr": 1.0024410037110357e-05, "epoch": 0.734, "percentage": 73.4, "elapsed_time": "0:18:48", "remaining_time": "0:06:48"}
|
| 368 |
+
{"current_steps": 3680, "total_steps": 5000, "loss": 1.6977, "lr": 9.88502212844063e-06, "epoch": 0.736, "percentage": 73.6, "elapsed_time": "0:18:50", "remaining_time": "0:06:45"}
|
| 369 |
+
{"current_steps": 3690, "total_steps": 5000, "loss": 2.4541, "lr": 9.746370902468311e-06, "epoch": 0.738, "percentage": 73.8, "elapsed_time": "0:18:53", "remaining_time": "0:06:42"}
|
| 370 |
+
{"current_steps": 3700, "total_steps": 5000, "loss": 1.0491, "lr": 9.608463116858542e-06, "epoch": 0.74, "percentage": 74.0, "elapsed_time": "0:18:56", "remaining_time": "0:06:39"}
|
| 371 |
+
{"current_steps": 3710, "total_steps": 5000, "loss": 1.7615, "lr": 9.471305493042243e-06, "epoch": 0.742, "percentage": 74.2, "elapsed_time": "0:18:59", "remaining_time": "0:06:36"}
|
| 372 |
+
{"current_steps": 3720, "total_steps": 5000, "loss": 1.2825, "lr": 9.334904715888495e-06, "epoch": 0.744, "percentage": 74.4, "elapsed_time": "0:19:02", "remaining_time": "0:06:33"}
|
| 373 |
+
{"current_steps": 3730, "total_steps": 5000, "loss": 2.0753, "lr": 9.199267433378727e-06, "epoch": 0.746, "percentage": 74.6, "elapsed_time": "0:19:05", "remaining_time": "0:06:30"}
|
| 374 |
+
{"current_steps": 3740, "total_steps": 5000, "loss": 2.5678, "lr": 9.064400256282757e-06, "epoch": 0.748, "percentage": 74.8, "elapsed_time": "0:19:08", "remaining_time": "0:06:27"}
|
| 375 |
+
{"current_steps": 3750, "total_steps": 5000, "loss": 1.0403, "lr": 8.930309757836517e-06, "epoch": 0.75, "percentage": 75.0, "elapsed_time": "0:19:11", "remaining_time": "0:06:23"}
|
| 376 |
+
{"current_steps": 3760, "total_steps": 5000, "loss": 2.1364, "lr": 8.797002473421728e-06, "epoch": 0.752, "percentage": 75.2, "elapsed_time": "0:19:14", "remaining_time": "0:06:20"}
|
| 377 |
+
{"current_steps": 3770, "total_steps": 5000, "loss": 1.4156, "lr": 8.664484900247363e-06, "epoch": 0.754, "percentage": 75.4, "elapsed_time": "0:19:17", "remaining_time": "0:06:17"}
|
| 378 |
+
{"current_steps": 3780, "total_steps": 5000, "loss": 2.463, "lr": 8.532763497032987e-06, "epoch": 0.756, "percentage": 75.6, "elapsed_time": "0:19:20", "remaining_time": "0:06:14"}
|
| 379 |
+
{"current_steps": 3790, "total_steps": 5000, "loss": 1.8851, "lr": 8.40184468369396e-06, "epoch": 0.758, "percentage": 75.8, "elapsed_time": "0:19:23", "remaining_time": "0:06:11"}
|
| 380 |
+
{"current_steps": 3800, "total_steps": 5000, "loss": 2.3422, "lr": 8.271734841028553e-06, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:19:26", "remaining_time": "0:06:08"}
|
| 381 |
+
{"current_steps": 3810, "total_steps": 5000, "loss": 2.0351, "lr": 8.142440310406924e-06, "epoch": 0.762, "percentage": 76.2, "elapsed_time": "0:19:29", "remaining_time": "0:06:05"}
|
| 382 |
+
{"current_steps": 3820, "total_steps": 5000, "loss": 2.6205, "lr": 8.013967393462094e-06, "epoch": 0.764, "percentage": 76.4, "elapsed_time": "0:19:32", "remaining_time": "0:06:02"}
|
| 383 |
+
{"current_steps": 3830, "total_steps": 5000, "loss": 1.4258, "lr": 7.886322351782783e-06, "epoch": 0.766, "percentage": 76.6, "elapsed_time": "0:19:35", "remaining_time": "0:05:59"}
|
| 384 |
+
{"current_steps": 3840, "total_steps": 5000, "loss": 2.0398, "lr": 7.759511406608255e-06, "epoch": 0.768, "percentage": 76.8, "elapsed_time": "0:19:38", "remaining_time": "0:05:56"}
|
| 385 |
+
{"current_steps": 3850, "total_steps": 5000, "loss": 2.7082, "lr": 7.633540738525066e-06, "epoch": 0.77, "percentage": 77.0, "elapsed_time": "0:19:41", "remaining_time": "0:05:52"}
|
| 386 |
+
{"current_steps": 3860, "total_steps": 5000, "loss": 2.2596, "lr": 7.508416487165862e-06, "epoch": 0.772, "percentage": 77.2, "elapsed_time": "0:19:44", "remaining_time": "0:05:49"}
|
| 387 |
+
{"current_steps": 3870, "total_steps": 5000, "loss": 1.665, "lr": 7.384144750910133e-06, "epoch": 0.774, "percentage": 77.4, "elapsed_time": "0:19:47", "remaining_time": "0:05:46"}
|
| 388 |
+
{"current_steps": 3880, "total_steps": 5000, "loss": 3.3368, "lr": 7.260731586586983e-06, "epoch": 0.776, "percentage": 77.6, "elapsed_time": "0:19:50", "remaining_time": "0:05:43"}
|
| 389 |
+
{"current_steps": 3890, "total_steps": 5000, "loss": 2.165, "lr": 7.138183009179922e-06, "epoch": 0.778, "percentage": 77.8, "elapsed_time": "0:19:53", "remaining_time": "0:05:40"}
|
| 390 |
+
{"current_steps": 3900, "total_steps": 5000, "loss": 0.9795, "lr": 7.016504991533726e-06, "epoch": 0.78, "percentage": 78.0, "elapsed_time": "0:19:56", "remaining_time": "0:05:37"}
|
| 391 |
+
{"current_steps": 3910, "total_steps": 5000, "loss": 4.0774, "lr": 6.895703464063319e-06, "epoch": 0.782, "percentage": 78.2, "elapsed_time": "0:19:59", "remaining_time": "0:05:34"}
|
| 392 |
+
{"current_steps": 3920, "total_steps": 5000, "loss": 1.838, "lr": 6.775784314464717e-06, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:20:02", "remaining_time": "0:05:31"}
|
| 393 |
+
{"current_steps": 3930, "total_steps": 5000, "loss": 2.2859, "lr": 6.656753387428089e-06, "epoch": 0.786, "percentage": 78.6, "elapsed_time": "0:20:05", "remaining_time": "0:05:28"}
|
| 394 |
+
{"current_steps": 3940, "total_steps": 5000, "loss": 4.2013, "lr": 6.538616484352902e-06, "epoch": 0.788, "percentage": 78.8, "elapsed_time": "0:20:08", "remaining_time": "0:05:25"}
|
| 395 |
+
{"current_steps": 3950, "total_steps": 5000, "loss": 3.8002, "lr": 6.421379363065142e-06, "epoch": 0.79, "percentage": 79.0, "elapsed_time": "0:20:11", "remaining_time": "0:05:22"}
|
| 396 |
+
{"current_steps": 3960, "total_steps": 5000, "loss": 2.5436, "lr": 6.305047737536707e-06, "epoch": 0.792, "percentage": 79.2, "elapsed_time": "0:20:14", "remaining_time": "0:05:18"}
|
| 397 |
+
{"current_steps": 3970, "total_steps": 5000, "loss": 2.4645, "lr": 6.189627277606894e-06, "epoch": 0.794, "percentage": 79.4, "elapsed_time": "0:20:17", "remaining_time": "0:05:15"}
|
| 398 |
+
{"current_steps": 3980, "total_steps": 5000, "loss": 1.1404, "lr": 6.075123608706093e-06, "epoch": 0.796, "percentage": 79.6, "elapsed_time": "0:20:20", "remaining_time": "0:05:12"}
|
| 399 |
+
{"current_steps": 3990, "total_steps": 5000, "loss": 4.2125, "lr": 5.961542311581586e-06, "epoch": 0.798, "percentage": 79.8, "elapsed_time": "0:20:23", "remaining_time": "0:05:09"}
|
| 400 |
+
{"current_steps": 4000, "total_steps": 5000, "loss": 1.4396, "lr": 5.848888922025553e-06, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:20:26", "remaining_time": "0:05:06"}
|
| 401 |
+
{"current_steps": 4010, "total_steps": 5000, "loss": 2.321, "lr": 5.737168930605272e-06, "epoch": 0.802, "percentage": 80.2, "elapsed_time": "0:20:29", "remaining_time": "0:05:03"}
|
| 402 |
+
{"current_steps": 4020, "total_steps": 5000, "loss": 2.803, "lr": 5.626387782395512e-06, "epoch": 0.804, "percentage": 80.4, "elapsed_time": "0:20:32", "remaining_time": "0:05:00"}
|
| 403 |
+
{"current_steps": 4030, "total_steps": 5000, "loss": 1.024, "lr": 5.5165508767131415e-06, "epoch": 0.806, "percentage": 80.6, "elapsed_time": "0:20:35", "remaining_time": "0:04:57"}
|
| 404 |
+
{"current_steps": 4040, "total_steps": 5000, "loss": 2.4534, "lr": 5.4076635668540075e-06, "epoch": 0.808, "percentage": 80.8, "elapsed_time": "0:20:38", "remaining_time": "0:04:54"}
|
| 405 |
+
{"current_steps": 4050, "total_steps": 5000, "loss": 2.1075, "lr": 5.299731159831953e-06, "epoch": 0.81, "percentage": 81.0, "elapsed_time": "0:20:41", "remaining_time": "0:04:51"}
|
| 406 |
+
{"current_steps": 4060, "total_steps": 5000, "loss": 3.3703, "lr": 5.192758916120236e-06, "epoch": 0.812, "percentage": 81.2, "elapsed_time": "0:20:44", "remaining_time": "0:04:48"}
|
| 407 |
+
{"current_steps": 4070, "total_steps": 5000, "loss": 2.9377, "lr": 5.086752049395094e-06, "epoch": 0.814, "percentage": 81.4, "elapsed_time": "0:20:47", "remaining_time": "0:04:45"}
|
| 408 |
+
{"current_steps": 4080, "total_steps": 5000, "loss": 4.4126, "lr": 4.981715726281666e-06, "epoch": 0.816, "percentage": 81.6, "elapsed_time": "0:20:50", "remaining_time": "0:04:42"}
|
| 409 |
+
{"current_steps": 4090, "total_steps": 5000, "loss": 2.5896, "lr": 4.877655066102149e-06, "epoch": 0.818, "percentage": 81.8, "elapsed_time": "0:20:53", "remaining_time": "0:04:38"}
|
| 410 |
+
{"current_steps": 4100, "total_steps": 5000, "loss": 1.4516, "lr": 4.7745751406263165e-06, "epoch": 0.82, "percentage": 82.0, "elapsed_time": "0:20:56", "remaining_time": "0:04:35"}
|
| 411 |
+
{"current_steps": 4110, "total_steps": 5000, "loss": 1.3904, "lr": 4.672480973824311e-06, "epoch": 0.822, "percentage": 82.2, "elapsed_time": "0:20:59", "remaining_time": "0:04:32"}
|
| 412 |
+
{"current_steps": 4120, "total_steps": 5000, "loss": 1.6246, "lr": 4.571377541621788e-06, "epoch": 0.824, "percentage": 82.4, "elapsed_time": "0:21:02", "remaining_time": "0:04:29"}
|
| 413 |
+
{"current_steps": 4130, "total_steps": 5000, "loss": 3.125, "lr": 4.4712697716574e-06, "epoch": 0.826, "percentage": 82.6, "elapsed_time": "0:21:05", "remaining_time": "0:04:26"}
|
| 414 |
+
{"current_steps": 4140, "total_steps": 5000, "loss": 1.1379, "lr": 4.372162543042624e-06, "epoch": 0.828, "percentage": 82.8, "elapsed_time": "0:21:08", "remaining_time": "0:04:23"}
|
| 415 |
+
{"current_steps": 4150, "total_steps": 5000, "loss": 1.723, "lr": 4.274060686123959e-06, "epoch": 0.83, "percentage": 83.0, "elapsed_time": "0:21:11", "remaining_time": "0:04:20"}
|
| 416 |
+
{"current_steps": 4160, "total_steps": 5000, "loss": 2.2388, "lr": 4.176968982247514e-06, "epoch": 0.832, "percentage": 83.2, "elapsed_time": "0:21:14", "remaining_time": "0:04:17"}
|
| 417 |
+
{"current_steps": 4170, "total_steps": 5000, "loss": 1.7591, "lr": 4.08089216352596e-06, "epoch": 0.834, "percentage": 83.4, "elapsed_time": "0:21:17", "remaining_time": "0:04:14"}
|
| 418 |
+
{"current_steps": 4180, "total_steps": 5000, "loss": 0.9926, "lr": 3.985834912607894e-06, "epoch": 0.836, "percentage": 83.6, "elapsed_time": "0:21:20", "remaining_time": "0:04:11"}
|
| 419 |
+
{"current_steps": 4190, "total_steps": 5000, "loss": 2.8662, "lr": 3.891801862449629e-06, "epoch": 0.838, "percentage": 83.8, "elapsed_time": "0:21:23", "remaining_time": "0:04:08"}
|
| 420 |
+
{"current_steps": 4200, "total_steps": 5000, "loss": 2.1153, "lr": 3.798797596089351e-06, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:21:26", "remaining_time": "0:04:05"}
|
| 421 |
+
{"current_steps": 4210, "total_steps": 5000, "loss": 1.9178, "lr": 3.7068266464238084e-06, "epoch": 0.842, "percentage": 84.2, "elapsed_time": "0:21:29", "remaining_time": "0:04:01"}
|
| 422 |
+
{"current_steps": 4220, "total_steps": 5000, "loss": 1.9438, "lr": 3.6158934959873353e-06, "epoch": 0.844, "percentage": 84.4, "elapsed_time": "0:21:32", "remaining_time": "0:03:58"}
|
| 423 |
+
{"current_steps": 4230, "total_steps": 5000, "loss": 2.5589, "lr": 3.5260025767333893e-06, "epoch": 0.846, "percentage": 84.6, "elapsed_time": "0:21:35", "remaining_time": "0:03:55"}
|
| 424 |
+
{"current_steps": 4240, "total_steps": 5000, "loss": 2.031, "lr": 3.4371582698185633e-06, "epoch": 0.848, "percentage": 84.8, "elapsed_time": "0:21:38", "remaining_time": "0:03:52"}
|
| 425 |
+
{"current_steps": 4250, "total_steps": 5000, "loss": 1.8701, "lr": 3.3493649053890326e-06, "epoch": 0.85, "percentage": 85.0, "elapsed_time": "0:21:41", "remaining_time": "0:03:49"}
|
| 426 |
+
{"current_steps": 4260, "total_steps": 5000, "loss": 2.4573, "lr": 3.262626762369525e-06, "epoch": 0.852, "percentage": 85.2, "elapsed_time": "0:21:44", "remaining_time": "0:03:46"}
|
| 427 |
+
{"current_steps": 4270, "total_steps": 5000, "loss": 1.5529, "lr": 3.176948068254762e-06, "epoch": 0.854, "percentage": 85.4, "elapsed_time": "0:21:47", "remaining_time": "0:03:43"}
|
| 428 |
+
{"current_steps": 4280, "total_steps": 5000, "loss": 4.1672, "lr": 3.092332998903416e-06, "epoch": 0.856, "percentage": 85.6, "elapsed_time": "0:21:50", "remaining_time": "0:03:40"}
|
| 429 |
+
{"current_steps": 4290, "total_steps": 5000, "loss": 2.5184, "lr": 3.0087856783345914e-06, "epoch": 0.858, "percentage": 85.8, "elapsed_time": "0:21:53", "remaining_time": "0:03:37"}
|
| 430 |
+
{"current_steps": 4300, "total_steps": 5000, "loss": 1.5187, "lr": 2.9263101785268254e-06, "epoch": 0.86, "percentage": 86.0, "elapsed_time": "0:21:56", "remaining_time": "0:03:34"}
|
| 431 |
+
{"current_steps": 4310, "total_steps": 5000, "loss": 2.3649, "lr": 2.8449105192196316e-06, "epoch": 0.862, "percentage": 86.2, "elapsed_time": "0:21:59", "remaining_time": "0:03:31"}
|
| 432 |
+
{"current_steps": 4320, "total_steps": 5000, "loss": 1.8013, "lr": 2.764590667717562e-06, "epoch": 0.864, "percentage": 86.4, "elapsed_time": "0:22:02", "remaining_time": "0:03:28"}
|
| 433 |
+
{"current_steps": 4330, "total_steps": 5000, "loss": 1.5805, "lr": 2.6853545386968606e-06, "epoch": 0.866, "percentage": 86.6, "elapsed_time": "0:22:05", "remaining_time": "0:03:25"}
|
| 434 |
+
{"current_steps": 4340, "total_steps": 5000, "loss": 1.8378, "lr": 2.6072059940146775e-06, "epoch": 0.868, "percentage": 86.8, "elapsed_time": "0:22:08", "remaining_time": "0:03:22"}
|
| 435 |
+
{"current_steps": 4350, "total_steps": 5000, "loss": 1.502, "lr": 2.5301488425208296e-06, "epoch": 0.87, "percentage": 87.0, "elapsed_time": "0:22:11", "remaining_time": "0:03:18"}
|
| 436 |
+
{"current_steps": 4360, "total_steps": 5000, "loss": 3.0615, "lr": 2.454186839872158e-06, "epoch": 0.872, "percentage": 87.2, "elapsed_time": "0:22:14", "remaining_time": "0:03:15"}
|
| 437 |
+
{"current_steps": 4370, "total_steps": 5000, "loss": 2.0572, "lr": 2.379323688349516e-06, "epoch": 0.874, "percentage": 87.4, "elapsed_time": "0:22:17", "remaining_time": "0:03:12"}
|
| 438 |
+
{"current_steps": 4380, "total_steps": 5000, "loss": 1.5945, "lr": 2.3055630366772856e-06, "epoch": 0.876, "percentage": 87.6, "elapsed_time": "0:22:20", "remaining_time": "0:03:09"}
|
| 439 |
+
{"current_steps": 4390, "total_steps": 5000, "loss": 1.6811, "lr": 2.2329084798455746e-06, "epoch": 0.878, "percentage": 87.8, "elapsed_time": "0:22:23", "remaining_time": "0:03:06"}
|
| 440 |
+
{"current_steps": 4400, "total_steps": 5000, "loss": 1.5655, "lr": 2.1613635589349756e-06, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "0:22:26", "remaining_time": "0:03:03"}
|
| 441 |
+
{"current_steps": 4410, "total_steps": 5000, "loss": 2.1422, "lr": 2.0909317609440095e-06, "epoch": 0.882, "percentage": 88.2, "elapsed_time": "0:22:29", "remaining_time": "0:03:00"}
|
| 442 |
+
{"current_steps": 4420, "total_steps": 5000, "loss": 1.403, "lr": 2.0216165186191407e-06, "epoch": 0.884, "percentage": 88.4, "elapsed_time": "0:22:32", "remaining_time": "0:02:57"}
|
| 443 |
+
{"current_steps": 4430, "total_steps": 5000, "loss": 2.6121, "lr": 1.95342121028749e-06, "epoch": 0.886, "percentage": 88.6, "elapsed_time": "0:22:35", "remaining_time": "0:02:54"}
|
| 444 |
+
{"current_steps": 4440, "total_steps": 5000, "loss": 5.1337, "lr": 1.8863491596921745e-06, "epoch": 0.888, "percentage": 88.8, "elapsed_time": "0:22:38", "remaining_time": "0:02:51"}
|
| 445 |
+
{"current_steps": 4450, "total_steps": 5000, "loss": 2.1225, "lr": 1.8204036358303173e-06, "epoch": 0.89, "percentage": 89.0, "elapsed_time": "0:22:41", "remaining_time": "0:02:48"}
|
| 446 |
+
{"current_steps": 4460, "total_steps": 5000, "loss": 2.8199, "lr": 1.7555878527937164e-06, "epoch": 0.892, "percentage": 89.2, "elapsed_time": "0:22:44", "remaining_time": "0:02:45"}
|
| 447 |
+
{"current_steps": 4470, "total_steps": 5000, "loss": 2.1345, "lr": 1.6919049696121958e-06, "epoch": 0.894, "percentage": 89.4, "elapsed_time": "0:22:47", "remaining_time": "0:02:42"}
|
| 448 |
+
{"current_steps": 4480, "total_steps": 5000, "loss": 1.671, "lr": 1.629358090099639e-06, "epoch": 0.896, "percentage": 89.6, "elapsed_time": "0:22:50", "remaining_time": "0:02:39"}
|
| 449 |
+
{"current_steps": 4490, "total_steps": 5000, "loss": 2.7573, "lr": 1.5679502627027136e-06, "epoch": 0.898, "percentage": 89.8, "elapsed_time": "0:22:53", "remaining_time": "0:02:36"}
|
| 450 |
+
{"current_steps": 4500, "total_steps": 5000, "loss": 1.8508, "lr": 1.5076844803522922e-06, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:22:56", "remaining_time": "0:02:32"}
|
| 451 |
+
{"current_steps": 4510, "total_steps": 5000, "loss": 4.521, "lr": 1.4485636803175829e-06, "epoch": 0.902, "percentage": 90.2, "elapsed_time": "0:22:59", "remaining_time": "0:02:29"}
|
| 452 |
+
{"current_steps": 4520, "total_steps": 5000, "loss": 3.2342, "lr": 1.3905907440629752e-06, "epoch": 0.904, "percentage": 90.4, "elapsed_time": "0:23:02", "remaining_time": "0:02:26"}
|
| 453 |
+
{"current_steps": 4530, "total_steps": 5000, "loss": 3.0516, "lr": 1.333768497107593e-06, "epoch": 0.906, "percentage": 90.6, "elapsed_time": "0:23:05", "remaining_time": "0:02:23"}
|
| 454 |
+
{"current_steps": 4540, "total_steps": 5000, "loss": 2.0561, "lr": 1.2780997088875869e-06, "epoch": 0.908, "percentage": 90.8, "elapsed_time": "0:23:08", "remaining_time": "0:02:20"}
|
| 455 |
+
{"current_steps": 4550, "total_steps": 5000, "loss": 1.5619, "lr": 1.2235870926211619e-06, "epoch": 0.91, "percentage": 91.0, "elapsed_time": "0:23:11", "remaining_time": "0:02:17"}
|
| 456 |
+
{"current_steps": 4560, "total_steps": 5000, "loss": 2.0134, "lr": 1.170233305176327e-06, "epoch": 0.912, "percentage": 91.2, "elapsed_time": "0:23:14", "remaining_time": "0:02:14"}
|
| 457 |
+
{"current_steps": 4570, "total_steps": 5000, "loss": 0.8128, "lr": 1.1180409469414094e-06, "epoch": 0.914, "percentage": 91.4, "elapsed_time": "0:23:17", "remaining_time": "0:02:11"}
|
| 458 |
+
{"current_steps": 4580, "total_steps": 5000, "loss": 2.5157, "lr": 1.067012561698319e-06, "epoch": 0.916, "percentage": 91.6, "elapsed_time": "0:23:20", "remaining_time": "0:02:08"}
|
| 459 |
+
{"current_steps": 4590, "total_steps": 5000, "loss": 1.7379, "lr": 1.0171506364985622e-06, "epoch": 0.918, "percentage": 91.8, "elapsed_time": "0:23:23", "remaining_time": "0:02:05"}
|
| 460 |
+
{"current_steps": 4600, "total_steps": 5000, "loss": 1.3243, "lr": 9.684576015420278e-07, "epoch": 0.92, "percentage": 92.0, "elapsed_time": "0:23:26", "remaining_time": "0:02:02"}
|
| 461 |
+
{"current_steps": 4610, "total_steps": 5000, "loss": 4.207, "lr": 9.209358300585474e-07, "epoch": 0.922, "percentage": 92.2, "elapsed_time": "0:23:29", "remaining_time": "0:01:59"}
|
| 462 |
+
{"current_steps": 4620, "total_steps": 5000, "loss": 2.8713, "lr": 8.745876381922147e-07, "epoch": 0.924, "percentage": 92.4, "elapsed_time": "0:23:32", "remaining_time": "0:01:56"}
|
| 463 |
+
{"current_steps": 4630, "total_steps": 5000, "loss": 2.2642, "lr": 8.294152848885157e-07, "epoch": 0.926, "percentage": 92.6, "elapsed_time": "0:23:35", "remaining_time": "0:01:53"}
|
| 464 |
+
{"current_steps": 4640, "total_steps": 5000, "loss": 3.0498, "lr": 7.854209717842231e-07, "epoch": 0.928, "percentage": 92.8, "elapsed_time": "0:23:38", "remaining_time": "0:01:50"}
|
| 465 |
+
{"current_steps": 4650, "total_steps": 5000, "loss": 3.1851, "lr": 7.426068431000882e-07, "epoch": 0.93, "percentage": 93.0, "elapsed_time": "0:23:41", "remaining_time": "0:01:47"}
|
| 466 |
+
{"current_steps": 4660, "total_steps": 5000, "loss": 1.5354, "lr": 7.009749855363456e-07, "epoch": 0.932, "percentage": 93.2, "elapsed_time": "0:23:44", "remaining_time": "0:01:43"}
|
| 467 |
+
{"current_steps": 4670, "total_steps": 5000, "loss": 1.7694, "lr": 6.605274281709928e-07, "epoch": 0.934, "percentage": 93.4, "elapsed_time": "0:23:47", "remaining_time": "0:01:40"}
|
| 468 |
+
{"current_steps": 4680, "total_steps": 5000, "loss": 1.4321, "lr": 6.212661423609184e-07, "epoch": 0.936, "percentage": 93.6, "elapsed_time": "0:23:50", "remaining_time": "0:01:37"}
|
| 469 |
+
{"current_steps": 4690, "total_steps": 5000, "loss": 1.4432, "lr": 5.83193041645802e-07, "epoch": 0.938, "percentage": 93.8, "elapsed_time": "0:23:53", "remaining_time": "0:01:34"}
|
| 470 |
+
{"current_steps": 4700, "total_steps": 5000, "loss": 1.7524, "lr": 5.463099816548579e-07, "epoch": 0.94, "percentage": 94.0, "elapsed_time": "0:23:56", "remaining_time": "0:01:31"}
|
| 471 |
+
{"current_steps": 4710, "total_steps": 5000, "loss": 3.8866, "lr": 5.106187600163987e-07, "epoch": 0.942, "percentage": 94.2, "elapsed_time": "0:23:59", "remaining_time": "0:01:28"}
|
| 472 |
+
{"current_steps": 4720, "total_steps": 5000, "loss": 1.618, "lr": 4.7612111627021175e-07, "epoch": 0.944, "percentage": 94.4, "elapsed_time": "0:24:02", "remaining_time": "0:01:25"}
|
| 473 |
+
{"current_steps": 4730, "total_steps": 5000, "loss": 2.4191, "lr": 4.4281873178278475e-07, "epoch": 0.946, "percentage": 94.6, "elapsed_time": "0:24:05", "remaining_time": "0:01:22"}
|
| 474 |
+
{"current_steps": 4740, "total_steps": 5000, "loss": 1.4241, "lr": 4.107132296653549e-07, "epoch": 0.948, "percentage": 94.8, "elapsed_time": "0:24:08", "remaining_time": "0:01:19"}
|
| 475 |
+
{"current_steps": 4750, "total_steps": 5000, "loss": 1.8877, "lr": 3.7980617469479953e-07, "epoch": 0.95, "percentage": 95.0, "elapsed_time": "0:24:11", "remaining_time": "0:01:16"}
|
| 476 |
+
{"current_steps": 4760, "total_steps": 5000, "loss": 1.4828, "lr": 3.5009907323737825e-07, "epoch": 0.952, "percentage": 95.2, "elapsed_time": "0:24:14", "remaining_time": "0:01:13"}
|
| 477 |
+
{"current_steps": 4770, "total_steps": 5000, "loss": 2.1333, "lr": 3.215933731753024e-07, "epoch": 0.954, "percentage": 95.4, "elapsed_time": "0:24:17", "remaining_time": "0:01:10"}
|
| 478 |
+
{"current_steps": 4780, "total_steps": 5000, "loss": 2.0255, "lr": 2.942904638361804e-07, "epoch": 0.956, "percentage": 95.6, "elapsed_time": "0:24:20", "remaining_time": "0:01:07"}
|
| 479 |
+
{"current_steps": 4790, "total_steps": 5000, "loss": 3.1469, "lr": 2.681916759252917e-07, "epoch": 0.958, "percentage": 95.8, "elapsed_time": "0:24:23", "remaining_time": "0:01:04"}
|
| 480 |
+
{"current_steps": 4800, "total_steps": 5000, "loss": 2.5756, "lr": 2.4329828146074095e-07, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "0:24:26", "remaining_time": "0:01:01"}
|
| 481 |
+
{"current_steps": 4810, "total_steps": 5000, "loss": 2.2082, "lr": 2.1961149371145795e-07, "epoch": 0.962, "percentage": 96.2, "elapsed_time": "0:24:29", "remaining_time": "0:00:58"}
|
| 482 |
+
{"current_steps": 4820, "total_steps": 5000, "loss": 1.2087, "lr": 1.9713246713805588e-07, "epoch": 0.964, "percentage": 96.4, "elapsed_time": "0:24:32", "remaining_time": "0:00:54"}
|
| 483 |
+
{"current_steps": 4830, "total_steps": 5000, "loss": 3.4375, "lr": 1.7586229733657644e-07, "epoch": 0.966, "percentage": 96.6, "elapsed_time": "0:24:35", "remaining_time": "0:00:51"}
|
| 484 |
+
{"current_steps": 4840, "total_steps": 5000, "loss": 1.2341, "lr": 1.5580202098509077e-07, "epoch": 0.968, "percentage": 96.8, "elapsed_time": "0:24:38", "remaining_time": "0:00:48"}
|
| 485 |
+
{"current_steps": 4850, "total_steps": 5000, "loss": 1.7228, "lr": 1.3695261579316777e-07, "epoch": 0.97, "percentage": 97.0, "elapsed_time": "0:24:41", "remaining_time": "0:00:45"}
|
| 486 |
+
{"current_steps": 4860, "total_steps": 5000, "loss": 1.4849, "lr": 1.193150004542204e-07, "epoch": 0.972, "percentage": 97.2, "elapsed_time": "0:24:44", "remaining_time": "0:00:42"}
|
| 487 |
+
{"current_steps": 4870, "total_steps": 5000, "loss": 1.5513, "lr": 1.0289003460074165e-07, "epoch": 0.974, "percentage": 97.4, "elapsed_time": "0:24:47", "remaining_time": "0:00:39"}
|
| 488 |
+
{"current_steps": 4880, "total_steps": 5000, "loss": 1.3479, "lr": 8.767851876239074e-08, "epoch": 0.976, "percentage": 97.6, "elapsed_time": "0:24:50", "remaining_time": "0:00:36"}
|
| 489 |
+
{"current_steps": 4890, "total_steps": 5000, "loss": 2.6461, "lr": 7.368119432699383e-08, "epoch": 0.978, "percentage": 97.8, "elapsed_time": "0:24:53", "remaining_time": "0:00:33"}
|
| 490 |
+
{"current_steps": 4900, "total_steps": 5000, "loss": 1.5137, "lr": 6.089874350439506e-08, "epoch": 0.98, "percentage": 98.0, "elapsed_time": "0:24:56", "remaining_time": "0:00:30"}
|
| 491 |
+
{"current_steps": 4910, "total_steps": 5000, "loss": 2.9775, "lr": 4.9331789293211026e-08, "epoch": 0.982, "percentage": 98.2, "elapsed_time": "0:24:59", "remaining_time": "0:00:27"}
|
| 492 |
+
{"current_steps": 4920, "total_steps": 5000, "loss": 1.0868, "lr": 3.8980895450474455e-08, "epoch": 0.984, "percentage": 98.4, "elapsed_time": "0:25:02", "remaining_time": "0:00:24"}
|
| 493 |
+
{"current_steps": 4930, "total_steps": 5000, "loss": 2.3111, "lr": 2.9846566464150626e-08, "epoch": 0.986, "percentage": 98.6, "elapsed_time": "0:25:05", "remaining_time": "0:00:21"}
|
| 494 |
+
{"current_steps": 4940, "total_steps": 5000, "loss": 2.8528, "lr": 2.192924752854042e-08, "epoch": 0.988, "percentage": 98.8, "elapsed_time": "0:25:08", "remaining_time": "0:00:18"}
|
| 495 |
+
{"current_steps": 4950, "total_steps": 5000, "loss": 7.4089, "lr": 1.522932452260595e-08, "epoch": 0.99, "percentage": 99.0, "elapsed_time": "0:25:11", "remaining_time": "0:00:15"}
|
| 496 |
+
{"current_steps": 4960, "total_steps": 5000, "loss": 5.3353, "lr": 9.747123991141194e-09, "epoch": 0.992, "percentage": 99.2, "elapsed_time": "0:25:14", "remaining_time": "0:00:12"}
|
| 497 |
+
{"current_steps": 4970, "total_steps": 5000, "loss": 1.8168, "lr": 5.48291312886251e-09, "epoch": 0.994, "percentage": 99.4, "elapsed_time": "0:25:17", "remaining_time": "0:00:09"}
|
| 498 |
+
{"current_steps": 4980, "total_steps": 5000, "loss": 2.34, "lr": 2.4368997673940297e-09, "epoch": 0.996, "percentage": 99.6, "elapsed_time": "0:25:20", "remaining_time": "0:00:06"}
|
| 499 |
+
{"current_steps": 4990, "total_steps": 5000, "loss": 1.8474, "lr": 6.092323651313292e-10, "epoch": 0.998, "percentage": 99.8, "elapsed_time": "0:25:23", "remaining_time": "0:00:03"}
|
| 500 |
+
{"current_steps": 5000, "total_steps": 5000, "loss": 5.5777, "lr": 0.0, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:25:26", "remaining_time": "0:00:00"}
|
| 501 |
+
{"current_steps": 5000, "total_steps": 5000, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:25:26", "remaining_time": "0:00:00"}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/trainer_state.json
ADDED
|
@@ -0,0 +1,3542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 5000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.002,
|
| 13 |
+
"grad_norm": 1.010804533958435,
|
| 14 |
+
"learning_rate": 1.0000000000000002e-06,
|
| 15 |
+
"loss": 10.8135,
|
| 16 |
+
"step": 10
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.004,
|
| 20 |
+
"grad_norm": 5.864879131317139,
|
| 21 |
+
"learning_rate": 2.0000000000000003e-06,
|
| 22 |
+
"loss": 8.4638,
|
| 23 |
+
"step": 20
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.006,
|
| 27 |
+
"grad_norm": 2.968351364135742,
|
| 28 |
+
"learning_rate": 3e-06,
|
| 29 |
+
"loss": 15.4904,
|
| 30 |
+
"step": 30
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.008,
|
| 34 |
+
"grad_norm": 7.154600143432617,
|
| 35 |
+
"learning_rate": 4.000000000000001e-06,
|
| 36 |
+
"loss": 11.4875,
|
| 37 |
+
"step": 40
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.01,
|
| 41 |
+
"grad_norm": 2.5448992252349854,
|
| 42 |
+
"learning_rate": 5e-06,
|
| 43 |
+
"loss": 14.2003,
|
| 44 |
+
"step": 50
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.012,
|
| 48 |
+
"grad_norm": 3.4361155033111572,
|
| 49 |
+
"learning_rate": 6e-06,
|
| 50 |
+
"loss": 12.1374,
|
| 51 |
+
"step": 60
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.014,
|
| 55 |
+
"grad_norm": 6.9021172523498535,
|
| 56 |
+
"learning_rate": 7.000000000000001e-06,
|
| 57 |
+
"loss": 11.6844,
|
| 58 |
+
"step": 70
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.016,
|
| 62 |
+
"grad_norm": 0.4121188521385193,
|
| 63 |
+
"learning_rate": 8.000000000000001e-06,
|
| 64 |
+
"loss": 10.4387,
|
| 65 |
+
"step": 80
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.018,
|
| 69 |
+
"grad_norm": 2.346395492553711,
|
| 70 |
+
"learning_rate": 9e-06,
|
| 71 |
+
"loss": 5.5739,
|
| 72 |
+
"step": 90
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.02,
|
| 76 |
+
"grad_norm": 7.073041915893555,
|
| 77 |
+
"learning_rate": 1e-05,
|
| 78 |
+
"loss": 12.7118,
|
| 79 |
+
"step": 100
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.022,
|
| 83 |
+
"grad_norm": 3.0463881492614746,
|
| 84 |
+
"learning_rate": 1.1000000000000001e-05,
|
| 85 |
+
"loss": 15.3528,
|
| 86 |
+
"step": 110
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.024,
|
| 90 |
+
"grad_norm": 1.735839605331421,
|
| 91 |
+
"learning_rate": 1.2e-05,
|
| 92 |
+
"loss": 14.2922,
|
| 93 |
+
"step": 120
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.026,
|
| 97 |
+
"grad_norm": 0.4800063371658325,
|
| 98 |
+
"learning_rate": 1.3000000000000001e-05,
|
| 99 |
+
"loss": 6.3563,
|
| 100 |
+
"step": 130
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.028,
|
| 104 |
+
"grad_norm": 25.26578140258789,
|
| 105 |
+
"learning_rate": 1.4000000000000001e-05,
|
| 106 |
+
"loss": 7.9494,
|
| 107 |
+
"step": 140
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.03,
|
| 111 |
+
"grad_norm": 1.725184440612793,
|
| 112 |
+
"learning_rate": 1.5e-05,
|
| 113 |
+
"loss": 10.7366,
|
| 114 |
+
"step": 150
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.032,
|
| 118 |
+
"grad_norm": 5.55291223526001,
|
| 119 |
+
"learning_rate": 1.6000000000000003e-05,
|
| 120 |
+
"loss": 4.7961,
|
| 121 |
+
"step": 160
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.034,
|
| 125 |
+
"grad_norm": 0.3070022761821747,
|
| 126 |
+
"learning_rate": 1.7000000000000003e-05,
|
| 127 |
+
"loss": 2.8594,
|
| 128 |
+
"step": 170
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.036,
|
| 132 |
+
"grad_norm": 4.781795501708984,
|
| 133 |
+
"learning_rate": 1.8e-05,
|
| 134 |
+
"loss": 4.2593,
|
| 135 |
+
"step": 180
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.038,
|
| 139 |
+
"grad_norm": 7.462250232696533,
|
| 140 |
+
"learning_rate": 1.9e-05,
|
| 141 |
+
"loss": 5.3202,
|
| 142 |
+
"step": 190
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.04,
|
| 146 |
+
"grad_norm": 5.073458194732666,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 8.7095,
|
| 149 |
+
"step": 200
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.042,
|
| 153 |
+
"grad_norm": 10.127187728881836,
|
| 154 |
+
"learning_rate": 2.1e-05,
|
| 155 |
+
"loss": 4.7786,
|
| 156 |
+
"step": 210
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.044,
|
| 160 |
+
"grad_norm": 6.5535125732421875,
|
| 161 |
+
"learning_rate": 2.2000000000000003e-05,
|
| 162 |
+
"loss": 2.5694,
|
| 163 |
+
"step": 220
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.046,
|
| 167 |
+
"grad_norm": 5.452541828155518,
|
| 168 |
+
"learning_rate": 2.3000000000000003e-05,
|
| 169 |
+
"loss": 4.3152,
|
| 170 |
+
"step": 230
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.048,
|
| 174 |
+
"grad_norm": 1.630622386932373,
|
| 175 |
+
"learning_rate": 2.4e-05,
|
| 176 |
+
"loss": 4.0996,
|
| 177 |
+
"step": 240
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.05,
|
| 181 |
+
"grad_norm": 3.042468547821045,
|
| 182 |
+
"learning_rate": 2.5e-05,
|
| 183 |
+
"loss": 4.9146,
|
| 184 |
+
"step": 250
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.052,
|
| 188 |
+
"grad_norm": 2.458193063735962,
|
| 189 |
+
"learning_rate": 2.6000000000000002e-05,
|
| 190 |
+
"loss": 1.8707,
|
| 191 |
+
"step": 260
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.054,
|
| 195 |
+
"grad_norm": 1.6752468347549438,
|
| 196 |
+
"learning_rate": 2.7000000000000002e-05,
|
| 197 |
+
"loss": 3.1247,
|
| 198 |
+
"step": 270
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.056,
|
| 202 |
+
"grad_norm": 4.470595359802246,
|
| 203 |
+
"learning_rate": 2.8000000000000003e-05,
|
| 204 |
+
"loss": 3.8507,
|
| 205 |
+
"step": 280
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.058,
|
| 209 |
+
"grad_norm": 6.865239143371582,
|
| 210 |
+
"learning_rate": 2.9e-05,
|
| 211 |
+
"loss": 2.8481,
|
| 212 |
+
"step": 290
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.06,
|
| 216 |
+
"grad_norm": 20.699951171875,
|
| 217 |
+
"learning_rate": 3e-05,
|
| 218 |
+
"loss": 4.4567,
|
| 219 |
+
"step": 300
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.062,
|
| 223 |
+
"grad_norm": 8.376248359680176,
|
| 224 |
+
"learning_rate": 3.1e-05,
|
| 225 |
+
"loss": 3.544,
|
| 226 |
+
"step": 310
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.064,
|
| 230 |
+
"grad_norm": 0.6330814361572266,
|
| 231 |
+
"learning_rate": 3.2000000000000005e-05,
|
| 232 |
+
"loss": 2.028,
|
| 233 |
+
"step": 320
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.066,
|
| 237 |
+
"grad_norm": 1.892885684967041,
|
| 238 |
+
"learning_rate": 3.3e-05,
|
| 239 |
+
"loss": 3.4244,
|
| 240 |
+
"step": 330
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.068,
|
| 244 |
+
"grad_norm": 6.533657073974609,
|
| 245 |
+
"learning_rate": 3.4000000000000007e-05,
|
| 246 |
+
"loss": 5.216,
|
| 247 |
+
"step": 340
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.07,
|
| 251 |
+
"grad_norm": 1.1567013263702393,
|
| 252 |
+
"learning_rate": 3.5e-05,
|
| 253 |
+
"loss": 2.7441,
|
| 254 |
+
"step": 350
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.072,
|
| 258 |
+
"grad_norm": 5.744589328765869,
|
| 259 |
+
"learning_rate": 3.6e-05,
|
| 260 |
+
"loss": 2.6191,
|
| 261 |
+
"step": 360
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.074,
|
| 265 |
+
"grad_norm": 7.04871129989624,
|
| 266 |
+
"learning_rate": 3.7e-05,
|
| 267 |
+
"loss": 5.3131,
|
| 268 |
+
"step": 370
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.076,
|
| 272 |
+
"grad_norm": 13.293014526367188,
|
| 273 |
+
"learning_rate": 3.8e-05,
|
| 274 |
+
"loss": 5.2818,
|
| 275 |
+
"step": 380
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.078,
|
| 279 |
+
"grad_norm": 3.2866837978363037,
|
| 280 |
+
"learning_rate": 3.9000000000000006e-05,
|
| 281 |
+
"loss": 3.086,
|
| 282 |
+
"step": 390
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.08,
|
| 286 |
+
"grad_norm": 2.1428515911102295,
|
| 287 |
+
"learning_rate": 4e-05,
|
| 288 |
+
"loss": 2.6475,
|
| 289 |
+
"step": 400
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.082,
|
| 293 |
+
"grad_norm": 46.64889907836914,
|
| 294 |
+
"learning_rate": 4.1e-05,
|
| 295 |
+
"loss": 2.0594,
|
| 296 |
+
"step": 410
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.084,
|
| 300 |
+
"grad_norm": 2.84214448928833,
|
| 301 |
+
"learning_rate": 4.2e-05,
|
| 302 |
+
"loss": 3.5431,
|
| 303 |
+
"step": 420
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.086,
|
| 307 |
+
"grad_norm": 5.909653186798096,
|
| 308 |
+
"learning_rate": 4.3e-05,
|
| 309 |
+
"loss": 2.7867,
|
| 310 |
+
"step": 430
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.088,
|
| 314 |
+
"grad_norm": 4.650357246398926,
|
| 315 |
+
"learning_rate": 4.4000000000000006e-05,
|
| 316 |
+
"loss": 6.2247,
|
| 317 |
+
"step": 440
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.09,
|
| 321 |
+
"grad_norm": 5.519321918487549,
|
| 322 |
+
"learning_rate": 4.5e-05,
|
| 323 |
+
"loss": 1.8291,
|
| 324 |
+
"step": 450
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.092,
|
| 328 |
+
"grad_norm": 135.69471740722656,
|
| 329 |
+
"learning_rate": 4.600000000000001e-05,
|
| 330 |
+
"loss": 5.4416,
|
| 331 |
+
"step": 460
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.094,
|
| 335 |
+
"grad_norm": 4.472829341888428,
|
| 336 |
+
"learning_rate": 4.7e-05,
|
| 337 |
+
"loss": 6.0983,
|
| 338 |
+
"step": 470
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.096,
|
| 342 |
+
"grad_norm": 3.7479896545410156,
|
| 343 |
+
"learning_rate": 4.8e-05,
|
| 344 |
+
"loss": 1.8501,
|
| 345 |
+
"step": 480
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.098,
|
| 349 |
+
"grad_norm": 5.382004261016846,
|
| 350 |
+
"learning_rate": 4.9e-05,
|
| 351 |
+
"loss": 1.5013,
|
| 352 |
+
"step": 490
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 0.1,
|
| 356 |
+
"grad_norm": 16.679519653320312,
|
| 357 |
+
"learning_rate": 5e-05,
|
| 358 |
+
"loss": 2.6987,
|
| 359 |
+
"step": 500
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 0.102,
|
| 363 |
+
"grad_norm": 2.9823508262634277,
|
| 364 |
+
"learning_rate": 4.999939076763487e-05,
|
| 365 |
+
"loss": 2.268,
|
| 366 |
+
"step": 510
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"epoch": 0.104,
|
| 370 |
+
"grad_norm": 5.970380783081055,
|
| 371 |
+
"learning_rate": 4.999756310023261e-05,
|
| 372 |
+
"loss": 2.1733,
|
| 373 |
+
"step": 520
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"epoch": 0.106,
|
| 377 |
+
"grad_norm": 0.8665391206741333,
|
| 378 |
+
"learning_rate": 4.999451708687114e-05,
|
| 379 |
+
"loss": 6.0941,
|
| 380 |
+
"step": 530
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"epoch": 0.108,
|
| 384 |
+
"grad_norm": 12.031403541564941,
|
| 385 |
+
"learning_rate": 4.999025287600886e-05,
|
| 386 |
+
"loss": 5.3397,
|
| 387 |
+
"step": 540
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.11,
|
| 391 |
+
"grad_norm": 6.884283542633057,
|
| 392 |
+
"learning_rate": 4.99847706754774e-05,
|
| 393 |
+
"loss": 1.2973,
|
| 394 |
+
"step": 550
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"epoch": 0.112,
|
| 398 |
+
"grad_norm": 6.688343524932861,
|
| 399 |
+
"learning_rate": 4.997807075247146e-05,
|
| 400 |
+
"loss": 2.9408,
|
| 401 |
+
"step": 560
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"epoch": 0.114,
|
| 405 |
+
"grad_norm": 2.265263319015503,
|
| 406 |
+
"learning_rate": 4.997015343353585e-05,
|
| 407 |
+
"loss": 1.5613,
|
| 408 |
+
"step": 570
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 0.116,
|
| 412 |
+
"grad_norm": 0.2830611765384674,
|
| 413 |
+
"learning_rate": 4.996101910454953e-05,
|
| 414 |
+
"loss": 1.5912,
|
| 415 |
+
"step": 580
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 0.118,
|
| 419 |
+
"grad_norm": 6.979793071746826,
|
| 420 |
+
"learning_rate": 4.995066821070679e-05,
|
| 421 |
+
"loss": 3.5306,
|
| 422 |
+
"step": 590
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"epoch": 0.12,
|
| 426 |
+
"grad_norm": 17.920307159423828,
|
| 427 |
+
"learning_rate": 4.993910125649561e-05,
|
| 428 |
+
"loss": 2.1385,
|
| 429 |
+
"step": 600
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 0.122,
|
| 433 |
+
"grad_norm": 3.7927424907684326,
|
| 434 |
+
"learning_rate": 4.992631880567301e-05,
|
| 435 |
+
"loss": 1.5452,
|
| 436 |
+
"step": 610
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"epoch": 0.124,
|
| 440 |
+
"grad_norm": 6.910824298858643,
|
| 441 |
+
"learning_rate": 4.991232148123761e-05,
|
| 442 |
+
"loss": 3.0193,
|
| 443 |
+
"step": 620
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"epoch": 0.126,
|
| 447 |
+
"grad_norm": 6.665999412536621,
|
| 448 |
+
"learning_rate": 4.989710996539926e-05,
|
| 449 |
+
"loss": 2.3063,
|
| 450 |
+
"step": 630
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"epoch": 0.128,
|
| 454 |
+
"grad_norm": 1.253774642944336,
|
| 455 |
+
"learning_rate": 4.988068499954578e-05,
|
| 456 |
+
"loss": 2.2441,
|
| 457 |
+
"step": 640
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"epoch": 0.13,
|
| 461 |
+
"grad_norm": 1.61366605758667,
|
| 462 |
+
"learning_rate": 4.9863047384206835e-05,
|
| 463 |
+
"loss": 2.3075,
|
| 464 |
+
"step": 650
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"epoch": 0.132,
|
| 468 |
+
"grad_norm": 0.7466872334480286,
|
| 469 |
+
"learning_rate": 4.984419797901491e-05,
|
| 470 |
+
"loss": 2.9721,
|
| 471 |
+
"step": 660
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"epoch": 0.134,
|
| 475 |
+
"grad_norm": 27.337018966674805,
|
| 476 |
+
"learning_rate": 4.982413770266342e-05,
|
| 477 |
+
"loss": 2.7736,
|
| 478 |
+
"step": 670
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.136,
|
| 482 |
+
"grad_norm": 7.90925407409668,
|
| 483 |
+
"learning_rate": 4.980286753286195e-05,
|
| 484 |
+
"loss": 1.8991,
|
| 485 |
+
"step": 680
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"epoch": 0.138,
|
| 489 |
+
"grad_norm": 1.1065661907196045,
|
| 490 |
+
"learning_rate": 4.978038850628854e-05,
|
| 491 |
+
"loss": 2.2606,
|
| 492 |
+
"step": 690
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 0.14,
|
| 496 |
+
"grad_norm": 1.1839981079101562,
|
| 497 |
+
"learning_rate": 4.975670171853926e-05,
|
| 498 |
+
"loss": 1.5491,
|
| 499 |
+
"step": 700
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 0.142,
|
| 503 |
+
"grad_norm": 4.375758171081543,
|
| 504 |
+
"learning_rate": 4.9731808324074717e-05,
|
| 505 |
+
"loss": 2.2913,
|
| 506 |
+
"step": 710
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 0.144,
|
| 510 |
+
"grad_norm": 6.079190731048584,
|
| 511 |
+
"learning_rate": 4.9705709536163824e-05,
|
| 512 |
+
"loss": 2.6315,
|
| 513 |
+
"step": 720
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"epoch": 0.146,
|
| 517 |
+
"grad_norm": 9.416289329528809,
|
| 518 |
+
"learning_rate": 4.96784066268247e-05,
|
| 519 |
+
"loss": 2.5128,
|
| 520 |
+
"step": 730
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 0.148,
|
| 524 |
+
"grad_norm": 4.381228446960449,
|
| 525 |
+
"learning_rate": 4.964990092676263e-05,
|
| 526 |
+
"loss": 5.8145,
|
| 527 |
+
"step": 740
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"epoch": 0.15,
|
| 531 |
+
"grad_norm": 1.1350055932998657,
|
| 532 |
+
"learning_rate": 4.962019382530521e-05,
|
| 533 |
+
"loss": 2.3354,
|
| 534 |
+
"step": 750
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"epoch": 0.152,
|
| 538 |
+
"grad_norm": 3.0520100593566895,
|
| 539 |
+
"learning_rate": 4.9589286770334654e-05,
|
| 540 |
+
"loss": 4.621,
|
| 541 |
+
"step": 760
|
| 542 |
+
},
|
| 543 |
+
{
|
| 544 |
+
"epoch": 0.154,
|
| 545 |
+
"grad_norm": 10.430975914001465,
|
| 546 |
+
"learning_rate": 4.9557181268217227e-05,
|
| 547 |
+
"loss": 2.7795,
|
| 548 |
+
"step": 770
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 0.156,
|
| 552 |
+
"grad_norm": 1.6189144849777222,
|
| 553 |
+
"learning_rate": 4.952387888372979e-05,
|
| 554 |
+
"loss": 3.0171,
|
| 555 |
+
"step": 780
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"epoch": 0.158,
|
| 559 |
+
"grad_norm": 14.97889518737793,
|
| 560 |
+
"learning_rate": 4.94893812399836e-05,
|
| 561 |
+
"loss": 2.1028,
|
| 562 |
+
"step": 790
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"epoch": 0.16,
|
| 566 |
+
"grad_norm": 47.81957244873047,
|
| 567 |
+
"learning_rate": 4.9453690018345144e-05,
|
| 568 |
+
"loss": 13.4531,
|
| 569 |
+
"step": 800
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 0.162,
|
| 573 |
+
"grad_norm": 1.6978071928024292,
|
| 574 |
+
"learning_rate": 4.94168069583542e-05,
|
| 575 |
+
"loss": 2.0661,
|
| 576 |
+
"step": 810
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 0.164,
|
| 580 |
+
"grad_norm": 14.654315948486328,
|
| 581 |
+
"learning_rate": 4.937873385763908e-05,
|
| 582 |
+
"loss": 2.6598,
|
| 583 |
+
"step": 820
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"epoch": 0.166,
|
| 587 |
+
"grad_norm": 9.5645112991333,
|
| 588 |
+
"learning_rate": 4.933947257182901e-05,
|
| 589 |
+
"loss": 2.58,
|
| 590 |
+
"step": 830
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 0.168,
|
| 594 |
+
"grad_norm": 6.204939365386963,
|
| 595 |
+
"learning_rate": 4.929902501446366e-05,
|
| 596 |
+
"loss": 2.9303,
|
| 597 |
+
"step": 840
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"epoch": 0.17,
|
| 601 |
+
"grad_norm": 3.7468295097351074,
|
| 602 |
+
"learning_rate": 4.925739315689991e-05,
|
| 603 |
+
"loss": 3.0212,
|
| 604 |
+
"step": 850
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"epoch": 0.172,
|
| 608 |
+
"grad_norm": 6.3915276527404785,
|
| 609 |
+
"learning_rate": 4.9214579028215776e-05,
|
| 610 |
+
"loss": 4.8252,
|
| 611 |
+
"step": 860
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 0.174,
|
| 615 |
+
"grad_norm": 4.012444019317627,
|
| 616 |
+
"learning_rate": 4.917058471511149e-05,
|
| 617 |
+
"loss": 2.5644,
|
| 618 |
+
"step": 870
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"epoch": 0.176,
|
| 622 |
+
"grad_norm": 5.753321647644043,
|
| 623 |
+
"learning_rate": 4.912541236180779e-05,
|
| 624 |
+
"loss": 1.8755,
|
| 625 |
+
"step": 880
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 0.178,
|
| 629 |
+
"grad_norm": 1.3646568059921265,
|
| 630 |
+
"learning_rate": 4.907906416994146e-05,
|
| 631 |
+
"loss": 0.7075,
|
| 632 |
+
"step": 890
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"epoch": 0.18,
|
| 636 |
+
"grad_norm": 7.185975551605225,
|
| 637 |
+
"learning_rate": 4.9031542398457974e-05,
|
| 638 |
+
"loss": 7.253,
|
| 639 |
+
"step": 900
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"epoch": 0.182,
|
| 643 |
+
"grad_norm": 9.094167709350586,
|
| 644 |
+
"learning_rate": 4.898284936350144e-05,
|
| 645 |
+
"loss": 3.3396,
|
| 646 |
+
"step": 910
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"epoch": 0.184,
|
| 650 |
+
"grad_norm": 2.7633776664733887,
|
| 651 |
+
"learning_rate": 4.893298743830168e-05,
|
| 652 |
+
"loss": 2.9798,
|
| 653 |
+
"step": 920
|
| 654 |
+
},
|
| 655 |
+
{
|
| 656 |
+
"epoch": 0.186,
|
| 657 |
+
"grad_norm": 6.744080543518066,
|
| 658 |
+
"learning_rate": 4.888195905305859e-05,
|
| 659 |
+
"loss": 1.153,
|
| 660 |
+
"step": 930
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"epoch": 0.188,
|
| 664 |
+
"grad_norm": 55.569053649902344,
|
| 665 |
+
"learning_rate": 4.882976669482367e-05,
|
| 666 |
+
"loss": 3.1989,
|
| 667 |
+
"step": 940
|
| 668 |
+
},
|
| 669 |
+
{
|
| 670 |
+
"epoch": 0.19,
|
| 671 |
+
"grad_norm": 20.480836868286133,
|
| 672 |
+
"learning_rate": 4.877641290737884e-05,
|
| 673 |
+
"loss": 2.7478,
|
| 674 |
+
"step": 950
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 0.192,
|
| 678 |
+
"grad_norm": 3.698868751525879,
|
| 679 |
+
"learning_rate": 4.8721900291112415e-05,
|
| 680 |
+
"loss": 2.09,
|
| 681 |
+
"step": 960
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"epoch": 0.194,
|
| 685 |
+
"grad_norm": 5.127801418304443,
|
| 686 |
+
"learning_rate": 4.8666231502892415e-05,
|
| 687 |
+
"loss": 1.7634,
|
| 688 |
+
"step": 970
|
| 689 |
+
},
|
| 690 |
+
{
|
| 691 |
+
"epoch": 0.196,
|
| 692 |
+
"grad_norm": 3.1002347469329834,
|
| 693 |
+
"learning_rate": 4.860940925593703e-05,
|
| 694 |
+
"loss": 1.6288,
|
| 695 |
+
"step": 980
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"epoch": 0.198,
|
| 699 |
+
"grad_norm": 2.849256753921509,
|
| 700 |
+
"learning_rate": 4.855143631968242e-05,
|
| 701 |
+
"loss": 2.9691,
|
| 702 |
+
"step": 990
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"epoch": 0.2,
|
| 706 |
+
"grad_norm": 15.38534164428711,
|
| 707 |
+
"learning_rate": 4.849231551964771e-05,
|
| 708 |
+
"loss": 3.5196,
|
| 709 |
+
"step": 1000
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"epoch": 0.202,
|
| 713 |
+
"grad_norm": 6.950368881225586,
|
| 714 |
+
"learning_rate": 4.843204973729729e-05,
|
| 715 |
+
"loss": 3.1836,
|
| 716 |
+
"step": 1010
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.204,
|
| 720 |
+
"grad_norm": 19.31244659423828,
|
| 721 |
+
"learning_rate": 4.837064190990036e-05,
|
| 722 |
+
"loss": 3.1554,
|
| 723 |
+
"step": 1020
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"epoch": 0.206,
|
| 727 |
+
"grad_norm": 52.91188430786133,
|
| 728 |
+
"learning_rate": 4.830809503038781e-05,
|
| 729 |
+
"loss": 3.0401,
|
| 730 |
+
"step": 1030
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 0.208,
|
| 734 |
+
"grad_norm": 3.394774913787842,
|
| 735 |
+
"learning_rate": 4.8244412147206284e-05,
|
| 736 |
+
"loss": 4.8929,
|
| 737 |
+
"step": 1040
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"epoch": 0.21,
|
| 741 |
+
"grad_norm": 6.458067417144775,
|
| 742 |
+
"learning_rate": 4.817959636416969e-05,
|
| 743 |
+
"loss": 2.9868,
|
| 744 |
+
"step": 1050
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"epoch": 0.212,
|
| 748 |
+
"grad_norm": 13.139763832092285,
|
| 749 |
+
"learning_rate": 4.8113650840307834e-05,
|
| 750 |
+
"loss": 5.5685,
|
| 751 |
+
"step": 1060
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 0.214,
|
| 755 |
+
"grad_norm": 7.955199718475342,
|
| 756 |
+
"learning_rate": 4.8046578789712515e-05,
|
| 757 |
+
"loss": 2.2926,
|
| 758 |
+
"step": 1070
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.216,
|
| 762 |
+
"grad_norm": 3.7902987003326416,
|
| 763 |
+
"learning_rate": 4.797838348138086e-05,
|
| 764 |
+
"loss": 3.3832,
|
| 765 |
+
"step": 1080
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 0.218,
|
| 769 |
+
"grad_norm": 7.483827590942383,
|
| 770 |
+
"learning_rate": 4.790906823905599e-05,
|
| 771 |
+
"loss": 4.5766,
|
| 772 |
+
"step": 1090
|
| 773 |
+
},
|
| 774 |
+
{
|
| 775 |
+
"epoch": 0.22,
|
| 776 |
+
"grad_norm": 2.907489538192749,
|
| 777 |
+
"learning_rate": 4.783863644106502e-05,
|
| 778 |
+
"loss": 3.2696,
|
| 779 |
+
"step": 1100
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 0.222,
|
| 783 |
+
"grad_norm": 2.127671003341675,
|
| 784 |
+
"learning_rate": 4.776709152015443e-05,
|
| 785 |
+
"loss": 1.583,
|
| 786 |
+
"step": 1110
|
| 787 |
+
},
|
| 788 |
+
{
|
| 789 |
+
"epoch": 0.224,
|
| 790 |
+
"grad_norm": 9.034016609191895,
|
| 791 |
+
"learning_rate": 4.769443696332272e-05,
|
| 792 |
+
"loss": 2.2554,
|
| 793 |
+
"step": 1120
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"epoch": 0.226,
|
| 797 |
+
"grad_norm": 0.7481829524040222,
|
| 798 |
+
"learning_rate": 4.762067631165049e-05,
|
| 799 |
+
"loss": 1.6651,
|
| 800 |
+
"step": 1130
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.228,
|
| 804 |
+
"grad_norm": 19.638362884521484,
|
| 805 |
+
"learning_rate": 4.754581316012785e-05,
|
| 806 |
+
"loss": 3.1578,
|
| 807 |
+
"step": 1140
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
"epoch": 0.23,
|
| 811 |
+
"grad_norm": 2.9221441745758057,
|
| 812 |
+
"learning_rate": 4.7469851157479177e-05,
|
| 813 |
+
"loss": 2.4297,
|
| 814 |
+
"step": 1150
|
| 815 |
+
},
|
| 816 |
+
{
|
| 817 |
+
"epoch": 0.232,
|
| 818 |
+
"grad_norm": 13.748416900634766,
|
| 819 |
+
"learning_rate": 4.7392794005985326e-05,
|
| 820 |
+
"loss": 2.9167,
|
| 821 |
+
"step": 1160
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 0.234,
|
| 825 |
+
"grad_norm": 2.525977611541748,
|
| 826 |
+
"learning_rate": 4.731464546130314e-05,
|
| 827 |
+
"loss": 4.5697,
|
| 828 |
+
"step": 1170
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 0.236,
|
| 832 |
+
"grad_norm": 1.523427963256836,
|
| 833 |
+
"learning_rate": 4.723540933228244e-05,
|
| 834 |
+
"loss": 2.1219,
|
| 835 |
+
"step": 1180
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 0.238,
|
| 839 |
+
"grad_norm": 7.397847652435303,
|
| 840 |
+
"learning_rate": 4.715508948078037e-05,
|
| 841 |
+
"loss": 2.4125,
|
| 842 |
+
"step": 1190
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.24,
|
| 846 |
+
"grad_norm": 1.8037457466125488,
|
| 847 |
+
"learning_rate": 4.707368982147318e-05,
|
| 848 |
+
"loss": 4.1166,
|
| 849 |
+
"step": 1200
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"epoch": 0.242,
|
| 853 |
+
"grad_norm": 2.4662930965423584,
|
| 854 |
+
"learning_rate": 4.6991214321665414e-05,
|
| 855 |
+
"loss": 3.1524,
|
| 856 |
+
"step": 1210
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 0.244,
|
| 860 |
+
"grad_norm": 5.675219535827637,
|
| 861 |
+
"learning_rate": 4.690766700109659e-05,
|
| 862 |
+
"loss": 1.5237,
|
| 863 |
+
"step": 1220
|
| 864 |
+
},
|
| 865 |
+
{
|
| 866 |
+
"epoch": 0.246,
|
| 867 |
+
"grad_norm": 11.182186126708984,
|
| 868 |
+
"learning_rate": 4.682305193174524e-05,
|
| 869 |
+
"loss": 2.8092,
|
| 870 |
+
"step": 1230
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"epoch": 0.248,
|
| 874 |
+
"grad_norm": 13.100793838500977,
|
| 875 |
+
"learning_rate": 4.6737373237630476e-05,
|
| 876 |
+
"loss": 2.256,
|
| 877 |
+
"step": 1240
|
| 878 |
+
},
|
| 879 |
+
{
|
| 880 |
+
"epoch": 0.25,
|
| 881 |
+
"grad_norm": 0.881151556968689,
|
| 882 |
+
"learning_rate": 4.665063509461097e-05,
|
| 883 |
+
"loss": 2.5601,
|
| 884 |
+
"step": 1250
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"epoch": 0.252,
|
| 888 |
+
"grad_norm": 1.001516342163086,
|
| 889 |
+
"learning_rate": 4.656284173018144e-05,
|
| 890 |
+
"loss": 2.5502,
|
| 891 |
+
"step": 1260
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"epoch": 0.254,
|
| 895 |
+
"grad_norm": 2.2227511405944824,
|
| 896 |
+
"learning_rate": 4.6473997423266614e-05,
|
| 897 |
+
"loss": 3.4447,
|
| 898 |
+
"step": 1270
|
| 899 |
+
},
|
| 900 |
+
{
|
| 901 |
+
"epoch": 0.256,
|
| 902 |
+
"grad_norm": 3.1253716945648193,
|
| 903 |
+
"learning_rate": 4.638410650401267e-05,
|
| 904 |
+
"loss": 1.6954,
|
| 905 |
+
"step": 1280
|
| 906 |
+
},
|
| 907 |
+
{
|
| 908 |
+
"epoch": 0.258,
|
| 909 |
+
"grad_norm": 4.01163387298584,
|
| 910 |
+
"learning_rate": 4.629317335357619e-05,
|
| 911 |
+
"loss": 2.353,
|
| 912 |
+
"step": 1290
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"epoch": 0.26,
|
| 916 |
+
"grad_norm": 14.949892044067383,
|
| 917 |
+
"learning_rate": 4.620120240391065e-05,
|
| 918 |
+
"loss": 2.1544,
|
| 919 |
+
"step": 1300
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"epoch": 0.262,
|
| 923 |
+
"grad_norm": 14.187519073486328,
|
| 924 |
+
"learning_rate": 4.610819813755038e-05,
|
| 925 |
+
"loss": 1.2159,
|
| 926 |
+
"step": 1310
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"epoch": 0.264,
|
| 930 |
+
"grad_norm": 0.4033137559890747,
|
| 931 |
+
"learning_rate": 4.601416508739211e-05,
|
| 932 |
+
"loss": 1.9003,
|
| 933 |
+
"step": 1320
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
"epoch": 0.266,
|
| 937 |
+
"grad_norm": 97.27405548095703,
|
| 938 |
+
"learning_rate": 4.591910783647404e-05,
|
| 939 |
+
"loss": 4.5354,
|
| 940 |
+
"step": 1330
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 0.268,
|
| 944 |
+
"grad_norm": 1.0602211952209473,
|
| 945 |
+
"learning_rate": 4.5823031017752485e-05,
|
| 946 |
+
"loss": 1.9724,
|
| 947 |
+
"step": 1340
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"epoch": 0.27,
|
| 951 |
+
"grad_norm": 13.463448524475098,
|
| 952 |
+
"learning_rate": 4.572593931387604e-05,
|
| 953 |
+
"loss": 2.4934,
|
| 954 |
+
"step": 1350
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"epoch": 0.272,
|
| 958 |
+
"grad_norm": 21.87057876586914,
|
| 959 |
+
"learning_rate": 4.562783745695738e-05,
|
| 960 |
+
"loss": 2.1056,
|
| 961 |
+
"step": 1360
|
| 962 |
+
},
|
| 963 |
+
{
|
| 964 |
+
"epoch": 0.274,
|
| 965 |
+
"grad_norm": 1.0790810585021973,
|
| 966 |
+
"learning_rate": 4.5528730228342605e-05,
|
| 967 |
+
"loss": 1.5175,
|
| 968 |
+
"step": 1370
|
| 969 |
+
},
|
| 970 |
+
{
|
| 971 |
+
"epoch": 0.276,
|
| 972 |
+
"grad_norm": 20.827247619628906,
|
| 973 |
+
"learning_rate": 4.542862245837821e-05,
|
| 974 |
+
"loss": 2.1733,
|
| 975 |
+
"step": 1380
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"epoch": 0.278,
|
| 979 |
+
"grad_norm": 3.0733835697174072,
|
| 980 |
+
"learning_rate": 4.532751902617569e-05,
|
| 981 |
+
"loss": 1.431,
|
| 982 |
+
"step": 1390
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"epoch": 0.28,
|
| 986 |
+
"grad_norm": 5.851264476776123,
|
| 987 |
+
"learning_rate": 4.522542485937369e-05,
|
| 988 |
+
"loss": 1.5888,
|
| 989 |
+
"step": 1400
|
| 990 |
+
},
|
| 991 |
+
{
|
| 992 |
+
"epoch": 0.282,
|
| 993 |
+
"grad_norm": 5.039645195007324,
|
| 994 |
+
"learning_rate": 4.512234493389785e-05,
|
| 995 |
+
"loss": 2.84,
|
| 996 |
+
"step": 1410
|
| 997 |
+
},
|
| 998 |
+
{
|
| 999 |
+
"epoch": 0.284,
|
| 1000 |
+
"grad_norm": 2.501115322113037,
|
| 1001 |
+
"learning_rate": 4.5018284273718336e-05,
|
| 1002 |
+
"loss": 3.0874,
|
| 1003 |
+
"step": 1420
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"epoch": 0.286,
|
| 1007 |
+
"grad_norm": 10.413125038146973,
|
| 1008 |
+
"learning_rate": 4.491324795060491e-05,
|
| 1009 |
+
"loss": 1.6856,
|
| 1010 |
+
"step": 1430
|
| 1011 |
+
},
|
| 1012 |
+
{
|
| 1013 |
+
"epoch": 0.288,
|
| 1014 |
+
"grad_norm": 4.448335647583008,
|
| 1015 |
+
"learning_rate": 4.480724108387977e-05,
|
| 1016 |
+
"loss": 3.0233,
|
| 1017 |
+
"step": 1440
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"epoch": 0.29,
|
| 1021 |
+
"grad_norm": 4.848165988922119,
|
| 1022 |
+
"learning_rate": 4.4700268840168045e-05,
|
| 1023 |
+
"loss": 2.6897,
|
| 1024 |
+
"step": 1450
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 0.292,
|
| 1028 |
+
"grad_norm": 1.7471359968185425,
|
| 1029 |
+
"learning_rate": 4.4592336433146e-05,
|
| 1030 |
+
"loss": 5.0716,
|
| 1031 |
+
"step": 1460
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 0.294,
|
| 1035 |
+
"grad_norm": 0.7191880345344543,
|
| 1036 |
+
"learning_rate": 4.448344912328686e-05,
|
| 1037 |
+
"loss": 1.5289,
|
| 1038 |
+
"step": 1470
|
| 1039 |
+
},
|
| 1040 |
+
{
|
| 1041 |
+
"epoch": 0.296,
|
| 1042 |
+
"grad_norm": 29.567447662353516,
|
| 1043 |
+
"learning_rate": 4.4373612217604496e-05,
|
| 1044 |
+
"loss": 5.0471,
|
| 1045 |
+
"step": 1480
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"epoch": 0.298,
|
| 1049 |
+
"grad_norm": 5.8198723793029785,
|
| 1050 |
+
"learning_rate": 4.426283106939474e-05,
|
| 1051 |
+
"loss": 1.6411,
|
| 1052 |
+
"step": 1490
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 0.3,
|
| 1056 |
+
"grad_norm": 8.184475898742676,
|
| 1057 |
+
"learning_rate": 4.415111107797445e-05,
|
| 1058 |
+
"loss": 3.0973,
|
| 1059 |
+
"step": 1500
|
| 1060 |
+
},
|
| 1061 |
+
{
|
| 1062 |
+
"epoch": 0.302,
|
| 1063 |
+
"grad_norm": 14.548653602600098,
|
| 1064 |
+
"learning_rate": 4.403845768841842e-05,
|
| 1065 |
+
"loss": 2.0314,
|
| 1066 |
+
"step": 1510
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"epoch": 0.304,
|
| 1070 |
+
"grad_norm": 20.58685302734375,
|
| 1071 |
+
"learning_rate": 4.3924876391293915e-05,
|
| 1072 |
+
"loss": 5.9555,
|
| 1073 |
+
"step": 1520
|
| 1074 |
+
},
|
| 1075 |
+
{
|
| 1076 |
+
"epoch": 0.306,
|
| 1077 |
+
"grad_norm": 4.702794551849365,
|
| 1078 |
+
"learning_rate": 4.381037272239311e-05,
|
| 1079 |
+
"loss": 2.5155,
|
| 1080 |
+
"step": 1530
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"epoch": 0.308,
|
| 1084 |
+
"grad_norm": 2.8011796474456787,
|
| 1085 |
+
"learning_rate": 4.36949522624633e-05,
|
| 1086 |
+
"loss": 3.6459,
|
| 1087 |
+
"step": 1540
|
| 1088 |
+
},
|
| 1089 |
+
{
|
| 1090 |
+
"epoch": 0.31,
|
| 1091 |
+
"grad_norm": 2.2072603702545166,
|
| 1092 |
+
"learning_rate": 4.357862063693486e-05,
|
| 1093 |
+
"loss": 3.0688,
|
| 1094 |
+
"step": 1550
|
| 1095 |
+
},
|
| 1096 |
+
{
|
| 1097 |
+
"epoch": 0.312,
|
| 1098 |
+
"grad_norm": 8.792641639709473,
|
| 1099 |
+
"learning_rate": 4.3461383515647106e-05,
|
| 1100 |
+
"loss": 5.096,
|
| 1101 |
+
"step": 1560
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"epoch": 0.314,
|
| 1105 |
+
"grad_norm": 23.349695205688477,
|
| 1106 |
+
"learning_rate": 4.334324661257191e-05,
|
| 1107 |
+
"loss": 3.9776,
|
| 1108 |
+
"step": 1570
|
| 1109 |
+
},
|
| 1110 |
+
{
|
| 1111 |
+
"epoch": 0.316,
|
| 1112 |
+
"grad_norm": 4.662014484405518,
|
| 1113 |
+
"learning_rate": 4.3224215685535294e-05,
|
| 1114 |
+
"loss": 1.8869,
|
| 1115 |
+
"step": 1580
|
| 1116 |
+
},
|
| 1117 |
+
{
|
| 1118 |
+
"epoch": 0.318,
|
| 1119 |
+
"grad_norm": 5.489886283874512,
|
| 1120 |
+
"learning_rate": 4.3104296535936695e-05,
|
| 1121 |
+
"loss": 2.1454,
|
| 1122 |
+
"step": 1590
|
| 1123 |
+
},
|
| 1124 |
+
{
|
| 1125 |
+
"epoch": 0.32,
|
| 1126 |
+
"grad_norm": 8.950507164001465,
|
| 1127 |
+
"learning_rate": 4.2983495008466276e-05,
|
| 1128 |
+
"loss": 3.0165,
|
| 1129 |
+
"step": 1600
|
| 1130 |
+
},
|
| 1131 |
+
{
|
| 1132 |
+
"epoch": 0.322,
|
| 1133 |
+
"grad_norm": 39.95512390136719,
|
| 1134 |
+
"learning_rate": 4.2861816990820084e-05,
|
| 1135 |
+
"loss": 2.9248,
|
| 1136 |
+
"step": 1610
|
| 1137 |
+
},
|
| 1138 |
+
{
|
| 1139 |
+
"epoch": 0.324,
|
| 1140 |
+
"grad_norm": 101.46671295166016,
|
| 1141 |
+
"learning_rate": 4.273926841341302e-05,
|
| 1142 |
+
"loss": 5.9823,
|
| 1143 |
+
"step": 1620
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"epoch": 0.326,
|
| 1147 |
+
"grad_norm": 2.456148862838745,
|
| 1148 |
+
"learning_rate": 4.261585524908987e-05,
|
| 1149 |
+
"loss": 4.5845,
|
| 1150 |
+
"step": 1630
|
| 1151 |
+
},
|
| 1152 |
+
{
|
| 1153 |
+
"epoch": 0.328,
|
| 1154 |
+
"grad_norm": 11.807568550109863,
|
| 1155 |
+
"learning_rate": 4.249158351283414e-05,
|
| 1156 |
+
"loss": 2.5418,
|
| 1157 |
+
"step": 1640
|
| 1158 |
+
},
|
| 1159 |
+
{
|
| 1160 |
+
"epoch": 0.33,
|
| 1161 |
+
"grad_norm": 2.619940996170044,
|
| 1162 |
+
"learning_rate": 4.2366459261474933e-05,
|
| 1163 |
+
"loss": 3.9026,
|
| 1164 |
+
"step": 1650
|
| 1165 |
+
},
|
| 1166 |
+
{
|
| 1167 |
+
"epoch": 0.332,
|
| 1168 |
+
"grad_norm": 6.112611770629883,
|
| 1169 |
+
"learning_rate": 4.224048859339175e-05,
|
| 1170 |
+
"loss": 3.7564,
|
| 1171 |
+
"step": 1660
|
| 1172 |
+
},
|
| 1173 |
+
{
|
| 1174 |
+
"epoch": 0.334,
|
| 1175 |
+
"grad_norm": 5.871501922607422,
|
| 1176 |
+
"learning_rate": 4.211367764821722e-05,
|
| 1177 |
+
"loss": 1.7162,
|
| 1178 |
+
"step": 1670
|
| 1179 |
+
},
|
| 1180 |
+
{
|
| 1181 |
+
"epoch": 0.336,
|
| 1182 |
+
"grad_norm": 2.189948081970215,
|
| 1183 |
+
"learning_rate": 4.198603260653792e-05,
|
| 1184 |
+
"loss": 3.2491,
|
| 1185 |
+
"step": 1680
|
| 1186 |
+
},
|
| 1187 |
+
{
|
| 1188 |
+
"epoch": 0.338,
|
| 1189 |
+
"grad_norm": 22.6879825592041,
|
| 1190 |
+
"learning_rate": 4.185755968959308e-05,
|
| 1191 |
+
"loss": 1.595,
|
| 1192 |
+
"step": 1690
|
| 1193 |
+
},
|
| 1194 |
+
{
|
| 1195 |
+
"epoch": 0.34,
|
| 1196 |
+
"grad_norm": 15.22309684753418,
|
| 1197 |
+
"learning_rate": 4.172826515897146e-05,
|
| 1198 |
+
"loss": 2.5093,
|
| 1199 |
+
"step": 1700
|
| 1200 |
+
},
|
| 1201 |
+
{
|
| 1202 |
+
"epoch": 0.342,
|
| 1203 |
+
"grad_norm": 5.424519062042236,
|
| 1204 |
+
"learning_rate": 4.1598155316306044e-05,
|
| 1205 |
+
"loss": 5.9477,
|
| 1206 |
+
"step": 1710
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"epoch": 0.344,
|
| 1210 |
+
"grad_norm": 10.794046401977539,
|
| 1211 |
+
"learning_rate": 4.146723650296701e-05,
|
| 1212 |
+
"loss": 2.6358,
|
| 1213 |
+
"step": 1720
|
| 1214 |
+
},
|
| 1215 |
+
{
|
| 1216 |
+
"epoch": 0.346,
|
| 1217 |
+
"grad_norm": 3.659132719039917,
|
| 1218 |
+
"learning_rate": 4.133551509975264e-05,
|
| 1219 |
+
"loss": 1.4777,
|
| 1220 |
+
"step": 1730
|
| 1221 |
+
},
|
| 1222 |
+
{
|
| 1223 |
+
"epoch": 0.348,
|
| 1224 |
+
"grad_norm": 3.7974021434783936,
|
| 1225 |
+
"learning_rate": 4.1202997526578276e-05,
|
| 1226 |
+
"loss": 2.132,
|
| 1227 |
+
"step": 1740
|
| 1228 |
+
},
|
| 1229 |
+
{
|
| 1230 |
+
"epoch": 0.35,
|
| 1231 |
+
"grad_norm": 5.216747283935547,
|
| 1232 |
+
"learning_rate": 4.1069690242163484e-05,
|
| 1233 |
+
"loss": 1.4507,
|
| 1234 |
+
"step": 1750
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 0.352,
|
| 1238 |
+
"grad_norm": 5.669449329376221,
|
| 1239 |
+
"learning_rate": 4.093559974371725e-05,
|
| 1240 |
+
"loss": 1.2368,
|
| 1241 |
+
"step": 1760
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"epoch": 0.354,
|
| 1245 |
+
"grad_norm": 3.354207992553711,
|
| 1246 |
+
"learning_rate": 4.080073256662127e-05,
|
| 1247 |
+
"loss": 0.9967,
|
| 1248 |
+
"step": 1770
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 0.356,
|
| 1252 |
+
"grad_norm": 10.8993558883667,
|
| 1253 |
+
"learning_rate": 4.066509528411152e-05,
|
| 1254 |
+
"loss": 5.9177,
|
| 1255 |
+
"step": 1780
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 0.358,
|
| 1259 |
+
"grad_norm": 13.498696327209473,
|
| 1260 |
+
"learning_rate": 4.052869450695776e-05,
|
| 1261 |
+
"loss": 2.3273,
|
| 1262 |
+
"step": 1790
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"epoch": 0.36,
|
| 1266 |
+
"grad_norm": 1.2119014263153076,
|
| 1267 |
+
"learning_rate": 4.039153688314145e-05,
|
| 1268 |
+
"loss": 2.7299,
|
| 1269 |
+
"step": 1800
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"epoch": 0.362,
|
| 1273 |
+
"grad_norm": 1.2092421054840088,
|
| 1274 |
+
"learning_rate": 4.02536290975317e-05,
|
| 1275 |
+
"loss": 2.0494,
|
| 1276 |
+
"step": 1810
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 0.364,
|
| 1280 |
+
"grad_norm": 15.161354064941406,
|
| 1281 |
+
"learning_rate": 4.011497787155938e-05,
|
| 1282 |
+
"loss": 1.9595,
|
| 1283 |
+
"step": 1820
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 0.366,
|
| 1287 |
+
"grad_norm": 13.400138854980469,
|
| 1288 |
+
"learning_rate": 3.997558996288965e-05,
|
| 1289 |
+
"loss": 2.4506,
|
| 1290 |
+
"step": 1830
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 0.368,
|
| 1294 |
+
"grad_norm": 0.7680931687355042,
|
| 1295 |
+
"learning_rate": 3.983547216509254e-05,
|
| 1296 |
+
"loss": 3.6176,
|
| 1297 |
+
"step": 1840
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 0.37,
|
| 1301 |
+
"grad_norm": 2.35080623626709,
|
| 1302 |
+
"learning_rate": 3.969463130731183e-05,
|
| 1303 |
+
"loss": 3.2954,
|
| 1304 |
+
"step": 1850
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 0.372,
|
| 1308 |
+
"grad_norm": 0.0,
|
| 1309 |
+
"learning_rate": 3.955307425393224e-05,
|
| 1310 |
+
"loss": 2.0271,
|
| 1311 |
+
"step": 1860
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 0.374,
|
| 1315 |
+
"grad_norm": 9.143167495727539,
|
| 1316 |
+
"learning_rate": 3.941080790424484e-05,
|
| 1317 |
+
"loss": 1.6445,
|
| 1318 |
+
"step": 1870
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 0.376,
|
| 1322 |
+
"grad_norm": 9.658303260803223,
|
| 1323 |
+
"learning_rate": 3.92678391921108e-05,
|
| 1324 |
+
"loss": 2.4891,
|
| 1325 |
+
"step": 1880
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 0.378,
|
| 1329 |
+
"grad_norm": 6.326284885406494,
|
| 1330 |
+
"learning_rate": 3.912417508562345e-05,
|
| 1331 |
+
"loss": 1.9795,
|
| 1332 |
+
"step": 1890
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 0.38,
|
| 1336 |
+
"grad_norm": 1.721880555152893,
|
| 1337 |
+
"learning_rate": 3.897982258676867e-05,
|
| 1338 |
+
"loss": 1.7931,
|
| 1339 |
+
"step": 1900
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 0.382,
|
| 1343 |
+
"grad_norm": 14.906951904296875,
|
| 1344 |
+
"learning_rate": 3.883478873108361e-05,
|
| 1345 |
+
"loss": 2.9522,
|
| 1346 |
+
"step": 1910
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 0.384,
|
| 1350 |
+
"grad_norm": 3.197312593460083,
|
| 1351 |
+
"learning_rate": 3.868908058731376e-05,
|
| 1352 |
+
"loss": 2.0274,
|
| 1353 |
+
"step": 1920
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 0.386,
|
| 1357 |
+
"grad_norm": 12.083334922790527,
|
| 1358 |
+
"learning_rate": 3.85427052570685e-05,
|
| 1359 |
+
"loss": 3.1124,
|
| 1360 |
+
"step": 1930
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 0.388,
|
| 1364 |
+
"grad_norm": 7.249095439910889,
|
| 1365 |
+
"learning_rate": 3.8395669874474915e-05,
|
| 1366 |
+
"loss": 2.101,
|
| 1367 |
+
"step": 1940
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 0.39,
|
| 1371 |
+
"grad_norm": 1.5133755207061768,
|
| 1372 |
+
"learning_rate": 3.824798160583012e-05,
|
| 1373 |
+
"loss": 3.1344,
|
| 1374 |
+
"step": 1950
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 0.392,
|
| 1378 |
+
"grad_norm": 1.0909286737442017,
|
| 1379 |
+
"learning_rate": 3.8099647649251986e-05,
|
| 1380 |
+
"loss": 1.7248,
|
| 1381 |
+
"step": 1960
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"epoch": 0.394,
|
| 1385 |
+
"grad_norm": 1.1263455152511597,
|
| 1386 |
+
"learning_rate": 3.795067523432826e-05,
|
| 1387 |
+
"loss": 1.9146,
|
| 1388 |
+
"step": 1970
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"epoch": 0.396,
|
| 1392 |
+
"grad_norm": 5.309441089630127,
|
| 1393 |
+
"learning_rate": 3.780107162176429e-05,
|
| 1394 |
+
"loss": 2.3492,
|
| 1395 |
+
"step": 1980
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 0.398,
|
| 1399 |
+
"grad_norm": 5.851804733276367,
|
| 1400 |
+
"learning_rate": 3.765084410302909e-05,
|
| 1401 |
+
"loss": 1.5525,
|
| 1402 |
+
"step": 1990
|
| 1403 |
+
},
|
| 1404 |
+
{
|
| 1405 |
+
"epoch": 0.4,
|
| 1406 |
+
"grad_norm": 6.578305244445801,
|
| 1407 |
+
"learning_rate": 3.7500000000000003e-05,
|
| 1408 |
+
"loss": 2.8312,
|
| 1409 |
+
"step": 2000
|
| 1410 |
+
},
|
| 1411 |
+
{
|
| 1412 |
+
"epoch": 0.402,
|
| 1413 |
+
"grad_norm": 8.410406112670898,
|
| 1414 |
+
"learning_rate": 3.7348546664605777e-05,
|
| 1415 |
+
"loss": 3.3568,
|
| 1416 |
+
"step": 2010
|
| 1417 |
+
},
|
| 1418 |
+
{
|
| 1419 |
+
"epoch": 0.404,
|
| 1420 |
+
"grad_norm": 1.6206369400024414,
|
| 1421 |
+
"learning_rate": 3.719649147846832e-05,
|
| 1422 |
+
"loss": 1.1778,
|
| 1423 |
+
"step": 2020
|
| 1424 |
+
},
|
| 1425 |
+
{
|
| 1426 |
+
"epoch": 0.406,
|
| 1427 |
+
"grad_norm": 3.7382287979125977,
|
| 1428 |
+
"learning_rate": 3.704384185254288e-05,
|
| 1429 |
+
"loss": 2.3964,
|
| 1430 |
+
"step": 2030
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 0.408,
|
| 1434 |
+
"grad_norm": 2.0517022609710693,
|
| 1435 |
+
"learning_rate": 3.689060522675689e-05,
|
| 1436 |
+
"loss": 2.0172,
|
| 1437 |
+
"step": 2040
|
| 1438 |
+
},
|
| 1439 |
+
{
|
| 1440 |
+
"epoch": 0.41,
|
| 1441 |
+
"grad_norm": 2.8966128826141357,
|
| 1442 |
+
"learning_rate": 3.673678906964727e-05,
|
| 1443 |
+
"loss": 2.1464,
|
| 1444 |
+
"step": 2050
|
| 1445 |
+
},
|
| 1446 |
+
{
|
| 1447 |
+
"epoch": 0.412,
|
| 1448 |
+
"grad_norm": 0.9577096104621887,
|
| 1449 |
+
"learning_rate": 3.6582400877996546e-05,
|
| 1450 |
+
"loss": 2.1743,
|
| 1451 |
+
"step": 2060
|
| 1452 |
+
},
|
| 1453 |
+
{
|
| 1454 |
+
"epoch": 0.414,
|
| 1455 |
+
"grad_norm": 2.7587995529174805,
|
| 1456 |
+
"learning_rate": 3.642744817646736e-05,
|
| 1457 |
+
"loss": 2.785,
|
| 1458 |
+
"step": 2070
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"epoch": 0.416,
|
| 1462 |
+
"grad_norm": 6.39242696762085,
|
| 1463 |
+
"learning_rate": 3.627193851723577e-05,
|
| 1464 |
+
"loss": 1.5782,
|
| 1465 |
+
"step": 2080
|
| 1466 |
+
},
|
| 1467 |
+
{
|
| 1468 |
+
"epoch": 0.418,
|
| 1469 |
+
"grad_norm": 19.898326873779297,
|
| 1470 |
+
"learning_rate": 3.611587947962319e-05,
|
| 1471 |
+
"loss": 3.6745,
|
| 1472 |
+
"step": 2090
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.42,
|
| 1476 |
+
"grad_norm": 10.48559856414795,
|
| 1477 |
+
"learning_rate": 3.5959278669726935e-05,
|
| 1478 |
+
"loss": 1.7939,
|
| 1479 |
+
"step": 2100
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.422,
|
| 1483 |
+
"grad_norm": 7.97528076171875,
|
| 1484 |
+
"learning_rate": 3.580214372004956e-05,
|
| 1485 |
+
"loss": 1.9243,
|
| 1486 |
+
"step": 2110
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.424,
|
| 1490 |
+
"grad_norm": 4.453253269195557,
|
| 1491 |
+
"learning_rate": 3.564448228912682e-05,
|
| 1492 |
+
"loss": 3.6028,
|
| 1493 |
+
"step": 2120
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.426,
|
| 1497 |
+
"grad_norm": 3.2717721462249756,
|
| 1498 |
+
"learning_rate": 3.548630206115443e-05,
|
| 1499 |
+
"loss": 1.3939,
|
| 1500 |
+
"step": 2130
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.428,
|
| 1504 |
+
"grad_norm": 5.499331951141357,
|
| 1505 |
+
"learning_rate": 3.532761074561355e-05,
|
| 1506 |
+
"loss": 2.1193,
|
| 1507 |
+
"step": 2140
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.43,
|
| 1511 |
+
"grad_norm": 2.989884853363037,
|
| 1512 |
+
"learning_rate": 3.516841607689501e-05,
|
| 1513 |
+
"loss": 2.0047,
|
| 1514 |
+
"step": 2150
|
| 1515 |
+
},
|
| 1516 |
+
{
|
| 1517 |
+
"epoch": 0.432,
|
| 1518 |
+
"grad_norm": 25.216196060180664,
|
| 1519 |
+
"learning_rate": 3.5008725813922386e-05,
|
| 1520 |
+
"loss": 5.2666,
|
| 1521 |
+
"step": 2160
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"epoch": 0.434,
|
| 1525 |
+
"grad_norm": 2.533149003982544,
|
| 1526 |
+
"learning_rate": 3.484854773977378e-05,
|
| 1527 |
+
"loss": 1.8129,
|
| 1528 |
+
"step": 2170
|
| 1529 |
+
},
|
| 1530 |
+
{
|
| 1531 |
+
"epoch": 0.436,
|
| 1532 |
+
"grad_norm": 1.6373289823532104,
|
| 1533 |
+
"learning_rate": 3.4687889661302576e-05,
|
| 1534 |
+
"loss": 1.3969,
|
| 1535 |
+
"step": 2180
|
| 1536 |
+
},
|
| 1537 |
+
{
|
| 1538 |
+
"epoch": 0.438,
|
| 1539 |
+
"grad_norm": 10.936837196350098,
|
| 1540 |
+
"learning_rate": 3.452675940875686e-05,
|
| 1541 |
+
"loss": 1.9788,
|
| 1542 |
+
"step": 2190
|
| 1543 |
+
},
|
| 1544 |
+
{
|
| 1545 |
+
"epoch": 0.44,
|
| 1546 |
+
"grad_norm": 5.747529983520508,
|
| 1547 |
+
"learning_rate": 3.436516483539781e-05,
|
| 1548 |
+
"loss": 1.5522,
|
| 1549 |
+
"step": 2200
|
| 1550 |
+
},
|
| 1551 |
+
{
|
| 1552 |
+
"epoch": 0.442,
|
| 1553 |
+
"grad_norm": 22.60832405090332,
|
| 1554 |
+
"learning_rate": 3.4203113817116957e-05,
|
| 1555 |
+
"loss": 2.1951,
|
| 1556 |
+
"step": 2210
|
| 1557 |
+
},
|
| 1558 |
+
{
|
| 1559 |
+
"epoch": 0.444,
|
| 1560 |
+
"grad_norm": 6.1106953620910645,
|
| 1561 |
+
"learning_rate": 3.4040614252052305e-05,
|
| 1562 |
+
"loss": 5.2669,
|
| 1563 |
+
"step": 2220
|
| 1564 |
+
},
|
| 1565 |
+
{
|
| 1566 |
+
"epoch": 0.446,
|
| 1567 |
+
"grad_norm": 3.707664728164673,
|
| 1568 |
+
"learning_rate": 3.387767406020343e-05,
|
| 1569 |
+
"loss": 2.5184,
|
| 1570 |
+
"step": 2230
|
| 1571 |
+
},
|
| 1572 |
+
{
|
| 1573 |
+
"epoch": 0.448,
|
| 1574 |
+
"grad_norm": 5.510468006134033,
|
| 1575 |
+
"learning_rate": 3.3714301183045385e-05,
|
| 1576 |
+
"loss": 2.0463,
|
| 1577 |
+
"step": 2240
|
| 1578 |
+
},
|
| 1579 |
+
{
|
| 1580 |
+
"epoch": 0.45,
|
| 1581 |
+
"grad_norm": 43.166866302490234,
|
| 1582 |
+
"learning_rate": 3.355050358314172e-05,
|
| 1583 |
+
"loss": 2.9354,
|
| 1584 |
+
"step": 2250
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"epoch": 0.452,
|
| 1588 |
+
"grad_norm": 0.45324602723121643,
|
| 1589 |
+
"learning_rate": 3.338628924375638e-05,
|
| 1590 |
+
"loss": 2.1297,
|
| 1591 |
+
"step": 2260
|
| 1592 |
+
},
|
| 1593 |
+
{
|
| 1594 |
+
"epoch": 0.454,
|
| 1595 |
+
"grad_norm": 8.361921310424805,
|
| 1596 |
+
"learning_rate": 3.322166616846458e-05,
|
| 1597 |
+
"loss": 3.2082,
|
| 1598 |
+
"step": 2270
|
| 1599 |
+
},
|
| 1600 |
+
{
|
| 1601 |
+
"epoch": 0.456,
|
| 1602 |
+
"grad_norm": 1.730605125427246,
|
| 1603 |
+
"learning_rate": 3.305664238076278e-05,
|
| 1604 |
+
"loss": 2.0667,
|
| 1605 |
+
"step": 2280
|
| 1606 |
+
},
|
| 1607 |
+
{
|
| 1608 |
+
"epoch": 0.458,
|
| 1609 |
+
"grad_norm": 13.287188529968262,
|
| 1610 |
+
"learning_rate": 3.289122592367757e-05,
|
| 1611 |
+
"loss": 2.4089,
|
| 1612 |
+
"step": 2290
|
| 1613 |
+
},
|
| 1614 |
+
{
|
| 1615 |
+
"epoch": 0.46,
|
| 1616 |
+
"grad_norm": 10.110196113586426,
|
| 1617 |
+
"learning_rate": 3.272542485937369e-05,
|
| 1618 |
+
"loss": 2.0842,
|
| 1619 |
+
"step": 2300
|
| 1620 |
+
},
|
| 1621 |
+
{
|
| 1622 |
+
"epoch": 0.462,
|
| 1623 |
+
"grad_norm": 13.76339340209961,
|
| 1624 |
+
"learning_rate": 3.2559247268761115e-05,
|
| 1625 |
+
"loss": 3.9489,
|
| 1626 |
+
"step": 2310
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 0.464,
|
| 1630 |
+
"grad_norm": 8.27099895477295,
|
| 1631 |
+
"learning_rate": 3.239270125110117e-05,
|
| 1632 |
+
"loss": 1.8036,
|
| 1633 |
+
"step": 2320
|
| 1634 |
+
},
|
| 1635 |
+
{
|
| 1636 |
+
"epoch": 0.466,
|
| 1637 |
+
"grad_norm": 1.385971188545227,
|
| 1638 |
+
"learning_rate": 3.222579492361179e-05,
|
| 1639 |
+
"loss": 2.8004,
|
| 1640 |
+
"step": 2330
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"epoch": 0.468,
|
| 1644 |
+
"grad_norm": 10.99479866027832,
|
| 1645 |
+
"learning_rate": 3.205853642107192e-05,
|
| 1646 |
+
"loss": 1.0807,
|
| 1647 |
+
"step": 2340
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"epoch": 0.47,
|
| 1651 |
+
"grad_norm": 7.162081718444824,
|
| 1652 |
+
"learning_rate": 3.1890933895424976e-05,
|
| 1653 |
+
"loss": 4.2218,
|
| 1654 |
+
"step": 2350
|
| 1655 |
+
},
|
| 1656 |
+
{
|
| 1657 |
+
"epoch": 0.472,
|
| 1658 |
+
"grad_norm": 3.080836057662964,
|
| 1659 |
+
"learning_rate": 3.172299551538164e-05,
|
| 1660 |
+
"loss": 1.9778,
|
| 1661 |
+
"step": 2360
|
| 1662 |
+
},
|
| 1663 |
+
{
|
| 1664 |
+
"epoch": 0.474,
|
| 1665 |
+
"grad_norm": 1.8797277212142944,
|
| 1666 |
+
"learning_rate": 3.155472946602162e-05,
|
| 1667 |
+
"loss": 2.7487,
|
| 1668 |
+
"step": 2370
|
| 1669 |
+
},
|
| 1670 |
+
{
|
| 1671 |
+
"epoch": 0.476,
|
| 1672 |
+
"grad_norm": 7.540584087371826,
|
| 1673 |
+
"learning_rate": 3.138614394839476e-05,
|
| 1674 |
+
"loss": 2.199,
|
| 1675 |
+
"step": 2380
|
| 1676 |
+
},
|
| 1677 |
+
{
|
| 1678 |
+
"epoch": 0.478,
|
| 1679 |
+
"grad_norm": 5.368736743927002,
|
| 1680 |
+
"learning_rate": 3.121724717912138e-05,
|
| 1681 |
+
"loss": 3.5763,
|
| 1682 |
+
"step": 2390
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 0.48,
|
| 1686 |
+
"grad_norm": 8.255654335021973,
|
| 1687 |
+
"learning_rate": 3.104804738999169e-05,
|
| 1688 |
+
"loss": 3.4331,
|
| 1689 |
+
"step": 2400
|
| 1690 |
+
},
|
| 1691 |
+
{
|
| 1692 |
+
"epoch": 0.482,
|
| 1693 |
+
"grad_norm": 7.196650981903076,
|
| 1694 |
+
"learning_rate": 3.087855282756475e-05,
|
| 1695 |
+
"loss": 2.2419,
|
| 1696 |
+
"step": 2410
|
| 1697 |
+
},
|
| 1698 |
+
{
|
| 1699 |
+
"epoch": 0.484,
|
| 1700 |
+
"grad_norm": 3.529343843460083,
|
| 1701 |
+
"learning_rate": 3.0708771752766394e-05,
|
| 1702 |
+
"loss": 2.0476,
|
| 1703 |
+
"step": 2420
|
| 1704 |
+
},
|
| 1705 |
+
{
|
| 1706 |
+
"epoch": 0.486,
|
| 1707 |
+
"grad_norm": 5.323751926422119,
|
| 1708 |
+
"learning_rate": 3.053871244048669e-05,
|
| 1709 |
+
"loss": 1.3934,
|
| 1710 |
+
"step": 2430
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"epoch": 0.488,
|
| 1714 |
+
"grad_norm": 53.67280197143555,
|
| 1715 |
+
"learning_rate": 3.0368383179176585e-05,
|
| 1716 |
+
"loss": 2.7532,
|
| 1717 |
+
"step": 2440
|
| 1718 |
+
},
|
| 1719 |
+
{
|
| 1720 |
+
"epoch": 0.49,
|
| 1721 |
+
"grad_norm": 21.385303497314453,
|
| 1722 |
+
"learning_rate": 3.0197792270443982e-05,
|
| 1723 |
+
"loss": 2.112,
|
| 1724 |
+
"step": 2450
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 0.492,
|
| 1728 |
+
"grad_norm": 5.010990142822266,
|
| 1729 |
+
"learning_rate": 3.002694802864912e-05,
|
| 1730 |
+
"loss": 1.8119,
|
| 1731 |
+
"step": 2460
|
| 1732 |
+
},
|
| 1733 |
+
{
|
| 1734 |
+
"epoch": 0.494,
|
| 1735 |
+
"grad_norm": 7.916762351989746,
|
| 1736 |
+
"learning_rate": 2.98558587804993e-05,
|
| 1737 |
+
"loss": 1.5163,
|
| 1738 |
+
"step": 2470
|
| 1739 |
+
},
|
| 1740 |
+
{
|
| 1741 |
+
"epoch": 0.496,
|
| 1742 |
+
"grad_norm": 34.273319244384766,
|
| 1743 |
+
"learning_rate": 2.9684532864643122e-05,
|
| 1744 |
+
"loss": 3.372,
|
| 1745 |
+
"step": 2480
|
| 1746 |
+
},
|
| 1747 |
+
{
|
| 1748 |
+
"epoch": 0.498,
|
| 1749 |
+
"grad_norm": 3.292635440826416,
|
| 1750 |
+
"learning_rate": 2.9512978631264006e-05,
|
| 1751 |
+
"loss": 1.5534,
|
| 1752 |
+
"step": 2490
|
| 1753 |
+
},
|
| 1754 |
+
{
|
| 1755 |
+
"epoch": 0.5,
|
| 1756 |
+
"grad_norm": 9.055399894714355,
|
| 1757 |
+
"learning_rate": 2.9341204441673266e-05,
|
| 1758 |
+
"loss": 1.8644,
|
| 1759 |
+
"step": 2500
|
| 1760 |
+
},
|
| 1761 |
+
{
|
| 1762 |
+
"epoch": 0.502,
|
| 1763 |
+
"grad_norm": 51.29086685180664,
|
| 1764 |
+
"learning_rate": 2.916921866790256e-05,
|
| 1765 |
+
"loss": 4.3985,
|
| 1766 |
+
"step": 2510
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 0.504,
|
| 1770 |
+
"grad_norm": 9.632088661193848,
|
| 1771 |
+
"learning_rate": 2.8997029692295874e-05,
|
| 1772 |
+
"loss": 2.0158,
|
| 1773 |
+
"step": 2520
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"epoch": 0.506,
|
| 1777 |
+
"grad_norm": 31.112043380737305,
|
| 1778 |
+
"learning_rate": 2.8824645907100954e-05,
|
| 1779 |
+
"loss": 1.3677,
|
| 1780 |
+
"step": 2530
|
| 1781 |
+
},
|
| 1782 |
+
{
|
| 1783 |
+
"epoch": 0.508,
|
| 1784 |
+
"grad_norm": 21.64225959777832,
|
| 1785 |
+
"learning_rate": 2.8652075714060295e-05,
|
| 1786 |
+
"loss": 2.399,
|
| 1787 |
+
"step": 2540
|
| 1788 |
+
},
|
| 1789 |
+
{
|
| 1790 |
+
"epoch": 0.51,
|
| 1791 |
+
"grad_norm": 1.216705322265625,
|
| 1792 |
+
"learning_rate": 2.8479327524001636e-05,
|
| 1793 |
+
"loss": 4.2765,
|
| 1794 |
+
"step": 2550
|
| 1795 |
+
},
|
| 1796 |
+
{
|
| 1797 |
+
"epoch": 0.512,
|
| 1798 |
+
"grad_norm": 12.95290470123291,
|
| 1799 |
+
"learning_rate": 2.8306409756428064e-05,
|
| 1800 |
+
"loss": 3.2872,
|
| 1801 |
+
"step": 2560
|
| 1802 |
+
},
|
| 1803 |
+
{
|
| 1804 |
+
"epoch": 0.514,
|
| 1805 |
+
"grad_norm": 7.114419460296631,
|
| 1806 |
+
"learning_rate": 2.8133330839107608e-05,
|
| 1807 |
+
"loss": 3.1682,
|
| 1808 |
+
"step": 2570
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 0.516,
|
| 1812 |
+
"grad_norm": 111.07962036132812,
|
| 1813 |
+
"learning_rate": 2.7960099207662532e-05,
|
| 1814 |
+
"loss": 3.3066,
|
| 1815 |
+
"step": 2580
|
| 1816 |
+
},
|
| 1817 |
+
{
|
| 1818 |
+
"epoch": 0.518,
|
| 1819 |
+
"grad_norm": 10.673909187316895,
|
| 1820 |
+
"learning_rate": 2.7786723305158136e-05,
|
| 1821 |
+
"loss": 1.8911,
|
| 1822 |
+
"step": 2590
|
| 1823 |
+
},
|
| 1824 |
+
{
|
| 1825 |
+
"epoch": 0.52,
|
| 1826 |
+
"grad_norm": 1.493356466293335,
|
| 1827 |
+
"learning_rate": 2.761321158169134e-05,
|
| 1828 |
+
"loss": 1.7402,
|
| 1829 |
+
"step": 2600
|
| 1830 |
+
},
|
| 1831 |
+
{
|
| 1832 |
+
"epoch": 0.522,
|
| 1833 |
+
"grad_norm": 1.4890353679656982,
|
| 1834 |
+
"learning_rate": 2.7439572493978736e-05,
|
| 1835 |
+
"loss": 1.1721,
|
| 1836 |
+
"step": 2610
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"epoch": 0.524,
|
| 1840 |
+
"grad_norm": 1.4277186393737793,
|
| 1841 |
+
"learning_rate": 2.726581450494451e-05,
|
| 1842 |
+
"loss": 1.2482,
|
| 1843 |
+
"step": 2620
|
| 1844 |
+
},
|
| 1845 |
+
{
|
| 1846 |
+
"epoch": 0.526,
|
| 1847 |
+
"grad_norm": 2.4278600215911865,
|
| 1848 |
+
"learning_rate": 2.7091946083307896e-05,
|
| 1849 |
+
"loss": 3.3641,
|
| 1850 |
+
"step": 2630
|
| 1851 |
+
},
|
| 1852 |
+
{
|
| 1853 |
+
"epoch": 0.528,
|
| 1854 |
+
"grad_norm": 3.5468785762786865,
|
| 1855 |
+
"learning_rate": 2.6917975703170466e-05,
|
| 1856 |
+
"loss": 1.9946,
|
| 1857 |
+
"step": 2640
|
| 1858 |
+
},
|
| 1859 |
+
{
|
| 1860 |
+
"epoch": 0.53,
|
| 1861 |
+
"grad_norm": 1.9078953266143799,
|
| 1862 |
+
"learning_rate": 2.674391184360313e-05,
|
| 1863 |
+
"loss": 1.0218,
|
| 1864 |
+
"step": 2650
|
| 1865 |
+
},
|
| 1866 |
+
{
|
| 1867 |
+
"epoch": 0.532,
|
| 1868 |
+
"grad_norm": 5.883788108825684,
|
| 1869 |
+
"learning_rate": 2.656976298823284e-05,
|
| 1870 |
+
"loss": 2.336,
|
| 1871 |
+
"step": 2660
|
| 1872 |
+
},
|
| 1873 |
+
{
|
| 1874 |
+
"epoch": 0.534,
|
| 1875 |
+
"grad_norm": 58.67000961303711,
|
| 1876 |
+
"learning_rate": 2.6395537624829096e-05,
|
| 1877 |
+
"loss": 4.6423,
|
| 1878 |
+
"step": 2670
|
| 1879 |
+
},
|
| 1880 |
+
{
|
| 1881 |
+
"epoch": 0.536,
|
| 1882 |
+
"grad_norm": 8.353065490722656,
|
| 1883 |
+
"learning_rate": 2.6221244244890336e-05,
|
| 1884 |
+
"loss": 1.845,
|
| 1885 |
+
"step": 2680
|
| 1886 |
+
},
|
| 1887 |
+
{
|
| 1888 |
+
"epoch": 0.538,
|
| 1889 |
+
"grad_norm": 2.636930227279663,
|
| 1890 |
+
"learning_rate": 2.604689134322999e-05,
|
| 1891 |
+
"loss": 2.3117,
|
| 1892 |
+
"step": 2690
|
| 1893 |
+
},
|
| 1894 |
+
{
|
| 1895 |
+
"epoch": 0.54,
|
| 1896 |
+
"grad_norm": 21.744890213012695,
|
| 1897 |
+
"learning_rate": 2.587248741756253e-05,
|
| 1898 |
+
"loss": 3.3991,
|
| 1899 |
+
"step": 2700
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"epoch": 0.542,
|
| 1903 |
+
"grad_norm": 203.36412048339844,
|
| 1904 |
+
"learning_rate": 2.5698040968089225e-05,
|
| 1905 |
+
"loss": 3.7802,
|
| 1906 |
+
"step": 2710
|
| 1907 |
+
},
|
| 1908 |
+
{
|
| 1909 |
+
"epoch": 0.544,
|
| 1910 |
+
"grad_norm": 1.4143257141113281,
|
| 1911 |
+
"learning_rate": 2.5523560497083926e-05,
|
| 1912 |
+
"loss": 1.4431,
|
| 1913 |
+
"step": 2720
|
| 1914 |
+
},
|
| 1915 |
+
{
|
| 1916 |
+
"epoch": 0.546,
|
| 1917 |
+
"grad_norm": 0.6071110367774963,
|
| 1918 |
+
"learning_rate": 2.5349054508478637e-05,
|
| 1919 |
+
"loss": 0.8369,
|
| 1920 |
+
"step": 2730
|
| 1921 |
+
},
|
| 1922 |
+
{
|
| 1923 |
+
"epoch": 0.548,
|
| 1924 |
+
"grad_norm": 10.470783233642578,
|
| 1925 |
+
"learning_rate": 2.517453150744904e-05,
|
| 1926 |
+
"loss": 2.658,
|
| 1927 |
+
"step": 2740
|
| 1928 |
+
},
|
| 1929 |
+
{
|
| 1930 |
+
"epoch": 0.55,
|
| 1931 |
+
"grad_norm": 14.130455017089844,
|
| 1932 |
+
"learning_rate": 2.5e-05,
|
| 1933 |
+
"loss": 2.12,
|
| 1934 |
+
"step": 2750
|
| 1935 |
+
},
|
| 1936 |
+
{
|
| 1937 |
+
"epoch": 0.552,
|
| 1938 |
+
"grad_norm": 1.0785049200057983,
|
| 1939 |
+
"learning_rate": 2.4825468492550964e-05,
|
| 1940 |
+
"loss": 2.1272,
|
| 1941 |
+
"step": 2760
|
| 1942 |
+
},
|
| 1943 |
+
{
|
| 1944 |
+
"epoch": 0.554,
|
| 1945 |
+
"grad_norm": 3.327045440673828,
|
| 1946 |
+
"learning_rate": 2.4650945491521372e-05,
|
| 1947 |
+
"loss": 1.5502,
|
| 1948 |
+
"step": 2770
|
| 1949 |
+
},
|
| 1950 |
+
{
|
| 1951 |
+
"epoch": 0.556,
|
| 1952 |
+
"grad_norm": 27.231857299804688,
|
| 1953 |
+
"learning_rate": 2.447643950291608e-05,
|
| 1954 |
+
"loss": 1.4815,
|
| 1955 |
+
"step": 2780
|
| 1956 |
+
},
|
| 1957 |
+
{
|
| 1958 |
+
"epoch": 0.558,
|
| 1959 |
+
"grad_norm": 22.837984085083008,
|
| 1960 |
+
"learning_rate": 2.4301959031910784e-05,
|
| 1961 |
+
"loss": 1.3745,
|
| 1962 |
+
"step": 2790
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"epoch": 0.56,
|
| 1966 |
+
"grad_norm": 5.593969821929932,
|
| 1967 |
+
"learning_rate": 2.4127512582437485e-05,
|
| 1968 |
+
"loss": 2.9765,
|
| 1969 |
+
"step": 2800
|
| 1970 |
+
},
|
| 1971 |
+
{
|
| 1972 |
+
"epoch": 0.562,
|
| 1973 |
+
"grad_norm": 6.5615644454956055,
|
| 1974 |
+
"learning_rate": 2.3953108656770016e-05,
|
| 1975 |
+
"loss": 1.2392,
|
| 1976 |
+
"step": 2810
|
| 1977 |
+
},
|
| 1978 |
+
{
|
| 1979 |
+
"epoch": 0.564,
|
| 1980 |
+
"grad_norm": 8.840109825134277,
|
| 1981 |
+
"learning_rate": 2.377875575510967e-05,
|
| 1982 |
+
"loss": 1.6202,
|
| 1983 |
+
"step": 2820
|
| 1984 |
+
},
|
| 1985 |
+
{
|
| 1986 |
+
"epoch": 0.566,
|
| 1987 |
+
"grad_norm": 18.031835556030273,
|
| 1988 |
+
"learning_rate": 2.3604462375170906e-05,
|
| 1989 |
+
"loss": 4.6898,
|
| 1990 |
+
"step": 2830
|
| 1991 |
+
},
|
| 1992 |
+
{
|
| 1993 |
+
"epoch": 0.568,
|
| 1994 |
+
"grad_norm": 5.239192008972168,
|
| 1995 |
+
"learning_rate": 2.3430237011767167e-05,
|
| 1996 |
+
"loss": 1.3387,
|
| 1997 |
+
"step": 2840
|
| 1998 |
+
},
|
| 1999 |
+
{
|
| 2000 |
+
"epoch": 0.57,
|
| 2001 |
+
"grad_norm": 5.165606498718262,
|
| 2002 |
+
"learning_rate": 2.3256088156396868e-05,
|
| 2003 |
+
"loss": 3.6984,
|
| 2004 |
+
"step": 2850
|
| 2005 |
+
},
|
| 2006 |
+
{
|
| 2007 |
+
"epoch": 0.572,
|
| 2008 |
+
"grad_norm": 11.281644821166992,
|
| 2009 |
+
"learning_rate": 2.3082024296829536e-05,
|
| 2010 |
+
"loss": 1.874,
|
| 2011 |
+
"step": 2860
|
| 2012 |
+
},
|
| 2013 |
+
{
|
| 2014 |
+
"epoch": 0.574,
|
| 2015 |
+
"grad_norm": 31.151283264160156,
|
| 2016 |
+
"learning_rate": 2.2908053916692117e-05,
|
| 2017 |
+
"loss": 1.7756,
|
| 2018 |
+
"step": 2870
|
| 2019 |
+
},
|
| 2020 |
+
{
|
| 2021 |
+
"epoch": 0.576,
|
| 2022 |
+
"grad_norm": 18.31266212463379,
|
| 2023 |
+
"learning_rate": 2.2734185495055503e-05,
|
| 2024 |
+
"loss": 8.2453,
|
| 2025 |
+
"step": 2880
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"epoch": 0.578,
|
| 2029 |
+
"grad_norm": 19.712526321411133,
|
| 2030 |
+
"learning_rate": 2.2560427506021266e-05,
|
| 2031 |
+
"loss": 1.6711,
|
| 2032 |
+
"step": 2890
|
| 2033 |
+
},
|
| 2034 |
+
{
|
| 2035 |
+
"epoch": 0.58,
|
| 2036 |
+
"grad_norm": 12.909187316894531,
|
| 2037 |
+
"learning_rate": 2.238678841830867e-05,
|
| 2038 |
+
"loss": 2.9633,
|
| 2039 |
+
"step": 2900
|
| 2040 |
+
},
|
| 2041 |
+
{
|
| 2042 |
+
"epoch": 0.582,
|
| 2043 |
+
"grad_norm": 3.159407615661621,
|
| 2044 |
+
"learning_rate": 2.2213276694841866e-05,
|
| 2045 |
+
"loss": 1.882,
|
| 2046 |
+
"step": 2910
|
| 2047 |
+
},
|
| 2048 |
+
{
|
| 2049 |
+
"epoch": 0.584,
|
| 2050 |
+
"grad_norm": 20.577537536621094,
|
| 2051 |
+
"learning_rate": 2.2039900792337474e-05,
|
| 2052 |
+
"loss": 2.1105,
|
| 2053 |
+
"step": 2920
|
| 2054 |
+
},
|
| 2055 |
+
{
|
| 2056 |
+
"epoch": 0.586,
|
| 2057 |
+
"grad_norm": 43.844261169433594,
|
| 2058 |
+
"learning_rate": 2.186666916089239e-05,
|
| 2059 |
+
"loss": 2.285,
|
| 2060 |
+
"step": 2930
|
| 2061 |
+
},
|
| 2062 |
+
{
|
| 2063 |
+
"epoch": 0.588,
|
| 2064 |
+
"grad_norm": 10.760743141174316,
|
| 2065 |
+
"learning_rate": 2.1693590243571938e-05,
|
| 2066 |
+
"loss": 2.7095,
|
| 2067 |
+
"step": 2940
|
| 2068 |
+
},
|
| 2069 |
+
{
|
| 2070 |
+
"epoch": 0.59,
|
| 2071 |
+
"grad_norm": 8.60970401763916,
|
| 2072 |
+
"learning_rate": 2.1520672475998373e-05,
|
| 2073 |
+
"loss": 1.4936,
|
| 2074 |
+
"step": 2950
|
| 2075 |
+
},
|
| 2076 |
+
{
|
| 2077 |
+
"epoch": 0.592,
|
| 2078 |
+
"grad_norm": 4.47432804107666,
|
| 2079 |
+
"learning_rate": 2.1347924285939714e-05,
|
| 2080 |
+
"loss": 2.9807,
|
| 2081 |
+
"step": 2960
|
| 2082 |
+
},
|
| 2083 |
+
{
|
| 2084 |
+
"epoch": 0.594,
|
| 2085 |
+
"grad_norm": 14.855293273925781,
|
| 2086 |
+
"learning_rate": 2.117535409289905e-05,
|
| 2087 |
+
"loss": 1.8499,
|
| 2088 |
+
"step": 2970
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"epoch": 0.596,
|
| 2092 |
+
"grad_norm": 5.818737506866455,
|
| 2093 |
+
"learning_rate": 2.1002970307704132e-05,
|
| 2094 |
+
"loss": 2.6104,
|
| 2095 |
+
"step": 2980
|
| 2096 |
+
},
|
| 2097 |
+
{
|
| 2098 |
+
"epoch": 0.598,
|
| 2099 |
+
"grad_norm": 56.345890045166016,
|
| 2100 |
+
"learning_rate": 2.0830781332097446e-05,
|
| 2101 |
+
"loss": 4.632,
|
| 2102 |
+
"step": 2990
|
| 2103 |
+
},
|
| 2104 |
+
{
|
| 2105 |
+
"epoch": 0.6,
|
| 2106 |
+
"grad_norm": 0.7071281671524048,
|
| 2107 |
+
"learning_rate": 2.0658795558326743e-05,
|
| 2108 |
+
"loss": 2.1349,
|
| 2109 |
+
"step": 3000
|
| 2110 |
+
},
|
| 2111 |
+
{
|
| 2112 |
+
"epoch": 0.602,
|
| 2113 |
+
"grad_norm": 15.646844863891602,
|
| 2114 |
+
"learning_rate": 2.0487021368736003e-05,
|
| 2115 |
+
"loss": 2.881,
|
| 2116 |
+
"step": 3010
|
| 2117 |
+
},
|
| 2118 |
+
{
|
| 2119 |
+
"epoch": 0.604,
|
| 2120 |
+
"grad_norm": 54.35161209106445,
|
| 2121 |
+
"learning_rate": 2.031546713535688e-05,
|
| 2122 |
+
"loss": 2.999,
|
| 2123 |
+
"step": 3020
|
| 2124 |
+
},
|
| 2125 |
+
{
|
| 2126 |
+
"epoch": 0.606,
|
| 2127 |
+
"grad_norm": 4.890777587890625,
|
| 2128 |
+
"learning_rate": 2.0144141219500705e-05,
|
| 2129 |
+
"loss": 3.0427,
|
| 2130 |
+
"step": 3030
|
| 2131 |
+
},
|
| 2132 |
+
{
|
| 2133 |
+
"epoch": 0.608,
|
| 2134 |
+
"grad_norm": 2.275003433227539,
|
| 2135 |
+
"learning_rate": 1.9973051971350888e-05,
|
| 2136 |
+
"loss": 2.5017,
|
| 2137 |
+
"step": 3040
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 0.61,
|
| 2141 |
+
"grad_norm": 3.7734315395355225,
|
| 2142 |
+
"learning_rate": 1.980220772955602e-05,
|
| 2143 |
+
"loss": 1.5811,
|
| 2144 |
+
"step": 3050
|
| 2145 |
+
},
|
| 2146 |
+
{
|
| 2147 |
+
"epoch": 0.612,
|
| 2148 |
+
"grad_norm": 4.304708957672119,
|
| 2149 |
+
"learning_rate": 1.963161682082342e-05,
|
| 2150 |
+
"loss": 1.6409,
|
| 2151 |
+
"step": 3060
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"epoch": 0.614,
|
| 2155 |
+
"grad_norm": 4.046875953674316,
|
| 2156 |
+
"learning_rate": 1.946128755951332e-05,
|
| 2157 |
+
"loss": 3.8443,
|
| 2158 |
+
"step": 3070
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"epoch": 0.616,
|
| 2162 |
+
"grad_norm": 33.650970458984375,
|
| 2163 |
+
"learning_rate": 1.9291228247233605e-05,
|
| 2164 |
+
"loss": 2.3784,
|
| 2165 |
+
"step": 3080
|
| 2166 |
+
},
|
| 2167 |
+
{
|
| 2168 |
+
"epoch": 0.618,
|
| 2169 |
+
"grad_norm": 40.188297271728516,
|
| 2170 |
+
"learning_rate": 1.912144717243525e-05,
|
| 2171 |
+
"loss": 1.9809,
|
| 2172 |
+
"step": 3090
|
| 2173 |
+
},
|
| 2174 |
+
{
|
| 2175 |
+
"epoch": 0.62,
|
| 2176 |
+
"grad_norm": 16.514888763427734,
|
| 2177 |
+
"learning_rate": 1.895195261000831e-05,
|
| 2178 |
+
"loss": 1.5875,
|
| 2179 |
+
"step": 3100
|
| 2180 |
+
},
|
| 2181 |
+
{
|
| 2182 |
+
"epoch": 0.622,
|
| 2183 |
+
"grad_norm": 8.940461158752441,
|
| 2184 |
+
"learning_rate": 1.8782752820878634e-05,
|
| 2185 |
+
"loss": 2.2161,
|
| 2186 |
+
"step": 3110
|
| 2187 |
+
},
|
| 2188 |
+
{
|
| 2189 |
+
"epoch": 0.624,
|
| 2190 |
+
"grad_norm": 12.641722679138184,
|
| 2191 |
+
"learning_rate": 1.8613856051605243e-05,
|
| 2192 |
+
"loss": 6.9032,
|
| 2193 |
+
"step": 3120
|
| 2194 |
+
},
|
| 2195 |
+
{
|
| 2196 |
+
"epoch": 0.626,
|
| 2197 |
+
"grad_norm": 77.18262481689453,
|
| 2198 |
+
"learning_rate": 1.8445270533978388e-05,
|
| 2199 |
+
"loss": 2.9789,
|
| 2200 |
+
"step": 3130
|
| 2201 |
+
},
|
| 2202 |
+
{
|
| 2203 |
+
"epoch": 0.628,
|
| 2204 |
+
"grad_norm": 9.005846977233887,
|
| 2205 |
+
"learning_rate": 1.827700448461836e-05,
|
| 2206 |
+
"loss": 2.0478,
|
| 2207 |
+
"step": 3140
|
| 2208 |
+
},
|
| 2209 |
+
{
|
| 2210 |
+
"epoch": 0.63,
|
| 2211 |
+
"grad_norm": 14.015524864196777,
|
| 2212 |
+
"learning_rate": 1.8109066104575023e-05,
|
| 2213 |
+
"loss": 1.2295,
|
| 2214 |
+
"step": 3150
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"epoch": 0.632,
|
| 2218 |
+
"grad_norm": 28.657386779785156,
|
| 2219 |
+
"learning_rate": 1.7941463578928086e-05,
|
| 2220 |
+
"loss": 2.6306,
|
| 2221 |
+
"step": 3160
|
| 2222 |
+
},
|
| 2223 |
+
{
|
| 2224 |
+
"epoch": 0.634,
|
| 2225 |
+
"grad_norm": 3.889622926712036,
|
| 2226 |
+
"learning_rate": 1.7774205076388206e-05,
|
| 2227 |
+
"loss": 1.2663,
|
| 2228 |
+
"step": 3170
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 0.636,
|
| 2232 |
+
"grad_norm": 5.531691551208496,
|
| 2233 |
+
"learning_rate": 1.7607298748898842e-05,
|
| 2234 |
+
"loss": 3.5995,
|
| 2235 |
+
"step": 3180
|
| 2236 |
+
},
|
| 2237 |
+
{
|
| 2238 |
+
"epoch": 0.638,
|
| 2239 |
+
"grad_norm": 2.6832261085510254,
|
| 2240 |
+
"learning_rate": 1.744075273123889e-05,
|
| 2241 |
+
"loss": 3.4226,
|
| 2242 |
+
"step": 3190
|
| 2243 |
+
},
|
| 2244 |
+
{
|
| 2245 |
+
"epoch": 0.64,
|
| 2246 |
+
"grad_norm": 4.261538982391357,
|
| 2247 |
+
"learning_rate": 1.7274575140626318e-05,
|
| 2248 |
+
"loss": 0.9354,
|
| 2249 |
+
"step": 3200
|
| 2250 |
+
},
|
| 2251 |
+
{
|
| 2252 |
+
"epoch": 0.642,
|
| 2253 |
+
"grad_norm": 8.686302185058594,
|
| 2254 |
+
"learning_rate": 1.7108774076322443e-05,
|
| 2255 |
+
"loss": 4.6267,
|
| 2256 |
+
"step": 3210
|
| 2257 |
+
},
|
| 2258 |
+
{
|
| 2259 |
+
"epoch": 0.644,
|
| 2260 |
+
"grad_norm": 13.585872650146484,
|
| 2261 |
+
"learning_rate": 1.6943357619237226e-05,
|
| 2262 |
+
"loss": 0.9114,
|
| 2263 |
+
"step": 3220
|
| 2264 |
+
},
|
| 2265 |
+
{
|
| 2266 |
+
"epoch": 0.646,
|
| 2267 |
+
"grad_norm": 1.3698618412017822,
|
| 2268 |
+
"learning_rate": 1.677833383153542e-05,
|
| 2269 |
+
"loss": 2.5474,
|
| 2270 |
+
"step": 3230
|
| 2271 |
+
},
|
| 2272 |
+
{
|
| 2273 |
+
"epoch": 0.648,
|
| 2274 |
+
"grad_norm": 19.42194175720215,
|
| 2275 |
+
"learning_rate": 1.6613710756243626e-05,
|
| 2276 |
+
"loss": 2.7762,
|
| 2277 |
+
"step": 3240
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"epoch": 0.65,
|
| 2281 |
+
"grad_norm": 13.380922317504883,
|
| 2282 |
+
"learning_rate": 1.6449496416858284e-05,
|
| 2283 |
+
"loss": 1.6468,
|
| 2284 |
+
"step": 3250
|
| 2285 |
+
},
|
| 2286 |
+
{
|
| 2287 |
+
"epoch": 0.652,
|
| 2288 |
+
"grad_norm": 10.786489486694336,
|
| 2289 |
+
"learning_rate": 1.6285698816954624e-05,
|
| 2290 |
+
"loss": 1.2256,
|
| 2291 |
+
"step": 3260
|
| 2292 |
+
},
|
| 2293 |
+
{
|
| 2294 |
+
"epoch": 0.654,
|
| 2295 |
+
"grad_norm": 5.827413558959961,
|
| 2296 |
+
"learning_rate": 1.612232593979658e-05,
|
| 2297 |
+
"loss": 1.5563,
|
| 2298 |
+
"step": 3270
|
| 2299 |
+
},
|
| 2300 |
+
{
|
| 2301 |
+
"epoch": 0.656,
|
| 2302 |
+
"grad_norm": 13.82107162475586,
|
| 2303 |
+
"learning_rate": 1.5959385747947698e-05,
|
| 2304 |
+
"loss": 3.3846,
|
| 2305 |
+
"step": 3280
|
| 2306 |
+
},
|
| 2307 |
+
{
|
| 2308 |
+
"epoch": 0.658,
|
| 2309 |
+
"grad_norm": 22.237295150756836,
|
| 2310 |
+
"learning_rate": 1.5796886182883053e-05,
|
| 2311 |
+
"loss": 2.2551,
|
| 2312 |
+
"step": 3290
|
| 2313 |
+
},
|
| 2314 |
+
{
|
| 2315 |
+
"epoch": 0.66,
|
| 2316 |
+
"grad_norm": 5.554327011108398,
|
| 2317 |
+
"learning_rate": 1.56348351646022e-05,
|
| 2318 |
+
"loss": 0.8103,
|
| 2319 |
+
"step": 3300
|
| 2320 |
+
},
|
| 2321 |
+
{
|
| 2322 |
+
"epoch": 0.662,
|
| 2323 |
+
"grad_norm": 20.308881759643555,
|
| 2324 |
+
"learning_rate": 1.547324059124315e-05,
|
| 2325 |
+
"loss": 1.7983,
|
| 2326 |
+
"step": 3310
|
| 2327 |
+
},
|
| 2328 |
+
{
|
| 2329 |
+
"epoch": 0.664,
|
| 2330 |
+
"grad_norm": 10.945833206176758,
|
| 2331 |
+
"learning_rate": 1.5312110338697426e-05,
|
| 2332 |
+
"loss": 4.4483,
|
| 2333 |
+
"step": 3320
|
| 2334 |
+
},
|
| 2335 |
+
{
|
| 2336 |
+
"epoch": 0.666,
|
| 2337 |
+
"grad_norm": 2.27091121673584,
|
| 2338 |
+
"learning_rate": 1.5151452260226224e-05,
|
| 2339 |
+
"loss": 6.7593,
|
| 2340 |
+
"step": 3330
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"epoch": 0.668,
|
| 2344 |
+
"grad_norm": 74.13166809082031,
|
| 2345 |
+
"learning_rate": 1.4991274186077632e-05,
|
| 2346 |
+
"loss": 3.4548,
|
| 2347 |
+
"step": 3340
|
| 2348 |
+
},
|
| 2349 |
+
{
|
| 2350 |
+
"epoch": 0.67,
|
| 2351 |
+
"grad_norm": 39.529685974121094,
|
| 2352 |
+
"learning_rate": 1.4831583923104999e-05,
|
| 2353 |
+
"loss": 2.0242,
|
| 2354 |
+
"step": 3350
|
| 2355 |
+
},
|
| 2356 |
+
{
|
| 2357 |
+
"epoch": 0.672,
|
| 2358 |
+
"grad_norm": 11.965998649597168,
|
| 2359 |
+
"learning_rate": 1.467238925438646e-05,
|
| 2360 |
+
"loss": 2.3468,
|
| 2361 |
+
"step": 3360
|
| 2362 |
+
},
|
| 2363 |
+
{
|
| 2364 |
+
"epoch": 0.674,
|
| 2365 |
+
"grad_norm": 4.220920085906982,
|
| 2366 |
+
"learning_rate": 1.4513697938845572e-05,
|
| 2367 |
+
"loss": 1.3924,
|
| 2368 |
+
"step": 3370
|
| 2369 |
+
},
|
| 2370 |
+
{
|
| 2371 |
+
"epoch": 0.676,
|
| 2372 |
+
"grad_norm": 48.23358154296875,
|
| 2373 |
+
"learning_rate": 1.4355517710873184e-05,
|
| 2374 |
+
"loss": 2.3896,
|
| 2375 |
+
"step": 3380
|
| 2376 |
+
},
|
| 2377 |
+
{
|
| 2378 |
+
"epoch": 0.678,
|
| 2379 |
+
"grad_norm": 13.424652099609375,
|
| 2380 |
+
"learning_rate": 1.4197856279950438e-05,
|
| 2381 |
+
"loss": 2.2237,
|
| 2382 |
+
"step": 3390
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"epoch": 0.68,
|
| 2386 |
+
"grad_norm": 137.2168426513672,
|
| 2387 |
+
"learning_rate": 1.4040721330273062e-05,
|
| 2388 |
+
"loss": 6.2669,
|
| 2389 |
+
"step": 3400
|
| 2390 |
+
},
|
| 2391 |
+
{
|
| 2392 |
+
"epoch": 0.682,
|
| 2393 |
+
"grad_norm": 28.708620071411133,
|
| 2394 |
+
"learning_rate": 1.388412052037682e-05,
|
| 2395 |
+
"loss": 2.271,
|
| 2396 |
+
"step": 3410
|
| 2397 |
+
},
|
| 2398 |
+
{
|
| 2399 |
+
"epoch": 0.684,
|
| 2400 |
+
"grad_norm": 29.713703155517578,
|
| 2401 |
+
"learning_rate": 1.3728061482764238e-05,
|
| 2402 |
+
"loss": 1.7512,
|
| 2403 |
+
"step": 3420
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"epoch": 0.686,
|
| 2407 |
+
"grad_norm": 8.746112823486328,
|
| 2408 |
+
"learning_rate": 1.3572551823532654e-05,
|
| 2409 |
+
"loss": 1.3673,
|
| 2410 |
+
"step": 3430
|
| 2411 |
+
},
|
| 2412 |
+
{
|
| 2413 |
+
"epoch": 0.688,
|
| 2414 |
+
"grad_norm": 4.915830612182617,
|
| 2415 |
+
"learning_rate": 1.3417599122003464e-05,
|
| 2416 |
+
"loss": 3.8359,
|
| 2417 |
+
"step": 3440
|
| 2418 |
+
},
|
| 2419 |
+
{
|
| 2420 |
+
"epoch": 0.69,
|
| 2421 |
+
"grad_norm": 36.02322769165039,
|
| 2422 |
+
"learning_rate": 1.3263210930352737e-05,
|
| 2423 |
+
"loss": 1.6534,
|
| 2424 |
+
"step": 3450
|
| 2425 |
+
},
|
| 2426 |
+
{
|
| 2427 |
+
"epoch": 0.692,
|
| 2428 |
+
"grad_norm": 4.923043727874756,
|
| 2429 |
+
"learning_rate": 1.3109394773243117e-05,
|
| 2430 |
+
"loss": 0.955,
|
| 2431 |
+
"step": 3460
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 0.694,
|
| 2435 |
+
"grad_norm": 8.513254165649414,
|
| 2436 |
+
"learning_rate": 1.2956158147457115e-05,
|
| 2437 |
+
"loss": 1.9489,
|
| 2438 |
+
"step": 3470
|
| 2439 |
+
},
|
| 2440 |
+
{
|
| 2441 |
+
"epoch": 0.696,
|
| 2442 |
+
"grad_norm": 5.320379257202148,
|
| 2443 |
+
"learning_rate": 1.280350852153168e-05,
|
| 2444 |
+
"loss": 2.5898,
|
| 2445 |
+
"step": 3480
|
| 2446 |
+
},
|
| 2447 |
+
{
|
| 2448 |
+
"epoch": 0.698,
|
| 2449 |
+
"grad_norm": 17.299158096313477,
|
| 2450 |
+
"learning_rate": 1.2651453335394231e-05,
|
| 2451 |
+
"loss": 2.9695,
|
| 2452 |
+
"step": 3490
|
| 2453 |
+
},
|
| 2454 |
+
{
|
| 2455 |
+
"epoch": 0.7,
|
| 2456 |
+
"grad_norm": 5.057638168334961,
|
| 2457 |
+
"learning_rate": 1.2500000000000006e-05,
|
| 2458 |
+
"loss": 1.8141,
|
| 2459 |
+
"step": 3500
|
| 2460 |
+
},
|
| 2461 |
+
{
|
| 2462 |
+
"epoch": 0.702,
|
| 2463 |
+
"grad_norm": 11.23135757446289,
|
| 2464 |
+
"learning_rate": 1.234915589697091e-05,
|
| 2465 |
+
"loss": 1.6577,
|
| 2466 |
+
"step": 3510
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"epoch": 0.704,
|
| 2470 |
+
"grad_norm": 13.365480422973633,
|
| 2471 |
+
"learning_rate": 1.2198928378235716e-05,
|
| 2472 |
+
"loss": 1.0784,
|
| 2473 |
+
"step": 3520
|
| 2474 |
+
},
|
| 2475 |
+
{
|
| 2476 |
+
"epoch": 0.706,
|
| 2477 |
+
"grad_norm": 4.888202667236328,
|
| 2478 |
+
"learning_rate": 1.2049324765671749e-05,
|
| 2479 |
+
"loss": 1.4946,
|
| 2480 |
+
"step": 3530
|
| 2481 |
+
},
|
| 2482 |
+
{
|
| 2483 |
+
"epoch": 0.708,
|
| 2484 |
+
"grad_norm": 11.178568840026855,
|
| 2485 |
+
"learning_rate": 1.1900352350748026e-05,
|
| 2486 |
+
"loss": 2.9905,
|
| 2487 |
+
"step": 3540
|
| 2488 |
+
},
|
| 2489 |
+
{
|
| 2490 |
+
"epoch": 0.71,
|
| 2491 |
+
"grad_norm": 1.779919981956482,
|
| 2492 |
+
"learning_rate": 1.175201839416988e-05,
|
| 2493 |
+
"loss": 2.2346,
|
| 2494 |
+
"step": 3550
|
| 2495 |
+
},
|
| 2496 |
+
{
|
| 2497 |
+
"epoch": 0.712,
|
| 2498 |
+
"grad_norm": 11.946904182434082,
|
| 2499 |
+
"learning_rate": 1.1604330125525079e-05,
|
| 2500 |
+
"loss": 1.6511,
|
| 2501 |
+
"step": 3560
|
| 2502 |
+
},
|
| 2503 |
+
{
|
| 2504 |
+
"epoch": 0.714,
|
| 2505 |
+
"grad_norm": 27.550771713256836,
|
| 2506 |
+
"learning_rate": 1.1457294742931507e-05,
|
| 2507 |
+
"loss": 3.0413,
|
| 2508 |
+
"step": 3570
|
| 2509 |
+
},
|
| 2510 |
+
{
|
| 2511 |
+
"epoch": 0.716,
|
| 2512 |
+
"grad_norm": 41.15061950683594,
|
| 2513 |
+
"learning_rate": 1.1310919412686247e-05,
|
| 2514 |
+
"loss": 2.2105,
|
| 2515 |
+
"step": 3580
|
| 2516 |
+
},
|
| 2517 |
+
{
|
| 2518 |
+
"epoch": 0.718,
|
| 2519 |
+
"grad_norm": 0.8456729650497437,
|
| 2520 |
+
"learning_rate": 1.11652112689164e-05,
|
| 2521 |
+
"loss": 2.0821,
|
| 2522 |
+
"step": 3590
|
| 2523 |
+
},
|
| 2524 |
+
{
|
| 2525 |
+
"epoch": 0.72,
|
| 2526 |
+
"grad_norm": 19.061296463012695,
|
| 2527 |
+
"learning_rate": 1.1020177413231334e-05,
|
| 2528 |
+
"loss": 3.4402,
|
| 2529 |
+
"step": 3600
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"epoch": 0.722,
|
| 2533 |
+
"grad_norm": 11.271340370178223,
|
| 2534 |
+
"learning_rate": 1.0875824914376553e-05,
|
| 2535 |
+
"loss": 1.5246,
|
| 2536 |
+
"step": 3610
|
| 2537 |
+
},
|
| 2538 |
+
{
|
| 2539 |
+
"epoch": 0.724,
|
| 2540 |
+
"grad_norm": 63.74707794189453,
|
| 2541 |
+
"learning_rate": 1.0732160807889211e-05,
|
| 2542 |
+
"loss": 4.4784,
|
| 2543 |
+
"step": 3620
|
| 2544 |
+
},
|
| 2545 |
+
{
|
| 2546 |
+
"epoch": 0.726,
|
| 2547 |
+
"grad_norm": 0.7974810600280762,
|
| 2548 |
+
"learning_rate": 1.058919209575517e-05,
|
| 2549 |
+
"loss": 2.8935,
|
| 2550 |
+
"step": 3630
|
| 2551 |
+
},
|
| 2552 |
+
{
|
| 2553 |
+
"epoch": 0.728,
|
| 2554 |
+
"grad_norm": 17.613325119018555,
|
| 2555 |
+
"learning_rate": 1.0446925746067768e-05,
|
| 2556 |
+
"loss": 1.4045,
|
| 2557 |
+
"step": 3640
|
| 2558 |
+
},
|
| 2559 |
+
{
|
| 2560 |
+
"epoch": 0.73,
|
| 2561 |
+
"grad_norm": 16.592836380004883,
|
| 2562 |
+
"learning_rate": 1.0305368692688174e-05,
|
| 2563 |
+
"loss": 2.0619,
|
| 2564 |
+
"step": 3650
|
| 2565 |
+
},
|
| 2566 |
+
{
|
| 2567 |
+
"epoch": 0.732,
|
| 2568 |
+
"grad_norm": 8.460766792297363,
|
| 2569 |
+
"learning_rate": 1.0164527834907467e-05,
|
| 2570 |
+
"loss": 1.8641,
|
| 2571 |
+
"step": 3660
|
| 2572 |
+
},
|
| 2573 |
+
{
|
| 2574 |
+
"epoch": 0.734,
|
| 2575 |
+
"grad_norm": 1.6616703271865845,
|
| 2576 |
+
"learning_rate": 1.0024410037110357e-05,
|
| 2577 |
+
"loss": 3.3477,
|
| 2578 |
+
"step": 3670
|
| 2579 |
+
},
|
| 2580 |
+
{
|
| 2581 |
+
"epoch": 0.736,
|
| 2582 |
+
"grad_norm": 14.18490982055664,
|
| 2583 |
+
"learning_rate": 9.88502212844063e-06,
|
| 2584 |
+
"loss": 1.6977,
|
| 2585 |
+
"step": 3680
|
| 2586 |
+
},
|
| 2587 |
+
{
|
| 2588 |
+
"epoch": 0.738,
|
| 2589 |
+
"grad_norm": 19.63976287841797,
|
| 2590 |
+
"learning_rate": 9.746370902468311e-06,
|
| 2591 |
+
"loss": 2.4541,
|
| 2592 |
+
"step": 3690
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"epoch": 0.74,
|
| 2596 |
+
"grad_norm": 5.758197784423828,
|
| 2597 |
+
"learning_rate": 9.608463116858542e-06,
|
| 2598 |
+
"loss": 1.0491,
|
| 2599 |
+
"step": 3700
|
| 2600 |
+
},
|
| 2601 |
+
{
|
| 2602 |
+
"epoch": 0.742,
|
| 2603 |
+
"grad_norm": 11.879212379455566,
|
| 2604 |
+
"learning_rate": 9.471305493042243e-06,
|
| 2605 |
+
"loss": 1.7615,
|
| 2606 |
+
"step": 3710
|
| 2607 |
+
},
|
| 2608 |
+
{
|
| 2609 |
+
"epoch": 0.744,
|
| 2610 |
+
"grad_norm": 14.958152770996094,
|
| 2611 |
+
"learning_rate": 9.334904715888495e-06,
|
| 2612 |
+
"loss": 1.2825,
|
| 2613 |
+
"step": 3720
|
| 2614 |
+
},
|
| 2615 |
+
{
|
| 2616 |
+
"epoch": 0.746,
|
| 2617 |
+
"grad_norm": 10.86933422088623,
|
| 2618 |
+
"learning_rate": 9.199267433378727e-06,
|
| 2619 |
+
"loss": 2.0753,
|
| 2620 |
+
"step": 3730
|
| 2621 |
+
},
|
| 2622 |
+
{
|
| 2623 |
+
"epoch": 0.748,
|
| 2624 |
+
"grad_norm": 3.0762341022491455,
|
| 2625 |
+
"learning_rate": 9.064400256282757e-06,
|
| 2626 |
+
"loss": 2.5678,
|
| 2627 |
+
"step": 3740
|
| 2628 |
+
},
|
| 2629 |
+
{
|
| 2630 |
+
"epoch": 0.75,
|
| 2631 |
+
"grad_norm": 3.98964262008667,
|
| 2632 |
+
"learning_rate": 8.930309757836517e-06,
|
| 2633 |
+
"loss": 1.0403,
|
| 2634 |
+
"step": 3750
|
| 2635 |
+
},
|
| 2636 |
+
{
|
| 2637 |
+
"epoch": 0.752,
|
| 2638 |
+
"grad_norm": 32.30311584472656,
|
| 2639 |
+
"learning_rate": 8.797002473421728e-06,
|
| 2640 |
+
"loss": 2.1364,
|
| 2641 |
+
"step": 3760
|
| 2642 |
+
},
|
| 2643 |
+
{
|
| 2644 |
+
"epoch": 0.754,
|
| 2645 |
+
"grad_norm": 15.094679832458496,
|
| 2646 |
+
"learning_rate": 8.664484900247363e-06,
|
| 2647 |
+
"loss": 1.4156,
|
| 2648 |
+
"step": 3770
|
| 2649 |
+
},
|
| 2650 |
+
{
|
| 2651 |
+
"epoch": 0.756,
|
| 2652 |
+
"grad_norm": 20.249778747558594,
|
| 2653 |
+
"learning_rate": 8.532763497032987e-06,
|
| 2654 |
+
"loss": 2.463,
|
| 2655 |
+
"step": 3780
|
| 2656 |
+
},
|
| 2657 |
+
{
|
| 2658 |
+
"epoch": 0.758,
|
| 2659 |
+
"grad_norm": 8.843160629272461,
|
| 2660 |
+
"learning_rate": 8.40184468369396e-06,
|
| 2661 |
+
"loss": 1.8851,
|
| 2662 |
+
"step": 3790
|
| 2663 |
+
},
|
| 2664 |
+
{
|
| 2665 |
+
"epoch": 0.76,
|
| 2666 |
+
"grad_norm": 13.01088809967041,
|
| 2667 |
+
"learning_rate": 8.271734841028553e-06,
|
| 2668 |
+
"loss": 2.3422,
|
| 2669 |
+
"step": 3800
|
| 2670 |
+
},
|
| 2671 |
+
{
|
| 2672 |
+
"epoch": 0.762,
|
| 2673 |
+
"grad_norm": 30.352092742919922,
|
| 2674 |
+
"learning_rate": 8.142440310406924e-06,
|
| 2675 |
+
"loss": 2.0351,
|
| 2676 |
+
"step": 3810
|
| 2677 |
+
},
|
| 2678 |
+
{
|
| 2679 |
+
"epoch": 0.764,
|
| 2680 |
+
"grad_norm": 17.028221130371094,
|
| 2681 |
+
"learning_rate": 8.013967393462094e-06,
|
| 2682 |
+
"loss": 2.6205,
|
| 2683 |
+
"step": 3820
|
| 2684 |
+
},
|
| 2685 |
+
{
|
| 2686 |
+
"epoch": 0.766,
|
| 2687 |
+
"grad_norm": 1.9819483757019043,
|
| 2688 |
+
"learning_rate": 7.886322351782783e-06,
|
| 2689 |
+
"loss": 1.4258,
|
| 2690 |
+
"step": 3830
|
| 2691 |
+
},
|
| 2692 |
+
{
|
| 2693 |
+
"epoch": 0.768,
|
| 2694 |
+
"grad_norm": 2.398263931274414,
|
| 2695 |
+
"learning_rate": 7.759511406608255e-06,
|
| 2696 |
+
"loss": 2.0398,
|
| 2697 |
+
"step": 3840
|
| 2698 |
+
},
|
| 2699 |
+
{
|
| 2700 |
+
"epoch": 0.77,
|
| 2701 |
+
"grad_norm": 6.930059432983398,
|
| 2702 |
+
"learning_rate": 7.633540738525066e-06,
|
| 2703 |
+
"loss": 2.7082,
|
| 2704 |
+
"step": 3850
|
| 2705 |
+
},
|
| 2706 |
+
{
|
| 2707 |
+
"epoch": 0.772,
|
| 2708 |
+
"grad_norm": 56.60074996948242,
|
| 2709 |
+
"learning_rate": 7.508416487165862e-06,
|
| 2710 |
+
"loss": 2.2596,
|
| 2711 |
+
"step": 3860
|
| 2712 |
+
},
|
| 2713 |
+
{
|
| 2714 |
+
"epoch": 0.774,
|
| 2715 |
+
"grad_norm": 6.612997531890869,
|
| 2716 |
+
"learning_rate": 7.384144750910133e-06,
|
| 2717 |
+
"loss": 1.665,
|
| 2718 |
+
"step": 3870
|
| 2719 |
+
},
|
| 2720 |
+
{
|
| 2721 |
+
"epoch": 0.776,
|
| 2722 |
+
"grad_norm": 1.0754458904266357,
|
| 2723 |
+
"learning_rate": 7.260731586586983e-06,
|
| 2724 |
+
"loss": 3.3368,
|
| 2725 |
+
"step": 3880
|
| 2726 |
+
},
|
| 2727 |
+
{
|
| 2728 |
+
"epoch": 0.778,
|
| 2729 |
+
"grad_norm": 7.085332870483398,
|
| 2730 |
+
"learning_rate": 7.138183009179922e-06,
|
| 2731 |
+
"loss": 2.165,
|
| 2732 |
+
"step": 3890
|
| 2733 |
+
},
|
| 2734 |
+
{
|
| 2735 |
+
"epoch": 0.78,
|
| 2736 |
+
"grad_norm": 5.756954669952393,
|
| 2737 |
+
"learning_rate": 7.016504991533726e-06,
|
| 2738 |
+
"loss": 0.9795,
|
| 2739 |
+
"step": 3900
|
| 2740 |
+
},
|
| 2741 |
+
{
|
| 2742 |
+
"epoch": 0.782,
|
| 2743 |
+
"grad_norm": 290.0068664550781,
|
| 2744 |
+
"learning_rate": 6.895703464063319e-06,
|
| 2745 |
+
"loss": 4.0774,
|
| 2746 |
+
"step": 3910
|
| 2747 |
+
},
|
| 2748 |
+
{
|
| 2749 |
+
"epoch": 0.784,
|
| 2750 |
+
"grad_norm": 18.028898239135742,
|
| 2751 |
+
"learning_rate": 6.775784314464717e-06,
|
| 2752 |
+
"loss": 1.838,
|
| 2753 |
+
"step": 3920
|
| 2754 |
+
},
|
| 2755 |
+
{
|
| 2756 |
+
"epoch": 0.786,
|
| 2757 |
+
"grad_norm": 30.23217010498047,
|
| 2758 |
+
"learning_rate": 6.656753387428089e-06,
|
| 2759 |
+
"loss": 2.2859,
|
| 2760 |
+
"step": 3930
|
| 2761 |
+
},
|
| 2762 |
+
{
|
| 2763 |
+
"epoch": 0.788,
|
| 2764 |
+
"grad_norm": 6.802043437957764,
|
| 2765 |
+
"learning_rate": 6.538616484352902e-06,
|
| 2766 |
+
"loss": 4.2013,
|
| 2767 |
+
"step": 3940
|
| 2768 |
+
},
|
| 2769 |
+
{
|
| 2770 |
+
"epoch": 0.79,
|
| 2771 |
+
"grad_norm": 21.070383071899414,
|
| 2772 |
+
"learning_rate": 6.421379363065142e-06,
|
| 2773 |
+
"loss": 3.8002,
|
| 2774 |
+
"step": 3950
|
| 2775 |
+
},
|
| 2776 |
+
{
|
| 2777 |
+
"epoch": 0.792,
|
| 2778 |
+
"grad_norm": 59.72300720214844,
|
| 2779 |
+
"learning_rate": 6.305047737536707e-06,
|
| 2780 |
+
"loss": 2.5436,
|
| 2781 |
+
"step": 3960
|
| 2782 |
+
},
|
| 2783 |
+
{
|
| 2784 |
+
"epoch": 0.794,
|
| 2785 |
+
"grad_norm": 3.4971981048583984,
|
| 2786 |
+
"learning_rate": 6.189627277606894e-06,
|
| 2787 |
+
"loss": 2.4645,
|
| 2788 |
+
"step": 3970
|
| 2789 |
+
},
|
| 2790 |
+
{
|
| 2791 |
+
"epoch": 0.796,
|
| 2792 |
+
"grad_norm": 10.910285949707031,
|
| 2793 |
+
"learning_rate": 6.075123608706093e-06,
|
| 2794 |
+
"loss": 1.1404,
|
| 2795 |
+
"step": 3980
|
| 2796 |
+
},
|
| 2797 |
+
{
|
| 2798 |
+
"epoch": 0.798,
|
| 2799 |
+
"grad_norm": 223.6168212890625,
|
| 2800 |
+
"learning_rate": 5.961542311581586e-06,
|
| 2801 |
+
"loss": 4.2125,
|
| 2802 |
+
"step": 3990
|
| 2803 |
+
},
|
| 2804 |
+
{
|
| 2805 |
+
"epoch": 0.8,
|
| 2806 |
+
"grad_norm": 15.95744514465332,
|
| 2807 |
+
"learning_rate": 5.848888922025553e-06,
|
| 2808 |
+
"loss": 1.4396,
|
| 2809 |
+
"step": 4000
|
| 2810 |
+
},
|
| 2811 |
+
{
|
| 2812 |
+
"epoch": 0.802,
|
| 2813 |
+
"grad_norm": 1.5669488906860352,
|
| 2814 |
+
"learning_rate": 5.737168930605272e-06,
|
| 2815 |
+
"loss": 2.321,
|
| 2816 |
+
"step": 4010
|
| 2817 |
+
},
|
| 2818 |
+
{
|
| 2819 |
+
"epoch": 0.804,
|
| 2820 |
+
"grad_norm": 20.125688552856445,
|
| 2821 |
+
"learning_rate": 5.626387782395512e-06,
|
| 2822 |
+
"loss": 2.803,
|
| 2823 |
+
"step": 4020
|
| 2824 |
+
},
|
| 2825 |
+
{
|
| 2826 |
+
"epoch": 0.806,
|
| 2827 |
+
"grad_norm": 9.791519165039062,
|
| 2828 |
+
"learning_rate": 5.5165508767131415e-06,
|
| 2829 |
+
"loss": 1.024,
|
| 2830 |
+
"step": 4030
|
| 2831 |
+
},
|
| 2832 |
+
{
|
| 2833 |
+
"epoch": 0.808,
|
| 2834 |
+
"grad_norm": 4.828093528747559,
|
| 2835 |
+
"learning_rate": 5.4076635668540075e-06,
|
| 2836 |
+
"loss": 2.4534,
|
| 2837 |
+
"step": 4040
|
| 2838 |
+
},
|
| 2839 |
+
{
|
| 2840 |
+
"epoch": 0.81,
|
| 2841 |
+
"grad_norm": 5.493026256561279,
|
| 2842 |
+
"learning_rate": 5.299731159831953e-06,
|
| 2843 |
+
"loss": 2.1075,
|
| 2844 |
+
"step": 4050
|
| 2845 |
+
},
|
| 2846 |
+
{
|
| 2847 |
+
"epoch": 0.812,
|
| 2848 |
+
"grad_norm": 8.821996688842773,
|
| 2849 |
+
"learning_rate": 5.192758916120236e-06,
|
| 2850 |
+
"loss": 3.3703,
|
| 2851 |
+
"step": 4060
|
| 2852 |
+
},
|
| 2853 |
+
{
|
| 2854 |
+
"epoch": 0.814,
|
| 2855 |
+
"grad_norm": 8.212584495544434,
|
| 2856 |
+
"learning_rate": 5.086752049395094e-06,
|
| 2857 |
+
"loss": 2.9377,
|
| 2858 |
+
"step": 4070
|
| 2859 |
+
},
|
| 2860 |
+
{
|
| 2861 |
+
"epoch": 0.816,
|
| 2862 |
+
"grad_norm": 14.955241203308105,
|
| 2863 |
+
"learning_rate": 4.981715726281666e-06,
|
| 2864 |
+
"loss": 4.4126,
|
| 2865 |
+
"step": 4080
|
| 2866 |
+
},
|
| 2867 |
+
{
|
| 2868 |
+
"epoch": 0.818,
|
| 2869 |
+
"grad_norm": 8.878729820251465,
|
| 2870 |
+
"learning_rate": 4.877655066102149e-06,
|
| 2871 |
+
"loss": 2.5896,
|
| 2872 |
+
"step": 4090
|
| 2873 |
+
},
|
| 2874 |
+
{
|
| 2875 |
+
"epoch": 0.82,
|
| 2876 |
+
"grad_norm": 11.291478157043457,
|
| 2877 |
+
"learning_rate": 4.7745751406263165e-06,
|
| 2878 |
+
"loss": 1.4516,
|
| 2879 |
+
"step": 4100
|
| 2880 |
+
},
|
| 2881 |
+
{
|
| 2882 |
+
"epoch": 0.822,
|
| 2883 |
+
"grad_norm": 3.0645530223846436,
|
| 2884 |
+
"learning_rate": 4.672480973824311e-06,
|
| 2885 |
+
"loss": 1.3904,
|
| 2886 |
+
"step": 4110
|
| 2887 |
+
},
|
| 2888 |
+
{
|
| 2889 |
+
"epoch": 0.824,
|
| 2890 |
+
"grad_norm": 10.652810096740723,
|
| 2891 |
+
"learning_rate": 4.571377541621788e-06,
|
| 2892 |
+
"loss": 1.6246,
|
| 2893 |
+
"step": 4120
|
| 2894 |
+
},
|
| 2895 |
+
{
|
| 2896 |
+
"epoch": 0.826,
|
| 2897 |
+
"grad_norm": 50.55589294433594,
|
| 2898 |
+
"learning_rate": 4.4712697716574e-06,
|
| 2899 |
+
"loss": 3.125,
|
| 2900 |
+
"step": 4130
|
| 2901 |
+
},
|
| 2902 |
+
{
|
| 2903 |
+
"epoch": 0.828,
|
| 2904 |
+
"grad_norm": 10.360865592956543,
|
| 2905 |
+
"learning_rate": 4.372162543042624e-06,
|
| 2906 |
+
"loss": 1.1379,
|
| 2907 |
+
"step": 4140
|
| 2908 |
+
},
|
| 2909 |
+
{
|
| 2910 |
+
"epoch": 0.83,
|
| 2911 |
+
"grad_norm": 11.648319244384766,
|
| 2912 |
+
"learning_rate": 4.274060686123959e-06,
|
| 2913 |
+
"loss": 1.723,
|
| 2914 |
+
"step": 4150
|
| 2915 |
+
},
|
| 2916 |
+
{
|
| 2917 |
+
"epoch": 0.832,
|
| 2918 |
+
"grad_norm": 5.083891868591309,
|
| 2919 |
+
"learning_rate": 4.176968982247514e-06,
|
| 2920 |
+
"loss": 2.2388,
|
| 2921 |
+
"step": 4160
|
| 2922 |
+
},
|
| 2923 |
+
{
|
| 2924 |
+
"epoch": 0.834,
|
| 2925 |
+
"grad_norm": 39.73847961425781,
|
| 2926 |
+
"learning_rate": 4.08089216352596e-06,
|
| 2927 |
+
"loss": 1.7591,
|
| 2928 |
+
"step": 4170
|
| 2929 |
+
},
|
| 2930 |
+
{
|
| 2931 |
+
"epoch": 0.836,
|
| 2932 |
+
"grad_norm": 2.994002342224121,
|
| 2933 |
+
"learning_rate": 3.985834912607894e-06,
|
| 2934 |
+
"loss": 0.9926,
|
| 2935 |
+
"step": 4180
|
| 2936 |
+
},
|
| 2937 |
+
{
|
| 2938 |
+
"epoch": 0.838,
|
| 2939 |
+
"grad_norm": 21.860036849975586,
|
| 2940 |
+
"learning_rate": 3.891801862449629e-06,
|
| 2941 |
+
"loss": 2.8662,
|
| 2942 |
+
"step": 4190
|
| 2943 |
+
},
|
| 2944 |
+
{
|
| 2945 |
+
"epoch": 0.84,
|
| 2946 |
+
"grad_norm": 23.886770248413086,
|
| 2947 |
+
"learning_rate": 3.798797596089351e-06,
|
| 2948 |
+
"loss": 2.1153,
|
| 2949 |
+
"step": 4200
|
| 2950 |
+
},
|
| 2951 |
+
{
|
| 2952 |
+
"epoch": 0.842,
|
| 2953 |
+
"grad_norm": 29.538490295410156,
|
| 2954 |
+
"learning_rate": 3.7068266464238084e-06,
|
| 2955 |
+
"loss": 1.9178,
|
| 2956 |
+
"step": 4210
|
| 2957 |
+
},
|
| 2958 |
+
{
|
| 2959 |
+
"epoch": 0.844,
|
| 2960 |
+
"grad_norm": 1.1373424530029297,
|
| 2961 |
+
"learning_rate": 3.6158934959873353e-06,
|
| 2962 |
+
"loss": 1.9438,
|
| 2963 |
+
"step": 4220
|
| 2964 |
+
},
|
| 2965 |
+
{
|
| 2966 |
+
"epoch": 0.846,
|
| 2967 |
+
"grad_norm": 27.889780044555664,
|
| 2968 |
+
"learning_rate": 3.5260025767333893e-06,
|
| 2969 |
+
"loss": 2.5589,
|
| 2970 |
+
"step": 4230
|
| 2971 |
+
},
|
| 2972 |
+
{
|
| 2973 |
+
"epoch": 0.848,
|
| 2974 |
+
"grad_norm": 12.20418930053711,
|
| 2975 |
+
"learning_rate": 3.4371582698185633e-06,
|
| 2976 |
+
"loss": 2.031,
|
| 2977 |
+
"step": 4240
|
| 2978 |
+
},
|
| 2979 |
+
{
|
| 2980 |
+
"epoch": 0.85,
|
| 2981 |
+
"grad_norm": 9.347545623779297,
|
| 2982 |
+
"learning_rate": 3.3493649053890326e-06,
|
| 2983 |
+
"loss": 1.8701,
|
| 2984 |
+
"step": 4250
|
| 2985 |
+
},
|
| 2986 |
+
{
|
| 2987 |
+
"epoch": 0.852,
|
| 2988 |
+
"grad_norm": 39.1044921875,
|
| 2989 |
+
"learning_rate": 3.262626762369525e-06,
|
| 2990 |
+
"loss": 2.4573,
|
| 2991 |
+
"step": 4260
|
| 2992 |
+
},
|
| 2993 |
+
{
|
| 2994 |
+
"epoch": 0.854,
|
| 2995 |
+
"grad_norm": 8.244641304016113,
|
| 2996 |
+
"learning_rate": 3.176948068254762e-06,
|
| 2997 |
+
"loss": 1.5529,
|
| 2998 |
+
"step": 4270
|
| 2999 |
+
},
|
| 3000 |
+
{
|
| 3001 |
+
"epoch": 0.856,
|
| 3002 |
+
"grad_norm": 9.417642593383789,
|
| 3003 |
+
"learning_rate": 3.092332998903416e-06,
|
| 3004 |
+
"loss": 4.1672,
|
| 3005 |
+
"step": 4280
|
| 3006 |
+
},
|
| 3007 |
+
{
|
| 3008 |
+
"epoch": 0.858,
|
| 3009 |
+
"grad_norm": 4.196715354919434,
|
| 3010 |
+
"learning_rate": 3.0087856783345914e-06,
|
| 3011 |
+
"loss": 2.5184,
|
| 3012 |
+
"step": 4290
|
| 3013 |
+
},
|
| 3014 |
+
{
|
| 3015 |
+
"epoch": 0.86,
|
| 3016 |
+
"grad_norm": 5.61329460144043,
|
| 3017 |
+
"learning_rate": 2.9263101785268254e-06,
|
| 3018 |
+
"loss": 1.5187,
|
| 3019 |
+
"step": 4300
|
| 3020 |
+
},
|
| 3021 |
+
{
|
| 3022 |
+
"epoch": 0.862,
|
| 3023 |
+
"grad_norm": 1.2130039930343628,
|
| 3024 |
+
"learning_rate": 2.8449105192196316e-06,
|
| 3025 |
+
"loss": 2.3649,
|
| 3026 |
+
"step": 4310
|
| 3027 |
+
},
|
| 3028 |
+
{
|
| 3029 |
+
"epoch": 0.864,
|
| 3030 |
+
"grad_norm": 26.6439151763916,
|
| 3031 |
+
"learning_rate": 2.764590667717562e-06,
|
| 3032 |
+
"loss": 1.8013,
|
| 3033 |
+
"step": 4320
|
| 3034 |
+
},
|
| 3035 |
+
{
|
| 3036 |
+
"epoch": 0.866,
|
| 3037 |
+
"grad_norm": 10.56583309173584,
|
| 3038 |
+
"learning_rate": 2.6853545386968606e-06,
|
| 3039 |
+
"loss": 1.5805,
|
| 3040 |
+
"step": 4330
|
| 3041 |
+
},
|
| 3042 |
+
{
|
| 3043 |
+
"epoch": 0.868,
|
| 3044 |
+
"grad_norm": 17.812593460083008,
|
| 3045 |
+
"learning_rate": 2.6072059940146775e-06,
|
| 3046 |
+
"loss": 1.8378,
|
| 3047 |
+
"step": 4340
|
| 3048 |
+
},
|
| 3049 |
+
{
|
| 3050 |
+
"epoch": 0.87,
|
| 3051 |
+
"grad_norm": 11.021294593811035,
|
| 3052 |
+
"learning_rate": 2.5301488425208296e-06,
|
| 3053 |
+
"loss": 1.502,
|
| 3054 |
+
"step": 4350
|
| 3055 |
+
},
|
| 3056 |
+
{
|
| 3057 |
+
"epoch": 0.872,
|
| 3058 |
+
"grad_norm": 7.252746105194092,
|
| 3059 |
+
"learning_rate": 2.454186839872158e-06,
|
| 3060 |
+
"loss": 3.0615,
|
| 3061 |
+
"step": 4360
|
| 3062 |
+
},
|
| 3063 |
+
{
|
| 3064 |
+
"epoch": 0.874,
|
| 3065 |
+
"grad_norm": 6.52194356918335,
|
| 3066 |
+
"learning_rate": 2.379323688349516e-06,
|
| 3067 |
+
"loss": 2.0572,
|
| 3068 |
+
"step": 4370
|
| 3069 |
+
},
|
| 3070 |
+
{
|
| 3071 |
+
"epoch": 0.876,
|
| 3072 |
+
"grad_norm": 25.462242126464844,
|
| 3073 |
+
"learning_rate": 2.3055630366772856e-06,
|
| 3074 |
+
"loss": 1.5945,
|
| 3075 |
+
"step": 4380
|
| 3076 |
+
},
|
| 3077 |
+
{
|
| 3078 |
+
"epoch": 0.878,
|
| 3079 |
+
"grad_norm": 1.2246159315109253,
|
| 3080 |
+
"learning_rate": 2.2329084798455746e-06,
|
| 3081 |
+
"loss": 1.6811,
|
| 3082 |
+
"step": 4390
|
| 3083 |
+
},
|
| 3084 |
+
{
|
| 3085 |
+
"epoch": 0.88,
|
| 3086 |
+
"grad_norm": 27.265029907226562,
|
| 3087 |
+
"learning_rate": 2.1613635589349756e-06,
|
| 3088 |
+
"loss": 1.5655,
|
| 3089 |
+
"step": 4400
|
| 3090 |
+
},
|
| 3091 |
+
{
|
| 3092 |
+
"epoch": 0.882,
|
| 3093 |
+
"grad_norm": 19.661325454711914,
|
| 3094 |
+
"learning_rate": 2.0909317609440095e-06,
|
| 3095 |
+
"loss": 2.1422,
|
| 3096 |
+
"step": 4410
|
| 3097 |
+
},
|
| 3098 |
+
{
|
| 3099 |
+
"epoch": 0.884,
|
| 3100 |
+
"grad_norm": 4.112105846405029,
|
| 3101 |
+
"learning_rate": 2.0216165186191407e-06,
|
| 3102 |
+
"loss": 1.403,
|
| 3103 |
+
"step": 4420
|
| 3104 |
+
},
|
| 3105 |
+
{
|
| 3106 |
+
"epoch": 0.886,
|
| 3107 |
+
"grad_norm": 22.394611358642578,
|
| 3108 |
+
"learning_rate": 1.95342121028749e-06,
|
| 3109 |
+
"loss": 2.6121,
|
| 3110 |
+
"step": 4430
|
| 3111 |
+
},
|
| 3112 |
+
{
|
| 3113 |
+
"epoch": 0.888,
|
| 3114 |
+
"grad_norm": 6.7292561531066895,
|
| 3115 |
+
"learning_rate": 1.8863491596921745e-06,
|
| 3116 |
+
"loss": 5.1337,
|
| 3117 |
+
"step": 4440
|
| 3118 |
+
},
|
| 3119 |
+
{
|
| 3120 |
+
"epoch": 0.89,
|
| 3121 |
+
"grad_norm": 9.215736389160156,
|
| 3122 |
+
"learning_rate": 1.8204036358303173e-06,
|
| 3123 |
+
"loss": 2.1225,
|
| 3124 |
+
"step": 4450
|
| 3125 |
+
},
|
| 3126 |
+
{
|
| 3127 |
+
"epoch": 0.892,
|
| 3128 |
+
"grad_norm": 44.11046600341797,
|
| 3129 |
+
"learning_rate": 1.7555878527937164e-06,
|
| 3130 |
+
"loss": 2.8199,
|
| 3131 |
+
"step": 4460
|
| 3132 |
+
},
|
| 3133 |
+
{
|
| 3134 |
+
"epoch": 0.894,
|
| 3135 |
+
"grad_norm": 45.898719787597656,
|
| 3136 |
+
"learning_rate": 1.6919049696121958e-06,
|
| 3137 |
+
"loss": 2.1345,
|
| 3138 |
+
"step": 4470
|
| 3139 |
+
},
|
| 3140 |
+
{
|
| 3141 |
+
"epoch": 0.896,
|
| 3142 |
+
"grad_norm": 6.135681629180908,
|
| 3143 |
+
"learning_rate": 1.629358090099639e-06,
|
| 3144 |
+
"loss": 1.671,
|
| 3145 |
+
"step": 4480
|
| 3146 |
+
},
|
| 3147 |
+
{
|
| 3148 |
+
"epoch": 0.898,
|
| 3149 |
+
"grad_norm": 88.45669555664062,
|
| 3150 |
+
"learning_rate": 1.5679502627027136e-06,
|
| 3151 |
+
"loss": 2.7573,
|
| 3152 |
+
"step": 4490
|
| 3153 |
+
},
|
| 3154 |
+
{
|
| 3155 |
+
"epoch": 0.9,
|
| 3156 |
+
"grad_norm": 8.329882621765137,
|
| 3157 |
+
"learning_rate": 1.5076844803522922e-06,
|
| 3158 |
+
"loss": 1.8508,
|
| 3159 |
+
"step": 4500
|
| 3160 |
+
},
|
| 3161 |
+
{
|
| 3162 |
+
"epoch": 0.902,
|
| 3163 |
+
"grad_norm": 19.587743759155273,
|
| 3164 |
+
"learning_rate": 1.4485636803175829e-06,
|
| 3165 |
+
"loss": 4.521,
|
| 3166 |
+
"step": 4510
|
| 3167 |
+
},
|
| 3168 |
+
{
|
| 3169 |
+
"epoch": 0.904,
|
| 3170 |
+
"grad_norm": 30.500099182128906,
|
| 3171 |
+
"learning_rate": 1.3905907440629752e-06,
|
| 3172 |
+
"loss": 3.2342,
|
| 3173 |
+
"step": 4520
|
| 3174 |
+
},
|
| 3175 |
+
{
|
| 3176 |
+
"epoch": 0.906,
|
| 3177 |
+
"grad_norm": 13.65441608428955,
|
| 3178 |
+
"learning_rate": 1.333768497107593e-06,
|
| 3179 |
+
"loss": 3.0516,
|
| 3180 |
+
"step": 4530
|
| 3181 |
+
},
|
| 3182 |
+
{
|
| 3183 |
+
"epoch": 0.908,
|
| 3184 |
+
"grad_norm": 39.75031280517578,
|
| 3185 |
+
"learning_rate": 1.2780997088875869e-06,
|
| 3186 |
+
"loss": 2.0561,
|
| 3187 |
+
"step": 4540
|
| 3188 |
+
},
|
| 3189 |
+
{
|
| 3190 |
+
"epoch": 0.91,
|
| 3191 |
+
"grad_norm": 45.91644287109375,
|
| 3192 |
+
"learning_rate": 1.2235870926211619e-06,
|
| 3193 |
+
"loss": 1.5619,
|
| 3194 |
+
"step": 4550
|
| 3195 |
+
},
|
| 3196 |
+
{
|
| 3197 |
+
"epoch": 0.912,
|
| 3198 |
+
"grad_norm": 5.88566255569458,
|
| 3199 |
+
"learning_rate": 1.170233305176327e-06,
|
| 3200 |
+
"loss": 2.0134,
|
| 3201 |
+
"step": 4560
|
| 3202 |
+
},
|
| 3203 |
+
{
|
| 3204 |
+
"epoch": 0.914,
|
| 3205 |
+
"grad_norm": 5.401540279388428,
|
| 3206 |
+
"learning_rate": 1.1180409469414094e-06,
|
| 3207 |
+
"loss": 0.8128,
|
| 3208 |
+
"step": 4570
|
| 3209 |
+
},
|
| 3210 |
+
{
|
| 3211 |
+
"epoch": 0.916,
|
| 3212 |
+
"grad_norm": 30.60863494873047,
|
| 3213 |
+
"learning_rate": 1.067012561698319e-06,
|
| 3214 |
+
"loss": 2.5157,
|
| 3215 |
+
"step": 4580
|
| 3216 |
+
},
|
| 3217 |
+
{
|
| 3218 |
+
"epoch": 0.918,
|
| 3219 |
+
"grad_norm": 2.0508158206939697,
|
| 3220 |
+
"learning_rate": 1.0171506364985622e-06,
|
| 3221 |
+
"loss": 1.7379,
|
| 3222 |
+
"step": 4590
|
| 3223 |
+
},
|
| 3224 |
+
{
|
| 3225 |
+
"epoch": 0.92,
|
| 3226 |
+
"grad_norm": 11.00378704071045,
|
| 3227 |
+
"learning_rate": 9.684576015420278e-07,
|
| 3228 |
+
"loss": 1.3243,
|
| 3229 |
+
"step": 4600
|
| 3230 |
+
},
|
| 3231 |
+
{
|
| 3232 |
+
"epoch": 0.922,
|
| 3233 |
+
"grad_norm": 68.18540954589844,
|
| 3234 |
+
"learning_rate": 9.209358300585474e-07,
|
| 3235 |
+
"loss": 4.207,
|
| 3236 |
+
"step": 4610
|
| 3237 |
+
},
|
| 3238 |
+
{
|
| 3239 |
+
"epoch": 0.924,
|
| 3240 |
+
"grad_norm": 11.006647109985352,
|
| 3241 |
+
"learning_rate": 8.745876381922147e-07,
|
| 3242 |
+
"loss": 2.8713,
|
| 3243 |
+
"step": 4620
|
| 3244 |
+
},
|
| 3245 |
+
{
|
| 3246 |
+
"epoch": 0.926,
|
| 3247 |
+
"grad_norm": 3.3466615676879883,
|
| 3248 |
+
"learning_rate": 8.294152848885157e-07,
|
| 3249 |
+
"loss": 2.2642,
|
| 3250 |
+
"step": 4630
|
| 3251 |
+
},
|
| 3252 |
+
{
|
| 3253 |
+
"epoch": 0.928,
|
| 3254 |
+
"grad_norm": 1.4981013536453247,
|
| 3255 |
+
"learning_rate": 7.854209717842231e-07,
|
| 3256 |
+
"loss": 3.0498,
|
| 3257 |
+
"step": 4640
|
| 3258 |
+
},
|
| 3259 |
+
{
|
| 3260 |
+
"epoch": 0.93,
|
| 3261 |
+
"grad_norm": 9.377291679382324,
|
| 3262 |
+
"learning_rate": 7.426068431000882e-07,
|
| 3263 |
+
"loss": 3.1851,
|
| 3264 |
+
"step": 4650
|
| 3265 |
+
},
|
| 3266 |
+
{
|
| 3267 |
+
"epoch": 0.932,
|
| 3268 |
+
"grad_norm": 32.81710433959961,
|
| 3269 |
+
"learning_rate": 7.009749855363456e-07,
|
| 3270 |
+
"loss": 1.5354,
|
| 3271 |
+
"step": 4660
|
| 3272 |
+
},
|
| 3273 |
+
{
|
| 3274 |
+
"epoch": 0.934,
|
| 3275 |
+
"grad_norm": 13.120279312133789,
|
| 3276 |
+
"learning_rate": 6.605274281709928e-07,
|
| 3277 |
+
"loss": 1.7694,
|
| 3278 |
+
"step": 4670
|
| 3279 |
+
},
|
| 3280 |
+
{
|
| 3281 |
+
"epoch": 0.936,
|
| 3282 |
+
"grad_norm": 3.324674606323242,
|
| 3283 |
+
"learning_rate": 6.212661423609184e-07,
|
| 3284 |
+
"loss": 1.4321,
|
| 3285 |
+
"step": 4680
|
| 3286 |
+
},
|
| 3287 |
+
{
|
| 3288 |
+
"epoch": 0.938,
|
| 3289 |
+
"grad_norm": 2.2350542545318604,
|
| 3290 |
+
"learning_rate": 5.83193041645802e-07,
|
| 3291 |
+
"loss": 1.4432,
|
| 3292 |
+
"step": 4690
|
| 3293 |
+
},
|
| 3294 |
+
{
|
| 3295 |
+
"epoch": 0.94,
|
| 3296 |
+
"grad_norm": 51.64103317260742,
|
| 3297 |
+
"learning_rate": 5.463099816548579e-07,
|
| 3298 |
+
"loss": 1.7524,
|
| 3299 |
+
"step": 4700
|
| 3300 |
+
},
|
| 3301 |
+
{
|
| 3302 |
+
"epoch": 0.942,
|
| 3303 |
+
"grad_norm": 20.108779907226562,
|
| 3304 |
+
"learning_rate": 5.106187600163987e-07,
|
| 3305 |
+
"loss": 3.8866,
|
| 3306 |
+
"step": 4710
|
| 3307 |
+
},
|
| 3308 |
+
{
|
| 3309 |
+
"epoch": 0.944,
|
| 3310 |
+
"grad_norm": 25.551082611083984,
|
| 3311 |
+
"learning_rate": 4.7612111627021175e-07,
|
| 3312 |
+
"loss": 1.618,
|
| 3313 |
+
"step": 4720
|
| 3314 |
+
},
|
| 3315 |
+
{
|
| 3316 |
+
"epoch": 0.946,
|
| 3317 |
+
"grad_norm": 5.8722639083862305,
|
| 3318 |
+
"learning_rate": 4.4281873178278475e-07,
|
| 3319 |
+
"loss": 2.4191,
|
| 3320 |
+
"step": 4730
|
| 3321 |
+
},
|
| 3322 |
+
{
|
| 3323 |
+
"epoch": 0.948,
|
| 3324 |
+
"grad_norm": 5.5632500648498535,
|
| 3325 |
+
"learning_rate": 4.107132296653549e-07,
|
| 3326 |
+
"loss": 1.4241,
|
| 3327 |
+
"step": 4740
|
| 3328 |
+
},
|
| 3329 |
+
{
|
| 3330 |
+
"epoch": 0.95,
|
| 3331 |
+
"grad_norm": 66.90338897705078,
|
| 3332 |
+
"learning_rate": 3.7980617469479953e-07,
|
| 3333 |
+
"loss": 1.8877,
|
| 3334 |
+
"step": 4750
|
| 3335 |
+
},
|
| 3336 |
+
{
|
| 3337 |
+
"epoch": 0.952,
|
| 3338 |
+
"grad_norm": 27.994674682617188,
|
| 3339 |
+
"learning_rate": 3.5009907323737825e-07,
|
| 3340 |
+
"loss": 1.4828,
|
| 3341 |
+
"step": 4760
|
| 3342 |
+
},
|
| 3343 |
+
{
|
| 3344 |
+
"epoch": 0.954,
|
| 3345 |
+
"grad_norm": 16.3344669342041,
|
| 3346 |
+
"learning_rate": 3.215933731753024e-07,
|
| 3347 |
+
"loss": 2.1333,
|
| 3348 |
+
"step": 4770
|
| 3349 |
+
},
|
| 3350 |
+
{
|
| 3351 |
+
"epoch": 0.956,
|
| 3352 |
+
"grad_norm": 2.000822067260742,
|
| 3353 |
+
"learning_rate": 2.942904638361804e-07,
|
| 3354 |
+
"loss": 2.0255,
|
| 3355 |
+
"step": 4780
|
| 3356 |
+
},
|
| 3357 |
+
{
|
| 3358 |
+
"epoch": 0.958,
|
| 3359 |
+
"grad_norm": 84.14417266845703,
|
| 3360 |
+
"learning_rate": 2.681916759252917e-07,
|
| 3361 |
+
"loss": 3.1469,
|
| 3362 |
+
"step": 4790
|
| 3363 |
+
},
|
| 3364 |
+
{
|
| 3365 |
+
"epoch": 0.96,
|
| 3366 |
+
"grad_norm": 2.5054540634155273,
|
| 3367 |
+
"learning_rate": 2.4329828146074095e-07,
|
| 3368 |
+
"loss": 2.5756,
|
| 3369 |
+
"step": 4800
|
| 3370 |
+
},
|
| 3371 |
+
{
|
| 3372 |
+
"epoch": 0.962,
|
| 3373 |
+
"grad_norm": 18.779565811157227,
|
| 3374 |
+
"learning_rate": 2.1961149371145795e-07,
|
| 3375 |
+
"loss": 2.2082,
|
| 3376 |
+
"step": 4810
|
| 3377 |
+
},
|
| 3378 |
+
{
|
| 3379 |
+
"epoch": 0.964,
|
| 3380 |
+
"grad_norm": 10.171392440795898,
|
| 3381 |
+
"learning_rate": 1.9713246713805588e-07,
|
| 3382 |
+
"loss": 1.2087,
|
| 3383 |
+
"step": 4820
|
| 3384 |
+
},
|
| 3385 |
+
{
|
| 3386 |
+
"epoch": 0.966,
|
| 3387 |
+
"grad_norm": 12.274552345275879,
|
| 3388 |
+
"learning_rate": 1.7586229733657644e-07,
|
| 3389 |
+
"loss": 3.4375,
|
| 3390 |
+
"step": 4830
|
| 3391 |
+
},
|
| 3392 |
+
{
|
| 3393 |
+
"epoch": 0.968,
|
| 3394 |
+
"grad_norm": 13.97036075592041,
|
| 3395 |
+
"learning_rate": 1.5580202098509077e-07,
|
| 3396 |
+
"loss": 1.2341,
|
| 3397 |
+
"step": 4840
|
| 3398 |
+
},
|
| 3399 |
+
{
|
| 3400 |
+
"epoch": 0.97,
|
| 3401 |
+
"grad_norm": 4.010193824768066,
|
| 3402 |
+
"learning_rate": 1.3695261579316777e-07,
|
| 3403 |
+
"loss": 1.7228,
|
| 3404 |
+
"step": 4850
|
| 3405 |
+
},
|
| 3406 |
+
{
|
| 3407 |
+
"epoch": 0.972,
|
| 3408 |
+
"grad_norm": 19.15084457397461,
|
| 3409 |
+
"learning_rate": 1.193150004542204e-07,
|
| 3410 |
+
"loss": 1.4849,
|
| 3411 |
+
"step": 4860
|
| 3412 |
+
},
|
| 3413 |
+
{
|
| 3414 |
+
"epoch": 0.974,
|
| 3415 |
+
"grad_norm": 4.279737949371338,
|
| 3416 |
+
"learning_rate": 1.0289003460074165e-07,
|
| 3417 |
+
"loss": 1.5513,
|
| 3418 |
+
"step": 4870
|
| 3419 |
+
},
|
| 3420 |
+
{
|
| 3421 |
+
"epoch": 0.976,
|
| 3422 |
+
"grad_norm": 3.084926128387451,
|
| 3423 |
+
"learning_rate": 8.767851876239074e-08,
|
| 3424 |
+
"loss": 1.3479,
|
| 3425 |
+
"step": 4880
|
| 3426 |
+
},
|
| 3427 |
+
{
|
| 3428 |
+
"epoch": 0.978,
|
| 3429 |
+
"grad_norm": 5.715677738189697,
|
| 3430 |
+
"learning_rate": 7.368119432699383e-08,
|
| 3431 |
+
"loss": 2.6461,
|
| 3432 |
+
"step": 4890
|
| 3433 |
+
},
|
| 3434 |
+
{
|
| 3435 |
+
"epoch": 0.98,
|
| 3436 |
+
"grad_norm": 72.04022216796875,
|
| 3437 |
+
"learning_rate": 6.089874350439506e-08,
|
| 3438 |
+
"loss": 1.5137,
|
| 3439 |
+
"step": 4900
|
| 3440 |
+
},
|
| 3441 |
+
{
|
| 3442 |
+
"epoch": 0.982,
|
| 3443 |
+
"grad_norm": 1.6261266469955444,
|
| 3444 |
+
"learning_rate": 4.9331789293211026e-08,
|
| 3445 |
+
"loss": 2.9775,
|
| 3446 |
+
"step": 4910
|
| 3447 |
+
},
|
| 3448 |
+
{
|
| 3449 |
+
"epoch": 0.984,
|
| 3450 |
+
"grad_norm": 3.387927532196045,
|
| 3451 |
+
"learning_rate": 3.8980895450474455e-08,
|
| 3452 |
+
"loss": 1.0868,
|
| 3453 |
+
"step": 4920
|
| 3454 |
+
},
|
| 3455 |
+
{
|
| 3456 |
+
"epoch": 0.986,
|
| 3457 |
+
"grad_norm": 73.5595932006836,
|
| 3458 |
+
"learning_rate": 2.9846566464150626e-08,
|
| 3459 |
+
"loss": 2.3111,
|
| 3460 |
+
"step": 4930
|
| 3461 |
+
},
|
| 3462 |
+
{
|
| 3463 |
+
"epoch": 0.988,
|
| 3464 |
+
"grad_norm": 8.196198463439941,
|
| 3465 |
+
"learning_rate": 2.192924752854042e-08,
|
| 3466 |
+
"loss": 2.8528,
|
| 3467 |
+
"step": 4940
|
| 3468 |
+
},
|
| 3469 |
+
{
|
| 3470 |
+
"epoch": 0.99,
|
| 3471 |
+
"grad_norm": 10.610214233398438,
|
| 3472 |
+
"learning_rate": 1.522932452260595e-08,
|
| 3473 |
+
"loss": 7.4089,
|
| 3474 |
+
"step": 4950
|
| 3475 |
+
},
|
| 3476 |
+
{
|
| 3477 |
+
"epoch": 0.992,
|
| 3478 |
+
"grad_norm": 2.894430160522461,
|
| 3479 |
+
"learning_rate": 9.747123991141194e-09,
|
| 3480 |
+
"loss": 5.3353,
|
| 3481 |
+
"step": 4960
|
| 3482 |
+
},
|
| 3483 |
+
{
|
| 3484 |
+
"epoch": 0.994,
|
| 3485 |
+
"grad_norm": 8.657221794128418,
|
| 3486 |
+
"learning_rate": 5.48291312886251e-09,
|
| 3487 |
+
"loss": 1.8168,
|
| 3488 |
+
"step": 4970
|
| 3489 |
+
},
|
| 3490 |
+
{
|
| 3491 |
+
"epoch": 0.996,
|
| 3492 |
+
"grad_norm": 2.366279363632202,
|
| 3493 |
+
"learning_rate": 2.4368997673940297e-09,
|
| 3494 |
+
"loss": 2.34,
|
| 3495 |
+
"step": 4980
|
| 3496 |
+
},
|
| 3497 |
+
{
|
| 3498 |
+
"epoch": 0.998,
|
| 3499 |
+
"grad_norm": 5.894517421722412,
|
| 3500 |
+
"learning_rate": 6.092323651313292e-10,
|
| 3501 |
+
"loss": 1.8474,
|
| 3502 |
+
"step": 4990
|
| 3503 |
+
},
|
| 3504 |
+
{
|
| 3505 |
+
"epoch": 1.0,
|
| 3506 |
+
"grad_norm": 27.153657913208008,
|
| 3507 |
+
"learning_rate": 0.0,
|
| 3508 |
+
"loss": 5.5777,
|
| 3509 |
+
"step": 5000
|
| 3510 |
+
},
|
| 3511 |
+
{
|
| 3512 |
+
"epoch": 1.0,
|
| 3513 |
+
"step": 5000,
|
| 3514 |
+
"total_flos": 9478520693637120.0,
|
| 3515 |
+
"train_loss": 2.8943692499160765,
|
| 3516 |
+
"train_runtime": 1526.3896,
|
| 3517 |
+
"train_samples_per_second": 3.276,
|
| 3518 |
+
"train_steps_per_second": 3.276
|
| 3519 |
+
}
|
| 3520 |
+
],
|
| 3521 |
+
"logging_steps": 10,
|
| 3522 |
+
"max_steps": 5000,
|
| 3523 |
+
"num_input_tokens_seen": 0,
|
| 3524 |
+
"num_train_epochs": 1,
|
| 3525 |
+
"save_steps": 4000,
|
| 3526 |
+
"stateful_callbacks": {
|
| 3527 |
+
"TrainerControl": {
|
| 3528 |
+
"args": {
|
| 3529 |
+
"should_epoch_stop": false,
|
| 3530 |
+
"should_evaluate": false,
|
| 3531 |
+
"should_log": false,
|
| 3532 |
+
"should_save": true,
|
| 3533 |
+
"should_training_stop": true
|
| 3534 |
+
},
|
| 3535 |
+
"attributes": {}
|
| 3536 |
+
}
|
| 3537 |
+
},
|
| 3538 |
+
"total_flos": 9478520693637120.0,
|
| 3539 |
+
"train_batch_size": 1,
|
| 3540 |
+
"trial_name": null,
|
| 3541 |
+
"trial_params": null
|
| 3542 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23a0f0ef63025bf3610f24845041b3ec2e067c7bef4a95b8c5cb40cf4406b62d
|
| 3 |
+
size 5432
|
Llama-2-13b-chat-hf/DomainBench/Agriculture/training_loss.png
ADDED
|
Llama-2-13b-chat-hf/DomainBench/Finance/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: other
|
| 4 |
+
base_model: /hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf
|
| 5 |
+
tags:
|
| 6 |
+
- llama-factory
|
| 7 |
+
- lora
|
| 8 |
+
- generated_from_trainer
|
| 9 |
+
model-index:
|
| 10 |
+
- name: threshold_3-lamb_0.1-lr_5e-5
|
| 11 |
+
results: []
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 15 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 16 |
+
|
| 17 |
+
# threshold_3-lamb_0.1-lr_5e-5
|
| 18 |
+
|
| 19 |
+
This model is a fine-tuned version of [/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf](https://huggingface.co//hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf) on the wealth dataset.
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 5e-05
|
| 39 |
+
- train_batch_size: 1
|
| 40 |
+
- eval_batch_size: 8
|
| 41 |
+
- seed: 42
|
| 42 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 43 |
+
- lr_scheduler_type: cosine
|
| 44 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 45 |
+
- num_epochs: 1.0
|
| 46 |
+
|
| 47 |
+
### Training results
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
### Framework versions
|
| 52 |
+
|
| 53 |
+
- PEFT 0.12.0
|
| 54 |
+
- Transformers 4.46.1
|
| 55 |
+
- Pytorch 2.5.1+cu124
|
| 56 |
+
- Datasets 3.1.0
|
| 57 |
+
- Tokenizers 0.20.3
|
Llama-2-13b-chat-hf/DomainBench/Finance/adapter_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": true,
|
| 8 |
+
"init_lora_weights": true,
|
| 9 |
+
"layer_replication": null,
|
| 10 |
+
"layers_pattern": null,
|
| 11 |
+
"layers_to_transform": null,
|
| 12 |
+
"loftq_config": {},
|
| 13 |
+
"lora_alpha": 16,
|
| 14 |
+
"lora_dropout": 0.0,
|
| 15 |
+
"megatron_config": null,
|
| 16 |
+
"megatron_core": "megatron.core",
|
| 17 |
+
"modules_to_save": null,
|
| 18 |
+
"peft_type": "LORA",
|
| 19 |
+
"r": 8,
|
| 20 |
+
"rank_pattern": {},
|
| 21 |
+
"revision": null,
|
| 22 |
+
"target_modules": [
|
| 23 |
+
"q_proj",
|
| 24 |
+
"v_proj"
|
| 25 |
+
],
|
| 26 |
+
"task_type": "CAUSAL_LM",
|
| 27 |
+
"use_dora": false,
|
| 28 |
+
"use_rslora": false
|
| 29 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa42a9d633b1cd3d1ecf795c7e679526bed7c191f6e0a95154fca04b2d6fe75d
|
| 3 |
+
size 26235704
|
Llama-2-13b-chat-hf/DomainBench/Finance/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.151346780094464e+16,
|
| 4 |
+
"train_loss": 1.6451873833656312,
|
| 5 |
+
"train_runtime": 1464.1065,
|
| 6 |
+
"train_samples_per_second": 3.415,
|
| 7 |
+
"train_steps_per_second": 3.415
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/logfile.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Finance/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
Llama-2-13b-chat-hf/DomainBench/Finance/tokenizer_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
|
| 33 |
+
"clean_up_tokenization_spaces": false,
|
| 34 |
+
"eos_token": "</s>",
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"split_special_tokens": false,
|
| 41 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 42 |
+
"unk_token": "<unk>",
|
| 43 |
+
"use_default_system_prompt": false
|
| 44 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.151346780094464e+16,
|
| 4 |
+
"train_loss": 1.6451873833656312,
|
| 5 |
+
"train_runtime": 1464.1065,
|
| 6 |
+
"train_samples_per_second": 3.415,
|
| 7 |
+
"train_steps_per_second": 3.415
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/trainer_log.jsonl
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 5000, "loss": 3.5335, "lr": 1.0000000000000002e-06, "epoch": 0.002, "percentage": 0.2, "elapsed_time": "0:00:04", "remaining_time": "0:33:19"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 5000, "loss": 5.5042, "lr": 2.0000000000000003e-06, "epoch": 0.004, "percentage": 0.4, "elapsed_time": "0:00:06", "remaining_time": "0:28:52"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 5000, "loss": 2.9337, "lr": 3e-06, "epoch": 0.006, "percentage": 0.6, "elapsed_time": "0:00:09", "remaining_time": "0:27:30"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 5000, "loss": 8.5725, "lr": 4.000000000000001e-06, "epoch": 0.008, "percentage": 0.8, "elapsed_time": "0:00:12", "remaining_time": "0:26:44"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 5000, "loss": 2.2742, "lr": 5e-06, "epoch": 0.01, "percentage": 1.0, "elapsed_time": "0:00:15", "remaining_time": "0:25:30"}
|
| 6 |
+
{"current_steps": 60, "total_steps": 5000, "loss": 4.5745, "lr": 6e-06, "epoch": 0.012, "percentage": 1.2, "elapsed_time": "0:00:17", "remaining_time": "0:24:16"}
|
| 7 |
+
{"current_steps": 70, "total_steps": 5000, "loss": 3.9419, "lr": 7.000000000000001e-06, "epoch": 0.014, "percentage": 1.4, "elapsed_time": "0:00:20", "remaining_time": "0:24:13"}
|
| 8 |
+
{"current_steps": 80, "total_steps": 5000, "loss": 2.2781, "lr": 8.000000000000001e-06, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:23", "remaining_time": "0:23:59"}
|
| 9 |
+
{"current_steps": 90, "total_steps": 5000, "loss": 6.3897, "lr": 9e-06, "epoch": 0.018, "percentage": 1.8, "elapsed_time": "0:00:53", "remaining_time": "0:48:21"}
|
| 10 |
+
{"current_steps": 100, "total_steps": 5000, "loss": 3.6837, "lr": 1e-05, "epoch": 0.02, "percentage": 2.0, "elapsed_time": "0:00:55", "remaining_time": "0:45:42"}
|
| 11 |
+
{"current_steps": 110, "total_steps": 5000, "loss": 1.3266, "lr": 1.1000000000000001e-05, "epoch": 0.022, "percentage": 2.2, "elapsed_time": "0:00:58", "remaining_time": "0:43:20"}
|
| 12 |
+
{"current_steps": 120, "total_steps": 5000, "loss": 6.1833, "lr": 1.2e-05, "epoch": 0.024, "percentage": 2.4, "elapsed_time": "0:01:01", "remaining_time": "0:41:40"}
|
| 13 |
+
{"current_steps": 130, "total_steps": 5000, "loss": 2.6712, "lr": 1.3000000000000001e-05, "epoch": 0.026, "percentage": 2.6, "elapsed_time": "0:01:04", "remaining_time": "0:40:16"}
|
| 14 |
+
{"current_steps": 140, "total_steps": 5000, "loss": 2.5445, "lr": 1.4000000000000001e-05, "epoch": 0.028, "percentage": 2.8, "elapsed_time": "0:01:07", "remaining_time": "0:39:03"}
|
| 15 |
+
{"current_steps": 150, "total_steps": 5000, "loss": 2.7276, "lr": 1.5e-05, "epoch": 0.03, "percentage": 3.0, "elapsed_time": "0:01:10", "remaining_time": "0:37:59"}
|
| 16 |
+
{"current_steps": 160, "total_steps": 5000, "loss": 1.7329, "lr": 1.6000000000000003e-05, "epoch": 0.032, "percentage": 3.2, "elapsed_time": "0:01:12", "remaining_time": "0:36:48"}
|
| 17 |
+
{"current_steps": 170, "total_steps": 5000, "loss": 1.916, "lr": 1.7000000000000003e-05, "epoch": 0.034, "percentage": 3.4, "elapsed_time": "0:01:15", "remaining_time": "0:35:52"}
|
| 18 |
+
{"current_steps": 180, "total_steps": 5000, "loss": 6.3988, "lr": 1.8e-05, "epoch": 0.036, "percentage": 3.6, "elapsed_time": "0:01:18", "remaining_time": "0:35:01"}
|
| 19 |
+
{"current_steps": 190, "total_steps": 5000, "loss": 2.1053, "lr": 1.9e-05, "epoch": 0.038, "percentage": 3.8, "elapsed_time": "0:01:21", "remaining_time": "0:34:23"}
|
| 20 |
+
{"current_steps": 200, "total_steps": 5000, "loss": 0.798, "lr": 2e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:01:24", "remaining_time": "0:33:37"}
|
| 21 |
+
{"current_steps": 210, "total_steps": 5000, "loss": 2.0661, "lr": 2.1e-05, "epoch": 0.042, "percentage": 4.2, "elapsed_time": "0:01:26", "remaining_time": "0:32:59"}
|
| 22 |
+
{"current_steps": 220, "total_steps": 5000, "loss": 2.4783, "lr": 2.2000000000000003e-05, "epoch": 0.044, "percentage": 4.4, "elapsed_time": "0:01:29", "remaining_time": "0:32:26"}
|
| 23 |
+
{"current_steps": 230, "total_steps": 5000, "loss": 0.7402, "lr": 2.3000000000000003e-05, "epoch": 0.046, "percentage": 4.6, "elapsed_time": "0:01:32", "remaining_time": "0:31:58"}
|
| 24 |
+
{"current_steps": 240, "total_steps": 5000, "loss": 2.5115, "lr": 2.4e-05, "epoch": 0.048, "percentage": 4.8, "elapsed_time": "0:01:35", "remaining_time": "0:31:36"}
|
| 25 |
+
{"current_steps": 250, "total_steps": 5000, "loss": 2.13, "lr": 2.5e-05, "epoch": 0.05, "percentage": 5.0, "elapsed_time": "0:01:38", "remaining_time": "0:31:11"}
|
| 26 |
+
{"current_steps": 260, "total_steps": 5000, "loss": 1.6962, "lr": 2.6000000000000002e-05, "epoch": 0.052, "percentage": 5.2, "elapsed_time": "0:01:41", "remaining_time": "0:30:50"}
|
| 27 |
+
{"current_steps": 270, "total_steps": 5000, "loss": 2.8705, "lr": 2.7000000000000002e-05, "epoch": 0.054, "percentage": 5.4, "elapsed_time": "0:01:44", "remaining_time": "0:30:31"}
|
| 28 |
+
{"current_steps": 280, "total_steps": 5000, "loss": 2.9734, "lr": 2.8000000000000003e-05, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:01:46", "remaining_time": "0:30:02"}
|
| 29 |
+
{"current_steps": 290, "total_steps": 5000, "loss": 1.7669, "lr": 2.9e-05, "epoch": 0.058, "percentage": 5.8, "elapsed_time": "0:01:49", "remaining_time": "0:29:46"}
|
| 30 |
+
{"current_steps": 300, "total_steps": 5000, "loss": 2.1499, "lr": 3e-05, "epoch": 0.06, "percentage": 6.0, "elapsed_time": "0:01:53", "remaining_time": "0:29:30"}
|
| 31 |
+
{"current_steps": 310, "total_steps": 5000, "loss": 1.6354, "lr": 3.1e-05, "epoch": 0.062, "percentage": 6.2, "elapsed_time": "0:01:55", "remaining_time": "0:29:11"}
|
| 32 |
+
{"current_steps": 320, "total_steps": 5000, "loss": 2.1457, "lr": 3.2000000000000005e-05, "epoch": 0.064, "percentage": 6.4, "elapsed_time": "0:01:58", "remaining_time": "0:28:53"}
|
| 33 |
+
{"current_steps": 330, "total_steps": 5000, "loss": 3.0738, "lr": 3.3e-05, "epoch": 0.066, "percentage": 6.6, "elapsed_time": "0:02:01", "remaining_time": "0:28:39"}
|
| 34 |
+
{"current_steps": 340, "total_steps": 5000, "loss": 2.4357, "lr": 3.4000000000000007e-05, "epoch": 0.068, "percentage": 6.8, "elapsed_time": "0:02:04", "remaining_time": "0:28:26"}
|
| 35 |
+
{"current_steps": 350, "total_steps": 5000, "loss": 1.7431, "lr": 3.5e-05, "epoch": 0.07, "percentage": 7.0, "elapsed_time": "0:02:07", "remaining_time": "0:28:10"}
|
| 36 |
+
{"current_steps": 360, "total_steps": 5000, "loss": 1.7527, "lr": 3.6e-05, "epoch": 0.072, "percentage": 7.2, "elapsed_time": "0:02:10", "remaining_time": "0:27:59"}
|
| 37 |
+
{"current_steps": 370, "total_steps": 5000, "loss": 6.1666, "lr": 3.7e-05, "epoch": 0.074, "percentage": 7.4, "elapsed_time": "0:02:13", "remaining_time": "0:27:48"}
|
| 38 |
+
{"current_steps": 380, "total_steps": 5000, "loss": 0.5917, "lr": 3.8e-05, "epoch": 0.076, "percentage": 7.6, "elapsed_time": "0:02:16", "remaining_time": "0:27:35"}
|
| 39 |
+
{"current_steps": 390, "total_steps": 5000, "loss": 1.5061, "lr": 3.9000000000000006e-05, "epoch": 0.078, "percentage": 7.8, "elapsed_time": "0:02:18", "remaining_time": "0:27:18"}
|
| 40 |
+
{"current_steps": 400, "total_steps": 5000, "loss": 1.5694, "lr": 4e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:02:21", "remaining_time": "0:27:08"}
|
| 41 |
+
{"current_steps": 410, "total_steps": 5000, "loss": 1.4762, "lr": 4.1e-05, "epoch": 0.082, "percentage": 8.2, "elapsed_time": "0:02:24", "remaining_time": "0:26:59"}
|
| 42 |
+
{"current_steps": 420, "total_steps": 5000, "loss": 1.0468, "lr": 4.2e-05, "epoch": 0.084, "percentage": 8.4, "elapsed_time": "0:02:27", "remaining_time": "0:26:47"}
|
| 43 |
+
{"current_steps": 430, "total_steps": 5000, "loss": 7.1942, "lr": 4.3e-05, "epoch": 0.086, "percentage": 8.6, "elapsed_time": "0:02:30", "remaining_time": "0:26:35"}
|
| 44 |
+
{"current_steps": 440, "total_steps": 5000, "loss": 2.706, "lr": 4.4000000000000006e-05, "epoch": 0.088, "percentage": 8.8, "elapsed_time": "0:02:33", "remaining_time": "0:26:26"}
|
| 45 |
+
{"current_steps": 450, "total_steps": 5000, "loss": 1.8559, "lr": 4.5e-05, "epoch": 0.09, "percentage": 9.0, "elapsed_time": "0:02:35", "remaining_time": "0:26:15"}
|
| 46 |
+
{"current_steps": 460, "total_steps": 5000, "loss": 1.2105, "lr": 4.600000000000001e-05, "epoch": 0.092, "percentage": 9.2, "elapsed_time": "0:02:38", "remaining_time": "0:26:07"}
|
| 47 |
+
{"current_steps": 470, "total_steps": 5000, "loss": 2.8524, "lr": 4.7e-05, "epoch": 0.094, "percentage": 9.4, "elapsed_time": "0:02:41", "remaining_time": "0:25:57"}
|
| 48 |
+
{"current_steps": 480, "total_steps": 5000, "loss": 2.0881, "lr": 4.8e-05, "epoch": 0.096, "percentage": 9.6, "elapsed_time": "0:02:44", "remaining_time": "0:25:47"}
|
| 49 |
+
{"current_steps": 490, "total_steps": 5000, "loss": 2.4156, "lr": 4.9e-05, "epoch": 0.098, "percentage": 9.8, "elapsed_time": "0:02:47", "remaining_time": "0:25:40"}
|
| 50 |
+
{"current_steps": 500, "total_steps": 5000, "loss": 0.9531, "lr": 5e-05, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:02:50", "remaining_time": "0:25:31"}
|
| 51 |
+
{"current_steps": 510, "total_steps": 5000, "loss": 3.9691, "lr": 4.999939076763487e-05, "epoch": 0.102, "percentage": 10.2, "elapsed_time": "0:02:52", "remaining_time": "0:25:22"}
|
| 52 |
+
{"current_steps": 520, "total_steps": 5000, "loss": 0.8777, "lr": 4.999756310023261e-05, "epoch": 0.104, "percentage": 10.4, "elapsed_time": "0:02:55", "remaining_time": "0:25:11"}
|
| 53 |
+
{"current_steps": 530, "total_steps": 5000, "loss": 0.9434, "lr": 4.999451708687114e-05, "epoch": 0.106, "percentage": 10.6, "elapsed_time": "0:02:58", "remaining_time": "0:25:03"}
|
| 54 |
+
{"current_steps": 540, "total_steps": 5000, "loss": 1.5592, "lr": 4.999025287600886e-05, "epoch": 0.108, "percentage": 10.8, "elapsed_time": "0:03:01", "remaining_time": "0:24:55"}
|
| 55 |
+
{"current_steps": 550, "total_steps": 5000, "loss": 0.6868, "lr": 4.99847706754774e-05, "epoch": 0.11, "percentage": 11.0, "elapsed_time": "0:03:04", "remaining_time": "0:24:50"}
|
| 56 |
+
{"current_steps": 560, "total_steps": 5000, "loss": 0.956, "lr": 4.997807075247146e-05, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:03:07", "remaining_time": "0:24:44"}
|
| 57 |
+
{"current_steps": 570, "total_steps": 5000, "loss": 0.8766, "lr": 4.997015343353585e-05, "epoch": 0.114, "percentage": 11.4, "elapsed_time": "0:03:10", "remaining_time": "0:24:36"}
|
| 58 |
+
{"current_steps": 580, "total_steps": 5000, "loss": 0.5423, "lr": 4.996101910454953e-05, "epoch": 0.116, "percentage": 11.6, "elapsed_time": "0:03:13", "remaining_time": "0:24:30"}
|
| 59 |
+
{"current_steps": 590, "total_steps": 5000, "loss": 1.7762, "lr": 4.995066821070679e-05, "epoch": 0.118, "percentage": 11.8, "elapsed_time": "0:03:15", "remaining_time": "0:24:22"}
|
| 60 |
+
{"current_steps": 600, "total_steps": 5000, "loss": 2.294, "lr": 4.993910125649561e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:03:18", "remaining_time": "0:24:16"}
|
| 61 |
+
{"current_steps": 610, "total_steps": 5000, "loss": 0.4881, "lr": 4.992631880567301e-05, "epoch": 0.122, "percentage": 12.2, "elapsed_time": "0:03:21", "remaining_time": "0:24:11"}
|
| 62 |
+
{"current_steps": 620, "total_steps": 5000, "loss": 3.3744, "lr": 4.991232148123761e-05, "epoch": 0.124, "percentage": 12.4, "elapsed_time": "0:03:24", "remaining_time": "0:24:05"}
|
| 63 |
+
{"current_steps": 630, "total_steps": 5000, "loss": 1.2847, "lr": 4.989710996539926e-05, "epoch": 0.126, "percentage": 12.6, "elapsed_time": "0:03:27", "remaining_time": "0:24:00"}
|
| 64 |
+
{"current_steps": 640, "total_steps": 5000, "loss": 0.9029, "lr": 4.988068499954578e-05, "epoch": 0.128, "percentage": 12.8, "elapsed_time": "0:03:30", "remaining_time": "0:23:53"}
|
| 65 |
+
{"current_steps": 650, "total_steps": 5000, "loss": 1.3799, "lr": 4.9863047384206835e-05, "epoch": 0.13, "percentage": 13.0, "elapsed_time": "0:03:33", "remaining_time": "0:23:46"}
|
| 66 |
+
{"current_steps": 660, "total_steps": 5000, "loss": 3.7841, "lr": 4.984419797901491e-05, "epoch": 0.132, "percentage": 13.2, "elapsed_time": "0:03:36", "remaining_time": "0:23:41"}
|
| 67 |
+
{"current_steps": 670, "total_steps": 5000, "loss": 0.9186, "lr": 4.982413770266342e-05, "epoch": 0.134, "percentage": 13.4, "elapsed_time": "0:03:39", "remaining_time": "0:23:36"}
|
| 68 |
+
{"current_steps": 680, "total_steps": 5000, "loss": 1.4738, "lr": 4.980286753286195e-05, "epoch": 0.136, "percentage": 13.6, "elapsed_time": "0:03:41", "remaining_time": "0:23:28"}
|
| 69 |
+
{"current_steps": 690, "total_steps": 5000, "loss": 3.8651, "lr": 4.978038850628854e-05, "epoch": 0.138, "percentage": 13.8, "elapsed_time": "0:03:44", "remaining_time": "0:23:22"}
|
| 70 |
+
{"current_steps": 700, "total_steps": 5000, "loss": 1.1881, "lr": 4.975670171853926e-05, "epoch": 0.14, "percentage": 14.0, "elapsed_time": "0:03:47", "remaining_time": "0:23:16"}
|
| 71 |
+
{"current_steps": 710, "total_steps": 5000, "loss": 0.8245, "lr": 4.9731808324074717e-05, "epoch": 0.142, "percentage": 14.2, "elapsed_time": "0:03:50", "remaining_time": "0:23:10"}
|
| 72 |
+
{"current_steps": 720, "total_steps": 5000, "loss": 1.2642, "lr": 4.9705709536163824e-05, "epoch": 0.144, "percentage": 14.4, "elapsed_time": "0:03:53", "remaining_time": "0:23:05"}
|
| 73 |
+
{"current_steps": 730, "total_steps": 5000, "loss": 1.6256, "lr": 4.96784066268247e-05, "epoch": 0.146, "percentage": 14.6, "elapsed_time": "0:03:56", "remaining_time": "0:23:00"}
|
| 74 |
+
{"current_steps": 740, "total_steps": 5000, "loss": 9.4718, "lr": 4.964990092676263e-05, "epoch": 0.148, "percentage": 14.8, "elapsed_time": "0:03:59", "remaining_time": "0:22:56"}
|
| 75 |
+
{"current_steps": 750, "total_steps": 5000, "loss": 1.6593, "lr": 4.962019382530521e-05, "epoch": 0.15, "percentage": 15.0, "elapsed_time": "0:04:02", "remaining_time": "0:22:51"}
|
| 76 |
+
{"current_steps": 760, "total_steps": 5000, "loss": 1.4136, "lr": 4.9589286770334654e-05, "epoch": 0.152, "percentage": 15.2, "elapsed_time": "0:04:05", "remaining_time": "0:22:46"}
|
| 77 |
+
{"current_steps": 770, "total_steps": 5000, "loss": 2.5935, "lr": 4.9557181268217227e-05, "epoch": 0.154, "percentage": 15.4, "elapsed_time": "0:04:07", "remaining_time": "0:22:41"}
|
| 78 |
+
{"current_steps": 780, "total_steps": 5000, "loss": 2.0708, "lr": 4.952387888372979e-05, "epoch": 0.156, "percentage": 15.6, "elapsed_time": "0:04:10", "remaining_time": "0:22:34"}
|
| 79 |
+
{"current_steps": 790, "total_steps": 5000, "loss": 0.8737, "lr": 4.94893812399836e-05, "epoch": 0.158, "percentage": 15.8, "elapsed_time": "0:04:13", "remaining_time": "0:22:28"}
|
| 80 |
+
{"current_steps": 800, "total_steps": 5000, "loss": 2.0691, "lr": 4.9453690018345144e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:04:15", "remaining_time": "0:22:23"}
|
| 81 |
+
{"current_steps": 810, "total_steps": 5000, "loss": 0.7324, "lr": 4.94168069583542e-05, "epoch": 0.162, "percentage": 16.2, "elapsed_time": "0:04:18", "remaining_time": "0:22:19"}
|
| 82 |
+
{"current_steps": 820, "total_steps": 5000, "loss": 0.577, "lr": 4.937873385763908e-05, "epoch": 0.164, "percentage": 16.4, "elapsed_time": "0:04:21", "remaining_time": "0:22:11"}
|
| 83 |
+
{"current_steps": 830, "total_steps": 5000, "loss": 0.6428, "lr": 4.933947257182901e-05, "epoch": 0.166, "percentage": 16.6, "elapsed_time": "0:04:23", "remaining_time": "0:22:04"}
|
| 84 |
+
{"current_steps": 840, "total_steps": 5000, "loss": 1.4208, "lr": 4.929902501446366e-05, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:04:26", "remaining_time": "0:22:00"}
|
| 85 |
+
{"current_steps": 850, "total_steps": 5000, "loss": 1.1485, "lr": 4.925739315689991e-05, "epoch": 0.17, "percentage": 17.0, "elapsed_time": "0:04:29", "remaining_time": "0:21:55"}
|
| 86 |
+
{"current_steps": 860, "total_steps": 5000, "loss": 0.8729, "lr": 4.9214579028215776e-05, "epoch": 0.172, "percentage": 17.2, "elapsed_time": "0:04:32", "remaining_time": "0:21:51"}
|
| 87 |
+
{"current_steps": 870, "total_steps": 5000, "loss": 2.3371, "lr": 4.917058471511149e-05, "epoch": 0.174, "percentage": 17.4, "elapsed_time": "0:04:35", "remaining_time": "0:21:46"}
|
| 88 |
+
{"current_steps": 880, "total_steps": 5000, "loss": 0.6887, "lr": 4.912541236180779e-05, "epoch": 0.176, "percentage": 17.6, "elapsed_time": "0:04:38", "remaining_time": "0:21:42"}
|
| 89 |
+
{"current_steps": 890, "total_steps": 5000, "loss": 0.8769, "lr": 4.907906416994146e-05, "epoch": 0.178, "percentage": 17.8, "elapsed_time": "0:04:40", "remaining_time": "0:21:37"}
|
| 90 |
+
{"current_steps": 900, "total_steps": 5000, "loss": 2.4827, "lr": 4.9031542398457974e-05, "epoch": 0.18, "percentage": 18.0, "elapsed_time": "0:04:43", "remaining_time": "0:21:33"}
|
| 91 |
+
{"current_steps": 910, "total_steps": 5000, "loss": 0.5722, "lr": 4.898284936350144e-05, "epoch": 0.182, "percentage": 18.2, "elapsed_time": "0:04:46", "remaining_time": "0:21:28"}
|
| 92 |
+
{"current_steps": 920, "total_steps": 5000, "loss": 1.3822, "lr": 4.893298743830168e-05, "epoch": 0.184, "percentage": 18.4, "elapsed_time": "0:04:49", "remaining_time": "0:21:24"}
|
| 93 |
+
{"current_steps": 930, "total_steps": 5000, "loss": 0.7233, "lr": 4.888195905305859e-05, "epoch": 0.186, "percentage": 18.6, "elapsed_time": "0:04:52", "remaining_time": "0:21:20"}
|
| 94 |
+
{"current_steps": 940, "total_steps": 5000, "loss": 9.3579, "lr": 4.882976669482367e-05, "epoch": 0.188, "percentage": 18.8, "elapsed_time": "0:04:55", "remaining_time": "0:21:15"}
|
| 95 |
+
{"current_steps": 950, "total_steps": 5000, "loss": 2.2926, "lr": 4.877641290737884e-05, "epoch": 0.19, "percentage": 19.0, "elapsed_time": "0:04:58", "remaining_time": "0:21:10"}
|
| 96 |
+
{"current_steps": 960, "total_steps": 5000, "loss": 0.8563, "lr": 4.8721900291112415e-05, "epoch": 0.192, "percentage": 19.2, "elapsed_time": "0:05:00", "remaining_time": "0:21:06"}
|
| 97 |
+
{"current_steps": 970, "total_steps": 5000, "loss": 2.229, "lr": 4.8666231502892415e-05, "epoch": 0.194, "percentage": 19.4, "elapsed_time": "0:05:03", "remaining_time": "0:21:02"}
|
| 98 |
+
{"current_steps": 980, "total_steps": 5000, "loss": 1.7804, "lr": 4.860940925593703e-05, "epoch": 0.196, "percentage": 19.6, "elapsed_time": "0:05:06", "remaining_time": "0:20:58"}
|
| 99 |
+
{"current_steps": 990, "total_steps": 5000, "loss": 0.826, "lr": 4.855143631968242e-05, "epoch": 0.198, "percentage": 19.8, "elapsed_time": "0:05:09", "remaining_time": "0:20:55"}
|
| 100 |
+
{"current_steps": 1000, "total_steps": 5000, "loss": 1.746, "lr": 4.849231551964771e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:05:12", "remaining_time": "0:20:51"}
|
| 101 |
+
{"current_steps": 1010, "total_steps": 5000, "loss": 2.6019, "lr": 4.843204973729729e-05, "epoch": 0.202, "percentage": 20.2, "elapsed_time": "0:05:15", "remaining_time": "0:20:47"}
|
| 102 |
+
{"current_steps": 1020, "total_steps": 5000, "loss": 1.5425, "lr": 4.837064190990036e-05, "epoch": 0.204, "percentage": 20.4, "elapsed_time": "0:05:18", "remaining_time": "0:20:43"}
|
| 103 |
+
{"current_steps": 1030, "total_steps": 5000, "loss": 1.2792, "lr": 4.830809503038781e-05, "epoch": 0.206, "percentage": 20.6, "elapsed_time": "0:05:21", "remaining_time": "0:20:39"}
|
| 104 |
+
{"current_steps": 1040, "total_steps": 5000, "loss": 0.9529, "lr": 4.8244412147206284e-05, "epoch": 0.208, "percentage": 20.8, "elapsed_time": "0:05:24", "remaining_time": "0:20:35"}
|
| 105 |
+
{"current_steps": 1050, "total_steps": 5000, "loss": 1.3505, "lr": 4.817959636416969e-05, "epoch": 0.21, "percentage": 21.0, "elapsed_time": "0:05:26", "remaining_time": "0:20:29"}
|
| 106 |
+
{"current_steps": 1060, "total_steps": 5000, "loss": 0.9163, "lr": 4.8113650840307834e-05, "epoch": 0.212, "percentage": 21.2, "elapsed_time": "0:05:29", "remaining_time": "0:20:26"}
|
| 107 |
+
{"current_steps": 1070, "total_steps": 5000, "loss": 1.228, "lr": 4.8046578789712515e-05, "epoch": 0.214, "percentage": 21.4, "elapsed_time": "0:05:32", "remaining_time": "0:20:22"}
|
| 108 |
+
{"current_steps": 1080, "total_steps": 5000, "loss": 1.1117, "lr": 4.797838348138086e-05, "epoch": 0.216, "percentage": 21.6, "elapsed_time": "0:05:35", "remaining_time": "0:20:19"}
|
| 109 |
+
{"current_steps": 1090, "total_steps": 5000, "loss": 1.7063, "lr": 4.790906823905599e-05, "epoch": 0.218, "percentage": 21.8, "elapsed_time": "0:05:38", "remaining_time": "0:20:14"}
|
| 110 |
+
{"current_steps": 1100, "total_steps": 5000, "loss": 0.9846, "lr": 4.783863644106502e-05, "epoch": 0.22, "percentage": 22.0, "elapsed_time": "0:05:41", "remaining_time": "0:20:10"}
|
| 111 |
+
{"current_steps": 1110, "total_steps": 5000, "loss": 0.9996, "lr": 4.776709152015443e-05, "epoch": 0.222, "percentage": 22.2, "elapsed_time": "0:05:44", "remaining_time": "0:20:07"}
|
| 112 |
+
{"current_steps": 1120, "total_steps": 5000, "loss": 1.5269, "lr": 4.769443696332272e-05, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:05:47", "remaining_time": "0:20:03"}
|
| 113 |
+
{"current_steps": 1130, "total_steps": 5000, "loss": 1.7736, "lr": 4.762067631165049e-05, "epoch": 0.226, "percentage": 22.6, "elapsed_time": "0:05:50", "remaining_time": "0:19:59"}
|
| 114 |
+
{"current_steps": 1140, "total_steps": 5000, "loss": 1.0233, "lr": 4.754581316012785e-05, "epoch": 0.228, "percentage": 22.8, "elapsed_time": "0:05:53", "remaining_time": "0:19:56"}
|
| 115 |
+
{"current_steps": 1150, "total_steps": 5000, "loss": 1.3813, "lr": 4.7469851157479177e-05, "epoch": 0.23, "percentage": 23.0, "elapsed_time": "0:05:56", "remaining_time": "0:19:53"}
|
| 116 |
+
{"current_steps": 1160, "total_steps": 5000, "loss": 0.6272, "lr": 4.7392794005985326e-05, "epoch": 0.232, "percentage": 23.2, "elapsed_time": "0:05:59", "remaining_time": "0:19:48"}
|
| 117 |
+
{"current_steps": 1170, "total_steps": 5000, "loss": 2.541, "lr": 4.731464546130314e-05, "epoch": 0.234, "percentage": 23.4, "elapsed_time": "0:06:02", "remaining_time": "0:19:45"}
|
| 118 |
+
{"current_steps": 1180, "total_steps": 5000, "loss": 1.0791, "lr": 4.723540933228244e-05, "epoch": 0.236, "percentage": 23.6, "elapsed_time": "0:06:05", "remaining_time": "0:19:41"}
|
| 119 |
+
{"current_steps": 1190, "total_steps": 5000, "loss": 1.5918, "lr": 4.715508948078037e-05, "epoch": 0.238, "percentage": 23.8, "elapsed_time": "0:06:08", "remaining_time": "0:19:38"}
|
| 120 |
+
{"current_steps": 1200, "total_steps": 5000, "loss": 1.3247, "lr": 4.707368982147318e-05, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:06:10", "remaining_time": "0:19:33"}
|
| 121 |
+
{"current_steps": 1210, "total_steps": 5000, "loss": 1.0801, "lr": 4.6991214321665414e-05, "epoch": 0.242, "percentage": 24.2, "elapsed_time": "0:06:13", "remaining_time": "0:19:29"}
|
| 122 |
+
{"current_steps": 1220, "total_steps": 5000, "loss": 2.2492, "lr": 4.690766700109659e-05, "epoch": 0.244, "percentage": 24.4, "elapsed_time": "0:06:16", "remaining_time": "0:19:25"}
|
| 123 |
+
{"current_steps": 1230, "total_steps": 5000, "loss": 0.9898, "lr": 4.682305193174524e-05, "epoch": 0.246, "percentage": 24.6, "elapsed_time": "0:06:19", "remaining_time": "0:19:22"}
|
| 124 |
+
{"current_steps": 1240, "total_steps": 5000, "loss": 0.8907, "lr": 4.6737373237630476e-05, "epoch": 0.248, "percentage": 24.8, "elapsed_time": "0:06:22", "remaining_time": "0:19:18"}
|
| 125 |
+
{"current_steps": 1250, "total_steps": 5000, "loss": 0.7756, "lr": 4.665063509461097e-05, "epoch": 0.25, "percentage": 25.0, "elapsed_time": "0:06:24", "remaining_time": "0:19:13"}
|
| 126 |
+
{"current_steps": 1260, "total_steps": 5000, "loss": 0.6585, "lr": 4.656284173018144e-05, "epoch": 0.252, "percentage": 25.2, "elapsed_time": "0:06:27", "remaining_time": "0:19:09"}
|
| 127 |
+
{"current_steps": 1270, "total_steps": 5000, "loss": 1.2712, "lr": 4.6473997423266614e-05, "epoch": 0.254, "percentage": 25.4, "elapsed_time": "0:06:30", "remaining_time": "0:19:06"}
|
| 128 |
+
{"current_steps": 1280, "total_steps": 5000, "loss": 1.2611, "lr": 4.638410650401267e-05, "epoch": 0.256, "percentage": 25.6, "elapsed_time": "0:06:33", "remaining_time": "0:19:03"}
|
| 129 |
+
{"current_steps": 1290, "total_steps": 5000, "loss": 1.0479, "lr": 4.629317335357619e-05, "epoch": 0.258, "percentage": 25.8, "elapsed_time": "0:06:36", "remaining_time": "0:19:00"}
|
| 130 |
+
{"current_steps": 1300, "total_steps": 5000, "loss": 1.6104, "lr": 4.620120240391065e-05, "epoch": 0.26, "percentage": 26.0, "elapsed_time": "0:06:39", "remaining_time": "0:18:56"}
|
| 131 |
+
{"current_steps": 1310, "total_steps": 5000, "loss": 1.3264, "lr": 4.610819813755038e-05, "epoch": 0.262, "percentage": 26.2, "elapsed_time": "0:06:42", "remaining_time": "0:18:52"}
|
| 132 |
+
{"current_steps": 1320, "total_steps": 5000, "loss": 1.1883, "lr": 4.601416508739211e-05, "epoch": 0.264, "percentage": 26.4, "elapsed_time": "0:06:45", "remaining_time": "0:18:49"}
|
| 133 |
+
{"current_steps": 1330, "total_steps": 5000, "loss": 1.4327, "lr": 4.591910783647404e-05, "epoch": 0.266, "percentage": 26.6, "elapsed_time": "0:06:48", "remaining_time": "0:18:46"}
|
| 134 |
+
{"current_steps": 1340, "total_steps": 5000, "loss": 1.3558, "lr": 4.5823031017752485e-05, "epoch": 0.268, "percentage": 26.8, "elapsed_time": "0:06:50", "remaining_time": "0:18:42"}
|
| 135 |
+
{"current_steps": 1350, "total_steps": 5000, "loss": 2.6668, "lr": 4.572593931387604e-05, "epoch": 0.27, "percentage": 27.0, "elapsed_time": "0:06:53", "remaining_time": "0:18:38"}
|
| 136 |
+
{"current_steps": 1360, "total_steps": 5000, "loss": 0.4225, "lr": 4.562783745695738e-05, "epoch": 0.272, "percentage": 27.2, "elapsed_time": "0:06:56", "remaining_time": "0:18:33"}
|
| 137 |
+
{"current_steps": 1370, "total_steps": 5000, "loss": 1.1548, "lr": 4.5528730228342605e-05, "epoch": 0.274, "percentage": 27.4, "elapsed_time": "0:06:59", "remaining_time": "0:18:30"}
|
| 138 |
+
{"current_steps": 1380, "total_steps": 5000, "loss": 0.9096, "lr": 4.542862245837821e-05, "epoch": 0.276, "percentage": 27.6, "elapsed_time": "0:07:01", "remaining_time": "0:18:26"}
|
| 139 |
+
{"current_steps": 1390, "total_steps": 5000, "loss": 1.3681, "lr": 4.532751902617569e-05, "epoch": 0.278, "percentage": 27.8, "elapsed_time": "0:07:04", "remaining_time": "0:18:23"}
|
| 140 |
+
{"current_steps": 1400, "total_steps": 5000, "loss": 1.286, "lr": 4.522542485937369e-05, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:07:07", "remaining_time": "0:18:19"}
|
| 141 |
+
{"current_steps": 1410, "total_steps": 5000, "loss": 1.0204, "lr": 4.512234493389785e-05, "epoch": 0.282, "percentage": 28.2, "elapsed_time": "0:07:10", "remaining_time": "0:18:15"}
|
| 142 |
+
{"current_steps": 1420, "total_steps": 5000, "loss": 1.083, "lr": 4.5018284273718336e-05, "epoch": 0.284, "percentage": 28.4, "elapsed_time": "0:07:12", "remaining_time": "0:18:11"}
|
| 143 |
+
{"current_steps": 1430, "total_steps": 5000, "loss": 0.5139, "lr": 4.491324795060491e-05, "epoch": 0.286, "percentage": 28.6, "elapsed_time": "0:07:16", "remaining_time": "0:18:08"}
|
| 144 |
+
{"current_steps": 1440, "total_steps": 5000, "loss": 1.591, "lr": 4.480724108387977e-05, "epoch": 0.288, "percentage": 28.8, "elapsed_time": "0:07:18", "remaining_time": "0:18:04"}
|
| 145 |
+
{"current_steps": 1450, "total_steps": 5000, "loss": 0.4862, "lr": 4.4700268840168045e-05, "epoch": 0.29, "percentage": 29.0, "elapsed_time": "0:07:21", "remaining_time": "0:18:01"}
|
| 146 |
+
{"current_steps": 1460, "total_steps": 5000, "loss": 0.7162, "lr": 4.4592336433146e-05, "epoch": 0.292, "percentage": 29.2, "elapsed_time": "0:07:24", "remaining_time": "0:17:58"}
|
| 147 |
+
{"current_steps": 1470, "total_steps": 5000, "loss": 1.6884, "lr": 4.448344912328686e-05, "epoch": 0.294, "percentage": 29.4, "elapsed_time": "0:07:27", "remaining_time": "0:17:54"}
|
| 148 |
+
{"current_steps": 1480, "total_steps": 5000, "loss": 0.9289, "lr": 4.4373612217604496e-05, "epoch": 0.296, "percentage": 29.6, "elapsed_time": "0:07:30", "remaining_time": "0:17:51"}
|
| 149 |
+
{"current_steps": 1490, "total_steps": 5000, "loss": 1.546, "lr": 4.426283106939474e-05, "epoch": 0.298, "percentage": 29.8, "elapsed_time": "0:07:33", "remaining_time": "0:17:47"}
|
| 150 |
+
{"current_steps": 1500, "total_steps": 5000, "loss": 0.9777, "lr": 4.415111107797445e-05, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:07:36", "remaining_time": "0:17:44"}
|
| 151 |
+
{"current_steps": 1510, "total_steps": 5000, "loss": 0.6637, "lr": 4.403845768841842e-05, "epoch": 0.302, "percentage": 30.2, "elapsed_time": "0:07:39", "remaining_time": "0:17:41"}
|
| 152 |
+
{"current_steps": 1520, "total_steps": 5000, "loss": 1.7305, "lr": 4.3924876391293915e-05, "epoch": 0.304, "percentage": 30.4, "elapsed_time": "0:07:42", "remaining_time": "0:17:38"}
|
| 153 |
+
{"current_steps": 1530, "total_steps": 5000, "loss": 0.5603, "lr": 4.381037272239311e-05, "epoch": 0.306, "percentage": 30.6, "elapsed_time": "0:07:44", "remaining_time": "0:17:34"}
|
| 154 |
+
{"current_steps": 1540, "total_steps": 5000, "loss": 3.2643, "lr": 4.36949522624633e-05, "epoch": 0.308, "percentage": 30.8, "elapsed_time": "0:07:47", "remaining_time": "0:17:31"}
|
| 155 |
+
{"current_steps": 1550, "total_steps": 5000, "loss": 3.7458, "lr": 4.357862063693486e-05, "epoch": 0.31, "percentage": 31.0, "elapsed_time": "0:07:50", "remaining_time": "0:17:27"}
|
| 156 |
+
{"current_steps": 1560, "total_steps": 5000, "loss": 1.9472, "lr": 4.3461383515647106e-05, "epoch": 0.312, "percentage": 31.2, "elapsed_time": "0:07:53", "remaining_time": "0:17:23"}
|
| 157 |
+
{"current_steps": 1570, "total_steps": 5000, "loss": 1.0152, "lr": 4.334324661257191e-05, "epoch": 0.314, "percentage": 31.4, "elapsed_time": "0:07:56", "remaining_time": "0:17:20"}
|
| 158 |
+
{"current_steps": 1580, "total_steps": 5000, "loss": 0.9588, "lr": 4.3224215685535294e-05, "epoch": 0.316, "percentage": 31.6, "elapsed_time": "0:07:59", "remaining_time": "0:17:17"}
|
| 159 |
+
{"current_steps": 1590, "total_steps": 5000, "loss": 1.1131, "lr": 4.3104296535936695e-05, "epoch": 0.318, "percentage": 31.8, "elapsed_time": "0:08:01", "remaining_time": "0:17:13"}
|
| 160 |
+
{"current_steps": 1600, "total_steps": 5000, "loss": 2.4659, "lr": 4.2983495008466276e-05, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:08:04", "remaining_time": "0:17:09"}
|
| 161 |
+
{"current_steps": 1610, "total_steps": 5000, "loss": 0.8816, "lr": 4.2861816990820084e-05, "epoch": 0.322, "percentage": 32.2, "elapsed_time": "0:08:07", "remaining_time": "0:17:06"}
|
| 162 |
+
{"current_steps": 1620, "total_steps": 5000, "loss": 0.519, "lr": 4.273926841341302e-05, "epoch": 0.324, "percentage": 32.4, "elapsed_time": "0:08:10", "remaining_time": "0:17:03"}
|
| 163 |
+
{"current_steps": 1630, "total_steps": 5000, "loss": 2.3195, "lr": 4.261585524908987e-05, "epoch": 0.326, "percentage": 32.6, "elapsed_time": "0:08:13", "remaining_time": "0:16:59"}
|
| 164 |
+
{"current_steps": 1640, "total_steps": 5000, "loss": 3.6418, "lr": 4.249158351283414e-05, "epoch": 0.328, "percentage": 32.8, "elapsed_time": "0:08:16", "remaining_time": "0:16:56"}
|
| 165 |
+
{"current_steps": 1650, "total_steps": 5000, "loss": 2.1609, "lr": 4.2366459261474933e-05, "epoch": 0.33, "percentage": 33.0, "elapsed_time": "0:08:19", "remaining_time": "0:16:53"}
|
| 166 |
+
{"current_steps": 1660, "total_steps": 5000, "loss": 1.4675, "lr": 4.224048859339175e-05, "epoch": 0.332, "percentage": 33.2, "elapsed_time": "0:08:21", "remaining_time": "0:16:49"}
|
| 167 |
+
{"current_steps": 1670, "total_steps": 5000, "loss": 1.3251, "lr": 4.211367764821722e-05, "epoch": 0.334, "percentage": 33.4, "elapsed_time": "0:08:24", "remaining_time": "0:16:46"}
|
| 168 |
+
{"current_steps": 1680, "total_steps": 5000, "loss": 1.0805, "lr": 4.198603260653792e-05, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:08:27", "remaining_time": "0:16:43"}
|
| 169 |
+
{"current_steps": 1690, "total_steps": 5000, "loss": 0.5501, "lr": 4.185755968959308e-05, "epoch": 0.338, "percentage": 33.8, "elapsed_time": "0:08:30", "remaining_time": "0:16:39"}
|
| 170 |
+
{"current_steps": 1700, "total_steps": 5000, "loss": 0.7312, "lr": 4.172826515897146e-05, "epoch": 0.34, "percentage": 34.0, "elapsed_time": "0:08:33", "remaining_time": "0:16:36"}
|
| 171 |
+
{"current_steps": 1710, "total_steps": 5000, "loss": 0.5805, "lr": 4.1598155316306044e-05, "epoch": 0.342, "percentage": 34.2, "elapsed_time": "0:08:36", "remaining_time": "0:16:33"}
|
| 172 |
+
{"current_steps": 1720, "total_steps": 5000, "loss": 1.9512, "lr": 4.146723650296701e-05, "epoch": 0.344, "percentage": 34.4, "elapsed_time": "0:08:39", "remaining_time": "0:16:30"}
|
| 173 |
+
{"current_steps": 1730, "total_steps": 5000, "loss": 1.5697, "lr": 4.133551509975264e-05, "epoch": 0.346, "percentage": 34.6, "elapsed_time": "0:08:42", "remaining_time": "0:16:27"}
|
| 174 |
+
{"current_steps": 1740, "total_steps": 5000, "loss": 0.8957, "lr": 4.1202997526578276e-05, "epoch": 0.348, "percentage": 34.8, "elapsed_time": "0:08:45", "remaining_time": "0:16:23"}
|
| 175 |
+
{"current_steps": 1750, "total_steps": 5000, "loss": 0.489, "lr": 4.1069690242163484e-05, "epoch": 0.35, "percentage": 35.0, "elapsed_time": "0:08:47", "remaining_time": "0:16:20"}
|
| 176 |
+
{"current_steps": 1760, "total_steps": 5000, "loss": 2.1314, "lr": 4.093559974371725e-05, "epoch": 0.352, "percentage": 35.2, "elapsed_time": "0:08:50", "remaining_time": "0:16:16"}
|
| 177 |
+
{"current_steps": 1770, "total_steps": 5000, "loss": 3.0047, "lr": 4.080073256662127e-05, "epoch": 0.354, "percentage": 35.4, "elapsed_time": "0:08:53", "remaining_time": "0:16:13"}
|
| 178 |
+
{"current_steps": 1780, "total_steps": 5000, "loss": 1.2667, "lr": 4.066509528411152e-05, "epoch": 0.356, "percentage": 35.6, "elapsed_time": "0:08:56", "remaining_time": "0:16:10"}
|
| 179 |
+
{"current_steps": 1790, "total_steps": 5000, "loss": 0.9325, "lr": 4.052869450695776e-05, "epoch": 0.358, "percentage": 35.8, "elapsed_time": "0:08:59", "remaining_time": "0:16:06"}
|
| 180 |
+
{"current_steps": 1800, "total_steps": 5000, "loss": 2.1198, "lr": 4.039153688314145e-05, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:09:02", "remaining_time": "0:16:03"}
|
| 181 |
+
{"current_steps": 1810, "total_steps": 5000, "loss": 0.809, "lr": 4.02536290975317e-05, "epoch": 0.362, "percentage": 36.2, "elapsed_time": "0:09:04", "remaining_time": "0:16:00"}
|
| 182 |
+
{"current_steps": 1820, "total_steps": 5000, "loss": 0.9603, "lr": 4.011497787155938e-05, "epoch": 0.364, "percentage": 36.4, "elapsed_time": "0:09:07", "remaining_time": "0:15:56"}
|
| 183 |
+
{"current_steps": 1830, "total_steps": 5000, "loss": 2.463, "lr": 3.997558996288965e-05, "epoch": 0.366, "percentage": 36.6, "elapsed_time": "0:09:10", "remaining_time": "0:15:53"}
|
| 184 |
+
{"current_steps": 1840, "total_steps": 5000, "loss": 0.6611, "lr": 3.983547216509254e-05, "epoch": 0.368, "percentage": 36.8, "elapsed_time": "0:09:13", "remaining_time": "0:15:50"}
|
| 185 |
+
{"current_steps": 1850, "total_steps": 5000, "loss": 0.8496, "lr": 3.969463130731183e-05, "epoch": 0.37, "percentage": 37.0, "elapsed_time": "0:09:15", "remaining_time": "0:15:46"}
|
| 186 |
+
{"current_steps": 1860, "total_steps": 5000, "loss": 2.0151, "lr": 3.955307425393224e-05, "epoch": 0.372, "percentage": 37.2, "elapsed_time": "0:09:18", "remaining_time": "0:15:43"}
|
| 187 |
+
{"current_steps": 1870, "total_steps": 5000, "loss": 1.1714, "lr": 3.941080790424484e-05, "epoch": 0.374, "percentage": 37.4, "elapsed_time": "0:09:21", "remaining_time": "0:15:40"}
|
| 188 |
+
{"current_steps": 1880, "total_steps": 5000, "loss": 2.1127, "lr": 3.92678391921108e-05, "epoch": 0.376, "percentage": 37.6, "elapsed_time": "0:09:24", "remaining_time": "0:15:37"}
|
| 189 |
+
{"current_steps": 1890, "total_steps": 5000, "loss": 3.0312, "lr": 3.912417508562345e-05, "epoch": 0.378, "percentage": 37.8, "elapsed_time": "0:09:27", "remaining_time": "0:15:34"}
|
| 190 |
+
{"current_steps": 1900, "total_steps": 5000, "loss": 1.052, "lr": 3.897982258676867e-05, "epoch": 0.38, "percentage": 38.0, "elapsed_time": "0:09:30", "remaining_time": "0:15:30"}
|
| 191 |
+
{"current_steps": 1910, "total_steps": 5000, "loss": 1.2737, "lr": 3.883478873108361e-05, "epoch": 0.382, "percentage": 38.2, "elapsed_time": "0:09:33", "remaining_time": "0:15:27"}
|
| 192 |
+
{"current_steps": 1920, "total_steps": 5000, "loss": 0.487, "lr": 3.868908058731376e-05, "epoch": 0.384, "percentage": 38.4, "elapsed_time": "0:09:36", "remaining_time": "0:15:24"}
|
| 193 |
+
{"current_steps": 1930, "total_steps": 5000, "loss": 1.1498, "lr": 3.85427052570685e-05, "epoch": 0.386, "percentage": 38.6, "elapsed_time": "0:09:39", "remaining_time": "0:15:21"}
|
| 194 |
+
{"current_steps": 1940, "total_steps": 5000, "loss": 1.3501, "lr": 3.8395669874474915e-05, "epoch": 0.388, "percentage": 38.8, "elapsed_time": "0:09:42", "remaining_time": "0:15:18"}
|
| 195 |
+
{"current_steps": 1950, "total_steps": 5000, "loss": 0.695, "lr": 3.824798160583012e-05, "epoch": 0.39, "percentage": 39.0, "elapsed_time": "0:09:45", "remaining_time": "0:15:15"}
|
| 196 |
+
{"current_steps": 1960, "total_steps": 5000, "loss": 1.1411, "lr": 3.8099647649251986e-05, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:09:48", "remaining_time": "0:15:12"}
|
| 197 |
+
{"current_steps": 1970, "total_steps": 5000, "loss": 1.3583, "lr": 3.795067523432826e-05, "epoch": 0.394, "percentage": 39.4, "elapsed_time": "0:09:51", "remaining_time": "0:15:09"}
|
| 198 |
+
{"current_steps": 1980, "total_steps": 5000, "loss": 1.43, "lr": 3.780107162176429e-05, "epoch": 0.396, "percentage": 39.6, "elapsed_time": "0:09:54", "remaining_time": "0:15:06"}
|
| 199 |
+
{"current_steps": 1990, "total_steps": 5000, "loss": 2.6459, "lr": 3.765084410302909e-05, "epoch": 0.398, "percentage": 39.8, "elapsed_time": "0:09:56", "remaining_time": "0:15:02"}
|
| 200 |
+
{"current_steps": 2000, "total_steps": 5000, "loss": 2.1941, "lr": 3.7500000000000003e-05, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:09:59", "remaining_time": "0:14:59"}
|
| 201 |
+
{"current_steps": 2010, "total_steps": 5000, "loss": 1.1788, "lr": 3.7348546664605777e-05, "epoch": 0.402, "percentage": 40.2, "elapsed_time": "0:10:02", "remaining_time": "0:14:56"}
|
| 202 |
+
{"current_steps": 2020, "total_steps": 5000, "loss": 0.6889, "lr": 3.719649147846832e-05, "epoch": 0.404, "percentage": 40.4, "elapsed_time": "0:10:05", "remaining_time": "0:14:53"}
|
| 203 |
+
{"current_steps": 2030, "total_steps": 5000, "loss": 1.0726, "lr": 3.704384185254288e-05, "epoch": 0.406, "percentage": 40.6, "elapsed_time": "0:10:08", "remaining_time": "0:14:50"}
|
| 204 |
+
{"current_steps": 2040, "total_steps": 5000, "loss": 1.1677, "lr": 3.689060522675689e-05, "epoch": 0.408, "percentage": 40.8, "elapsed_time": "0:10:11", "remaining_time": "0:14:47"}
|
| 205 |
+
{"current_steps": 2050, "total_steps": 5000, "loss": 1.1147, "lr": 3.673678906964727e-05, "epoch": 0.41, "percentage": 41.0, "elapsed_time": "0:10:14", "remaining_time": "0:14:44"}
|
| 206 |
+
{"current_steps": 2060, "total_steps": 5000, "loss": 0.9126, "lr": 3.6582400877996546e-05, "epoch": 0.412, "percentage": 41.2, "elapsed_time": "0:10:17", "remaining_time": "0:14:41"}
|
| 207 |
+
{"current_steps": 2070, "total_steps": 5000, "loss": 2.0398, "lr": 3.642744817646736e-05, "epoch": 0.414, "percentage": 41.4, "elapsed_time": "0:10:20", "remaining_time": "0:14:38"}
|
| 208 |
+
{"current_steps": 2080, "total_steps": 5000, "loss": 1.3157, "lr": 3.627193851723577e-05, "epoch": 0.416, "percentage": 41.6, "elapsed_time": "0:10:23", "remaining_time": "0:14:35"}
|
| 209 |
+
{"current_steps": 2090, "total_steps": 5000, "loss": 0.8404, "lr": 3.611587947962319e-05, "epoch": 0.418, "percentage": 41.8, "elapsed_time": "0:10:26", "remaining_time": "0:14:31"}
|
| 210 |
+
{"current_steps": 2100, "total_steps": 5000, "loss": 1.571, "lr": 3.5959278669726935e-05, "epoch": 0.42, "percentage": 42.0, "elapsed_time": "0:10:28", "remaining_time": "0:14:28"}
|
| 211 |
+
{"current_steps": 2110, "total_steps": 5000, "loss": 1.7026, "lr": 3.580214372004956e-05, "epoch": 0.422, "percentage": 42.2, "elapsed_time": "0:10:31", "remaining_time": "0:14:25"}
|
| 212 |
+
{"current_steps": 2120, "total_steps": 5000, "loss": 0.7502, "lr": 3.564448228912682e-05, "epoch": 0.424, "percentage": 42.4, "elapsed_time": "0:10:34", "remaining_time": "0:14:22"}
|
| 213 |
+
{"current_steps": 2130, "total_steps": 5000, "loss": 0.7163, "lr": 3.548630206115443e-05, "epoch": 0.426, "percentage": 42.6, "elapsed_time": "0:10:37", "remaining_time": "0:14:18"}
|
| 214 |
+
{"current_steps": 2140, "total_steps": 5000, "loss": 1.2891, "lr": 3.532761074561355e-05, "epoch": 0.428, "percentage": 42.8, "elapsed_time": "0:10:40", "remaining_time": "0:14:15"}
|
| 215 |
+
{"current_steps": 2150, "total_steps": 5000, "loss": 2.0099, "lr": 3.516841607689501e-05, "epoch": 0.43, "percentage": 43.0, "elapsed_time": "0:10:42", "remaining_time": "0:14:11"}
|
| 216 |
+
{"current_steps": 2160, "total_steps": 5000, "loss": 2.6718, "lr": 3.5008725813922386e-05, "epoch": 0.432, "percentage": 43.2, "elapsed_time": "0:10:45", "remaining_time": "0:14:08"}
|
| 217 |
+
{"current_steps": 2170, "total_steps": 5000, "loss": 3.9758, "lr": 3.484854773977378e-05, "epoch": 0.434, "percentage": 43.4, "elapsed_time": "0:10:48", "remaining_time": "0:14:05"}
|
| 218 |
+
{"current_steps": 2180, "total_steps": 5000, "loss": 4.9642, "lr": 3.4687889661302576e-05, "epoch": 0.436, "percentage": 43.6, "elapsed_time": "0:10:51", "remaining_time": "0:14:02"}
|
| 219 |
+
{"current_steps": 2190, "total_steps": 5000, "loss": 1.3115, "lr": 3.452675940875686e-05, "epoch": 0.438, "percentage": 43.8, "elapsed_time": "0:10:54", "remaining_time": "0:13:59"}
|
| 220 |
+
{"current_steps": 2200, "total_steps": 5000, "loss": 2.1822, "lr": 3.436516483539781e-05, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:10:56", "remaining_time": "0:13:55"}
|
| 221 |
+
{"current_steps": 2210, "total_steps": 5000, "loss": 0.6386, "lr": 3.4203113817116957e-05, "epoch": 0.442, "percentage": 44.2, "elapsed_time": "0:10:59", "remaining_time": "0:13:52"}
|
| 222 |
+
{"current_steps": 2220, "total_steps": 5000, "loss": 0.6365, "lr": 3.4040614252052305e-05, "epoch": 0.444, "percentage": 44.4, "elapsed_time": "0:11:02", "remaining_time": "0:13:49"}
|
| 223 |
+
{"current_steps": 2230, "total_steps": 5000, "loss": 0.897, "lr": 3.387767406020343e-05, "epoch": 0.446, "percentage": 44.6, "elapsed_time": "0:11:05", "remaining_time": "0:13:46"}
|
| 224 |
+
{"current_steps": 2240, "total_steps": 5000, "loss": 1.4772, "lr": 3.3714301183045385e-05, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:11:08", "remaining_time": "0:13:43"}
|
| 225 |
+
{"current_steps": 2250, "total_steps": 5000, "loss": 0.6316, "lr": 3.355050358314172e-05, "epoch": 0.45, "percentage": 45.0, "elapsed_time": "0:11:11", "remaining_time": "0:13:40"}
|
| 226 |
+
{"current_steps": 2260, "total_steps": 5000, "loss": 0.5972, "lr": 3.338628924375638e-05, "epoch": 0.452, "percentage": 45.2, "elapsed_time": "0:11:14", "remaining_time": "0:13:37"}
|
| 227 |
+
{"current_steps": 2270, "total_steps": 5000, "loss": 0.7165, "lr": 3.322166616846458e-05, "epoch": 0.454, "percentage": 45.4, "elapsed_time": "0:11:16", "remaining_time": "0:13:34"}
|
| 228 |
+
{"current_steps": 2280, "total_steps": 5000, "loss": 1.8711, "lr": 3.305664238076278e-05, "epoch": 0.456, "percentage": 45.6, "elapsed_time": "0:11:19", "remaining_time": "0:13:31"}
|
| 229 |
+
{"current_steps": 2290, "total_steps": 5000, "loss": 0.9576, "lr": 3.289122592367757e-05, "epoch": 0.458, "percentage": 45.8, "elapsed_time": "0:11:23", "remaining_time": "0:13:28"}
|
| 230 |
+
{"current_steps": 2300, "total_steps": 5000, "loss": 1.4457, "lr": 3.272542485937369e-05, "epoch": 0.46, "percentage": 46.0, "elapsed_time": "0:11:25", "remaining_time": "0:13:25"}
|
| 231 |
+
{"current_steps": 2310, "total_steps": 5000, "loss": 0.9553, "lr": 3.2559247268761115e-05, "epoch": 0.462, "percentage": 46.2, "elapsed_time": "0:11:29", "remaining_time": "0:13:22"}
|
| 232 |
+
{"current_steps": 2320, "total_steps": 5000, "loss": 1.3703, "lr": 3.239270125110117e-05, "epoch": 0.464, "percentage": 46.4, "elapsed_time": "0:11:31", "remaining_time": "0:13:19"}
|
| 233 |
+
{"current_steps": 2330, "total_steps": 5000, "loss": 1.9259, "lr": 3.222579492361179e-05, "epoch": 0.466, "percentage": 46.6, "elapsed_time": "0:11:34", "remaining_time": "0:13:16"}
|
| 234 |
+
{"current_steps": 2340, "total_steps": 5000, "loss": 0.9021, "lr": 3.205853642107192e-05, "epoch": 0.468, "percentage": 46.8, "elapsed_time": "0:11:37", "remaining_time": "0:13:13"}
|
| 235 |
+
{"current_steps": 2350, "total_steps": 5000, "loss": 2.2195, "lr": 3.1890933895424976e-05, "epoch": 0.47, "percentage": 47.0, "elapsed_time": "0:11:40", "remaining_time": "0:13:10"}
|
| 236 |
+
{"current_steps": 2360, "total_steps": 5000, "loss": 0.724, "lr": 3.172299551538164e-05, "epoch": 0.472, "percentage": 47.2, "elapsed_time": "0:11:43", "remaining_time": "0:13:07"}
|
| 237 |
+
{"current_steps": 2370, "total_steps": 5000, "loss": 2.0286, "lr": 3.155472946602162e-05, "epoch": 0.474, "percentage": 47.4, "elapsed_time": "0:11:46", "remaining_time": "0:13:04"}
|
| 238 |
+
{"current_steps": 2380, "total_steps": 5000, "loss": 0.9387, "lr": 3.138614394839476e-05, "epoch": 0.476, "percentage": 47.6, "elapsed_time": "0:11:49", "remaining_time": "0:13:01"}
|
| 239 |
+
{"current_steps": 2390, "total_steps": 5000, "loss": 0.8844, "lr": 3.121724717912138e-05, "epoch": 0.478, "percentage": 47.8, "elapsed_time": "0:11:52", "remaining_time": "0:12:58"}
|
| 240 |
+
{"current_steps": 2400, "total_steps": 5000, "loss": 0.8334, "lr": 3.104804738999169e-05, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:11:55", "remaining_time": "0:12:55"}
|
| 241 |
+
{"current_steps": 2410, "total_steps": 5000, "loss": 0.7062, "lr": 3.087855282756475e-05, "epoch": 0.482, "percentage": 48.2, "elapsed_time": "0:11:58", "remaining_time": "0:12:52"}
|
| 242 |
+
{"current_steps": 2420, "total_steps": 5000, "loss": 1.1329, "lr": 3.0708771752766394e-05, "epoch": 0.484, "percentage": 48.4, "elapsed_time": "0:12:01", "remaining_time": "0:12:49"}
|
| 243 |
+
{"current_steps": 2430, "total_steps": 5000, "loss": 3.1218, "lr": 3.053871244048669e-05, "epoch": 0.486, "percentage": 48.6, "elapsed_time": "0:12:04", "remaining_time": "0:12:46"}
|
| 244 |
+
{"current_steps": 2440, "total_steps": 5000, "loss": 1.795, "lr": 3.0368383179176585e-05, "epoch": 0.488, "percentage": 48.8, "elapsed_time": "0:12:07", "remaining_time": "0:12:43"}
|
| 245 |
+
{"current_steps": 2450, "total_steps": 5000, "loss": 1.3635, "lr": 3.0197792270443982e-05, "epoch": 0.49, "percentage": 49.0, "elapsed_time": "0:12:10", "remaining_time": "0:12:39"}
|
| 246 |
+
{"current_steps": 2460, "total_steps": 5000, "loss": 1.5214, "lr": 3.002694802864912e-05, "epoch": 0.492, "percentage": 49.2, "elapsed_time": "0:12:12", "remaining_time": "0:12:36"}
|
| 247 |
+
{"current_steps": 2470, "total_steps": 5000, "loss": 1.1378, "lr": 2.98558587804993e-05, "epoch": 0.494, "percentage": 49.4, "elapsed_time": "0:12:15", "remaining_time": "0:12:33"}
|
| 248 |
+
{"current_steps": 2480, "total_steps": 5000, "loss": 1.3969, "lr": 2.9684532864643122e-05, "epoch": 0.496, "percentage": 49.6, "elapsed_time": "0:12:18", "remaining_time": "0:12:30"}
|
| 249 |
+
{"current_steps": 2490, "total_steps": 5000, "loss": 0.7386, "lr": 2.9512978631264006e-05, "epoch": 0.498, "percentage": 49.8, "elapsed_time": "0:12:21", "remaining_time": "0:12:27"}
|
| 250 |
+
{"current_steps": 2500, "total_steps": 5000, "loss": 1.2025, "lr": 2.9341204441673266e-05, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:12:24", "remaining_time": "0:12:24"}
|
| 251 |
+
{"current_steps": 2510, "total_steps": 5000, "loss": 0.8712, "lr": 2.916921866790256e-05, "epoch": 0.502, "percentage": 50.2, "elapsed_time": "0:12:27", "remaining_time": "0:12:21"}
|
| 252 |
+
{"current_steps": 2520, "total_steps": 5000, "loss": 2.7228, "lr": 2.8997029692295874e-05, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:12:30", "remaining_time": "0:12:18"}
|
| 253 |
+
{"current_steps": 2530, "total_steps": 5000, "loss": 1.6352, "lr": 2.8824645907100954e-05, "epoch": 0.506, "percentage": 50.6, "elapsed_time": "0:12:33", "remaining_time": "0:12:15"}
|
| 254 |
+
{"current_steps": 2540, "total_steps": 5000, "loss": 0.7708, "lr": 2.8652075714060295e-05, "epoch": 0.508, "percentage": 50.8, "elapsed_time": "0:12:36", "remaining_time": "0:12:12"}
|
| 255 |
+
{"current_steps": 2550, "total_steps": 5000, "loss": 1.7158, "lr": 2.8479327524001636e-05, "epoch": 0.51, "percentage": 51.0, "elapsed_time": "0:12:38", "remaining_time": "0:12:09"}
|
| 256 |
+
{"current_steps": 2560, "total_steps": 5000, "loss": 1.9952, "lr": 2.8306409756428064e-05, "epoch": 0.512, "percentage": 51.2, "elapsed_time": "0:12:41", "remaining_time": "0:12:05"}
|
| 257 |
+
{"current_steps": 2570, "total_steps": 5000, "loss": 1.7763, "lr": 2.8133330839107608e-05, "epoch": 0.514, "percentage": 51.4, "elapsed_time": "0:12:44", "remaining_time": "0:12:02"}
|
| 258 |
+
{"current_steps": 2580, "total_steps": 5000, "loss": 1.1427, "lr": 2.7960099207662532e-05, "epoch": 0.516, "percentage": 51.6, "elapsed_time": "0:12:47", "remaining_time": "0:11:59"}
|
| 259 |
+
{"current_steps": 2590, "total_steps": 5000, "loss": 1.3634, "lr": 2.7786723305158136e-05, "epoch": 0.518, "percentage": 51.8, "elapsed_time": "0:12:50", "remaining_time": "0:11:57"}
|
| 260 |
+
{"current_steps": 2600, "total_steps": 5000, "loss": 1.1372, "lr": 2.761321158169134e-05, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:12:53", "remaining_time": "0:11:53"}
|
| 261 |
+
{"current_steps": 2610, "total_steps": 5000, "loss": 0.6111, "lr": 2.7439572493978736e-05, "epoch": 0.522, "percentage": 52.2, "elapsed_time": "0:12:56", "remaining_time": "0:11:50"}
|
| 262 |
+
{"current_steps": 2620, "total_steps": 5000, "loss": 1.1249, "lr": 2.726581450494451e-05, "epoch": 0.524, "percentage": 52.4, "elapsed_time": "0:12:59", "remaining_time": "0:11:47"}
|
| 263 |
+
{"current_steps": 2630, "total_steps": 5000, "loss": 4.3159, "lr": 2.7091946083307896e-05, "epoch": 0.526, "percentage": 52.6, "elapsed_time": "0:13:02", "remaining_time": "0:11:44"}
|
| 264 |
+
{"current_steps": 2640, "total_steps": 5000, "loss": 0.9282, "lr": 2.6917975703170466e-05, "epoch": 0.528, "percentage": 52.8, "elapsed_time": "0:13:04", "remaining_time": "0:11:41"}
|
| 265 |
+
{"current_steps": 2650, "total_steps": 5000, "loss": 1.3395, "lr": 2.674391184360313e-05, "epoch": 0.53, "percentage": 53.0, "elapsed_time": "0:13:07", "remaining_time": "0:11:38"}
|
| 266 |
+
{"current_steps": 2660, "total_steps": 5000, "loss": 0.7275, "lr": 2.656976298823284e-05, "epoch": 0.532, "percentage": 53.2, "elapsed_time": "0:13:10", "remaining_time": "0:11:35"}
|
| 267 |
+
{"current_steps": 2670, "total_steps": 5000, "loss": 5.1828, "lr": 2.6395537624829096e-05, "epoch": 0.534, "percentage": 53.4, "elapsed_time": "0:13:13", "remaining_time": "0:11:32"}
|
| 268 |
+
{"current_steps": 2680, "total_steps": 5000, "loss": 1.1841, "lr": 2.6221244244890336e-05, "epoch": 0.536, "percentage": 53.6, "elapsed_time": "0:13:15", "remaining_time": "0:11:28"}
|
| 269 |
+
{"current_steps": 2690, "total_steps": 5000, "loss": 2.1432, "lr": 2.604689134322999e-05, "epoch": 0.538, "percentage": 53.8, "elapsed_time": "0:13:18", "remaining_time": "0:11:26"}
|
| 270 |
+
{"current_steps": 2700, "total_steps": 5000, "loss": 1.1397, "lr": 2.587248741756253e-05, "epoch": 0.54, "percentage": 54.0, "elapsed_time": "0:13:21", "remaining_time": "0:11:23"}
|
| 271 |
+
{"current_steps": 2710, "total_steps": 5000, "loss": 1.8865, "lr": 2.5698040968089225e-05, "epoch": 0.542, "percentage": 54.2, "elapsed_time": "0:13:24", "remaining_time": "0:11:20"}
|
| 272 |
+
{"current_steps": 2720, "total_steps": 5000, "loss": 1.7895, "lr": 2.5523560497083926e-05, "epoch": 0.544, "percentage": 54.4, "elapsed_time": "0:13:28", "remaining_time": "0:11:17"}
|
| 273 |
+
{"current_steps": 2730, "total_steps": 5000, "loss": 0.5246, "lr": 2.5349054508478637e-05, "epoch": 0.546, "percentage": 54.6, "elapsed_time": "0:13:30", "remaining_time": "0:11:13"}
|
| 274 |
+
{"current_steps": 2740, "total_steps": 5000, "loss": 2.2288, "lr": 2.517453150744904e-05, "epoch": 0.548, "percentage": 54.8, "elapsed_time": "0:13:33", "remaining_time": "0:11:10"}
|
| 275 |
+
{"current_steps": 2750, "total_steps": 5000, "loss": 1.1424, "lr": 2.5e-05, "epoch": 0.55, "percentage": 55.0, "elapsed_time": "0:13:36", "remaining_time": "0:11:07"}
|
| 276 |
+
{"current_steps": 2760, "total_steps": 5000, "loss": 0.5168, "lr": 2.4825468492550964e-05, "epoch": 0.552, "percentage": 55.2, "elapsed_time": "0:13:38", "remaining_time": "0:11:04"}
|
| 277 |
+
{"current_steps": 2770, "total_steps": 5000, "loss": 1.6828, "lr": 2.4650945491521372e-05, "epoch": 0.554, "percentage": 55.4, "elapsed_time": "0:13:41", "remaining_time": "0:11:01"}
|
| 278 |
+
{"current_steps": 2780, "total_steps": 5000, "loss": 1.3219, "lr": 2.447643950291608e-05, "epoch": 0.556, "percentage": 55.6, "elapsed_time": "0:13:44", "remaining_time": "0:10:58"}
|
| 279 |
+
{"current_steps": 2790, "total_steps": 5000, "loss": 1.511, "lr": 2.4301959031910784e-05, "epoch": 0.558, "percentage": 55.8, "elapsed_time": "0:13:47", "remaining_time": "0:10:55"}
|
| 280 |
+
{"current_steps": 2800, "total_steps": 5000, "loss": 1.5983, "lr": 2.4127512582437485e-05, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:13:50", "remaining_time": "0:10:52"}
|
| 281 |
+
{"current_steps": 2810, "total_steps": 5000, "loss": 0.7233, "lr": 2.3953108656770016e-05, "epoch": 0.562, "percentage": 56.2, "elapsed_time": "0:13:53", "remaining_time": "0:10:49"}
|
| 282 |
+
{"current_steps": 2820, "total_steps": 5000, "loss": 1.2128, "lr": 2.377875575510967e-05, "epoch": 0.564, "percentage": 56.4, "elapsed_time": "0:13:56", "remaining_time": "0:10:46"}
|
| 283 |
+
{"current_steps": 2830, "total_steps": 5000, "loss": 1.3444, "lr": 2.3604462375170906e-05, "epoch": 0.566, "percentage": 56.6, "elapsed_time": "0:13:59", "remaining_time": "0:10:43"}
|
| 284 |
+
{"current_steps": 2840, "total_steps": 5000, "loss": 0.6756, "lr": 2.3430237011767167e-05, "epoch": 0.568, "percentage": 56.8, "elapsed_time": "0:14:02", "remaining_time": "0:10:41"}
|
| 285 |
+
{"current_steps": 2850, "total_steps": 5000, "loss": 2.1279, "lr": 2.3256088156396868e-05, "epoch": 0.57, "percentage": 57.0, "elapsed_time": "0:14:05", "remaining_time": "0:10:37"}
|
| 286 |
+
{"current_steps": 2860, "total_steps": 5000, "loss": 1.2081, "lr": 2.3082024296829536e-05, "epoch": 0.572, "percentage": 57.2, "elapsed_time": "0:14:08", "remaining_time": "0:10:34"}
|
| 287 |
+
{"current_steps": 2870, "total_steps": 5000, "loss": 1.6913, "lr": 2.2908053916692117e-05, "epoch": 0.574, "percentage": 57.4, "elapsed_time": "0:14:11", "remaining_time": "0:10:32"}
|
| 288 |
+
{"current_steps": 2880, "total_steps": 5000, "loss": 0.7114, "lr": 2.2734185495055503e-05, "epoch": 0.576, "percentage": 57.6, "elapsed_time": "0:14:14", "remaining_time": "0:10:29"}
|
| 289 |
+
{"current_steps": 2890, "total_steps": 5000, "loss": 1.5252, "lr": 2.2560427506021266e-05, "epoch": 0.578, "percentage": 57.8, "elapsed_time": "0:14:17", "remaining_time": "0:10:26"}
|
| 290 |
+
{"current_steps": 2900, "total_steps": 5000, "loss": 1.2513, "lr": 2.238678841830867e-05, "epoch": 0.58, "percentage": 58.0, "elapsed_time": "0:14:20", "remaining_time": "0:10:23"}
|
| 291 |
+
{"current_steps": 2910, "total_steps": 5000, "loss": 1.5359, "lr": 2.2213276694841866e-05, "epoch": 0.582, "percentage": 58.2, "elapsed_time": "0:14:23", "remaining_time": "0:10:20"}
|
| 292 |
+
{"current_steps": 2920, "total_steps": 5000, "loss": 1.3664, "lr": 2.2039900792337474e-05, "epoch": 0.584, "percentage": 58.4, "elapsed_time": "0:14:26", "remaining_time": "0:10:17"}
|
| 293 |
+
{"current_steps": 2930, "total_steps": 5000, "loss": 1.102, "lr": 2.186666916089239e-05, "epoch": 0.586, "percentage": 58.6, "elapsed_time": "0:14:29", "remaining_time": "0:10:14"}
|
| 294 |
+
{"current_steps": 2940, "total_steps": 5000, "loss": 1.5992, "lr": 2.1693590243571938e-05, "epoch": 0.588, "percentage": 58.8, "elapsed_time": "0:14:32", "remaining_time": "0:10:11"}
|
| 295 |
+
{"current_steps": 2950, "total_steps": 5000, "loss": 0.8194, "lr": 2.1520672475998373e-05, "epoch": 0.59, "percentage": 59.0, "elapsed_time": "0:14:35", "remaining_time": "0:10:08"}
|
| 296 |
+
{"current_steps": 2960, "total_steps": 5000, "loss": 0.8753, "lr": 2.1347924285939714e-05, "epoch": 0.592, "percentage": 59.2, "elapsed_time": "0:14:38", "remaining_time": "0:10:05"}
|
| 297 |
+
{"current_steps": 2970, "total_steps": 5000, "loss": 1.8568, "lr": 2.117535409289905e-05, "epoch": 0.594, "percentage": 59.4, "elapsed_time": "0:14:41", "remaining_time": "0:10:02"}
|
| 298 |
+
{"current_steps": 2980, "total_steps": 5000, "loss": 2.0932, "lr": 2.1002970307704132e-05, "epoch": 0.596, "percentage": 59.6, "elapsed_time": "0:14:44", "remaining_time": "0:09:59"}
|
| 299 |
+
{"current_steps": 2990, "total_steps": 5000, "loss": 0.6447, "lr": 2.0830781332097446e-05, "epoch": 0.598, "percentage": 59.8, "elapsed_time": "0:14:47", "remaining_time": "0:09:56"}
|
| 300 |
+
{"current_steps": 3000, "total_steps": 5000, "loss": 0.8866, "lr": 2.0658795558326743e-05, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:14:50", "remaining_time": "0:09:53"}
|
| 301 |
+
{"current_steps": 3010, "total_steps": 5000, "loss": 2.3415, "lr": 2.0487021368736003e-05, "epoch": 0.602, "percentage": 60.2, "elapsed_time": "0:14:52", "remaining_time": "0:09:50"}
|
| 302 |
+
{"current_steps": 3020, "total_steps": 5000, "loss": 1.244, "lr": 2.031546713535688e-05, "epoch": 0.604, "percentage": 60.4, "elapsed_time": "0:14:55", "remaining_time": "0:09:47"}
|
| 303 |
+
{"current_steps": 3030, "total_steps": 5000, "loss": 2.6138, "lr": 2.0144141219500705e-05, "epoch": 0.606, "percentage": 60.6, "elapsed_time": "0:14:58", "remaining_time": "0:09:44"}
|
| 304 |
+
{"current_steps": 3040, "total_steps": 5000, "loss": 0.8315, "lr": 1.9973051971350888e-05, "epoch": 0.608, "percentage": 60.8, "elapsed_time": "0:15:01", "remaining_time": "0:09:41"}
|
| 305 |
+
{"current_steps": 3050, "total_steps": 5000, "loss": 4.5689, "lr": 1.980220772955602e-05, "epoch": 0.61, "percentage": 61.0, "elapsed_time": "0:15:04", "remaining_time": "0:09:38"}
|
| 306 |
+
{"current_steps": 3060, "total_steps": 5000, "loss": 1.2894, "lr": 1.963161682082342e-05, "epoch": 0.612, "percentage": 61.2, "elapsed_time": "0:15:07", "remaining_time": "0:09:35"}
|
| 307 |
+
{"current_steps": 3070, "total_steps": 5000, "loss": 1.1867, "lr": 1.946128755951332e-05, "epoch": 0.614, "percentage": 61.4, "elapsed_time": "0:15:10", "remaining_time": "0:09:32"}
|
| 308 |
+
{"current_steps": 3080, "total_steps": 5000, "loss": 1.8681, "lr": 1.9291228247233605e-05, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:15:13", "remaining_time": "0:09:29"}
|
| 309 |
+
{"current_steps": 3090, "total_steps": 5000, "loss": 1.1927, "lr": 1.912144717243525e-05, "epoch": 0.618, "percentage": 61.8, "elapsed_time": "0:15:16", "remaining_time": "0:09:26"}
|
| 310 |
+
{"current_steps": 3100, "total_steps": 5000, "loss": 1.6265, "lr": 1.895195261000831e-05, "epoch": 0.62, "percentage": 62.0, "elapsed_time": "0:15:19", "remaining_time": "0:09:23"}
|
| 311 |
+
{"current_steps": 3110, "total_steps": 5000, "loss": 1.3608, "lr": 1.8782752820878634e-05, "epoch": 0.622, "percentage": 62.2, "elapsed_time": "0:15:21", "remaining_time": "0:09:20"}
|
| 312 |
+
{"current_steps": 3120, "total_steps": 5000, "loss": 1.4472, "lr": 1.8613856051605243e-05, "epoch": 0.624, "percentage": 62.4, "elapsed_time": "0:15:24", "remaining_time": "0:09:17"}
|
| 313 |
+
{"current_steps": 3130, "total_steps": 5000, "loss": 0.6093, "lr": 1.8445270533978388e-05, "epoch": 0.626, "percentage": 62.6, "elapsed_time": "0:15:27", "remaining_time": "0:09:14"}
|
| 314 |
+
{"current_steps": 3140, "total_steps": 5000, "loss": 1.3848, "lr": 1.827700448461836e-05, "epoch": 0.628, "percentage": 62.8, "elapsed_time": "0:15:30", "remaining_time": "0:09:11"}
|
| 315 |
+
{"current_steps": 3150, "total_steps": 5000, "loss": 1.1096, "lr": 1.8109066104575023e-05, "epoch": 0.63, "percentage": 63.0, "elapsed_time": "0:15:33", "remaining_time": "0:09:08"}
|
| 316 |
+
{"current_steps": 3160, "total_steps": 5000, "loss": 0.8174, "lr": 1.7941463578928086e-05, "epoch": 0.632, "percentage": 63.2, "elapsed_time": "0:15:35", "remaining_time": "0:09:04"}
|
| 317 |
+
{"current_steps": 3170, "total_steps": 5000, "loss": 1.5061, "lr": 1.7774205076388206e-05, "epoch": 0.634, "percentage": 63.4, "elapsed_time": "0:15:38", "remaining_time": "0:09:01"}
|
| 318 |
+
{"current_steps": 3180, "total_steps": 5000, "loss": 0.7052, "lr": 1.7607298748898842e-05, "epoch": 0.636, "percentage": 63.6, "elapsed_time": "0:15:41", "remaining_time": "0:08:58"}
|
| 319 |
+
{"current_steps": 3190, "total_steps": 5000, "loss": 2.0225, "lr": 1.744075273123889e-05, "epoch": 0.638, "percentage": 63.8, "elapsed_time": "0:15:44", "remaining_time": "0:08:55"}
|
| 320 |
+
{"current_steps": 3200, "total_steps": 5000, "loss": 2.2687, "lr": 1.7274575140626318e-05, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:15:47", "remaining_time": "0:08:52"}
|
| 321 |
+
{"current_steps": 3210, "total_steps": 5000, "loss": 0.6663, "lr": 1.7108774076322443e-05, "epoch": 0.642, "percentage": 64.2, "elapsed_time": "0:15:50", "remaining_time": "0:08:49"}
|
| 322 |
+
{"current_steps": 3220, "total_steps": 5000, "loss": 1.259, "lr": 1.6943357619237226e-05, "epoch": 0.644, "percentage": 64.4, "elapsed_time": "0:15:53", "remaining_time": "0:08:47"}
|
| 323 |
+
{"current_steps": 3230, "total_steps": 5000, "loss": 1.1647, "lr": 1.677833383153542e-05, "epoch": 0.646, "percentage": 64.6, "elapsed_time": "0:15:56", "remaining_time": "0:08:44"}
|
| 324 |
+
{"current_steps": 3240, "total_steps": 5000, "loss": 0.9512, "lr": 1.6613710756243626e-05, "epoch": 0.648, "percentage": 64.8, "elapsed_time": "0:15:59", "remaining_time": "0:08:40"}
|
| 325 |
+
{"current_steps": 3250, "total_steps": 5000, "loss": 3.6506, "lr": 1.6449496416858284e-05, "epoch": 0.65, "percentage": 65.0, "elapsed_time": "0:16:02", "remaining_time": "0:08:38"}
|
| 326 |
+
{"current_steps": 3260, "total_steps": 5000, "loss": 1.8059, "lr": 1.6285698816954624e-05, "epoch": 0.652, "percentage": 65.2, "elapsed_time": "0:16:05", "remaining_time": "0:08:35"}
|
| 327 |
+
{"current_steps": 3270, "total_steps": 5000, "loss": 1.8584, "lr": 1.612232593979658e-05, "epoch": 0.654, "percentage": 65.4, "elapsed_time": "0:16:07", "remaining_time": "0:08:32"}
|
| 328 |
+
{"current_steps": 3280, "total_steps": 5000, "loss": 0.8049, "lr": 1.5959385747947698e-05, "epoch": 0.656, "percentage": 65.6, "elapsed_time": "0:16:10", "remaining_time": "0:08:28"}
|
| 329 |
+
{"current_steps": 3290, "total_steps": 5000, "loss": 1.5368, "lr": 1.5796886182883053e-05, "epoch": 0.658, "percentage": 65.8, "elapsed_time": "0:16:13", "remaining_time": "0:08:26"}
|
| 330 |
+
{"current_steps": 3300, "total_steps": 5000, "loss": 1.5826, "lr": 1.56348351646022e-05, "epoch": 0.66, "percentage": 66.0, "elapsed_time": "0:16:16", "remaining_time": "0:08:22"}
|
| 331 |
+
{"current_steps": 3310, "total_steps": 5000, "loss": 0.8753, "lr": 1.547324059124315e-05, "epoch": 0.662, "percentage": 66.2, "elapsed_time": "0:16:19", "remaining_time": "0:08:20"}
|
| 332 |
+
{"current_steps": 3320, "total_steps": 5000, "loss": 1.7769, "lr": 1.5312110338697426e-05, "epoch": 0.664, "percentage": 66.4, "elapsed_time": "0:16:22", "remaining_time": "0:08:16"}
|
| 333 |
+
{"current_steps": 3330, "total_steps": 5000, "loss": 3.1034, "lr": 1.5151452260226224e-05, "epoch": 0.666, "percentage": 66.6, "elapsed_time": "0:16:24", "remaining_time": "0:08:13"}
|
| 334 |
+
{"current_steps": 3340, "total_steps": 5000, "loss": 2.3354, "lr": 1.4991274186077632e-05, "epoch": 0.668, "percentage": 66.8, "elapsed_time": "0:16:27", "remaining_time": "0:08:10"}
|
| 335 |
+
{"current_steps": 3350, "total_steps": 5000, "loss": 0.603, "lr": 1.4831583923104999e-05, "epoch": 0.67, "percentage": 67.0, "elapsed_time": "0:16:30", "remaining_time": "0:08:07"}
|
| 336 |
+
{"current_steps": 3360, "total_steps": 5000, "loss": 0.6025, "lr": 1.467238925438646e-05, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:16:33", "remaining_time": "0:08:05"}
|
| 337 |
+
{"current_steps": 3370, "total_steps": 5000, "loss": 0.9105, "lr": 1.4513697938845572e-05, "epoch": 0.674, "percentage": 67.4, "elapsed_time": "0:16:36", "remaining_time": "0:08:01"}
|
| 338 |
+
{"current_steps": 3380, "total_steps": 5000, "loss": 0.8274, "lr": 1.4355517710873184e-05, "epoch": 0.676, "percentage": 67.6, "elapsed_time": "0:16:39", "remaining_time": "0:07:59"}
|
| 339 |
+
{"current_steps": 3390, "total_steps": 5000, "loss": 1.5935, "lr": 1.4197856279950438e-05, "epoch": 0.678, "percentage": 67.8, "elapsed_time": "0:16:42", "remaining_time": "0:07:56"}
|
| 340 |
+
{"current_steps": 3400, "total_steps": 5000, "loss": 0.7384, "lr": 1.4040721330273062e-05, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:16:44", "remaining_time": "0:07:52"}
|
| 341 |
+
{"current_steps": 3410, "total_steps": 5000, "loss": 1.8132, "lr": 1.388412052037682e-05, "epoch": 0.682, "percentage": 68.2, "elapsed_time": "0:16:47", "remaining_time": "0:07:49"}
|
| 342 |
+
{"current_steps": 3420, "total_steps": 5000, "loss": 0.7159, "lr": 1.3728061482764238e-05, "epoch": 0.684, "percentage": 68.4, "elapsed_time": "0:16:50", "remaining_time": "0:07:46"}
|
| 343 |
+
{"current_steps": 3430, "total_steps": 5000, "loss": 0.8062, "lr": 1.3572551823532654e-05, "epoch": 0.686, "percentage": 68.6, "elapsed_time": "0:16:53", "remaining_time": "0:07:44"}
|
| 344 |
+
{"current_steps": 3440, "total_steps": 5000, "loss": 1.4223, "lr": 1.3417599122003464e-05, "epoch": 0.688, "percentage": 68.8, "elapsed_time": "0:16:56", "remaining_time": "0:07:41"}
|
| 345 |
+
{"current_steps": 3450, "total_steps": 5000, "loss": 0.8253, "lr": 1.3263210930352737e-05, "epoch": 0.69, "percentage": 69.0, "elapsed_time": "0:16:59", "remaining_time": "0:07:38"}
|
| 346 |
+
{"current_steps": 3460, "total_steps": 5000, "loss": 1.6049, "lr": 1.3109394773243117e-05, "epoch": 0.692, "percentage": 69.2, "elapsed_time": "0:17:02", "remaining_time": "0:07:35"}
|
| 347 |
+
{"current_steps": 3470, "total_steps": 5000, "loss": 0.6334, "lr": 1.2956158147457115e-05, "epoch": 0.694, "percentage": 69.4, "elapsed_time": "0:17:04", "remaining_time": "0:07:31"}
|
| 348 |
+
{"current_steps": 3480, "total_steps": 5000, "loss": 1.605, "lr": 1.280350852153168e-05, "epoch": 0.696, "percentage": 69.6, "elapsed_time": "0:17:07", "remaining_time": "0:07:29"}
|
| 349 |
+
{"current_steps": 3490, "total_steps": 5000, "loss": 1.3822, "lr": 1.2651453335394231e-05, "epoch": 0.698, "percentage": 69.8, "elapsed_time": "0:17:10", "remaining_time": "0:07:26"}
|
| 350 |
+
{"current_steps": 3500, "total_steps": 5000, "loss": 1.154, "lr": 1.2500000000000006e-05, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:17:13", "remaining_time": "0:07:23"}
|
| 351 |
+
{"current_steps": 3510, "total_steps": 5000, "loss": 1.3894, "lr": 1.234915589697091e-05, "epoch": 0.702, "percentage": 70.2, "elapsed_time": "0:17:16", "remaining_time": "0:07:20"}
|
| 352 |
+
{"current_steps": 3520, "total_steps": 5000, "loss": 0.8913, "lr": 1.2198928378235716e-05, "epoch": 0.704, "percentage": 70.4, "elapsed_time": "0:17:19", "remaining_time": "0:07:17"}
|
| 353 |
+
{"current_steps": 3530, "total_steps": 5000, "loss": 1.4518, "lr": 1.2049324765671749e-05, "epoch": 0.706, "percentage": 70.6, "elapsed_time": "0:17:22", "remaining_time": "0:07:14"}
|
| 354 |
+
{"current_steps": 3540, "total_steps": 5000, "loss": 2.2489, "lr": 1.1900352350748026e-05, "epoch": 0.708, "percentage": 70.8, "elapsed_time": "0:17:25", "remaining_time": "0:07:11"}
|
| 355 |
+
{"current_steps": 3550, "total_steps": 5000, "loss": 0.8946, "lr": 1.175201839416988e-05, "epoch": 0.71, "percentage": 71.0, "elapsed_time": "0:17:28", "remaining_time": "0:07:08"}
|
| 356 |
+
{"current_steps": 3560, "total_steps": 5000, "loss": 0.5814, "lr": 1.1604330125525079e-05, "epoch": 0.712, "percentage": 71.2, "elapsed_time": "0:17:30", "remaining_time": "0:07:04"}
|
| 357 |
+
{"current_steps": 3570, "total_steps": 5000, "loss": 4.9924, "lr": 1.1457294742931507e-05, "epoch": 0.714, "percentage": 71.4, "elapsed_time": "0:17:33", "remaining_time": "0:07:02"}
|
| 358 |
+
{"current_steps": 3580, "total_steps": 5000, "loss": 0.9913, "lr": 1.1310919412686247e-05, "epoch": 0.716, "percentage": 71.6, "elapsed_time": "0:17:36", "remaining_time": "0:06:59"}
|
| 359 |
+
{"current_steps": 3590, "total_steps": 5000, "loss": 4.148, "lr": 1.11652112689164e-05, "epoch": 0.718, "percentage": 71.8, "elapsed_time": "0:17:39", "remaining_time": "0:06:56"}
|
| 360 |
+
{"current_steps": 3600, "total_steps": 5000, "loss": 1.184, "lr": 1.1020177413231334e-05, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:17:42", "remaining_time": "0:06:53"}
|
| 361 |
+
{"current_steps": 3610, "total_steps": 5000, "loss": 0.8565, "lr": 1.0875824914376553e-05, "epoch": 0.722, "percentage": 72.2, "elapsed_time": "0:17:45", "remaining_time": "0:06:50"}
|
| 362 |
+
{"current_steps": 3620, "total_steps": 5000, "loss": 1.6703, "lr": 1.0732160807889211e-05, "epoch": 0.724, "percentage": 72.4, "elapsed_time": "0:17:47", "remaining_time": "0:06:47"}
|
| 363 |
+
{"current_steps": 3630, "total_steps": 5000, "loss": 1.2277, "lr": 1.058919209575517e-05, "epoch": 0.726, "percentage": 72.6, "elapsed_time": "0:17:51", "remaining_time": "0:06:44"}
|
| 364 |
+
{"current_steps": 3640, "total_steps": 5000, "loss": 0.7438, "lr": 1.0446925746067768e-05, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:17:54", "remaining_time": "0:06:41"}
|
| 365 |
+
{"current_steps": 3650, "total_steps": 5000, "loss": 3.7347, "lr": 1.0305368692688174e-05, "epoch": 0.73, "percentage": 73.0, "elapsed_time": "0:17:57", "remaining_time": "0:06:38"}
|
| 366 |
+
{"current_steps": 3660, "total_steps": 5000, "loss": 1.8496, "lr": 1.0164527834907467e-05, "epoch": 0.732, "percentage": 73.2, "elapsed_time": "0:17:59", "remaining_time": "0:06:35"}
|
| 367 |
+
{"current_steps": 3670, "total_steps": 5000, "loss": 1.2388, "lr": 1.0024410037110357e-05, "epoch": 0.734, "percentage": 73.4, "elapsed_time": "0:18:02", "remaining_time": "0:06:32"}
|
| 368 |
+
{"current_steps": 3680, "total_steps": 5000, "loss": 5.6336, "lr": 9.88502212844063e-06, "epoch": 0.736, "percentage": 73.6, "elapsed_time": "0:18:05", "remaining_time": "0:06:29"}
|
| 369 |
+
{"current_steps": 3690, "total_steps": 5000, "loss": 1.434, "lr": 9.746370902468311e-06, "epoch": 0.738, "percentage": 73.8, "elapsed_time": "0:18:08", "remaining_time": "0:06:26"}
|
| 370 |
+
{"current_steps": 3700, "total_steps": 5000, "loss": 2.9898, "lr": 9.608463116858542e-06, "epoch": 0.74, "percentage": 74.0, "elapsed_time": "0:18:10", "remaining_time": "0:06:23"}
|
| 371 |
+
{"current_steps": 3710, "total_steps": 5000, "loss": 0.9298, "lr": 9.471305493042243e-06, "epoch": 0.742, "percentage": 74.2, "elapsed_time": "0:18:13", "remaining_time": "0:06:20"}
|
| 372 |
+
{"current_steps": 3720, "total_steps": 5000, "loss": 2.3826, "lr": 9.334904715888495e-06, "epoch": 0.744, "percentage": 74.4, "elapsed_time": "0:18:16", "remaining_time": "0:06:17"}
|
| 373 |
+
{"current_steps": 3730, "total_steps": 5000, "loss": 0.8495, "lr": 9.199267433378727e-06, "epoch": 0.746, "percentage": 74.6, "elapsed_time": "0:18:18", "remaining_time": "0:06:14"}
|
| 374 |
+
{"current_steps": 3740, "total_steps": 5000, "loss": 0.6518, "lr": 9.064400256282757e-06, "epoch": 0.748, "percentage": 74.8, "elapsed_time": "0:18:21", "remaining_time": "0:06:11"}
|
| 375 |
+
{"current_steps": 3750, "total_steps": 5000, "loss": 0.8714, "lr": 8.930309757836517e-06, "epoch": 0.75, "percentage": 75.0, "elapsed_time": "0:18:24", "remaining_time": "0:06:08"}
|
| 376 |
+
{"current_steps": 3760, "total_steps": 5000, "loss": 1.378, "lr": 8.797002473421728e-06, "epoch": 0.752, "percentage": 75.2, "elapsed_time": "0:18:27", "remaining_time": "0:06:05"}
|
| 377 |
+
{"current_steps": 3770, "total_steps": 5000, "loss": 1.1048, "lr": 8.664484900247363e-06, "epoch": 0.754, "percentage": 75.4, "elapsed_time": "0:18:30", "remaining_time": "0:06:02"}
|
| 378 |
+
{"current_steps": 3780, "total_steps": 5000, "loss": 0.9402, "lr": 8.532763497032987e-06, "epoch": 0.756, "percentage": 75.6, "elapsed_time": "0:18:33", "remaining_time": "0:05:59"}
|
| 379 |
+
{"current_steps": 3790, "total_steps": 5000, "loss": 1.3253, "lr": 8.40184468369396e-06, "epoch": 0.758, "percentage": 75.8, "elapsed_time": "0:18:36", "remaining_time": "0:05:56"}
|
| 380 |
+
{"current_steps": 3800, "total_steps": 5000, "loss": 1.0062, "lr": 8.271734841028553e-06, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:18:39", "remaining_time": "0:05:53"}
|
| 381 |
+
{"current_steps": 3810, "total_steps": 5000, "loss": 0.9064, "lr": 8.142440310406924e-06, "epoch": 0.762, "percentage": 76.2, "elapsed_time": "0:18:42", "remaining_time": "0:05:50"}
|
| 382 |
+
{"current_steps": 3820, "total_steps": 5000, "loss": 3.918, "lr": 8.013967393462094e-06, "epoch": 0.764, "percentage": 76.4, "elapsed_time": "0:18:45", "remaining_time": "0:05:47"}
|
| 383 |
+
{"current_steps": 3830, "total_steps": 5000, "loss": 0.4079, "lr": 7.886322351782783e-06, "epoch": 0.766, "percentage": 76.6, "elapsed_time": "0:18:48", "remaining_time": "0:05:44"}
|
| 384 |
+
{"current_steps": 3840, "total_steps": 5000, "loss": 0.8509, "lr": 7.759511406608255e-06, "epoch": 0.768, "percentage": 76.8, "elapsed_time": "0:18:51", "remaining_time": "0:05:41"}
|
| 385 |
+
{"current_steps": 3850, "total_steps": 5000, "loss": 1.9055, "lr": 7.633540738525066e-06, "epoch": 0.77, "percentage": 77.0, "elapsed_time": "0:18:53", "remaining_time": "0:05:38"}
|
| 386 |
+
{"current_steps": 3860, "total_steps": 5000, "loss": 6.2419, "lr": 7.508416487165862e-06, "epoch": 0.772, "percentage": 77.2, "elapsed_time": "0:18:56", "remaining_time": "0:05:35"}
|
| 387 |
+
{"current_steps": 3870, "total_steps": 5000, "loss": 1.1085, "lr": 7.384144750910133e-06, "epoch": 0.774, "percentage": 77.4, "elapsed_time": "0:18:59", "remaining_time": "0:05:32"}
|
| 388 |
+
{"current_steps": 3880, "total_steps": 5000, "loss": 4.1298, "lr": 7.260731586586983e-06, "epoch": 0.776, "percentage": 77.6, "elapsed_time": "0:19:02", "remaining_time": "0:05:29"}
|
| 389 |
+
{"current_steps": 3890, "total_steps": 5000, "loss": 0.7264, "lr": 7.138183009179922e-06, "epoch": 0.778, "percentage": 77.8, "elapsed_time": "0:19:05", "remaining_time": "0:05:26"}
|
| 390 |
+
{"current_steps": 3900, "total_steps": 5000, "loss": 1.0637, "lr": 7.016504991533726e-06, "epoch": 0.78, "percentage": 78.0, "elapsed_time": "0:19:08", "remaining_time": "0:05:23"}
|
| 391 |
+
{"current_steps": 3910, "total_steps": 5000, "loss": 1.2957, "lr": 6.895703464063319e-06, "epoch": 0.782, "percentage": 78.2, "elapsed_time": "0:19:11", "remaining_time": "0:05:20"}
|
| 392 |
+
{"current_steps": 3920, "total_steps": 5000, "loss": 0.4924, "lr": 6.775784314464717e-06, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:19:13", "remaining_time": "0:05:17"}
|
| 393 |
+
{"current_steps": 3930, "total_steps": 5000, "loss": 1.0074, "lr": 6.656753387428089e-06, "epoch": 0.786, "percentage": 78.6, "elapsed_time": "0:19:16", "remaining_time": "0:05:14"}
|
| 394 |
+
{"current_steps": 3940, "total_steps": 5000, "loss": 1.7825, "lr": 6.538616484352902e-06, "epoch": 0.788, "percentage": 78.8, "elapsed_time": "0:19:19", "remaining_time": "0:05:11"}
|
| 395 |
+
{"current_steps": 3950, "total_steps": 5000, "loss": 1.2334, "lr": 6.421379363065142e-06, "epoch": 0.79, "percentage": 79.0, "elapsed_time": "0:19:22", "remaining_time": "0:05:08"}
|
| 396 |
+
{"current_steps": 3960, "total_steps": 5000, "loss": 1.9249, "lr": 6.305047737536707e-06, "epoch": 0.792, "percentage": 79.2, "elapsed_time": "0:19:24", "remaining_time": "0:05:05"}
|
| 397 |
+
{"current_steps": 3970, "total_steps": 5000, "loss": 1.4351, "lr": 6.189627277606894e-06, "epoch": 0.794, "percentage": 79.4, "elapsed_time": "0:19:27", "remaining_time": "0:05:02"}
|
| 398 |
+
{"current_steps": 3980, "total_steps": 5000, "loss": 1.6044, "lr": 6.075123608706093e-06, "epoch": 0.796, "percentage": 79.6, "elapsed_time": "0:19:30", "remaining_time": "0:04:59"}
|
| 399 |
+
{"current_steps": 3990, "total_steps": 5000, "loss": 1.7348, "lr": 5.961542311581586e-06, "epoch": 0.798, "percentage": 79.8, "elapsed_time": "0:19:33", "remaining_time": "0:04:57"}
|
| 400 |
+
{"current_steps": 4000, "total_steps": 5000, "loss": 12.6939, "lr": 5.848888922025553e-06, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:19:36", "remaining_time": "0:04:54"}
|
| 401 |
+
{"current_steps": 4010, "total_steps": 5000, "loss": 0.8231, "lr": 5.737168930605272e-06, "epoch": 0.802, "percentage": 80.2, "elapsed_time": "0:19:39", "remaining_time": "0:04:51"}
|
| 402 |
+
{"current_steps": 4020, "total_steps": 5000, "loss": 2.239, "lr": 5.626387782395512e-06, "epoch": 0.804, "percentage": 80.4, "elapsed_time": "0:19:42", "remaining_time": "0:04:48"}
|
| 403 |
+
{"current_steps": 4030, "total_steps": 5000, "loss": 3.1218, "lr": 5.5165508767131415e-06, "epoch": 0.806, "percentage": 80.6, "elapsed_time": "0:19:45", "remaining_time": "0:04:45"}
|
| 404 |
+
{"current_steps": 4040, "total_steps": 5000, "loss": 0.5524, "lr": 5.4076635668540075e-06, "epoch": 0.808, "percentage": 80.8, "elapsed_time": "0:19:48", "remaining_time": "0:04:42"}
|
| 405 |
+
{"current_steps": 4050, "total_steps": 5000, "loss": 2.4598, "lr": 5.299731159831953e-06, "epoch": 0.81, "percentage": 81.0, "elapsed_time": "0:19:50", "remaining_time": "0:04:39"}
|
| 406 |
+
{"current_steps": 4060, "total_steps": 5000, "loss": 0.634, "lr": 5.192758916120236e-06, "epoch": 0.812, "percentage": 81.2, "elapsed_time": "0:19:53", "remaining_time": "0:04:36"}
|
| 407 |
+
{"current_steps": 4070, "total_steps": 5000, "loss": 0.8943, "lr": 5.086752049395094e-06, "epoch": 0.814, "percentage": 81.4, "elapsed_time": "0:19:56", "remaining_time": "0:04:33"}
|
| 408 |
+
{"current_steps": 4080, "total_steps": 5000, "loss": 1.1776, "lr": 4.981715726281666e-06, "epoch": 0.816, "percentage": 81.6, "elapsed_time": "0:19:59", "remaining_time": "0:04:30"}
|
| 409 |
+
{"current_steps": 4090, "total_steps": 5000, "loss": 0.918, "lr": 4.877655066102149e-06, "epoch": 0.818, "percentage": 81.8, "elapsed_time": "0:20:02", "remaining_time": "0:04:27"}
|
| 410 |
+
{"current_steps": 4100, "total_steps": 5000, "loss": 0.3834, "lr": 4.7745751406263165e-06, "epoch": 0.82, "percentage": 82.0, "elapsed_time": "0:20:04", "remaining_time": "0:04:24"}
|
| 411 |
+
{"current_steps": 4110, "total_steps": 5000, "loss": 1.0476, "lr": 4.672480973824311e-06, "epoch": 0.822, "percentage": 82.2, "elapsed_time": "0:20:07", "remaining_time": "0:04:21"}
|
| 412 |
+
{"current_steps": 4120, "total_steps": 5000, "loss": 0.7036, "lr": 4.571377541621788e-06, "epoch": 0.824, "percentage": 82.4, "elapsed_time": "0:20:10", "remaining_time": "0:04:18"}
|
| 413 |
+
{"current_steps": 4130, "total_steps": 5000, "loss": 1.4096, "lr": 4.4712697716574e-06, "epoch": 0.826, "percentage": 82.6, "elapsed_time": "0:20:13", "remaining_time": "0:04:15"}
|
| 414 |
+
{"current_steps": 4140, "total_steps": 5000, "loss": 1.2209, "lr": 4.372162543042624e-06, "epoch": 0.828, "percentage": 82.8, "elapsed_time": "0:20:15", "remaining_time": "0:04:12"}
|
| 415 |
+
{"current_steps": 4150, "total_steps": 5000, "loss": 1.4758, "lr": 4.274060686123959e-06, "epoch": 0.83, "percentage": 83.0, "elapsed_time": "0:20:18", "remaining_time": "0:04:09"}
|
| 416 |
+
{"current_steps": 4160, "total_steps": 5000, "loss": 1.546, "lr": 4.176968982247514e-06, "epoch": 0.832, "percentage": 83.2, "elapsed_time": "0:20:21", "remaining_time": "0:04:06"}
|
| 417 |
+
{"current_steps": 4170, "total_steps": 5000, "loss": 0.8864, "lr": 4.08089216352596e-06, "epoch": 0.834, "percentage": 83.4, "elapsed_time": "0:20:24", "remaining_time": "0:04:03"}
|
| 418 |
+
{"current_steps": 4180, "total_steps": 5000, "loss": 1.4541, "lr": 3.985834912607894e-06, "epoch": 0.836, "percentage": 83.6, "elapsed_time": "0:20:27", "remaining_time": "0:04:00"}
|
| 419 |
+
{"current_steps": 4190, "total_steps": 5000, "loss": 0.6756, "lr": 3.891801862449629e-06, "epoch": 0.838, "percentage": 83.8, "elapsed_time": "0:20:30", "remaining_time": "0:03:57"}
|
| 420 |
+
{"current_steps": 4200, "total_steps": 5000, "loss": 1.3881, "lr": 3.798797596089351e-06, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:20:33", "remaining_time": "0:03:54"}
|
| 421 |
+
{"current_steps": 4210, "total_steps": 5000, "loss": 2.3349, "lr": 3.7068266464238084e-06, "epoch": 0.842, "percentage": 84.2, "elapsed_time": "0:20:36", "remaining_time": "0:03:52"}
|
| 422 |
+
{"current_steps": 4220, "total_steps": 5000, "loss": 0.7973, "lr": 3.6158934959873353e-06, "epoch": 0.844, "percentage": 84.4, "elapsed_time": "0:20:39", "remaining_time": "0:03:49"}
|
| 423 |
+
{"current_steps": 4230, "total_steps": 5000, "loss": 1.1455, "lr": 3.5260025767333893e-06, "epoch": 0.846, "percentage": 84.6, "elapsed_time": "0:20:42", "remaining_time": "0:03:46"}
|
| 424 |
+
{"current_steps": 4240, "total_steps": 5000, "loss": 2.3828, "lr": 3.4371582698185633e-06, "epoch": 0.848, "percentage": 84.8, "elapsed_time": "0:20:45", "remaining_time": "0:03:43"}
|
| 425 |
+
{"current_steps": 4250, "total_steps": 5000, "loss": 0.8389, "lr": 3.3493649053890326e-06, "epoch": 0.85, "percentage": 85.0, "elapsed_time": "0:20:48", "remaining_time": "0:03:40"}
|
| 426 |
+
{"current_steps": 4260, "total_steps": 5000, "loss": 0.7594, "lr": 3.262626762369525e-06, "epoch": 0.852, "percentage": 85.2, "elapsed_time": "0:20:51", "remaining_time": "0:03:37"}
|
| 427 |
+
{"current_steps": 4270, "total_steps": 5000, "loss": 0.8006, "lr": 3.176948068254762e-06, "epoch": 0.854, "percentage": 85.4, "elapsed_time": "0:20:53", "remaining_time": "0:03:34"}
|
| 428 |
+
{"current_steps": 4280, "total_steps": 5000, "loss": 1.4222, "lr": 3.092332998903416e-06, "epoch": 0.856, "percentage": 85.6, "elapsed_time": "0:20:56", "remaining_time": "0:03:31"}
|
| 429 |
+
{"current_steps": 4290, "total_steps": 5000, "loss": 2.5477, "lr": 3.0087856783345914e-06, "epoch": 0.858, "percentage": 85.8, "elapsed_time": "0:20:59", "remaining_time": "0:03:28"}
|
| 430 |
+
{"current_steps": 4300, "total_steps": 5000, "loss": 2.2044, "lr": 2.9263101785268254e-06, "epoch": 0.86, "percentage": 86.0, "elapsed_time": "0:21:02", "remaining_time": "0:03:25"}
|
| 431 |
+
{"current_steps": 4310, "total_steps": 5000, "loss": 1.5267, "lr": 2.8449105192196316e-06, "epoch": 0.862, "percentage": 86.2, "elapsed_time": "0:21:04", "remaining_time": "0:03:22"}
|
| 432 |
+
{"current_steps": 4320, "total_steps": 5000, "loss": 0.6063, "lr": 2.764590667717562e-06, "epoch": 0.864, "percentage": 86.4, "elapsed_time": "0:21:07", "remaining_time": "0:03:19"}
|
| 433 |
+
{"current_steps": 4330, "total_steps": 5000, "loss": 2.6636, "lr": 2.6853545386968606e-06, "epoch": 0.866, "percentage": 86.6, "elapsed_time": "0:21:11", "remaining_time": "0:03:16"}
|
| 434 |
+
{"current_steps": 4340, "total_steps": 5000, "loss": 0.9864, "lr": 2.6072059940146775e-06, "epoch": 0.868, "percentage": 86.8, "elapsed_time": "0:21:13", "remaining_time": "0:03:13"}
|
| 435 |
+
{"current_steps": 4350, "total_steps": 5000, "loss": 2.3756, "lr": 2.5301488425208296e-06, "epoch": 0.87, "percentage": 87.0, "elapsed_time": "0:21:16", "remaining_time": "0:03:10"}
|
| 436 |
+
{"current_steps": 4360, "total_steps": 5000, "loss": 0.962, "lr": 2.454186839872158e-06, "epoch": 0.872, "percentage": 87.2, "elapsed_time": "0:21:19", "remaining_time": "0:03:07"}
|
| 437 |
+
{"current_steps": 4370, "total_steps": 5000, "loss": 0.8765, "lr": 2.379323688349516e-06, "epoch": 0.874, "percentage": 87.4, "elapsed_time": "0:21:22", "remaining_time": "0:03:04"}
|
| 438 |
+
{"current_steps": 4380, "total_steps": 5000, "loss": 1.3187, "lr": 2.3055630366772856e-06, "epoch": 0.876, "percentage": 87.6, "elapsed_time": "0:21:24", "remaining_time": "0:03:01"}
|
| 439 |
+
{"current_steps": 4390, "total_steps": 5000, "loss": 0.996, "lr": 2.2329084798455746e-06, "epoch": 0.878, "percentage": 87.8, "elapsed_time": "0:21:28", "remaining_time": "0:02:58"}
|
| 440 |
+
{"current_steps": 4400, "total_steps": 5000, "loss": 1.1203, "lr": 2.1613635589349756e-06, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "0:21:30", "remaining_time": "0:02:56"}
|
| 441 |
+
{"current_steps": 4410, "total_steps": 5000, "loss": 1.5124, "lr": 2.0909317609440095e-06, "epoch": 0.882, "percentage": 88.2, "elapsed_time": "0:21:34", "remaining_time": "0:02:53"}
|
| 442 |
+
{"current_steps": 4420, "total_steps": 5000, "loss": 0.5098, "lr": 2.0216165186191407e-06, "epoch": 0.884, "percentage": 88.4, "elapsed_time": "0:21:36", "remaining_time": "0:02:50"}
|
| 443 |
+
{"current_steps": 4430, "total_steps": 5000, "loss": 0.9764, "lr": 1.95342121028749e-06, "epoch": 0.886, "percentage": 88.6, "elapsed_time": "0:21:39", "remaining_time": "0:02:47"}
|
| 444 |
+
{"current_steps": 4440, "total_steps": 5000, "loss": 1.6156, "lr": 1.8863491596921745e-06, "epoch": 0.888, "percentage": 88.8, "elapsed_time": "0:21:42", "remaining_time": "0:02:44"}
|
| 445 |
+
{"current_steps": 4450, "total_steps": 5000, "loss": 1.3607, "lr": 1.8204036358303173e-06, "epoch": 0.89, "percentage": 89.0, "elapsed_time": "0:21:45", "remaining_time": "0:02:41"}
|
| 446 |
+
{"current_steps": 4460, "total_steps": 5000, "loss": 1.291, "lr": 1.7555878527937164e-06, "epoch": 0.892, "percentage": 89.2, "elapsed_time": "0:21:48", "remaining_time": "0:02:38"}
|
| 447 |
+
{"current_steps": 4470, "total_steps": 5000, "loss": 0.6693, "lr": 1.6919049696121958e-06, "epoch": 0.894, "percentage": 89.4, "elapsed_time": "0:21:51", "remaining_time": "0:02:35"}
|
| 448 |
+
{"current_steps": 4480, "total_steps": 5000, "loss": 1.8125, "lr": 1.629358090099639e-06, "epoch": 0.896, "percentage": 89.6, "elapsed_time": "0:21:54", "remaining_time": "0:02:32"}
|
| 449 |
+
{"current_steps": 4490, "total_steps": 5000, "loss": 1.3824, "lr": 1.5679502627027136e-06, "epoch": 0.898, "percentage": 89.8, "elapsed_time": "0:21:57", "remaining_time": "0:02:29"}
|
| 450 |
+
{"current_steps": 4500, "total_steps": 5000, "loss": 1.466, "lr": 1.5076844803522922e-06, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:22:00", "remaining_time": "0:02:26"}
|
| 451 |
+
{"current_steps": 4510, "total_steps": 5000, "loss": 0.8571, "lr": 1.4485636803175829e-06, "epoch": 0.902, "percentage": 90.2, "elapsed_time": "0:22:03", "remaining_time": "0:02:23"}
|
| 452 |
+
{"current_steps": 4520, "total_steps": 5000, "loss": 1.0712, "lr": 1.3905907440629752e-06, "epoch": 0.904, "percentage": 90.4, "elapsed_time": "0:22:06", "remaining_time": "0:02:20"}
|
| 453 |
+
{"current_steps": 4530, "total_steps": 5000, "loss": 1.328, "lr": 1.333768497107593e-06, "epoch": 0.906, "percentage": 90.6, "elapsed_time": "0:22:09", "remaining_time": "0:02:17"}
|
| 454 |
+
{"current_steps": 4540, "total_steps": 5000, "loss": 3.858, "lr": 1.2780997088875869e-06, "epoch": 0.908, "percentage": 90.8, "elapsed_time": "0:22:11", "remaining_time": "0:02:14"}
|
| 455 |
+
{"current_steps": 4550, "total_steps": 5000, "loss": 1.3761, "lr": 1.2235870926211619e-06, "epoch": 0.91, "percentage": 91.0, "elapsed_time": "0:22:14", "remaining_time": "0:02:12"}
|
| 456 |
+
{"current_steps": 4560, "total_steps": 5000, "loss": 0.9014, "lr": 1.170233305176327e-06, "epoch": 0.912, "percentage": 91.2, "elapsed_time": "0:22:17", "remaining_time": "0:02:09"}
|
| 457 |
+
{"current_steps": 4570, "total_steps": 5000, "loss": 1.2054, "lr": 1.1180409469414094e-06, "epoch": 0.914, "percentage": 91.4, "elapsed_time": "0:22:20", "remaining_time": "0:02:06"}
|
| 458 |
+
{"current_steps": 4580, "total_steps": 5000, "loss": 1.1962, "lr": 1.067012561698319e-06, "epoch": 0.916, "percentage": 91.6, "elapsed_time": "0:22:23", "remaining_time": "0:02:03"}
|
| 459 |
+
{"current_steps": 4590, "total_steps": 5000, "loss": 1.1866, "lr": 1.0171506364985622e-06, "epoch": 0.918, "percentage": 91.8, "elapsed_time": "0:22:26", "remaining_time": "0:02:00"}
|
| 460 |
+
{"current_steps": 4600, "total_steps": 5000, "loss": 1.4084, "lr": 9.684576015420278e-07, "epoch": 0.92, "percentage": 92.0, "elapsed_time": "0:22:29", "remaining_time": "0:01:57"}
|
| 461 |
+
{"current_steps": 4610, "total_steps": 5000, "loss": 0.4785, "lr": 9.209358300585474e-07, "epoch": 0.922, "percentage": 92.2, "elapsed_time": "0:22:32", "remaining_time": "0:01:54"}
|
| 462 |
+
{"current_steps": 4620, "total_steps": 5000, "loss": 1.0671, "lr": 8.745876381922147e-07, "epoch": 0.924, "percentage": 92.4, "elapsed_time": "0:22:35", "remaining_time": "0:01:51"}
|
| 463 |
+
{"current_steps": 4630, "total_steps": 5000, "loss": 3.2901, "lr": 8.294152848885157e-07, "epoch": 0.926, "percentage": 92.6, "elapsed_time": "0:22:37", "remaining_time": "0:01:48"}
|
| 464 |
+
{"current_steps": 4640, "total_steps": 5000, "loss": 1.0359, "lr": 7.854209717842231e-07, "epoch": 0.928, "percentage": 92.8, "elapsed_time": "0:22:40", "remaining_time": "0:01:45"}
|
| 465 |
+
{"current_steps": 4650, "total_steps": 5000, "loss": 0.5115, "lr": 7.426068431000882e-07, "epoch": 0.93, "percentage": 93.0, "elapsed_time": "0:22:43", "remaining_time": "0:01:42"}
|
| 466 |
+
{"current_steps": 4660, "total_steps": 5000, "loss": 1.0589, "lr": 7.009749855363456e-07, "epoch": 0.932, "percentage": 93.2, "elapsed_time": "0:22:46", "remaining_time": "0:01:39"}
|
| 467 |
+
{"current_steps": 4670, "total_steps": 5000, "loss": 0.3984, "lr": 6.605274281709928e-07, "epoch": 0.934, "percentage": 93.4, "elapsed_time": "0:22:49", "remaining_time": "0:01:36"}
|
| 468 |
+
{"current_steps": 4680, "total_steps": 5000, "loss": 2.5647, "lr": 6.212661423609184e-07, "epoch": 0.936, "percentage": 93.6, "elapsed_time": "0:22:52", "remaining_time": "0:01:33"}
|
| 469 |
+
{"current_steps": 4690, "total_steps": 5000, "loss": 0.93, "lr": 5.83193041645802e-07, "epoch": 0.938, "percentage": 93.8, "elapsed_time": "0:22:54", "remaining_time": "0:01:30"}
|
| 470 |
+
{"current_steps": 4700, "total_steps": 5000, "loss": 1.4605, "lr": 5.463099816548579e-07, "epoch": 0.94, "percentage": 94.0, "elapsed_time": "0:22:58", "remaining_time": "0:01:27"}
|
| 471 |
+
{"current_steps": 4710, "total_steps": 5000, "loss": 0.7053, "lr": 5.106187600163987e-07, "epoch": 0.942, "percentage": 94.2, "elapsed_time": "0:23:00", "remaining_time": "0:01:25"}
|
| 472 |
+
{"current_steps": 4720, "total_steps": 5000, "loss": 1.9524, "lr": 4.7612111627021175e-07, "epoch": 0.944, "percentage": 94.4, "elapsed_time": "0:23:03", "remaining_time": "0:01:22"}
|
| 473 |
+
{"current_steps": 4730, "total_steps": 5000, "loss": 2.0867, "lr": 4.4281873178278475e-07, "epoch": 0.946, "percentage": 94.6, "elapsed_time": "0:23:06", "remaining_time": "0:01:19"}
|
| 474 |
+
{"current_steps": 4740, "total_steps": 5000, "loss": 0.6371, "lr": 4.107132296653549e-07, "epoch": 0.948, "percentage": 94.8, "elapsed_time": "0:23:09", "remaining_time": "0:01:16"}
|
| 475 |
+
{"current_steps": 4750, "total_steps": 5000, "loss": 0.5299, "lr": 3.7980617469479953e-07, "epoch": 0.95, "percentage": 95.0, "elapsed_time": "0:23:12", "remaining_time": "0:01:13"}
|
| 476 |
+
{"current_steps": 4760, "total_steps": 5000, "loss": 4.1515, "lr": 3.5009907323737825e-07, "epoch": 0.952, "percentage": 95.2, "elapsed_time": "0:23:14", "remaining_time": "0:01:10"}
|
| 477 |
+
{"current_steps": 4770, "total_steps": 5000, "loss": 0.762, "lr": 3.215933731753024e-07, "epoch": 0.954, "percentage": 95.4, "elapsed_time": "0:23:17", "remaining_time": "0:01:07"}
|
| 478 |
+
{"current_steps": 4780, "total_steps": 5000, "loss": 0.9055, "lr": 2.942904638361804e-07, "epoch": 0.956, "percentage": 95.6, "elapsed_time": "0:23:20", "remaining_time": "0:01:04"}
|
| 479 |
+
{"current_steps": 4790, "total_steps": 5000, "loss": 1.026, "lr": 2.681916759252917e-07, "epoch": 0.958, "percentage": 95.8, "elapsed_time": "0:23:23", "remaining_time": "0:01:01"}
|
| 480 |
+
{"current_steps": 4800, "total_steps": 5000, "loss": 1.9403, "lr": 2.4329828146074095e-07, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "0:23:26", "remaining_time": "0:00:58"}
|
| 481 |
+
{"current_steps": 4810, "total_steps": 5000, "loss": 1.8509, "lr": 2.1961149371145795e-07, "epoch": 0.962, "percentage": 96.2, "elapsed_time": "0:23:29", "remaining_time": "0:00:55"}
|
| 482 |
+
{"current_steps": 4820, "total_steps": 5000, "loss": 1.5779, "lr": 1.9713246713805588e-07, "epoch": 0.964, "percentage": 96.4, "elapsed_time": "0:23:32", "remaining_time": "0:00:52"}
|
| 483 |
+
{"current_steps": 4830, "total_steps": 5000, "loss": 1.2824, "lr": 1.7586229733657644e-07, "epoch": 0.966, "percentage": 96.6, "elapsed_time": "0:23:34", "remaining_time": "0:00:49"}
|
| 484 |
+
{"current_steps": 4840, "total_steps": 5000, "loss": 1.8272, "lr": 1.5580202098509077e-07, "epoch": 0.968, "percentage": 96.8, "elapsed_time": "0:23:37", "remaining_time": "0:00:46"}
|
| 485 |
+
{"current_steps": 4850, "total_steps": 5000, "loss": 1.7201, "lr": 1.3695261579316777e-07, "epoch": 0.97, "percentage": 97.0, "elapsed_time": "0:23:40", "remaining_time": "0:00:43"}
|
| 486 |
+
{"current_steps": 4860, "total_steps": 5000, "loss": 1.2317, "lr": 1.193150004542204e-07, "epoch": 0.972, "percentage": 97.2, "elapsed_time": "0:23:42", "remaining_time": "0:00:40"}
|
| 487 |
+
{"current_steps": 4870, "total_steps": 5000, "loss": 1.2935, "lr": 1.0289003460074165e-07, "epoch": 0.974, "percentage": 97.4, "elapsed_time": "0:23:45", "remaining_time": "0:00:38"}
|
| 488 |
+
{"current_steps": 4880, "total_steps": 5000, "loss": 1.2833, "lr": 8.767851876239074e-08, "epoch": 0.976, "percentage": 97.6, "elapsed_time": "0:23:48", "remaining_time": "0:00:35"}
|
| 489 |
+
{"current_steps": 4890, "total_steps": 5000, "loss": 1.021, "lr": 7.368119432699383e-08, "epoch": 0.978, "percentage": 97.8, "elapsed_time": "0:23:51", "remaining_time": "0:00:32"}
|
| 490 |
+
{"current_steps": 4900, "total_steps": 5000, "loss": 1.1247, "lr": 6.089874350439506e-08, "epoch": 0.98, "percentage": 98.0, "elapsed_time": "0:23:54", "remaining_time": "0:00:29"}
|
| 491 |
+
{"current_steps": 4910, "total_steps": 5000, "loss": 1.2268, "lr": 4.9331789293211026e-08, "epoch": 0.982, "percentage": 98.2, "elapsed_time": "0:23:57", "remaining_time": "0:00:26"}
|
| 492 |
+
{"current_steps": 4920, "total_steps": 5000, "loss": 4.2583, "lr": 3.8980895450474455e-08, "epoch": 0.984, "percentage": 98.4, "elapsed_time": "0:24:00", "remaining_time": "0:00:23"}
|
| 493 |
+
{"current_steps": 4930, "total_steps": 5000, "loss": 0.7995, "lr": 2.9846566464150626e-08, "epoch": 0.986, "percentage": 98.6, "elapsed_time": "0:24:03", "remaining_time": "0:00:20"}
|
| 494 |
+
{"current_steps": 4940, "total_steps": 5000, "loss": 1.3818, "lr": 2.192924752854042e-08, "epoch": 0.988, "percentage": 98.8, "elapsed_time": "0:24:06", "remaining_time": "0:00:17"}
|
| 495 |
+
{"current_steps": 4950, "total_steps": 5000, "loss": 2.6165, "lr": 1.522932452260595e-08, "epoch": 0.99, "percentage": 99.0, "elapsed_time": "0:24:09", "remaining_time": "0:00:14"}
|
| 496 |
+
{"current_steps": 4960, "total_steps": 5000, "loss": 1.7912, "lr": 9.747123991141194e-09, "epoch": 0.992, "percentage": 99.2, "elapsed_time": "0:24:12", "remaining_time": "0:00:11"}
|
| 497 |
+
{"current_steps": 4970, "total_steps": 5000, "loss": 1.421, "lr": 5.48291312886251e-09, "epoch": 0.994, "percentage": 99.4, "elapsed_time": "0:24:15", "remaining_time": "0:00:08"}
|
| 498 |
+
{"current_steps": 4980, "total_steps": 5000, "loss": 1.2866, "lr": 2.4368997673940297e-09, "epoch": 0.996, "percentage": 99.6, "elapsed_time": "0:24:18", "remaining_time": "0:00:05"}
|
| 499 |
+
{"current_steps": 4990, "total_steps": 5000, "loss": 0.6754, "lr": 6.092323651313292e-10, "epoch": 0.998, "percentage": 99.8, "elapsed_time": "0:24:20", "remaining_time": "0:00:02"}
|
| 500 |
+
{"current_steps": 5000, "total_steps": 5000, "loss": 1.1699, "lr": 0.0, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:24:23", "remaining_time": "0:00:00"}
|
| 501 |
+
{"current_steps": 5000, "total_steps": 5000, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:24:24", "remaining_time": "0:00:00"}
|
Llama-2-13b-chat-hf/DomainBench/Finance/trainer_state.json
ADDED
|
@@ -0,0 +1,3542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 5000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.002,
|
| 13 |
+
"grad_norm": 1.1971019506454468,
|
| 14 |
+
"learning_rate": 1.0000000000000002e-06,
|
| 15 |
+
"loss": 3.5335,
|
| 16 |
+
"step": 10
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.004,
|
| 20 |
+
"grad_norm": 0.6988129019737244,
|
| 21 |
+
"learning_rate": 2.0000000000000003e-06,
|
| 22 |
+
"loss": 5.5042,
|
| 23 |
+
"step": 20
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.006,
|
| 27 |
+
"grad_norm": 0.7433498501777649,
|
| 28 |
+
"learning_rate": 3e-06,
|
| 29 |
+
"loss": 2.9337,
|
| 30 |
+
"step": 30
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.008,
|
| 34 |
+
"grad_norm": 0.6900968551635742,
|
| 35 |
+
"learning_rate": 4.000000000000001e-06,
|
| 36 |
+
"loss": 8.5725,
|
| 37 |
+
"step": 40
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.01,
|
| 41 |
+
"grad_norm": 0.0,
|
| 42 |
+
"learning_rate": 5e-06,
|
| 43 |
+
"loss": 2.2742,
|
| 44 |
+
"step": 50
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.012,
|
| 48 |
+
"grad_norm": 0.980725884437561,
|
| 49 |
+
"learning_rate": 6e-06,
|
| 50 |
+
"loss": 4.5745,
|
| 51 |
+
"step": 60
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.014,
|
| 55 |
+
"grad_norm": 1.36703360080719,
|
| 56 |
+
"learning_rate": 7.000000000000001e-06,
|
| 57 |
+
"loss": 3.9419,
|
| 58 |
+
"step": 70
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.016,
|
| 62 |
+
"grad_norm": 1.5291451215744019,
|
| 63 |
+
"learning_rate": 8.000000000000001e-06,
|
| 64 |
+
"loss": 2.2781,
|
| 65 |
+
"step": 80
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.018,
|
| 69 |
+
"grad_norm": 0.7842139601707458,
|
| 70 |
+
"learning_rate": 9e-06,
|
| 71 |
+
"loss": 6.3897,
|
| 72 |
+
"step": 90
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.02,
|
| 76 |
+
"grad_norm": 0.620259702205658,
|
| 77 |
+
"learning_rate": 1e-05,
|
| 78 |
+
"loss": 3.6837,
|
| 79 |
+
"step": 100
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.022,
|
| 83 |
+
"grad_norm": 0.0,
|
| 84 |
+
"learning_rate": 1.1000000000000001e-05,
|
| 85 |
+
"loss": 1.3266,
|
| 86 |
+
"step": 110
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.024,
|
| 90 |
+
"grad_norm": 0.2774917483329773,
|
| 91 |
+
"learning_rate": 1.2e-05,
|
| 92 |
+
"loss": 6.1833,
|
| 93 |
+
"step": 120
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.026,
|
| 97 |
+
"grad_norm": 1.9525136947631836,
|
| 98 |
+
"learning_rate": 1.3000000000000001e-05,
|
| 99 |
+
"loss": 2.6712,
|
| 100 |
+
"step": 130
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.028,
|
| 104 |
+
"grad_norm": 3.779365062713623,
|
| 105 |
+
"learning_rate": 1.4000000000000001e-05,
|
| 106 |
+
"loss": 2.5445,
|
| 107 |
+
"step": 140
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.03,
|
| 111 |
+
"grad_norm": 1.3333756923675537,
|
| 112 |
+
"learning_rate": 1.5e-05,
|
| 113 |
+
"loss": 2.7276,
|
| 114 |
+
"step": 150
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.032,
|
| 118 |
+
"grad_norm": 0.9332829713821411,
|
| 119 |
+
"learning_rate": 1.6000000000000003e-05,
|
| 120 |
+
"loss": 1.7329,
|
| 121 |
+
"step": 160
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.034,
|
| 125 |
+
"grad_norm": 1.1419305801391602,
|
| 126 |
+
"learning_rate": 1.7000000000000003e-05,
|
| 127 |
+
"loss": 1.916,
|
| 128 |
+
"step": 170
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.036,
|
| 132 |
+
"grad_norm": 1.4852521419525146,
|
| 133 |
+
"learning_rate": 1.8e-05,
|
| 134 |
+
"loss": 6.3988,
|
| 135 |
+
"step": 180
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.038,
|
| 139 |
+
"grad_norm": 0.8550328016281128,
|
| 140 |
+
"learning_rate": 1.9e-05,
|
| 141 |
+
"loss": 2.1053,
|
| 142 |
+
"step": 190
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.04,
|
| 146 |
+
"grad_norm": 2.170074939727783,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 0.798,
|
| 149 |
+
"step": 200
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.042,
|
| 153 |
+
"grad_norm": 0.7900694012641907,
|
| 154 |
+
"learning_rate": 2.1e-05,
|
| 155 |
+
"loss": 2.0661,
|
| 156 |
+
"step": 210
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.044,
|
| 160 |
+
"grad_norm": 0.0,
|
| 161 |
+
"learning_rate": 2.2000000000000003e-05,
|
| 162 |
+
"loss": 2.4783,
|
| 163 |
+
"step": 220
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.046,
|
| 167 |
+
"grad_norm": 0.0,
|
| 168 |
+
"learning_rate": 2.3000000000000003e-05,
|
| 169 |
+
"loss": 0.7402,
|
| 170 |
+
"step": 230
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.048,
|
| 174 |
+
"grad_norm": 0.35598117113113403,
|
| 175 |
+
"learning_rate": 2.4e-05,
|
| 176 |
+
"loss": 2.5115,
|
| 177 |
+
"step": 240
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.05,
|
| 181 |
+
"grad_norm": 5.627530574798584,
|
| 182 |
+
"learning_rate": 2.5e-05,
|
| 183 |
+
"loss": 2.13,
|
| 184 |
+
"step": 250
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.052,
|
| 188 |
+
"grad_norm": 3.1692419052124023,
|
| 189 |
+
"learning_rate": 2.6000000000000002e-05,
|
| 190 |
+
"loss": 1.6962,
|
| 191 |
+
"step": 260
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.054,
|
| 195 |
+
"grad_norm": 5.671966075897217,
|
| 196 |
+
"learning_rate": 2.7000000000000002e-05,
|
| 197 |
+
"loss": 2.8705,
|
| 198 |
+
"step": 270
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.056,
|
| 202 |
+
"grad_norm": 0.8001578450202942,
|
| 203 |
+
"learning_rate": 2.8000000000000003e-05,
|
| 204 |
+
"loss": 2.9734,
|
| 205 |
+
"step": 280
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.058,
|
| 209 |
+
"grad_norm": 0.21718740463256836,
|
| 210 |
+
"learning_rate": 2.9e-05,
|
| 211 |
+
"loss": 1.7669,
|
| 212 |
+
"step": 290
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.06,
|
| 216 |
+
"grad_norm": 1.2626160383224487,
|
| 217 |
+
"learning_rate": 3e-05,
|
| 218 |
+
"loss": 2.1499,
|
| 219 |
+
"step": 300
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.062,
|
| 223 |
+
"grad_norm": 0.0,
|
| 224 |
+
"learning_rate": 3.1e-05,
|
| 225 |
+
"loss": 1.6354,
|
| 226 |
+
"step": 310
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.064,
|
| 230 |
+
"grad_norm": 3.380746603012085,
|
| 231 |
+
"learning_rate": 3.2000000000000005e-05,
|
| 232 |
+
"loss": 2.1457,
|
| 233 |
+
"step": 320
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.066,
|
| 237 |
+
"grad_norm": 0.2737733721733093,
|
| 238 |
+
"learning_rate": 3.3e-05,
|
| 239 |
+
"loss": 3.0738,
|
| 240 |
+
"step": 330
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.068,
|
| 244 |
+
"grad_norm": 0.797559380531311,
|
| 245 |
+
"learning_rate": 3.4000000000000007e-05,
|
| 246 |
+
"loss": 2.4357,
|
| 247 |
+
"step": 340
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.07,
|
| 251 |
+
"grad_norm": 1.7590610980987549,
|
| 252 |
+
"learning_rate": 3.5e-05,
|
| 253 |
+
"loss": 1.7431,
|
| 254 |
+
"step": 350
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.072,
|
| 258 |
+
"grad_norm": 0.7487906217575073,
|
| 259 |
+
"learning_rate": 3.6e-05,
|
| 260 |
+
"loss": 1.7527,
|
| 261 |
+
"step": 360
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.074,
|
| 265 |
+
"grad_norm": 0.6480693221092224,
|
| 266 |
+
"learning_rate": 3.7e-05,
|
| 267 |
+
"loss": 6.1666,
|
| 268 |
+
"step": 370
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.076,
|
| 272 |
+
"grad_norm": 0.8775593638420105,
|
| 273 |
+
"learning_rate": 3.8e-05,
|
| 274 |
+
"loss": 0.5917,
|
| 275 |
+
"step": 380
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.078,
|
| 279 |
+
"grad_norm": 5.630661964416504,
|
| 280 |
+
"learning_rate": 3.9000000000000006e-05,
|
| 281 |
+
"loss": 1.5061,
|
| 282 |
+
"step": 390
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.08,
|
| 286 |
+
"grad_norm": 0.7237743735313416,
|
| 287 |
+
"learning_rate": 4e-05,
|
| 288 |
+
"loss": 1.5694,
|
| 289 |
+
"step": 400
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.082,
|
| 293 |
+
"grad_norm": 1.8499999046325684,
|
| 294 |
+
"learning_rate": 4.1e-05,
|
| 295 |
+
"loss": 1.4762,
|
| 296 |
+
"step": 410
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.084,
|
| 300 |
+
"grad_norm": 3.2792017459869385,
|
| 301 |
+
"learning_rate": 4.2e-05,
|
| 302 |
+
"loss": 1.0468,
|
| 303 |
+
"step": 420
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.086,
|
| 307 |
+
"grad_norm": 1.644893765449524,
|
| 308 |
+
"learning_rate": 4.3e-05,
|
| 309 |
+
"loss": 7.1942,
|
| 310 |
+
"step": 430
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.088,
|
| 314 |
+
"grad_norm": 3.8417279720306396,
|
| 315 |
+
"learning_rate": 4.4000000000000006e-05,
|
| 316 |
+
"loss": 2.706,
|
| 317 |
+
"step": 440
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.09,
|
| 321 |
+
"grad_norm": 9.292684555053711,
|
| 322 |
+
"learning_rate": 4.5e-05,
|
| 323 |
+
"loss": 1.8559,
|
| 324 |
+
"step": 450
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.092,
|
| 328 |
+
"grad_norm": 8.052363395690918,
|
| 329 |
+
"learning_rate": 4.600000000000001e-05,
|
| 330 |
+
"loss": 1.2105,
|
| 331 |
+
"step": 460
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.094,
|
| 335 |
+
"grad_norm": 2.8528406620025635,
|
| 336 |
+
"learning_rate": 4.7e-05,
|
| 337 |
+
"loss": 2.8524,
|
| 338 |
+
"step": 470
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.096,
|
| 342 |
+
"grad_norm": 1.0747981071472168,
|
| 343 |
+
"learning_rate": 4.8e-05,
|
| 344 |
+
"loss": 2.0881,
|
| 345 |
+
"step": 480
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.098,
|
| 349 |
+
"grad_norm": 1.3833454847335815,
|
| 350 |
+
"learning_rate": 4.9e-05,
|
| 351 |
+
"loss": 2.4156,
|
| 352 |
+
"step": 490
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 0.1,
|
| 356 |
+
"grad_norm": 0.5067809820175171,
|
| 357 |
+
"learning_rate": 5e-05,
|
| 358 |
+
"loss": 0.9531,
|
| 359 |
+
"step": 500
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 0.102,
|
| 363 |
+
"grad_norm": 1.2673537731170654,
|
| 364 |
+
"learning_rate": 4.999939076763487e-05,
|
| 365 |
+
"loss": 3.9691,
|
| 366 |
+
"step": 510
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"epoch": 0.104,
|
| 370 |
+
"grad_norm": 0.0,
|
| 371 |
+
"learning_rate": 4.999756310023261e-05,
|
| 372 |
+
"loss": 0.8777,
|
| 373 |
+
"step": 520
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"epoch": 0.106,
|
| 377 |
+
"grad_norm": 0.7635637521743774,
|
| 378 |
+
"learning_rate": 4.999451708687114e-05,
|
| 379 |
+
"loss": 0.9434,
|
| 380 |
+
"step": 530
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"epoch": 0.108,
|
| 384 |
+
"grad_norm": 12.323412895202637,
|
| 385 |
+
"learning_rate": 4.999025287600886e-05,
|
| 386 |
+
"loss": 1.5592,
|
| 387 |
+
"step": 540
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.11,
|
| 391 |
+
"grad_norm": 0.7043775916099548,
|
| 392 |
+
"learning_rate": 4.99847706754774e-05,
|
| 393 |
+
"loss": 0.6868,
|
| 394 |
+
"step": 550
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"epoch": 0.112,
|
| 398 |
+
"grad_norm": 2.084545612335205,
|
| 399 |
+
"learning_rate": 4.997807075247146e-05,
|
| 400 |
+
"loss": 0.956,
|
| 401 |
+
"step": 560
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"epoch": 0.114,
|
| 405 |
+
"grad_norm": 1.2179222106933594,
|
| 406 |
+
"learning_rate": 4.997015343353585e-05,
|
| 407 |
+
"loss": 0.8766,
|
| 408 |
+
"step": 570
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 0.116,
|
| 412 |
+
"grad_norm": 0.2723235785961151,
|
| 413 |
+
"learning_rate": 4.996101910454953e-05,
|
| 414 |
+
"loss": 0.5423,
|
| 415 |
+
"step": 580
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 0.118,
|
| 419 |
+
"grad_norm": 19.425006866455078,
|
| 420 |
+
"learning_rate": 4.995066821070679e-05,
|
| 421 |
+
"loss": 1.7762,
|
| 422 |
+
"step": 590
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"epoch": 0.12,
|
| 426 |
+
"grad_norm": 7.556680679321289,
|
| 427 |
+
"learning_rate": 4.993910125649561e-05,
|
| 428 |
+
"loss": 2.294,
|
| 429 |
+
"step": 600
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 0.122,
|
| 433 |
+
"grad_norm": 1.423615574836731,
|
| 434 |
+
"learning_rate": 4.992631880567301e-05,
|
| 435 |
+
"loss": 0.4881,
|
| 436 |
+
"step": 610
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"epoch": 0.124,
|
| 440 |
+
"grad_norm": 0.8066297769546509,
|
| 441 |
+
"learning_rate": 4.991232148123761e-05,
|
| 442 |
+
"loss": 3.3744,
|
| 443 |
+
"step": 620
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"epoch": 0.126,
|
| 447 |
+
"grad_norm": 1.3097137212753296,
|
| 448 |
+
"learning_rate": 4.989710996539926e-05,
|
| 449 |
+
"loss": 1.2847,
|
| 450 |
+
"step": 630
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"epoch": 0.128,
|
| 454 |
+
"grad_norm": 0.45432719588279724,
|
| 455 |
+
"learning_rate": 4.988068499954578e-05,
|
| 456 |
+
"loss": 0.9029,
|
| 457 |
+
"step": 640
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"epoch": 0.13,
|
| 461 |
+
"grad_norm": 0.6234528422355652,
|
| 462 |
+
"learning_rate": 4.9863047384206835e-05,
|
| 463 |
+
"loss": 1.3799,
|
| 464 |
+
"step": 650
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"epoch": 0.132,
|
| 468 |
+
"grad_norm": 5.1088385581970215,
|
| 469 |
+
"learning_rate": 4.984419797901491e-05,
|
| 470 |
+
"loss": 3.7841,
|
| 471 |
+
"step": 660
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"epoch": 0.134,
|
| 475 |
+
"grad_norm": 1.5142585039138794,
|
| 476 |
+
"learning_rate": 4.982413770266342e-05,
|
| 477 |
+
"loss": 0.9186,
|
| 478 |
+
"step": 670
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.136,
|
| 482 |
+
"grad_norm": 1.2981692552566528,
|
| 483 |
+
"learning_rate": 4.980286753286195e-05,
|
| 484 |
+
"loss": 1.4738,
|
| 485 |
+
"step": 680
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"epoch": 0.138,
|
| 489 |
+
"grad_norm": 12.405978202819824,
|
| 490 |
+
"learning_rate": 4.978038850628854e-05,
|
| 491 |
+
"loss": 3.8651,
|
| 492 |
+
"step": 690
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 0.14,
|
| 496 |
+
"grad_norm": 3.6295487880706787,
|
| 497 |
+
"learning_rate": 4.975670171853926e-05,
|
| 498 |
+
"loss": 1.1881,
|
| 499 |
+
"step": 700
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 0.142,
|
| 503 |
+
"grad_norm": 1.0809906721115112,
|
| 504 |
+
"learning_rate": 4.9731808324074717e-05,
|
| 505 |
+
"loss": 0.8245,
|
| 506 |
+
"step": 710
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 0.144,
|
| 510 |
+
"grad_norm": 3.7995641231536865,
|
| 511 |
+
"learning_rate": 4.9705709536163824e-05,
|
| 512 |
+
"loss": 1.2642,
|
| 513 |
+
"step": 720
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"epoch": 0.146,
|
| 517 |
+
"grad_norm": 0.8331463932991028,
|
| 518 |
+
"learning_rate": 4.96784066268247e-05,
|
| 519 |
+
"loss": 1.6256,
|
| 520 |
+
"step": 730
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 0.148,
|
| 524 |
+
"grad_norm": 5.477302551269531,
|
| 525 |
+
"learning_rate": 4.964990092676263e-05,
|
| 526 |
+
"loss": 9.4718,
|
| 527 |
+
"step": 740
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"epoch": 0.15,
|
| 531 |
+
"grad_norm": 2.345902681350708,
|
| 532 |
+
"learning_rate": 4.962019382530521e-05,
|
| 533 |
+
"loss": 1.6593,
|
| 534 |
+
"step": 750
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"epoch": 0.152,
|
| 538 |
+
"grad_norm": 1.1139212846755981,
|
| 539 |
+
"learning_rate": 4.9589286770334654e-05,
|
| 540 |
+
"loss": 1.4136,
|
| 541 |
+
"step": 760
|
| 542 |
+
},
|
| 543 |
+
{
|
| 544 |
+
"epoch": 0.154,
|
| 545 |
+
"grad_norm": 1.1984540224075317,
|
| 546 |
+
"learning_rate": 4.9557181268217227e-05,
|
| 547 |
+
"loss": 2.5935,
|
| 548 |
+
"step": 770
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 0.156,
|
| 552 |
+
"grad_norm": 1.6878119707107544,
|
| 553 |
+
"learning_rate": 4.952387888372979e-05,
|
| 554 |
+
"loss": 2.0708,
|
| 555 |
+
"step": 780
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"epoch": 0.158,
|
| 559 |
+
"grad_norm": 0.7853636145591736,
|
| 560 |
+
"learning_rate": 4.94893812399836e-05,
|
| 561 |
+
"loss": 0.8737,
|
| 562 |
+
"step": 790
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"epoch": 0.16,
|
| 566 |
+
"grad_norm": 0.6999472379684448,
|
| 567 |
+
"learning_rate": 4.9453690018345144e-05,
|
| 568 |
+
"loss": 2.0691,
|
| 569 |
+
"step": 800
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 0.162,
|
| 573 |
+
"grad_norm": 5.1682329177856445,
|
| 574 |
+
"learning_rate": 4.94168069583542e-05,
|
| 575 |
+
"loss": 0.7324,
|
| 576 |
+
"step": 810
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 0.164,
|
| 580 |
+
"grad_norm": 0.0,
|
| 581 |
+
"learning_rate": 4.937873385763908e-05,
|
| 582 |
+
"loss": 0.577,
|
| 583 |
+
"step": 820
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"epoch": 0.166,
|
| 587 |
+
"grad_norm": 0.8014002442359924,
|
| 588 |
+
"learning_rate": 4.933947257182901e-05,
|
| 589 |
+
"loss": 0.6428,
|
| 590 |
+
"step": 830
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 0.168,
|
| 594 |
+
"grad_norm": 1.618863821029663,
|
| 595 |
+
"learning_rate": 4.929902501446366e-05,
|
| 596 |
+
"loss": 1.4208,
|
| 597 |
+
"step": 840
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"epoch": 0.17,
|
| 601 |
+
"grad_norm": 8.870545387268066,
|
| 602 |
+
"learning_rate": 4.925739315689991e-05,
|
| 603 |
+
"loss": 1.1485,
|
| 604 |
+
"step": 850
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"epoch": 0.172,
|
| 608 |
+
"grad_norm": 0.5625079274177551,
|
| 609 |
+
"learning_rate": 4.9214579028215776e-05,
|
| 610 |
+
"loss": 0.8729,
|
| 611 |
+
"step": 860
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 0.174,
|
| 615 |
+
"grad_norm": 1.9488248825073242,
|
| 616 |
+
"learning_rate": 4.917058471511149e-05,
|
| 617 |
+
"loss": 2.3371,
|
| 618 |
+
"step": 870
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"epoch": 0.176,
|
| 622 |
+
"grad_norm": 1.2681325674057007,
|
| 623 |
+
"learning_rate": 4.912541236180779e-05,
|
| 624 |
+
"loss": 0.6887,
|
| 625 |
+
"step": 880
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 0.178,
|
| 629 |
+
"grad_norm": 1.7612295150756836,
|
| 630 |
+
"learning_rate": 4.907906416994146e-05,
|
| 631 |
+
"loss": 0.8769,
|
| 632 |
+
"step": 890
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"epoch": 0.18,
|
| 636 |
+
"grad_norm": 0.9437585473060608,
|
| 637 |
+
"learning_rate": 4.9031542398457974e-05,
|
| 638 |
+
"loss": 2.4827,
|
| 639 |
+
"step": 900
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"epoch": 0.182,
|
| 643 |
+
"grad_norm": 0.5411396026611328,
|
| 644 |
+
"learning_rate": 4.898284936350144e-05,
|
| 645 |
+
"loss": 0.5722,
|
| 646 |
+
"step": 910
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"epoch": 0.184,
|
| 650 |
+
"grad_norm": 1.9333531856536865,
|
| 651 |
+
"learning_rate": 4.893298743830168e-05,
|
| 652 |
+
"loss": 1.3822,
|
| 653 |
+
"step": 920
|
| 654 |
+
},
|
| 655 |
+
{
|
| 656 |
+
"epoch": 0.186,
|
| 657 |
+
"grad_norm": 1.7342445850372314,
|
| 658 |
+
"learning_rate": 4.888195905305859e-05,
|
| 659 |
+
"loss": 0.7233,
|
| 660 |
+
"step": 930
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"epoch": 0.188,
|
| 664 |
+
"grad_norm": 2.7781763076782227,
|
| 665 |
+
"learning_rate": 4.882976669482367e-05,
|
| 666 |
+
"loss": 9.3579,
|
| 667 |
+
"step": 940
|
| 668 |
+
},
|
| 669 |
+
{
|
| 670 |
+
"epoch": 0.19,
|
| 671 |
+
"grad_norm": 11.274718284606934,
|
| 672 |
+
"learning_rate": 4.877641290737884e-05,
|
| 673 |
+
"loss": 2.2926,
|
| 674 |
+
"step": 950
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 0.192,
|
| 678 |
+
"grad_norm": 2.7516400814056396,
|
| 679 |
+
"learning_rate": 4.8721900291112415e-05,
|
| 680 |
+
"loss": 0.8563,
|
| 681 |
+
"step": 960
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"epoch": 0.194,
|
| 685 |
+
"grad_norm": 2.3136308193206787,
|
| 686 |
+
"learning_rate": 4.8666231502892415e-05,
|
| 687 |
+
"loss": 2.229,
|
| 688 |
+
"step": 970
|
| 689 |
+
},
|
| 690 |
+
{
|
| 691 |
+
"epoch": 0.196,
|
| 692 |
+
"grad_norm": 2.1585183143615723,
|
| 693 |
+
"learning_rate": 4.860940925593703e-05,
|
| 694 |
+
"loss": 1.7804,
|
| 695 |
+
"step": 980
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"epoch": 0.198,
|
| 699 |
+
"grad_norm": 2.338162899017334,
|
| 700 |
+
"learning_rate": 4.855143631968242e-05,
|
| 701 |
+
"loss": 0.826,
|
| 702 |
+
"step": 990
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"epoch": 0.2,
|
| 706 |
+
"grad_norm": 1.9523894786834717,
|
| 707 |
+
"learning_rate": 4.849231551964771e-05,
|
| 708 |
+
"loss": 1.746,
|
| 709 |
+
"step": 1000
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"epoch": 0.202,
|
| 713 |
+
"grad_norm": 1.1914769411087036,
|
| 714 |
+
"learning_rate": 4.843204973729729e-05,
|
| 715 |
+
"loss": 2.6019,
|
| 716 |
+
"step": 1010
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.204,
|
| 720 |
+
"grad_norm": 0.8508822917938232,
|
| 721 |
+
"learning_rate": 4.837064190990036e-05,
|
| 722 |
+
"loss": 1.5425,
|
| 723 |
+
"step": 1020
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"epoch": 0.206,
|
| 727 |
+
"grad_norm": 1.5970311164855957,
|
| 728 |
+
"learning_rate": 4.830809503038781e-05,
|
| 729 |
+
"loss": 1.2792,
|
| 730 |
+
"step": 1030
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 0.208,
|
| 734 |
+
"grad_norm": 1.7772223949432373,
|
| 735 |
+
"learning_rate": 4.8244412147206284e-05,
|
| 736 |
+
"loss": 0.9529,
|
| 737 |
+
"step": 1040
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"epoch": 0.21,
|
| 741 |
+
"grad_norm": 2.8910939693450928,
|
| 742 |
+
"learning_rate": 4.817959636416969e-05,
|
| 743 |
+
"loss": 1.3505,
|
| 744 |
+
"step": 1050
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"epoch": 0.212,
|
| 748 |
+
"grad_norm": 1.4733608961105347,
|
| 749 |
+
"learning_rate": 4.8113650840307834e-05,
|
| 750 |
+
"loss": 0.9163,
|
| 751 |
+
"step": 1060
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 0.214,
|
| 755 |
+
"grad_norm": 0.32765626907348633,
|
| 756 |
+
"learning_rate": 4.8046578789712515e-05,
|
| 757 |
+
"loss": 1.228,
|
| 758 |
+
"step": 1070
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.216,
|
| 762 |
+
"grad_norm": 9.417176246643066,
|
| 763 |
+
"learning_rate": 4.797838348138086e-05,
|
| 764 |
+
"loss": 1.1117,
|
| 765 |
+
"step": 1080
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 0.218,
|
| 769 |
+
"grad_norm": 0.6818392872810364,
|
| 770 |
+
"learning_rate": 4.790906823905599e-05,
|
| 771 |
+
"loss": 1.7063,
|
| 772 |
+
"step": 1090
|
| 773 |
+
},
|
| 774 |
+
{
|
| 775 |
+
"epoch": 0.22,
|
| 776 |
+
"grad_norm": 0.9097204208374023,
|
| 777 |
+
"learning_rate": 4.783863644106502e-05,
|
| 778 |
+
"loss": 0.9846,
|
| 779 |
+
"step": 1100
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 0.222,
|
| 783 |
+
"grad_norm": 2.9016330242156982,
|
| 784 |
+
"learning_rate": 4.776709152015443e-05,
|
| 785 |
+
"loss": 0.9996,
|
| 786 |
+
"step": 1110
|
| 787 |
+
},
|
| 788 |
+
{
|
| 789 |
+
"epoch": 0.224,
|
| 790 |
+
"grad_norm": 2.820084810256958,
|
| 791 |
+
"learning_rate": 4.769443696332272e-05,
|
| 792 |
+
"loss": 1.5269,
|
| 793 |
+
"step": 1120
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"epoch": 0.226,
|
| 797 |
+
"grad_norm": 0.6218633651733398,
|
| 798 |
+
"learning_rate": 4.762067631165049e-05,
|
| 799 |
+
"loss": 1.7736,
|
| 800 |
+
"step": 1130
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.228,
|
| 804 |
+
"grad_norm": 9.925037384033203,
|
| 805 |
+
"learning_rate": 4.754581316012785e-05,
|
| 806 |
+
"loss": 1.0233,
|
| 807 |
+
"step": 1140
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
"epoch": 0.23,
|
| 811 |
+
"grad_norm": 11.37203598022461,
|
| 812 |
+
"learning_rate": 4.7469851157479177e-05,
|
| 813 |
+
"loss": 1.3813,
|
| 814 |
+
"step": 1150
|
| 815 |
+
},
|
| 816 |
+
{
|
| 817 |
+
"epoch": 0.232,
|
| 818 |
+
"grad_norm": 0.0,
|
| 819 |
+
"learning_rate": 4.7392794005985326e-05,
|
| 820 |
+
"loss": 0.6272,
|
| 821 |
+
"step": 1160
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 0.234,
|
| 825 |
+
"grad_norm": 0.4212048351764679,
|
| 826 |
+
"learning_rate": 4.731464546130314e-05,
|
| 827 |
+
"loss": 2.541,
|
| 828 |
+
"step": 1170
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 0.236,
|
| 832 |
+
"grad_norm": 0.8374232053756714,
|
| 833 |
+
"learning_rate": 4.723540933228244e-05,
|
| 834 |
+
"loss": 1.0791,
|
| 835 |
+
"step": 1180
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 0.238,
|
| 839 |
+
"grad_norm": 3.9064877033233643,
|
| 840 |
+
"learning_rate": 4.715508948078037e-05,
|
| 841 |
+
"loss": 1.5918,
|
| 842 |
+
"step": 1190
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.24,
|
| 846 |
+
"grad_norm": 1.2058210372924805,
|
| 847 |
+
"learning_rate": 4.707368982147318e-05,
|
| 848 |
+
"loss": 1.3247,
|
| 849 |
+
"step": 1200
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"epoch": 0.242,
|
| 853 |
+
"grad_norm": 1.6068898439407349,
|
| 854 |
+
"learning_rate": 4.6991214321665414e-05,
|
| 855 |
+
"loss": 1.0801,
|
| 856 |
+
"step": 1210
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 0.244,
|
| 860 |
+
"grad_norm": 0.9633479118347168,
|
| 861 |
+
"learning_rate": 4.690766700109659e-05,
|
| 862 |
+
"loss": 2.2492,
|
| 863 |
+
"step": 1220
|
| 864 |
+
},
|
| 865 |
+
{
|
| 866 |
+
"epoch": 0.246,
|
| 867 |
+
"grad_norm": 3.4308974742889404,
|
| 868 |
+
"learning_rate": 4.682305193174524e-05,
|
| 869 |
+
"loss": 0.9898,
|
| 870 |
+
"step": 1230
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"epoch": 0.248,
|
| 874 |
+
"grad_norm": 2.3105175495147705,
|
| 875 |
+
"learning_rate": 4.6737373237630476e-05,
|
| 876 |
+
"loss": 0.8907,
|
| 877 |
+
"step": 1240
|
| 878 |
+
},
|
| 879 |
+
{
|
| 880 |
+
"epoch": 0.25,
|
| 881 |
+
"grad_norm": 1.836309790611267,
|
| 882 |
+
"learning_rate": 4.665063509461097e-05,
|
| 883 |
+
"loss": 0.7756,
|
| 884 |
+
"step": 1250
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"epoch": 0.252,
|
| 888 |
+
"grad_norm": 2.3956916332244873,
|
| 889 |
+
"learning_rate": 4.656284173018144e-05,
|
| 890 |
+
"loss": 0.6585,
|
| 891 |
+
"step": 1260
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"epoch": 0.254,
|
| 895 |
+
"grad_norm": 1.4388060569763184,
|
| 896 |
+
"learning_rate": 4.6473997423266614e-05,
|
| 897 |
+
"loss": 1.2712,
|
| 898 |
+
"step": 1270
|
| 899 |
+
},
|
| 900 |
+
{
|
| 901 |
+
"epoch": 0.256,
|
| 902 |
+
"grad_norm": 0.7336168885231018,
|
| 903 |
+
"learning_rate": 4.638410650401267e-05,
|
| 904 |
+
"loss": 1.2611,
|
| 905 |
+
"step": 1280
|
| 906 |
+
},
|
| 907 |
+
{
|
| 908 |
+
"epoch": 0.258,
|
| 909 |
+
"grad_norm": 0.7995616793632507,
|
| 910 |
+
"learning_rate": 4.629317335357619e-05,
|
| 911 |
+
"loss": 1.0479,
|
| 912 |
+
"step": 1290
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"epoch": 0.26,
|
| 916 |
+
"grad_norm": 1.2063746452331543,
|
| 917 |
+
"learning_rate": 4.620120240391065e-05,
|
| 918 |
+
"loss": 1.6104,
|
| 919 |
+
"step": 1300
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"epoch": 0.262,
|
| 923 |
+
"grad_norm": 0.9148932695388794,
|
| 924 |
+
"learning_rate": 4.610819813755038e-05,
|
| 925 |
+
"loss": 1.3264,
|
| 926 |
+
"step": 1310
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"epoch": 0.264,
|
| 930 |
+
"grad_norm": 11.944299697875977,
|
| 931 |
+
"learning_rate": 4.601416508739211e-05,
|
| 932 |
+
"loss": 1.1883,
|
| 933 |
+
"step": 1320
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
"epoch": 0.266,
|
| 937 |
+
"grad_norm": 6.062456130981445,
|
| 938 |
+
"learning_rate": 4.591910783647404e-05,
|
| 939 |
+
"loss": 1.4327,
|
| 940 |
+
"step": 1330
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 0.268,
|
| 944 |
+
"grad_norm": 8.919118881225586,
|
| 945 |
+
"learning_rate": 4.5823031017752485e-05,
|
| 946 |
+
"loss": 1.3558,
|
| 947 |
+
"step": 1340
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"epoch": 0.27,
|
| 951 |
+
"grad_norm": 0.7179931402206421,
|
| 952 |
+
"learning_rate": 4.572593931387604e-05,
|
| 953 |
+
"loss": 2.6668,
|
| 954 |
+
"step": 1350
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"epoch": 0.272,
|
| 958 |
+
"grad_norm": 1.1414953470230103,
|
| 959 |
+
"learning_rate": 4.562783745695738e-05,
|
| 960 |
+
"loss": 0.4225,
|
| 961 |
+
"step": 1360
|
| 962 |
+
},
|
| 963 |
+
{
|
| 964 |
+
"epoch": 0.274,
|
| 965 |
+
"grad_norm": 2.28456449508667,
|
| 966 |
+
"learning_rate": 4.5528730228342605e-05,
|
| 967 |
+
"loss": 1.1548,
|
| 968 |
+
"step": 1370
|
| 969 |
+
},
|
| 970 |
+
{
|
| 971 |
+
"epoch": 0.276,
|
| 972 |
+
"grad_norm": 4.372885227203369,
|
| 973 |
+
"learning_rate": 4.542862245837821e-05,
|
| 974 |
+
"loss": 0.9096,
|
| 975 |
+
"step": 1380
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"epoch": 0.278,
|
| 979 |
+
"grad_norm": 0.26524534821510315,
|
| 980 |
+
"learning_rate": 4.532751902617569e-05,
|
| 981 |
+
"loss": 1.3681,
|
| 982 |
+
"step": 1390
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"epoch": 0.28,
|
| 986 |
+
"grad_norm": 1.4623117446899414,
|
| 987 |
+
"learning_rate": 4.522542485937369e-05,
|
| 988 |
+
"loss": 1.286,
|
| 989 |
+
"step": 1400
|
| 990 |
+
},
|
| 991 |
+
{
|
| 992 |
+
"epoch": 0.282,
|
| 993 |
+
"grad_norm": 1.503664255142212,
|
| 994 |
+
"learning_rate": 4.512234493389785e-05,
|
| 995 |
+
"loss": 1.0204,
|
| 996 |
+
"step": 1410
|
| 997 |
+
},
|
| 998 |
+
{
|
| 999 |
+
"epoch": 0.284,
|
| 1000 |
+
"grad_norm": 0.5991083979606628,
|
| 1001 |
+
"learning_rate": 4.5018284273718336e-05,
|
| 1002 |
+
"loss": 1.083,
|
| 1003 |
+
"step": 1420
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"epoch": 0.286,
|
| 1007 |
+
"grad_norm": 0.6346163749694824,
|
| 1008 |
+
"learning_rate": 4.491324795060491e-05,
|
| 1009 |
+
"loss": 0.5139,
|
| 1010 |
+
"step": 1430
|
| 1011 |
+
},
|
| 1012 |
+
{
|
| 1013 |
+
"epoch": 0.288,
|
| 1014 |
+
"grad_norm": 1.6709283590316772,
|
| 1015 |
+
"learning_rate": 4.480724108387977e-05,
|
| 1016 |
+
"loss": 1.591,
|
| 1017 |
+
"step": 1440
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"epoch": 0.29,
|
| 1021 |
+
"grad_norm": 0.49724531173706055,
|
| 1022 |
+
"learning_rate": 4.4700268840168045e-05,
|
| 1023 |
+
"loss": 0.4862,
|
| 1024 |
+
"step": 1450
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 0.292,
|
| 1028 |
+
"grad_norm": 3.7753849029541016,
|
| 1029 |
+
"learning_rate": 4.4592336433146e-05,
|
| 1030 |
+
"loss": 0.7162,
|
| 1031 |
+
"step": 1460
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 0.294,
|
| 1035 |
+
"grad_norm": 1.118366003036499,
|
| 1036 |
+
"learning_rate": 4.448344912328686e-05,
|
| 1037 |
+
"loss": 1.6884,
|
| 1038 |
+
"step": 1470
|
| 1039 |
+
},
|
| 1040 |
+
{
|
| 1041 |
+
"epoch": 0.296,
|
| 1042 |
+
"grad_norm": 0.3718686103820801,
|
| 1043 |
+
"learning_rate": 4.4373612217604496e-05,
|
| 1044 |
+
"loss": 0.9289,
|
| 1045 |
+
"step": 1480
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"epoch": 0.298,
|
| 1049 |
+
"grad_norm": 2.443821668624878,
|
| 1050 |
+
"learning_rate": 4.426283106939474e-05,
|
| 1051 |
+
"loss": 1.546,
|
| 1052 |
+
"step": 1490
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 0.3,
|
| 1056 |
+
"grad_norm": 1.6871048212051392,
|
| 1057 |
+
"learning_rate": 4.415111107797445e-05,
|
| 1058 |
+
"loss": 0.9777,
|
| 1059 |
+
"step": 1500
|
| 1060 |
+
},
|
| 1061 |
+
{
|
| 1062 |
+
"epoch": 0.302,
|
| 1063 |
+
"grad_norm": 1.2891165018081665,
|
| 1064 |
+
"learning_rate": 4.403845768841842e-05,
|
| 1065 |
+
"loss": 0.6637,
|
| 1066 |
+
"step": 1510
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"epoch": 0.304,
|
| 1070 |
+
"grad_norm": 1.3466380834579468,
|
| 1071 |
+
"learning_rate": 4.3924876391293915e-05,
|
| 1072 |
+
"loss": 1.7305,
|
| 1073 |
+
"step": 1520
|
| 1074 |
+
},
|
| 1075 |
+
{
|
| 1076 |
+
"epoch": 0.306,
|
| 1077 |
+
"grad_norm": 1.7780977487564087,
|
| 1078 |
+
"learning_rate": 4.381037272239311e-05,
|
| 1079 |
+
"loss": 0.5603,
|
| 1080 |
+
"step": 1530
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"epoch": 0.308,
|
| 1084 |
+
"grad_norm": 0.5368999242782593,
|
| 1085 |
+
"learning_rate": 4.36949522624633e-05,
|
| 1086 |
+
"loss": 3.2643,
|
| 1087 |
+
"step": 1540
|
| 1088 |
+
},
|
| 1089 |
+
{
|
| 1090 |
+
"epoch": 0.31,
|
| 1091 |
+
"grad_norm": 0.0,
|
| 1092 |
+
"learning_rate": 4.357862063693486e-05,
|
| 1093 |
+
"loss": 3.7458,
|
| 1094 |
+
"step": 1550
|
| 1095 |
+
},
|
| 1096 |
+
{
|
| 1097 |
+
"epoch": 0.312,
|
| 1098 |
+
"grad_norm": 0.3356248736381531,
|
| 1099 |
+
"learning_rate": 4.3461383515647106e-05,
|
| 1100 |
+
"loss": 1.9472,
|
| 1101 |
+
"step": 1560
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"epoch": 0.314,
|
| 1105 |
+
"grad_norm": 0.4538246989250183,
|
| 1106 |
+
"learning_rate": 4.334324661257191e-05,
|
| 1107 |
+
"loss": 1.0152,
|
| 1108 |
+
"step": 1570
|
| 1109 |
+
},
|
| 1110 |
+
{
|
| 1111 |
+
"epoch": 0.316,
|
| 1112 |
+
"grad_norm": 0.0,
|
| 1113 |
+
"learning_rate": 4.3224215685535294e-05,
|
| 1114 |
+
"loss": 0.9588,
|
| 1115 |
+
"step": 1580
|
| 1116 |
+
},
|
| 1117 |
+
{
|
| 1118 |
+
"epoch": 0.318,
|
| 1119 |
+
"grad_norm": 4.520866394042969,
|
| 1120 |
+
"learning_rate": 4.3104296535936695e-05,
|
| 1121 |
+
"loss": 1.1131,
|
| 1122 |
+
"step": 1590
|
| 1123 |
+
},
|
| 1124 |
+
{
|
| 1125 |
+
"epoch": 0.32,
|
| 1126 |
+
"grad_norm": 1.177220106124878,
|
| 1127 |
+
"learning_rate": 4.2983495008466276e-05,
|
| 1128 |
+
"loss": 2.4659,
|
| 1129 |
+
"step": 1600
|
| 1130 |
+
},
|
| 1131 |
+
{
|
| 1132 |
+
"epoch": 0.322,
|
| 1133 |
+
"grad_norm": 1.685763955116272,
|
| 1134 |
+
"learning_rate": 4.2861816990820084e-05,
|
| 1135 |
+
"loss": 0.8816,
|
| 1136 |
+
"step": 1610
|
| 1137 |
+
},
|
| 1138 |
+
{
|
| 1139 |
+
"epoch": 0.324,
|
| 1140 |
+
"grad_norm": 0.0,
|
| 1141 |
+
"learning_rate": 4.273926841341302e-05,
|
| 1142 |
+
"loss": 0.519,
|
| 1143 |
+
"step": 1620
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"epoch": 0.326,
|
| 1147 |
+
"grad_norm": 0.0,
|
| 1148 |
+
"learning_rate": 4.261585524908987e-05,
|
| 1149 |
+
"loss": 2.3195,
|
| 1150 |
+
"step": 1630
|
| 1151 |
+
},
|
| 1152 |
+
{
|
| 1153 |
+
"epoch": 0.328,
|
| 1154 |
+
"grad_norm": 3.9909987449645996,
|
| 1155 |
+
"learning_rate": 4.249158351283414e-05,
|
| 1156 |
+
"loss": 3.6418,
|
| 1157 |
+
"step": 1640
|
| 1158 |
+
},
|
| 1159 |
+
{
|
| 1160 |
+
"epoch": 0.33,
|
| 1161 |
+
"grad_norm": 2.534325361251831,
|
| 1162 |
+
"learning_rate": 4.2366459261474933e-05,
|
| 1163 |
+
"loss": 2.1609,
|
| 1164 |
+
"step": 1650
|
| 1165 |
+
},
|
| 1166 |
+
{
|
| 1167 |
+
"epoch": 0.332,
|
| 1168 |
+
"grad_norm": 1.4876514673233032,
|
| 1169 |
+
"learning_rate": 4.224048859339175e-05,
|
| 1170 |
+
"loss": 1.4675,
|
| 1171 |
+
"step": 1660
|
| 1172 |
+
},
|
| 1173 |
+
{
|
| 1174 |
+
"epoch": 0.334,
|
| 1175 |
+
"grad_norm": 8.85960578918457,
|
| 1176 |
+
"learning_rate": 4.211367764821722e-05,
|
| 1177 |
+
"loss": 1.3251,
|
| 1178 |
+
"step": 1670
|
| 1179 |
+
},
|
| 1180 |
+
{
|
| 1181 |
+
"epoch": 0.336,
|
| 1182 |
+
"grad_norm": 0.4190084636211395,
|
| 1183 |
+
"learning_rate": 4.198603260653792e-05,
|
| 1184 |
+
"loss": 1.0805,
|
| 1185 |
+
"step": 1680
|
| 1186 |
+
},
|
| 1187 |
+
{
|
| 1188 |
+
"epoch": 0.338,
|
| 1189 |
+
"grad_norm": 1.5515415668487549,
|
| 1190 |
+
"learning_rate": 4.185755968959308e-05,
|
| 1191 |
+
"loss": 0.5501,
|
| 1192 |
+
"step": 1690
|
| 1193 |
+
},
|
| 1194 |
+
{
|
| 1195 |
+
"epoch": 0.34,
|
| 1196 |
+
"grad_norm": 2.4281444549560547,
|
| 1197 |
+
"learning_rate": 4.172826515897146e-05,
|
| 1198 |
+
"loss": 0.7312,
|
| 1199 |
+
"step": 1700
|
| 1200 |
+
},
|
| 1201 |
+
{
|
| 1202 |
+
"epoch": 0.342,
|
| 1203 |
+
"grad_norm": 0.6593759655952454,
|
| 1204 |
+
"learning_rate": 4.1598155316306044e-05,
|
| 1205 |
+
"loss": 0.5805,
|
| 1206 |
+
"step": 1710
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"epoch": 0.344,
|
| 1210 |
+
"grad_norm": 1.126512050628662,
|
| 1211 |
+
"learning_rate": 4.146723650296701e-05,
|
| 1212 |
+
"loss": 1.9512,
|
| 1213 |
+
"step": 1720
|
| 1214 |
+
},
|
| 1215 |
+
{
|
| 1216 |
+
"epoch": 0.346,
|
| 1217 |
+
"grad_norm": 0.6861071586608887,
|
| 1218 |
+
"learning_rate": 4.133551509975264e-05,
|
| 1219 |
+
"loss": 1.5697,
|
| 1220 |
+
"step": 1730
|
| 1221 |
+
},
|
| 1222 |
+
{
|
| 1223 |
+
"epoch": 0.348,
|
| 1224 |
+
"grad_norm": 0.5916430354118347,
|
| 1225 |
+
"learning_rate": 4.1202997526578276e-05,
|
| 1226 |
+
"loss": 0.8957,
|
| 1227 |
+
"step": 1740
|
| 1228 |
+
},
|
| 1229 |
+
{
|
| 1230 |
+
"epoch": 0.35,
|
| 1231 |
+
"grad_norm": 0.6755764484405518,
|
| 1232 |
+
"learning_rate": 4.1069690242163484e-05,
|
| 1233 |
+
"loss": 0.489,
|
| 1234 |
+
"step": 1750
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 0.352,
|
| 1238 |
+
"grad_norm": 0.0,
|
| 1239 |
+
"learning_rate": 4.093559974371725e-05,
|
| 1240 |
+
"loss": 2.1314,
|
| 1241 |
+
"step": 1760
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"epoch": 0.354,
|
| 1245 |
+
"grad_norm": 0.4118390381336212,
|
| 1246 |
+
"learning_rate": 4.080073256662127e-05,
|
| 1247 |
+
"loss": 3.0047,
|
| 1248 |
+
"step": 1770
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 0.356,
|
| 1252 |
+
"grad_norm": 5.131174564361572,
|
| 1253 |
+
"learning_rate": 4.066509528411152e-05,
|
| 1254 |
+
"loss": 1.2667,
|
| 1255 |
+
"step": 1780
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 0.358,
|
| 1259 |
+
"grad_norm": 0.9545110464096069,
|
| 1260 |
+
"learning_rate": 4.052869450695776e-05,
|
| 1261 |
+
"loss": 0.9325,
|
| 1262 |
+
"step": 1790
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"epoch": 0.36,
|
| 1266 |
+
"grad_norm": 0.9308171272277832,
|
| 1267 |
+
"learning_rate": 4.039153688314145e-05,
|
| 1268 |
+
"loss": 2.1198,
|
| 1269 |
+
"step": 1800
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"epoch": 0.362,
|
| 1273 |
+
"grad_norm": 1.648389220237732,
|
| 1274 |
+
"learning_rate": 4.02536290975317e-05,
|
| 1275 |
+
"loss": 0.809,
|
| 1276 |
+
"step": 1810
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 0.364,
|
| 1280 |
+
"grad_norm": 1.658455729484558,
|
| 1281 |
+
"learning_rate": 4.011497787155938e-05,
|
| 1282 |
+
"loss": 0.9603,
|
| 1283 |
+
"step": 1820
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 0.366,
|
| 1287 |
+
"grad_norm": 7.657645225524902,
|
| 1288 |
+
"learning_rate": 3.997558996288965e-05,
|
| 1289 |
+
"loss": 2.463,
|
| 1290 |
+
"step": 1830
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 0.368,
|
| 1294 |
+
"grad_norm": 1.101691484451294,
|
| 1295 |
+
"learning_rate": 3.983547216509254e-05,
|
| 1296 |
+
"loss": 0.6611,
|
| 1297 |
+
"step": 1840
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 0.37,
|
| 1301 |
+
"grad_norm": 0.0,
|
| 1302 |
+
"learning_rate": 3.969463130731183e-05,
|
| 1303 |
+
"loss": 0.8496,
|
| 1304 |
+
"step": 1850
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 0.372,
|
| 1308 |
+
"grad_norm": 1.1520494222640991,
|
| 1309 |
+
"learning_rate": 3.955307425393224e-05,
|
| 1310 |
+
"loss": 2.0151,
|
| 1311 |
+
"step": 1860
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 0.374,
|
| 1315 |
+
"grad_norm": 0.7042891383171082,
|
| 1316 |
+
"learning_rate": 3.941080790424484e-05,
|
| 1317 |
+
"loss": 1.1714,
|
| 1318 |
+
"step": 1870
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 0.376,
|
| 1322 |
+
"grad_norm": 1.9343875646591187,
|
| 1323 |
+
"learning_rate": 3.92678391921108e-05,
|
| 1324 |
+
"loss": 2.1127,
|
| 1325 |
+
"step": 1880
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 0.378,
|
| 1329 |
+
"grad_norm": 3.507776975631714,
|
| 1330 |
+
"learning_rate": 3.912417508562345e-05,
|
| 1331 |
+
"loss": 3.0312,
|
| 1332 |
+
"step": 1890
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 0.38,
|
| 1336 |
+
"grad_norm": 1.134413480758667,
|
| 1337 |
+
"learning_rate": 3.897982258676867e-05,
|
| 1338 |
+
"loss": 1.052,
|
| 1339 |
+
"step": 1900
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 0.382,
|
| 1343 |
+
"grad_norm": 1.6466419696807861,
|
| 1344 |
+
"learning_rate": 3.883478873108361e-05,
|
| 1345 |
+
"loss": 1.2737,
|
| 1346 |
+
"step": 1910
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 0.384,
|
| 1350 |
+
"grad_norm": 0.9162160158157349,
|
| 1351 |
+
"learning_rate": 3.868908058731376e-05,
|
| 1352 |
+
"loss": 0.487,
|
| 1353 |
+
"step": 1920
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 0.386,
|
| 1357 |
+
"grad_norm": 1.0865650177001953,
|
| 1358 |
+
"learning_rate": 3.85427052570685e-05,
|
| 1359 |
+
"loss": 1.1498,
|
| 1360 |
+
"step": 1930
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 0.388,
|
| 1364 |
+
"grad_norm": 2.519901990890503,
|
| 1365 |
+
"learning_rate": 3.8395669874474915e-05,
|
| 1366 |
+
"loss": 1.3501,
|
| 1367 |
+
"step": 1940
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 0.39,
|
| 1371 |
+
"grad_norm": 1.382398247718811,
|
| 1372 |
+
"learning_rate": 3.824798160583012e-05,
|
| 1373 |
+
"loss": 0.695,
|
| 1374 |
+
"step": 1950
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 0.392,
|
| 1378 |
+
"grad_norm": 3.123533010482788,
|
| 1379 |
+
"learning_rate": 3.8099647649251986e-05,
|
| 1380 |
+
"loss": 1.1411,
|
| 1381 |
+
"step": 1960
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"epoch": 0.394,
|
| 1385 |
+
"grad_norm": 0.35227781534194946,
|
| 1386 |
+
"learning_rate": 3.795067523432826e-05,
|
| 1387 |
+
"loss": 1.3583,
|
| 1388 |
+
"step": 1970
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"epoch": 0.396,
|
| 1392 |
+
"grad_norm": 14.265485763549805,
|
| 1393 |
+
"learning_rate": 3.780107162176429e-05,
|
| 1394 |
+
"loss": 1.43,
|
| 1395 |
+
"step": 1980
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 0.398,
|
| 1399 |
+
"grad_norm": 0.9283212423324585,
|
| 1400 |
+
"learning_rate": 3.765084410302909e-05,
|
| 1401 |
+
"loss": 2.6459,
|
| 1402 |
+
"step": 1990
|
| 1403 |
+
},
|
| 1404 |
+
{
|
| 1405 |
+
"epoch": 0.4,
|
| 1406 |
+
"grad_norm": 1.3758940696716309,
|
| 1407 |
+
"learning_rate": 3.7500000000000003e-05,
|
| 1408 |
+
"loss": 2.1941,
|
| 1409 |
+
"step": 2000
|
| 1410 |
+
},
|
| 1411 |
+
{
|
| 1412 |
+
"epoch": 0.402,
|
| 1413 |
+
"grad_norm": 0.39502596855163574,
|
| 1414 |
+
"learning_rate": 3.7348546664605777e-05,
|
| 1415 |
+
"loss": 1.1788,
|
| 1416 |
+
"step": 2010
|
| 1417 |
+
},
|
| 1418 |
+
{
|
| 1419 |
+
"epoch": 0.404,
|
| 1420 |
+
"grad_norm": 2.504502534866333,
|
| 1421 |
+
"learning_rate": 3.719649147846832e-05,
|
| 1422 |
+
"loss": 0.6889,
|
| 1423 |
+
"step": 2020
|
| 1424 |
+
},
|
| 1425 |
+
{
|
| 1426 |
+
"epoch": 0.406,
|
| 1427 |
+
"grad_norm": 0.4152251183986664,
|
| 1428 |
+
"learning_rate": 3.704384185254288e-05,
|
| 1429 |
+
"loss": 1.0726,
|
| 1430 |
+
"step": 2030
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 0.408,
|
| 1434 |
+
"grad_norm": 2.400519371032715,
|
| 1435 |
+
"learning_rate": 3.689060522675689e-05,
|
| 1436 |
+
"loss": 1.1677,
|
| 1437 |
+
"step": 2040
|
| 1438 |
+
},
|
| 1439 |
+
{
|
| 1440 |
+
"epoch": 0.41,
|
| 1441 |
+
"grad_norm": 1.742875337600708,
|
| 1442 |
+
"learning_rate": 3.673678906964727e-05,
|
| 1443 |
+
"loss": 1.1147,
|
| 1444 |
+
"step": 2050
|
| 1445 |
+
},
|
| 1446 |
+
{
|
| 1447 |
+
"epoch": 0.412,
|
| 1448 |
+
"grad_norm": 1.1500080823898315,
|
| 1449 |
+
"learning_rate": 3.6582400877996546e-05,
|
| 1450 |
+
"loss": 0.9126,
|
| 1451 |
+
"step": 2060
|
| 1452 |
+
},
|
| 1453 |
+
{
|
| 1454 |
+
"epoch": 0.414,
|
| 1455 |
+
"grad_norm": 19.241657257080078,
|
| 1456 |
+
"learning_rate": 3.642744817646736e-05,
|
| 1457 |
+
"loss": 2.0398,
|
| 1458 |
+
"step": 2070
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"epoch": 0.416,
|
| 1462 |
+
"grad_norm": 0.7482590675354004,
|
| 1463 |
+
"learning_rate": 3.627193851723577e-05,
|
| 1464 |
+
"loss": 1.3157,
|
| 1465 |
+
"step": 2080
|
| 1466 |
+
},
|
| 1467 |
+
{
|
| 1468 |
+
"epoch": 0.418,
|
| 1469 |
+
"grad_norm": 1.0819116830825806,
|
| 1470 |
+
"learning_rate": 3.611587947962319e-05,
|
| 1471 |
+
"loss": 0.8404,
|
| 1472 |
+
"step": 2090
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.42,
|
| 1476 |
+
"grad_norm": 0.6795836687088013,
|
| 1477 |
+
"learning_rate": 3.5959278669726935e-05,
|
| 1478 |
+
"loss": 1.571,
|
| 1479 |
+
"step": 2100
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.422,
|
| 1483 |
+
"grad_norm": 1.1088377237319946,
|
| 1484 |
+
"learning_rate": 3.580214372004956e-05,
|
| 1485 |
+
"loss": 1.7026,
|
| 1486 |
+
"step": 2110
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.424,
|
| 1490 |
+
"grad_norm": 3.5846972465515137,
|
| 1491 |
+
"learning_rate": 3.564448228912682e-05,
|
| 1492 |
+
"loss": 0.7502,
|
| 1493 |
+
"step": 2120
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.426,
|
| 1497 |
+
"grad_norm": 0.6942911148071289,
|
| 1498 |
+
"learning_rate": 3.548630206115443e-05,
|
| 1499 |
+
"loss": 0.7163,
|
| 1500 |
+
"step": 2130
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.428,
|
| 1504 |
+
"grad_norm": 1.9937869310379028,
|
| 1505 |
+
"learning_rate": 3.532761074561355e-05,
|
| 1506 |
+
"loss": 1.2891,
|
| 1507 |
+
"step": 2140
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.43,
|
| 1511 |
+
"grad_norm": 0.8418222665786743,
|
| 1512 |
+
"learning_rate": 3.516841607689501e-05,
|
| 1513 |
+
"loss": 2.0099,
|
| 1514 |
+
"step": 2150
|
| 1515 |
+
},
|
| 1516 |
+
{
|
| 1517 |
+
"epoch": 0.432,
|
| 1518 |
+
"grad_norm": 6.1483893394470215,
|
| 1519 |
+
"learning_rate": 3.5008725813922386e-05,
|
| 1520 |
+
"loss": 2.6718,
|
| 1521 |
+
"step": 2160
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"epoch": 0.434,
|
| 1525 |
+
"grad_norm": 2.0732738971710205,
|
| 1526 |
+
"learning_rate": 3.484854773977378e-05,
|
| 1527 |
+
"loss": 3.9758,
|
| 1528 |
+
"step": 2170
|
| 1529 |
+
},
|
| 1530 |
+
{
|
| 1531 |
+
"epoch": 0.436,
|
| 1532 |
+
"grad_norm": 1.1674740314483643,
|
| 1533 |
+
"learning_rate": 3.4687889661302576e-05,
|
| 1534 |
+
"loss": 4.9642,
|
| 1535 |
+
"step": 2180
|
| 1536 |
+
},
|
| 1537 |
+
{
|
| 1538 |
+
"epoch": 0.438,
|
| 1539 |
+
"grad_norm": 0.39993491768836975,
|
| 1540 |
+
"learning_rate": 3.452675940875686e-05,
|
| 1541 |
+
"loss": 1.3115,
|
| 1542 |
+
"step": 2190
|
| 1543 |
+
},
|
| 1544 |
+
{
|
| 1545 |
+
"epoch": 0.44,
|
| 1546 |
+
"grad_norm": 0.35298460721969604,
|
| 1547 |
+
"learning_rate": 3.436516483539781e-05,
|
| 1548 |
+
"loss": 2.1822,
|
| 1549 |
+
"step": 2200
|
| 1550 |
+
},
|
| 1551 |
+
{
|
| 1552 |
+
"epoch": 0.442,
|
| 1553 |
+
"grad_norm": 1.8225891590118408,
|
| 1554 |
+
"learning_rate": 3.4203113817116957e-05,
|
| 1555 |
+
"loss": 0.6386,
|
| 1556 |
+
"step": 2210
|
| 1557 |
+
},
|
| 1558 |
+
{
|
| 1559 |
+
"epoch": 0.444,
|
| 1560 |
+
"grad_norm": 0.0,
|
| 1561 |
+
"learning_rate": 3.4040614252052305e-05,
|
| 1562 |
+
"loss": 0.6365,
|
| 1563 |
+
"step": 2220
|
| 1564 |
+
},
|
| 1565 |
+
{
|
| 1566 |
+
"epoch": 0.446,
|
| 1567 |
+
"grad_norm": 3.6752185821533203,
|
| 1568 |
+
"learning_rate": 3.387767406020343e-05,
|
| 1569 |
+
"loss": 0.897,
|
| 1570 |
+
"step": 2230
|
| 1571 |
+
},
|
| 1572 |
+
{
|
| 1573 |
+
"epoch": 0.448,
|
| 1574 |
+
"grad_norm": 0.5872332453727722,
|
| 1575 |
+
"learning_rate": 3.3714301183045385e-05,
|
| 1576 |
+
"loss": 1.4772,
|
| 1577 |
+
"step": 2240
|
| 1578 |
+
},
|
| 1579 |
+
{
|
| 1580 |
+
"epoch": 0.45,
|
| 1581 |
+
"grad_norm": 0.3744696080684662,
|
| 1582 |
+
"learning_rate": 3.355050358314172e-05,
|
| 1583 |
+
"loss": 0.6316,
|
| 1584 |
+
"step": 2250
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"epoch": 0.452,
|
| 1588 |
+
"grad_norm": 1.4100755453109741,
|
| 1589 |
+
"learning_rate": 3.338628924375638e-05,
|
| 1590 |
+
"loss": 0.5972,
|
| 1591 |
+
"step": 2260
|
| 1592 |
+
},
|
| 1593 |
+
{
|
| 1594 |
+
"epoch": 0.454,
|
| 1595 |
+
"grad_norm": 0.578823447227478,
|
| 1596 |
+
"learning_rate": 3.322166616846458e-05,
|
| 1597 |
+
"loss": 0.7165,
|
| 1598 |
+
"step": 2270
|
| 1599 |
+
},
|
| 1600 |
+
{
|
| 1601 |
+
"epoch": 0.456,
|
| 1602 |
+
"grad_norm": 0.9964269995689392,
|
| 1603 |
+
"learning_rate": 3.305664238076278e-05,
|
| 1604 |
+
"loss": 1.8711,
|
| 1605 |
+
"step": 2280
|
| 1606 |
+
},
|
| 1607 |
+
{
|
| 1608 |
+
"epoch": 0.458,
|
| 1609 |
+
"grad_norm": 0.8978599905967712,
|
| 1610 |
+
"learning_rate": 3.289122592367757e-05,
|
| 1611 |
+
"loss": 0.9576,
|
| 1612 |
+
"step": 2290
|
| 1613 |
+
},
|
| 1614 |
+
{
|
| 1615 |
+
"epoch": 0.46,
|
| 1616 |
+
"grad_norm": 0.8042961359024048,
|
| 1617 |
+
"learning_rate": 3.272542485937369e-05,
|
| 1618 |
+
"loss": 1.4457,
|
| 1619 |
+
"step": 2300
|
| 1620 |
+
},
|
| 1621 |
+
{
|
| 1622 |
+
"epoch": 0.462,
|
| 1623 |
+
"grad_norm": 4.5866875648498535,
|
| 1624 |
+
"learning_rate": 3.2559247268761115e-05,
|
| 1625 |
+
"loss": 0.9553,
|
| 1626 |
+
"step": 2310
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 0.464,
|
| 1630 |
+
"grad_norm": 0.3632621765136719,
|
| 1631 |
+
"learning_rate": 3.239270125110117e-05,
|
| 1632 |
+
"loss": 1.3703,
|
| 1633 |
+
"step": 2320
|
| 1634 |
+
},
|
| 1635 |
+
{
|
| 1636 |
+
"epoch": 0.466,
|
| 1637 |
+
"grad_norm": 0.7157305479049683,
|
| 1638 |
+
"learning_rate": 3.222579492361179e-05,
|
| 1639 |
+
"loss": 1.9259,
|
| 1640 |
+
"step": 2330
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"epoch": 0.468,
|
| 1644 |
+
"grad_norm": 1.0667433738708496,
|
| 1645 |
+
"learning_rate": 3.205853642107192e-05,
|
| 1646 |
+
"loss": 0.9021,
|
| 1647 |
+
"step": 2340
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"epoch": 0.47,
|
| 1651 |
+
"grad_norm": 1.5877084732055664,
|
| 1652 |
+
"learning_rate": 3.1890933895424976e-05,
|
| 1653 |
+
"loss": 2.2195,
|
| 1654 |
+
"step": 2350
|
| 1655 |
+
},
|
| 1656 |
+
{
|
| 1657 |
+
"epoch": 0.472,
|
| 1658 |
+
"grad_norm": 1.945678472518921,
|
| 1659 |
+
"learning_rate": 3.172299551538164e-05,
|
| 1660 |
+
"loss": 0.724,
|
| 1661 |
+
"step": 2360
|
| 1662 |
+
},
|
| 1663 |
+
{
|
| 1664 |
+
"epoch": 0.474,
|
| 1665 |
+
"grad_norm": 0.0,
|
| 1666 |
+
"learning_rate": 3.155472946602162e-05,
|
| 1667 |
+
"loss": 2.0286,
|
| 1668 |
+
"step": 2370
|
| 1669 |
+
},
|
| 1670 |
+
{
|
| 1671 |
+
"epoch": 0.476,
|
| 1672 |
+
"grad_norm": 7.778709411621094,
|
| 1673 |
+
"learning_rate": 3.138614394839476e-05,
|
| 1674 |
+
"loss": 0.9387,
|
| 1675 |
+
"step": 2380
|
| 1676 |
+
},
|
| 1677 |
+
{
|
| 1678 |
+
"epoch": 0.478,
|
| 1679 |
+
"grad_norm": 0.7516165971755981,
|
| 1680 |
+
"learning_rate": 3.121724717912138e-05,
|
| 1681 |
+
"loss": 0.8844,
|
| 1682 |
+
"step": 2390
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 0.48,
|
| 1686 |
+
"grad_norm": 3.163661241531372,
|
| 1687 |
+
"learning_rate": 3.104804738999169e-05,
|
| 1688 |
+
"loss": 0.8334,
|
| 1689 |
+
"step": 2400
|
| 1690 |
+
},
|
| 1691 |
+
{
|
| 1692 |
+
"epoch": 0.482,
|
| 1693 |
+
"grad_norm": 1.4817134141921997,
|
| 1694 |
+
"learning_rate": 3.087855282756475e-05,
|
| 1695 |
+
"loss": 0.7062,
|
| 1696 |
+
"step": 2410
|
| 1697 |
+
},
|
| 1698 |
+
{
|
| 1699 |
+
"epoch": 0.484,
|
| 1700 |
+
"grad_norm": 1.825772762298584,
|
| 1701 |
+
"learning_rate": 3.0708771752766394e-05,
|
| 1702 |
+
"loss": 1.1329,
|
| 1703 |
+
"step": 2420
|
| 1704 |
+
},
|
| 1705 |
+
{
|
| 1706 |
+
"epoch": 0.486,
|
| 1707 |
+
"grad_norm": 0.3289898931980133,
|
| 1708 |
+
"learning_rate": 3.053871244048669e-05,
|
| 1709 |
+
"loss": 3.1218,
|
| 1710 |
+
"step": 2430
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"epoch": 0.488,
|
| 1714 |
+
"grad_norm": 1.0258142948150635,
|
| 1715 |
+
"learning_rate": 3.0368383179176585e-05,
|
| 1716 |
+
"loss": 1.795,
|
| 1717 |
+
"step": 2440
|
| 1718 |
+
},
|
| 1719 |
+
{
|
| 1720 |
+
"epoch": 0.49,
|
| 1721 |
+
"grad_norm": 4.297471523284912,
|
| 1722 |
+
"learning_rate": 3.0197792270443982e-05,
|
| 1723 |
+
"loss": 1.3635,
|
| 1724 |
+
"step": 2450
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 0.492,
|
| 1728 |
+
"grad_norm": 0.9445592761039734,
|
| 1729 |
+
"learning_rate": 3.002694802864912e-05,
|
| 1730 |
+
"loss": 1.5214,
|
| 1731 |
+
"step": 2460
|
| 1732 |
+
},
|
| 1733 |
+
{
|
| 1734 |
+
"epoch": 0.494,
|
| 1735 |
+
"grad_norm": 3.7605855464935303,
|
| 1736 |
+
"learning_rate": 2.98558587804993e-05,
|
| 1737 |
+
"loss": 1.1378,
|
| 1738 |
+
"step": 2470
|
| 1739 |
+
},
|
| 1740 |
+
{
|
| 1741 |
+
"epoch": 0.496,
|
| 1742 |
+
"grad_norm": 1.3011572360992432,
|
| 1743 |
+
"learning_rate": 2.9684532864643122e-05,
|
| 1744 |
+
"loss": 1.3969,
|
| 1745 |
+
"step": 2480
|
| 1746 |
+
},
|
| 1747 |
+
{
|
| 1748 |
+
"epoch": 0.498,
|
| 1749 |
+
"grad_norm": 1.2740352153778076,
|
| 1750 |
+
"learning_rate": 2.9512978631264006e-05,
|
| 1751 |
+
"loss": 0.7386,
|
| 1752 |
+
"step": 2490
|
| 1753 |
+
},
|
| 1754 |
+
{
|
| 1755 |
+
"epoch": 0.5,
|
| 1756 |
+
"grad_norm": 0.6233336329460144,
|
| 1757 |
+
"learning_rate": 2.9341204441673266e-05,
|
| 1758 |
+
"loss": 1.2025,
|
| 1759 |
+
"step": 2500
|
| 1760 |
+
},
|
| 1761 |
+
{
|
| 1762 |
+
"epoch": 0.502,
|
| 1763 |
+
"grad_norm": 4.989047050476074,
|
| 1764 |
+
"learning_rate": 2.916921866790256e-05,
|
| 1765 |
+
"loss": 0.8712,
|
| 1766 |
+
"step": 2510
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 0.504,
|
| 1770 |
+
"grad_norm": 2.683638095855713,
|
| 1771 |
+
"learning_rate": 2.8997029692295874e-05,
|
| 1772 |
+
"loss": 2.7228,
|
| 1773 |
+
"step": 2520
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"epoch": 0.506,
|
| 1777 |
+
"grad_norm": 1.5171597003936768,
|
| 1778 |
+
"learning_rate": 2.8824645907100954e-05,
|
| 1779 |
+
"loss": 1.6352,
|
| 1780 |
+
"step": 2530
|
| 1781 |
+
},
|
| 1782 |
+
{
|
| 1783 |
+
"epoch": 0.508,
|
| 1784 |
+
"grad_norm": 0.639072835445404,
|
| 1785 |
+
"learning_rate": 2.8652075714060295e-05,
|
| 1786 |
+
"loss": 0.7708,
|
| 1787 |
+
"step": 2540
|
| 1788 |
+
},
|
| 1789 |
+
{
|
| 1790 |
+
"epoch": 0.51,
|
| 1791 |
+
"grad_norm": 0.4355096220970154,
|
| 1792 |
+
"learning_rate": 2.8479327524001636e-05,
|
| 1793 |
+
"loss": 1.7158,
|
| 1794 |
+
"step": 2550
|
| 1795 |
+
},
|
| 1796 |
+
{
|
| 1797 |
+
"epoch": 0.512,
|
| 1798 |
+
"grad_norm": 5.390672206878662,
|
| 1799 |
+
"learning_rate": 2.8306409756428064e-05,
|
| 1800 |
+
"loss": 1.9952,
|
| 1801 |
+
"step": 2560
|
| 1802 |
+
},
|
| 1803 |
+
{
|
| 1804 |
+
"epoch": 0.514,
|
| 1805 |
+
"grad_norm": 1.1911485195159912,
|
| 1806 |
+
"learning_rate": 2.8133330839107608e-05,
|
| 1807 |
+
"loss": 1.7763,
|
| 1808 |
+
"step": 2570
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 0.516,
|
| 1812 |
+
"grad_norm": 1.0947262048721313,
|
| 1813 |
+
"learning_rate": 2.7960099207662532e-05,
|
| 1814 |
+
"loss": 1.1427,
|
| 1815 |
+
"step": 2580
|
| 1816 |
+
},
|
| 1817 |
+
{
|
| 1818 |
+
"epoch": 0.518,
|
| 1819 |
+
"grad_norm": 0.9010633230209351,
|
| 1820 |
+
"learning_rate": 2.7786723305158136e-05,
|
| 1821 |
+
"loss": 1.3634,
|
| 1822 |
+
"step": 2590
|
| 1823 |
+
},
|
| 1824 |
+
{
|
| 1825 |
+
"epoch": 0.52,
|
| 1826 |
+
"grad_norm": 3.9178924560546875,
|
| 1827 |
+
"learning_rate": 2.761321158169134e-05,
|
| 1828 |
+
"loss": 1.1372,
|
| 1829 |
+
"step": 2600
|
| 1830 |
+
},
|
| 1831 |
+
{
|
| 1832 |
+
"epoch": 0.522,
|
| 1833 |
+
"grad_norm": 1.1135903596878052,
|
| 1834 |
+
"learning_rate": 2.7439572493978736e-05,
|
| 1835 |
+
"loss": 0.6111,
|
| 1836 |
+
"step": 2610
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"epoch": 0.524,
|
| 1840 |
+
"grad_norm": 2.4544811248779297,
|
| 1841 |
+
"learning_rate": 2.726581450494451e-05,
|
| 1842 |
+
"loss": 1.1249,
|
| 1843 |
+
"step": 2620
|
| 1844 |
+
},
|
| 1845 |
+
{
|
| 1846 |
+
"epoch": 0.526,
|
| 1847 |
+
"grad_norm": 0.459357887506485,
|
| 1848 |
+
"learning_rate": 2.7091946083307896e-05,
|
| 1849 |
+
"loss": 4.3159,
|
| 1850 |
+
"step": 2630
|
| 1851 |
+
},
|
| 1852 |
+
{
|
| 1853 |
+
"epoch": 0.528,
|
| 1854 |
+
"grad_norm": 2.8313136100769043,
|
| 1855 |
+
"learning_rate": 2.6917975703170466e-05,
|
| 1856 |
+
"loss": 0.9282,
|
| 1857 |
+
"step": 2640
|
| 1858 |
+
},
|
| 1859 |
+
{
|
| 1860 |
+
"epoch": 0.53,
|
| 1861 |
+
"grad_norm": 3.777279853820801,
|
| 1862 |
+
"learning_rate": 2.674391184360313e-05,
|
| 1863 |
+
"loss": 1.3395,
|
| 1864 |
+
"step": 2650
|
| 1865 |
+
},
|
| 1866 |
+
{
|
| 1867 |
+
"epoch": 0.532,
|
| 1868 |
+
"grad_norm": 3.277451992034912,
|
| 1869 |
+
"learning_rate": 2.656976298823284e-05,
|
| 1870 |
+
"loss": 0.7275,
|
| 1871 |
+
"step": 2660
|
| 1872 |
+
},
|
| 1873 |
+
{
|
| 1874 |
+
"epoch": 0.534,
|
| 1875 |
+
"grad_norm": 0.6467308402061462,
|
| 1876 |
+
"learning_rate": 2.6395537624829096e-05,
|
| 1877 |
+
"loss": 5.1828,
|
| 1878 |
+
"step": 2670
|
| 1879 |
+
},
|
| 1880 |
+
{
|
| 1881 |
+
"epoch": 0.536,
|
| 1882 |
+
"grad_norm": 0.0,
|
| 1883 |
+
"learning_rate": 2.6221244244890336e-05,
|
| 1884 |
+
"loss": 1.1841,
|
| 1885 |
+
"step": 2680
|
| 1886 |
+
},
|
| 1887 |
+
{
|
| 1888 |
+
"epoch": 0.538,
|
| 1889 |
+
"grad_norm": 0.5595866441726685,
|
| 1890 |
+
"learning_rate": 2.604689134322999e-05,
|
| 1891 |
+
"loss": 2.1432,
|
| 1892 |
+
"step": 2690
|
| 1893 |
+
},
|
| 1894 |
+
{
|
| 1895 |
+
"epoch": 0.54,
|
| 1896 |
+
"grad_norm": 2.23224139213562,
|
| 1897 |
+
"learning_rate": 2.587248741756253e-05,
|
| 1898 |
+
"loss": 1.1397,
|
| 1899 |
+
"step": 2700
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"epoch": 0.542,
|
| 1903 |
+
"grad_norm": 14.135408401489258,
|
| 1904 |
+
"learning_rate": 2.5698040968089225e-05,
|
| 1905 |
+
"loss": 1.8865,
|
| 1906 |
+
"step": 2710
|
| 1907 |
+
},
|
| 1908 |
+
{
|
| 1909 |
+
"epoch": 0.544,
|
| 1910 |
+
"grad_norm": 3.3315963745117188,
|
| 1911 |
+
"learning_rate": 2.5523560497083926e-05,
|
| 1912 |
+
"loss": 1.7895,
|
| 1913 |
+
"step": 2720
|
| 1914 |
+
},
|
| 1915 |
+
{
|
| 1916 |
+
"epoch": 0.546,
|
| 1917 |
+
"grad_norm": 0.6247251629829407,
|
| 1918 |
+
"learning_rate": 2.5349054508478637e-05,
|
| 1919 |
+
"loss": 0.5246,
|
| 1920 |
+
"step": 2730
|
| 1921 |
+
},
|
| 1922 |
+
{
|
| 1923 |
+
"epoch": 0.548,
|
| 1924 |
+
"grad_norm": 1.406416654586792,
|
| 1925 |
+
"learning_rate": 2.517453150744904e-05,
|
| 1926 |
+
"loss": 2.2288,
|
| 1927 |
+
"step": 2740
|
| 1928 |
+
},
|
| 1929 |
+
{
|
| 1930 |
+
"epoch": 0.55,
|
| 1931 |
+
"grad_norm": 8.639768600463867,
|
| 1932 |
+
"learning_rate": 2.5e-05,
|
| 1933 |
+
"loss": 1.1424,
|
| 1934 |
+
"step": 2750
|
| 1935 |
+
},
|
| 1936 |
+
{
|
| 1937 |
+
"epoch": 0.552,
|
| 1938 |
+
"grad_norm": 1.132100224494934,
|
| 1939 |
+
"learning_rate": 2.4825468492550964e-05,
|
| 1940 |
+
"loss": 0.5168,
|
| 1941 |
+
"step": 2760
|
| 1942 |
+
},
|
| 1943 |
+
{
|
| 1944 |
+
"epoch": 0.554,
|
| 1945 |
+
"grad_norm": 0.6441847681999207,
|
| 1946 |
+
"learning_rate": 2.4650945491521372e-05,
|
| 1947 |
+
"loss": 1.6828,
|
| 1948 |
+
"step": 2770
|
| 1949 |
+
},
|
| 1950 |
+
{
|
| 1951 |
+
"epoch": 0.556,
|
| 1952 |
+
"grad_norm": 0.38985392451286316,
|
| 1953 |
+
"learning_rate": 2.447643950291608e-05,
|
| 1954 |
+
"loss": 1.3219,
|
| 1955 |
+
"step": 2780
|
| 1956 |
+
},
|
| 1957 |
+
{
|
| 1958 |
+
"epoch": 0.558,
|
| 1959 |
+
"grad_norm": 6.931550025939941,
|
| 1960 |
+
"learning_rate": 2.4301959031910784e-05,
|
| 1961 |
+
"loss": 1.511,
|
| 1962 |
+
"step": 2790
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"epoch": 0.56,
|
| 1966 |
+
"grad_norm": 2.3524391651153564,
|
| 1967 |
+
"learning_rate": 2.4127512582437485e-05,
|
| 1968 |
+
"loss": 1.5983,
|
| 1969 |
+
"step": 2800
|
| 1970 |
+
},
|
| 1971 |
+
{
|
| 1972 |
+
"epoch": 0.562,
|
| 1973 |
+
"grad_norm": 2.679114580154419,
|
| 1974 |
+
"learning_rate": 2.3953108656770016e-05,
|
| 1975 |
+
"loss": 0.7233,
|
| 1976 |
+
"step": 2810
|
| 1977 |
+
},
|
| 1978 |
+
{
|
| 1979 |
+
"epoch": 0.564,
|
| 1980 |
+
"grad_norm": 4.362655162811279,
|
| 1981 |
+
"learning_rate": 2.377875575510967e-05,
|
| 1982 |
+
"loss": 1.2128,
|
| 1983 |
+
"step": 2820
|
| 1984 |
+
},
|
| 1985 |
+
{
|
| 1986 |
+
"epoch": 0.566,
|
| 1987 |
+
"grad_norm": 1.256465196609497,
|
| 1988 |
+
"learning_rate": 2.3604462375170906e-05,
|
| 1989 |
+
"loss": 1.3444,
|
| 1990 |
+
"step": 2830
|
| 1991 |
+
},
|
| 1992 |
+
{
|
| 1993 |
+
"epoch": 0.568,
|
| 1994 |
+
"grad_norm": 1.7675756216049194,
|
| 1995 |
+
"learning_rate": 2.3430237011767167e-05,
|
| 1996 |
+
"loss": 0.6756,
|
| 1997 |
+
"step": 2840
|
| 1998 |
+
},
|
| 1999 |
+
{
|
| 2000 |
+
"epoch": 0.57,
|
| 2001 |
+
"grad_norm": 0.9454560875892639,
|
| 2002 |
+
"learning_rate": 2.3256088156396868e-05,
|
| 2003 |
+
"loss": 2.1279,
|
| 2004 |
+
"step": 2850
|
| 2005 |
+
},
|
| 2006 |
+
{
|
| 2007 |
+
"epoch": 0.572,
|
| 2008 |
+
"grad_norm": 1.4755468368530273,
|
| 2009 |
+
"learning_rate": 2.3082024296829536e-05,
|
| 2010 |
+
"loss": 1.2081,
|
| 2011 |
+
"step": 2860
|
| 2012 |
+
},
|
| 2013 |
+
{
|
| 2014 |
+
"epoch": 0.574,
|
| 2015 |
+
"grad_norm": 0.39675667881965637,
|
| 2016 |
+
"learning_rate": 2.2908053916692117e-05,
|
| 2017 |
+
"loss": 1.6913,
|
| 2018 |
+
"step": 2870
|
| 2019 |
+
},
|
| 2020 |
+
{
|
| 2021 |
+
"epoch": 0.576,
|
| 2022 |
+
"grad_norm": 3.55375337600708,
|
| 2023 |
+
"learning_rate": 2.2734185495055503e-05,
|
| 2024 |
+
"loss": 0.7114,
|
| 2025 |
+
"step": 2880
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"epoch": 0.578,
|
| 2029 |
+
"grad_norm": 1.0292774438858032,
|
| 2030 |
+
"learning_rate": 2.2560427506021266e-05,
|
| 2031 |
+
"loss": 1.5252,
|
| 2032 |
+
"step": 2890
|
| 2033 |
+
},
|
| 2034 |
+
{
|
| 2035 |
+
"epoch": 0.58,
|
| 2036 |
+
"grad_norm": 0.8896195888519287,
|
| 2037 |
+
"learning_rate": 2.238678841830867e-05,
|
| 2038 |
+
"loss": 1.2513,
|
| 2039 |
+
"step": 2900
|
| 2040 |
+
},
|
| 2041 |
+
{
|
| 2042 |
+
"epoch": 0.582,
|
| 2043 |
+
"grad_norm": 4.262208461761475,
|
| 2044 |
+
"learning_rate": 2.2213276694841866e-05,
|
| 2045 |
+
"loss": 1.5359,
|
| 2046 |
+
"step": 2910
|
| 2047 |
+
},
|
| 2048 |
+
{
|
| 2049 |
+
"epoch": 0.584,
|
| 2050 |
+
"grad_norm": 0.8088265061378479,
|
| 2051 |
+
"learning_rate": 2.2039900792337474e-05,
|
| 2052 |
+
"loss": 1.3664,
|
| 2053 |
+
"step": 2920
|
| 2054 |
+
},
|
| 2055 |
+
{
|
| 2056 |
+
"epoch": 0.586,
|
| 2057 |
+
"grad_norm": 2.1337993144989014,
|
| 2058 |
+
"learning_rate": 2.186666916089239e-05,
|
| 2059 |
+
"loss": 1.102,
|
| 2060 |
+
"step": 2930
|
| 2061 |
+
},
|
| 2062 |
+
{
|
| 2063 |
+
"epoch": 0.588,
|
| 2064 |
+
"grad_norm": 2.935187816619873,
|
| 2065 |
+
"learning_rate": 2.1693590243571938e-05,
|
| 2066 |
+
"loss": 1.5992,
|
| 2067 |
+
"step": 2940
|
| 2068 |
+
},
|
| 2069 |
+
{
|
| 2070 |
+
"epoch": 0.59,
|
| 2071 |
+
"grad_norm": 0.628711998462677,
|
| 2072 |
+
"learning_rate": 2.1520672475998373e-05,
|
| 2073 |
+
"loss": 0.8194,
|
| 2074 |
+
"step": 2950
|
| 2075 |
+
},
|
| 2076 |
+
{
|
| 2077 |
+
"epoch": 0.592,
|
| 2078 |
+
"grad_norm": 0.5109361410140991,
|
| 2079 |
+
"learning_rate": 2.1347924285939714e-05,
|
| 2080 |
+
"loss": 0.8753,
|
| 2081 |
+
"step": 2960
|
| 2082 |
+
},
|
| 2083 |
+
{
|
| 2084 |
+
"epoch": 0.594,
|
| 2085 |
+
"grad_norm": 2.374326229095459,
|
| 2086 |
+
"learning_rate": 2.117535409289905e-05,
|
| 2087 |
+
"loss": 1.8568,
|
| 2088 |
+
"step": 2970
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"epoch": 0.596,
|
| 2092 |
+
"grad_norm": 2.674623966217041,
|
| 2093 |
+
"learning_rate": 2.1002970307704132e-05,
|
| 2094 |
+
"loss": 2.0932,
|
| 2095 |
+
"step": 2980
|
| 2096 |
+
},
|
| 2097 |
+
{
|
| 2098 |
+
"epoch": 0.598,
|
| 2099 |
+
"grad_norm": 0.7172428369522095,
|
| 2100 |
+
"learning_rate": 2.0830781332097446e-05,
|
| 2101 |
+
"loss": 0.6447,
|
| 2102 |
+
"step": 2990
|
| 2103 |
+
},
|
| 2104 |
+
{
|
| 2105 |
+
"epoch": 0.6,
|
| 2106 |
+
"grad_norm": 2.8360252380371094,
|
| 2107 |
+
"learning_rate": 2.0658795558326743e-05,
|
| 2108 |
+
"loss": 0.8866,
|
| 2109 |
+
"step": 3000
|
| 2110 |
+
},
|
| 2111 |
+
{
|
| 2112 |
+
"epoch": 0.602,
|
| 2113 |
+
"grad_norm": 1.225557804107666,
|
| 2114 |
+
"learning_rate": 2.0487021368736003e-05,
|
| 2115 |
+
"loss": 2.3415,
|
| 2116 |
+
"step": 3010
|
| 2117 |
+
},
|
| 2118 |
+
{
|
| 2119 |
+
"epoch": 0.604,
|
| 2120 |
+
"grad_norm": 0.8167237043380737,
|
| 2121 |
+
"learning_rate": 2.031546713535688e-05,
|
| 2122 |
+
"loss": 1.244,
|
| 2123 |
+
"step": 3020
|
| 2124 |
+
},
|
| 2125 |
+
{
|
| 2126 |
+
"epoch": 0.606,
|
| 2127 |
+
"grad_norm": 9.67261791229248,
|
| 2128 |
+
"learning_rate": 2.0144141219500705e-05,
|
| 2129 |
+
"loss": 2.6138,
|
| 2130 |
+
"step": 3030
|
| 2131 |
+
},
|
| 2132 |
+
{
|
| 2133 |
+
"epoch": 0.608,
|
| 2134 |
+
"grad_norm": 0.6742550730705261,
|
| 2135 |
+
"learning_rate": 1.9973051971350888e-05,
|
| 2136 |
+
"loss": 0.8315,
|
| 2137 |
+
"step": 3040
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 0.61,
|
| 2141 |
+
"grad_norm": 18.961151123046875,
|
| 2142 |
+
"learning_rate": 1.980220772955602e-05,
|
| 2143 |
+
"loss": 4.5689,
|
| 2144 |
+
"step": 3050
|
| 2145 |
+
},
|
| 2146 |
+
{
|
| 2147 |
+
"epoch": 0.612,
|
| 2148 |
+
"grad_norm": 5.60457181930542,
|
| 2149 |
+
"learning_rate": 1.963161682082342e-05,
|
| 2150 |
+
"loss": 1.2894,
|
| 2151 |
+
"step": 3060
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"epoch": 0.614,
|
| 2155 |
+
"grad_norm": 1.3066905736923218,
|
| 2156 |
+
"learning_rate": 1.946128755951332e-05,
|
| 2157 |
+
"loss": 1.1867,
|
| 2158 |
+
"step": 3070
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"epoch": 0.616,
|
| 2162 |
+
"grad_norm": 3.874143362045288,
|
| 2163 |
+
"learning_rate": 1.9291228247233605e-05,
|
| 2164 |
+
"loss": 1.8681,
|
| 2165 |
+
"step": 3080
|
| 2166 |
+
},
|
| 2167 |
+
{
|
| 2168 |
+
"epoch": 0.618,
|
| 2169 |
+
"grad_norm": 2.0997185707092285,
|
| 2170 |
+
"learning_rate": 1.912144717243525e-05,
|
| 2171 |
+
"loss": 1.1927,
|
| 2172 |
+
"step": 3090
|
| 2173 |
+
},
|
| 2174 |
+
{
|
| 2175 |
+
"epoch": 0.62,
|
| 2176 |
+
"grad_norm": 3.868537664413452,
|
| 2177 |
+
"learning_rate": 1.895195261000831e-05,
|
| 2178 |
+
"loss": 1.6265,
|
| 2179 |
+
"step": 3100
|
| 2180 |
+
},
|
| 2181 |
+
{
|
| 2182 |
+
"epoch": 0.622,
|
| 2183 |
+
"grad_norm": 0.4815676808357239,
|
| 2184 |
+
"learning_rate": 1.8782752820878634e-05,
|
| 2185 |
+
"loss": 1.3608,
|
| 2186 |
+
"step": 3110
|
| 2187 |
+
},
|
| 2188 |
+
{
|
| 2189 |
+
"epoch": 0.624,
|
| 2190 |
+
"grad_norm": 4.783424377441406,
|
| 2191 |
+
"learning_rate": 1.8613856051605243e-05,
|
| 2192 |
+
"loss": 1.4472,
|
| 2193 |
+
"step": 3120
|
| 2194 |
+
},
|
| 2195 |
+
{
|
| 2196 |
+
"epoch": 0.626,
|
| 2197 |
+
"grad_norm": 0.5334388017654419,
|
| 2198 |
+
"learning_rate": 1.8445270533978388e-05,
|
| 2199 |
+
"loss": 0.6093,
|
| 2200 |
+
"step": 3130
|
| 2201 |
+
},
|
| 2202 |
+
{
|
| 2203 |
+
"epoch": 0.628,
|
| 2204 |
+
"grad_norm": 2.9873673915863037,
|
| 2205 |
+
"learning_rate": 1.827700448461836e-05,
|
| 2206 |
+
"loss": 1.3848,
|
| 2207 |
+
"step": 3140
|
| 2208 |
+
},
|
| 2209 |
+
{
|
| 2210 |
+
"epoch": 0.63,
|
| 2211 |
+
"grad_norm": 1.5935121774673462,
|
| 2212 |
+
"learning_rate": 1.8109066104575023e-05,
|
| 2213 |
+
"loss": 1.1096,
|
| 2214 |
+
"step": 3150
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"epoch": 0.632,
|
| 2218 |
+
"grad_norm": 0.0,
|
| 2219 |
+
"learning_rate": 1.7941463578928086e-05,
|
| 2220 |
+
"loss": 0.8174,
|
| 2221 |
+
"step": 3160
|
| 2222 |
+
},
|
| 2223 |
+
{
|
| 2224 |
+
"epoch": 0.634,
|
| 2225 |
+
"grad_norm": 0.5703982710838318,
|
| 2226 |
+
"learning_rate": 1.7774205076388206e-05,
|
| 2227 |
+
"loss": 1.5061,
|
| 2228 |
+
"step": 3170
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 0.636,
|
| 2232 |
+
"grad_norm": 0.0,
|
| 2233 |
+
"learning_rate": 1.7607298748898842e-05,
|
| 2234 |
+
"loss": 0.7052,
|
| 2235 |
+
"step": 3180
|
| 2236 |
+
},
|
| 2237 |
+
{
|
| 2238 |
+
"epoch": 0.638,
|
| 2239 |
+
"grad_norm": 3.1492154598236084,
|
| 2240 |
+
"learning_rate": 1.744075273123889e-05,
|
| 2241 |
+
"loss": 2.0225,
|
| 2242 |
+
"step": 3190
|
| 2243 |
+
},
|
| 2244 |
+
{
|
| 2245 |
+
"epoch": 0.64,
|
| 2246 |
+
"grad_norm": 6.029577255249023,
|
| 2247 |
+
"learning_rate": 1.7274575140626318e-05,
|
| 2248 |
+
"loss": 2.2687,
|
| 2249 |
+
"step": 3200
|
| 2250 |
+
},
|
| 2251 |
+
{
|
| 2252 |
+
"epoch": 0.642,
|
| 2253 |
+
"grad_norm": 0.691862940788269,
|
| 2254 |
+
"learning_rate": 1.7108774076322443e-05,
|
| 2255 |
+
"loss": 0.6663,
|
| 2256 |
+
"step": 3210
|
| 2257 |
+
},
|
| 2258 |
+
{
|
| 2259 |
+
"epoch": 0.644,
|
| 2260 |
+
"grad_norm": 6.771577835083008,
|
| 2261 |
+
"learning_rate": 1.6943357619237226e-05,
|
| 2262 |
+
"loss": 1.259,
|
| 2263 |
+
"step": 3220
|
| 2264 |
+
},
|
| 2265 |
+
{
|
| 2266 |
+
"epoch": 0.646,
|
| 2267 |
+
"grad_norm": 1.3319804668426514,
|
| 2268 |
+
"learning_rate": 1.677833383153542e-05,
|
| 2269 |
+
"loss": 1.1647,
|
| 2270 |
+
"step": 3230
|
| 2271 |
+
},
|
| 2272 |
+
{
|
| 2273 |
+
"epoch": 0.648,
|
| 2274 |
+
"grad_norm": 3.718688488006592,
|
| 2275 |
+
"learning_rate": 1.6613710756243626e-05,
|
| 2276 |
+
"loss": 0.9512,
|
| 2277 |
+
"step": 3240
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"epoch": 0.65,
|
| 2281 |
+
"grad_norm": 1.9150598049163818,
|
| 2282 |
+
"learning_rate": 1.6449496416858284e-05,
|
| 2283 |
+
"loss": 3.6506,
|
| 2284 |
+
"step": 3250
|
| 2285 |
+
},
|
| 2286 |
+
{
|
| 2287 |
+
"epoch": 0.652,
|
| 2288 |
+
"grad_norm": 2.3984363079071045,
|
| 2289 |
+
"learning_rate": 1.6285698816954624e-05,
|
| 2290 |
+
"loss": 1.8059,
|
| 2291 |
+
"step": 3260
|
| 2292 |
+
},
|
| 2293 |
+
{
|
| 2294 |
+
"epoch": 0.654,
|
| 2295 |
+
"grad_norm": 1.429632544517517,
|
| 2296 |
+
"learning_rate": 1.612232593979658e-05,
|
| 2297 |
+
"loss": 1.8584,
|
| 2298 |
+
"step": 3270
|
| 2299 |
+
},
|
| 2300 |
+
{
|
| 2301 |
+
"epoch": 0.656,
|
| 2302 |
+
"grad_norm": 0.3058028817176819,
|
| 2303 |
+
"learning_rate": 1.5959385747947698e-05,
|
| 2304 |
+
"loss": 0.8049,
|
| 2305 |
+
"step": 3280
|
| 2306 |
+
},
|
| 2307 |
+
{
|
| 2308 |
+
"epoch": 0.658,
|
| 2309 |
+
"grad_norm": 2.075374126434326,
|
| 2310 |
+
"learning_rate": 1.5796886182883053e-05,
|
| 2311 |
+
"loss": 1.5368,
|
| 2312 |
+
"step": 3290
|
| 2313 |
+
},
|
| 2314 |
+
{
|
| 2315 |
+
"epoch": 0.66,
|
| 2316 |
+
"grad_norm": 1.1144261360168457,
|
| 2317 |
+
"learning_rate": 1.56348351646022e-05,
|
| 2318 |
+
"loss": 1.5826,
|
| 2319 |
+
"step": 3300
|
| 2320 |
+
},
|
| 2321 |
+
{
|
| 2322 |
+
"epoch": 0.662,
|
| 2323 |
+
"grad_norm": 1.5412263870239258,
|
| 2324 |
+
"learning_rate": 1.547324059124315e-05,
|
| 2325 |
+
"loss": 0.8753,
|
| 2326 |
+
"step": 3310
|
| 2327 |
+
},
|
| 2328 |
+
{
|
| 2329 |
+
"epoch": 0.664,
|
| 2330 |
+
"grad_norm": 0.0,
|
| 2331 |
+
"learning_rate": 1.5312110338697426e-05,
|
| 2332 |
+
"loss": 1.7769,
|
| 2333 |
+
"step": 3320
|
| 2334 |
+
},
|
| 2335 |
+
{
|
| 2336 |
+
"epoch": 0.666,
|
| 2337 |
+
"grad_norm": 0.9992184042930603,
|
| 2338 |
+
"learning_rate": 1.5151452260226224e-05,
|
| 2339 |
+
"loss": 3.1034,
|
| 2340 |
+
"step": 3330
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"epoch": 0.668,
|
| 2344 |
+
"grad_norm": 26.014408111572266,
|
| 2345 |
+
"learning_rate": 1.4991274186077632e-05,
|
| 2346 |
+
"loss": 2.3354,
|
| 2347 |
+
"step": 3340
|
| 2348 |
+
},
|
| 2349 |
+
{
|
| 2350 |
+
"epoch": 0.67,
|
| 2351 |
+
"grad_norm": 0.9760955572128296,
|
| 2352 |
+
"learning_rate": 1.4831583923104999e-05,
|
| 2353 |
+
"loss": 0.603,
|
| 2354 |
+
"step": 3350
|
| 2355 |
+
},
|
| 2356 |
+
{
|
| 2357 |
+
"epoch": 0.672,
|
| 2358 |
+
"grad_norm": 1.7530325651168823,
|
| 2359 |
+
"learning_rate": 1.467238925438646e-05,
|
| 2360 |
+
"loss": 0.6025,
|
| 2361 |
+
"step": 3360
|
| 2362 |
+
},
|
| 2363 |
+
{
|
| 2364 |
+
"epoch": 0.674,
|
| 2365 |
+
"grad_norm": 1.2257040739059448,
|
| 2366 |
+
"learning_rate": 1.4513697938845572e-05,
|
| 2367 |
+
"loss": 0.9105,
|
| 2368 |
+
"step": 3370
|
| 2369 |
+
},
|
| 2370 |
+
{
|
| 2371 |
+
"epoch": 0.676,
|
| 2372 |
+
"grad_norm": 0.3643525540828705,
|
| 2373 |
+
"learning_rate": 1.4355517710873184e-05,
|
| 2374 |
+
"loss": 0.8274,
|
| 2375 |
+
"step": 3380
|
| 2376 |
+
},
|
| 2377 |
+
{
|
| 2378 |
+
"epoch": 0.678,
|
| 2379 |
+
"grad_norm": 1.6802215576171875,
|
| 2380 |
+
"learning_rate": 1.4197856279950438e-05,
|
| 2381 |
+
"loss": 1.5935,
|
| 2382 |
+
"step": 3390
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"epoch": 0.68,
|
| 2386 |
+
"grad_norm": 1.3920577764511108,
|
| 2387 |
+
"learning_rate": 1.4040721330273062e-05,
|
| 2388 |
+
"loss": 0.7384,
|
| 2389 |
+
"step": 3400
|
| 2390 |
+
},
|
| 2391 |
+
{
|
| 2392 |
+
"epoch": 0.682,
|
| 2393 |
+
"grad_norm": 0.8468830585479736,
|
| 2394 |
+
"learning_rate": 1.388412052037682e-05,
|
| 2395 |
+
"loss": 1.8132,
|
| 2396 |
+
"step": 3410
|
| 2397 |
+
},
|
| 2398 |
+
{
|
| 2399 |
+
"epoch": 0.684,
|
| 2400 |
+
"grad_norm": 0.4457054138183594,
|
| 2401 |
+
"learning_rate": 1.3728061482764238e-05,
|
| 2402 |
+
"loss": 0.7159,
|
| 2403 |
+
"step": 3420
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"epoch": 0.686,
|
| 2407 |
+
"grad_norm": 0.8082422018051147,
|
| 2408 |
+
"learning_rate": 1.3572551823532654e-05,
|
| 2409 |
+
"loss": 0.8062,
|
| 2410 |
+
"step": 3430
|
| 2411 |
+
},
|
| 2412 |
+
{
|
| 2413 |
+
"epoch": 0.688,
|
| 2414 |
+
"grad_norm": 3.860785722732544,
|
| 2415 |
+
"learning_rate": 1.3417599122003464e-05,
|
| 2416 |
+
"loss": 1.4223,
|
| 2417 |
+
"step": 3440
|
| 2418 |
+
},
|
| 2419 |
+
{
|
| 2420 |
+
"epoch": 0.69,
|
| 2421 |
+
"grad_norm": 1.3153111934661865,
|
| 2422 |
+
"learning_rate": 1.3263210930352737e-05,
|
| 2423 |
+
"loss": 0.8253,
|
| 2424 |
+
"step": 3450
|
| 2425 |
+
},
|
| 2426 |
+
{
|
| 2427 |
+
"epoch": 0.692,
|
| 2428 |
+
"grad_norm": 0.49992579221725464,
|
| 2429 |
+
"learning_rate": 1.3109394773243117e-05,
|
| 2430 |
+
"loss": 1.6049,
|
| 2431 |
+
"step": 3460
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 0.694,
|
| 2435 |
+
"grad_norm": 1.5094339847564697,
|
| 2436 |
+
"learning_rate": 1.2956158147457115e-05,
|
| 2437 |
+
"loss": 0.6334,
|
| 2438 |
+
"step": 3470
|
| 2439 |
+
},
|
| 2440 |
+
{
|
| 2441 |
+
"epoch": 0.696,
|
| 2442 |
+
"grad_norm": 1.6272149085998535,
|
| 2443 |
+
"learning_rate": 1.280350852153168e-05,
|
| 2444 |
+
"loss": 1.605,
|
| 2445 |
+
"step": 3480
|
| 2446 |
+
},
|
| 2447 |
+
{
|
| 2448 |
+
"epoch": 0.698,
|
| 2449 |
+
"grad_norm": 0.6700888276100159,
|
| 2450 |
+
"learning_rate": 1.2651453335394231e-05,
|
| 2451 |
+
"loss": 1.3822,
|
| 2452 |
+
"step": 3490
|
| 2453 |
+
},
|
| 2454 |
+
{
|
| 2455 |
+
"epoch": 0.7,
|
| 2456 |
+
"grad_norm": 2.492363691329956,
|
| 2457 |
+
"learning_rate": 1.2500000000000006e-05,
|
| 2458 |
+
"loss": 1.154,
|
| 2459 |
+
"step": 3500
|
| 2460 |
+
},
|
| 2461 |
+
{
|
| 2462 |
+
"epoch": 0.702,
|
| 2463 |
+
"grad_norm": 2.3340728282928467,
|
| 2464 |
+
"learning_rate": 1.234915589697091e-05,
|
| 2465 |
+
"loss": 1.3894,
|
| 2466 |
+
"step": 3510
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"epoch": 0.704,
|
| 2470 |
+
"grad_norm": 0.0,
|
| 2471 |
+
"learning_rate": 1.2198928378235716e-05,
|
| 2472 |
+
"loss": 0.8913,
|
| 2473 |
+
"step": 3520
|
| 2474 |
+
},
|
| 2475 |
+
{
|
| 2476 |
+
"epoch": 0.706,
|
| 2477 |
+
"grad_norm": 2.37203311920166,
|
| 2478 |
+
"learning_rate": 1.2049324765671749e-05,
|
| 2479 |
+
"loss": 1.4518,
|
| 2480 |
+
"step": 3530
|
| 2481 |
+
},
|
| 2482 |
+
{
|
| 2483 |
+
"epoch": 0.708,
|
| 2484 |
+
"grad_norm": 0.4668540954589844,
|
| 2485 |
+
"learning_rate": 1.1900352350748026e-05,
|
| 2486 |
+
"loss": 2.2489,
|
| 2487 |
+
"step": 3540
|
| 2488 |
+
},
|
| 2489 |
+
{
|
| 2490 |
+
"epoch": 0.71,
|
| 2491 |
+
"grad_norm": 7.209711074829102,
|
| 2492 |
+
"learning_rate": 1.175201839416988e-05,
|
| 2493 |
+
"loss": 0.8946,
|
| 2494 |
+
"step": 3550
|
| 2495 |
+
},
|
| 2496 |
+
{
|
| 2497 |
+
"epoch": 0.712,
|
| 2498 |
+
"grad_norm": 0.0,
|
| 2499 |
+
"learning_rate": 1.1604330125525079e-05,
|
| 2500 |
+
"loss": 0.5814,
|
| 2501 |
+
"step": 3560
|
| 2502 |
+
},
|
| 2503 |
+
{
|
| 2504 |
+
"epoch": 0.714,
|
| 2505 |
+
"grad_norm": 1.9763480424880981,
|
| 2506 |
+
"learning_rate": 1.1457294742931507e-05,
|
| 2507 |
+
"loss": 4.9924,
|
| 2508 |
+
"step": 3570
|
| 2509 |
+
},
|
| 2510 |
+
{
|
| 2511 |
+
"epoch": 0.716,
|
| 2512 |
+
"grad_norm": 4.834700107574463,
|
| 2513 |
+
"learning_rate": 1.1310919412686247e-05,
|
| 2514 |
+
"loss": 0.9913,
|
| 2515 |
+
"step": 3580
|
| 2516 |
+
},
|
| 2517 |
+
{
|
| 2518 |
+
"epoch": 0.718,
|
| 2519 |
+
"grad_norm": 5.4178876876831055,
|
| 2520 |
+
"learning_rate": 1.11652112689164e-05,
|
| 2521 |
+
"loss": 4.148,
|
| 2522 |
+
"step": 3590
|
| 2523 |
+
},
|
| 2524 |
+
{
|
| 2525 |
+
"epoch": 0.72,
|
| 2526 |
+
"grad_norm": 0.0,
|
| 2527 |
+
"learning_rate": 1.1020177413231334e-05,
|
| 2528 |
+
"loss": 1.184,
|
| 2529 |
+
"step": 3600
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"epoch": 0.722,
|
| 2533 |
+
"grad_norm": 5.075471878051758,
|
| 2534 |
+
"learning_rate": 1.0875824914376553e-05,
|
| 2535 |
+
"loss": 0.8565,
|
| 2536 |
+
"step": 3610
|
| 2537 |
+
},
|
| 2538 |
+
{
|
| 2539 |
+
"epoch": 0.724,
|
| 2540 |
+
"grad_norm": 1.6416356563568115,
|
| 2541 |
+
"learning_rate": 1.0732160807889211e-05,
|
| 2542 |
+
"loss": 1.6703,
|
| 2543 |
+
"step": 3620
|
| 2544 |
+
},
|
| 2545 |
+
{
|
| 2546 |
+
"epoch": 0.726,
|
| 2547 |
+
"grad_norm": 0.3667546808719635,
|
| 2548 |
+
"learning_rate": 1.058919209575517e-05,
|
| 2549 |
+
"loss": 1.2277,
|
| 2550 |
+
"step": 3630
|
| 2551 |
+
},
|
| 2552 |
+
{
|
| 2553 |
+
"epoch": 0.728,
|
| 2554 |
+
"grad_norm": 0.44018077850341797,
|
| 2555 |
+
"learning_rate": 1.0446925746067768e-05,
|
| 2556 |
+
"loss": 0.7438,
|
| 2557 |
+
"step": 3640
|
| 2558 |
+
},
|
| 2559 |
+
{
|
| 2560 |
+
"epoch": 0.73,
|
| 2561 |
+
"grad_norm": 0.9236971735954285,
|
| 2562 |
+
"learning_rate": 1.0305368692688174e-05,
|
| 2563 |
+
"loss": 3.7347,
|
| 2564 |
+
"step": 3650
|
| 2565 |
+
},
|
| 2566 |
+
{
|
| 2567 |
+
"epoch": 0.732,
|
| 2568 |
+
"grad_norm": 0.0,
|
| 2569 |
+
"learning_rate": 1.0164527834907467e-05,
|
| 2570 |
+
"loss": 1.8496,
|
| 2571 |
+
"step": 3660
|
| 2572 |
+
},
|
| 2573 |
+
{
|
| 2574 |
+
"epoch": 0.734,
|
| 2575 |
+
"grad_norm": 0.9189653396606445,
|
| 2576 |
+
"learning_rate": 1.0024410037110357e-05,
|
| 2577 |
+
"loss": 1.2388,
|
| 2578 |
+
"step": 3670
|
| 2579 |
+
},
|
| 2580 |
+
{
|
| 2581 |
+
"epoch": 0.736,
|
| 2582 |
+
"grad_norm": 2.626263380050659,
|
| 2583 |
+
"learning_rate": 9.88502212844063e-06,
|
| 2584 |
+
"loss": 5.6336,
|
| 2585 |
+
"step": 3680
|
| 2586 |
+
},
|
| 2587 |
+
{
|
| 2588 |
+
"epoch": 0.738,
|
| 2589 |
+
"grad_norm": 6.583788871765137,
|
| 2590 |
+
"learning_rate": 9.746370902468311e-06,
|
| 2591 |
+
"loss": 1.434,
|
| 2592 |
+
"step": 3690
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"epoch": 0.74,
|
| 2596 |
+
"grad_norm": 0.29883110523223877,
|
| 2597 |
+
"learning_rate": 9.608463116858542e-06,
|
| 2598 |
+
"loss": 2.9898,
|
| 2599 |
+
"step": 3700
|
| 2600 |
+
},
|
| 2601 |
+
{
|
| 2602 |
+
"epoch": 0.742,
|
| 2603 |
+
"grad_norm": 1.0266231298446655,
|
| 2604 |
+
"learning_rate": 9.471305493042243e-06,
|
| 2605 |
+
"loss": 0.9298,
|
| 2606 |
+
"step": 3710
|
| 2607 |
+
},
|
| 2608 |
+
{
|
| 2609 |
+
"epoch": 0.744,
|
| 2610 |
+
"grad_norm": 0.641528844833374,
|
| 2611 |
+
"learning_rate": 9.334904715888495e-06,
|
| 2612 |
+
"loss": 2.3826,
|
| 2613 |
+
"step": 3720
|
| 2614 |
+
},
|
| 2615 |
+
{
|
| 2616 |
+
"epoch": 0.746,
|
| 2617 |
+
"grad_norm": 0.8033006191253662,
|
| 2618 |
+
"learning_rate": 9.199267433378727e-06,
|
| 2619 |
+
"loss": 0.8495,
|
| 2620 |
+
"step": 3730
|
| 2621 |
+
},
|
| 2622 |
+
{
|
| 2623 |
+
"epoch": 0.748,
|
| 2624 |
+
"grad_norm": 2.0159640312194824,
|
| 2625 |
+
"learning_rate": 9.064400256282757e-06,
|
| 2626 |
+
"loss": 0.6518,
|
| 2627 |
+
"step": 3740
|
| 2628 |
+
},
|
| 2629 |
+
{
|
| 2630 |
+
"epoch": 0.75,
|
| 2631 |
+
"grad_norm": 3.390242099761963,
|
| 2632 |
+
"learning_rate": 8.930309757836517e-06,
|
| 2633 |
+
"loss": 0.8714,
|
| 2634 |
+
"step": 3750
|
| 2635 |
+
},
|
| 2636 |
+
{
|
| 2637 |
+
"epoch": 0.752,
|
| 2638 |
+
"grad_norm": 2.8902716636657715,
|
| 2639 |
+
"learning_rate": 8.797002473421728e-06,
|
| 2640 |
+
"loss": 1.378,
|
| 2641 |
+
"step": 3760
|
| 2642 |
+
},
|
| 2643 |
+
{
|
| 2644 |
+
"epoch": 0.754,
|
| 2645 |
+
"grad_norm": 0.5341398119926453,
|
| 2646 |
+
"learning_rate": 8.664484900247363e-06,
|
| 2647 |
+
"loss": 1.1048,
|
| 2648 |
+
"step": 3770
|
| 2649 |
+
},
|
| 2650 |
+
{
|
| 2651 |
+
"epoch": 0.756,
|
| 2652 |
+
"grad_norm": 1.0644313097000122,
|
| 2653 |
+
"learning_rate": 8.532763497032987e-06,
|
| 2654 |
+
"loss": 0.9402,
|
| 2655 |
+
"step": 3780
|
| 2656 |
+
},
|
| 2657 |
+
{
|
| 2658 |
+
"epoch": 0.758,
|
| 2659 |
+
"grad_norm": 0.3949926197528839,
|
| 2660 |
+
"learning_rate": 8.40184468369396e-06,
|
| 2661 |
+
"loss": 1.3253,
|
| 2662 |
+
"step": 3790
|
| 2663 |
+
},
|
| 2664 |
+
{
|
| 2665 |
+
"epoch": 0.76,
|
| 2666 |
+
"grad_norm": 7.875147819519043,
|
| 2667 |
+
"learning_rate": 8.271734841028553e-06,
|
| 2668 |
+
"loss": 1.0062,
|
| 2669 |
+
"step": 3800
|
| 2670 |
+
},
|
| 2671 |
+
{
|
| 2672 |
+
"epoch": 0.762,
|
| 2673 |
+
"grad_norm": 0.3310488760471344,
|
| 2674 |
+
"learning_rate": 8.142440310406924e-06,
|
| 2675 |
+
"loss": 0.9064,
|
| 2676 |
+
"step": 3810
|
| 2677 |
+
},
|
| 2678 |
+
{
|
| 2679 |
+
"epoch": 0.764,
|
| 2680 |
+
"grad_norm": 3.954543352127075,
|
| 2681 |
+
"learning_rate": 8.013967393462094e-06,
|
| 2682 |
+
"loss": 3.918,
|
| 2683 |
+
"step": 3820
|
| 2684 |
+
},
|
| 2685 |
+
{
|
| 2686 |
+
"epoch": 0.766,
|
| 2687 |
+
"grad_norm": 0.5071019530296326,
|
| 2688 |
+
"learning_rate": 7.886322351782783e-06,
|
| 2689 |
+
"loss": 0.4079,
|
| 2690 |
+
"step": 3830
|
| 2691 |
+
},
|
| 2692 |
+
{
|
| 2693 |
+
"epoch": 0.768,
|
| 2694 |
+
"grad_norm": 1.4938267469406128,
|
| 2695 |
+
"learning_rate": 7.759511406608255e-06,
|
| 2696 |
+
"loss": 0.8509,
|
| 2697 |
+
"step": 3840
|
| 2698 |
+
},
|
| 2699 |
+
{
|
| 2700 |
+
"epoch": 0.77,
|
| 2701 |
+
"grad_norm": 0.6899465322494507,
|
| 2702 |
+
"learning_rate": 7.633540738525066e-06,
|
| 2703 |
+
"loss": 1.9055,
|
| 2704 |
+
"step": 3850
|
| 2705 |
+
},
|
| 2706 |
+
{
|
| 2707 |
+
"epoch": 0.772,
|
| 2708 |
+
"grad_norm": 59.65831756591797,
|
| 2709 |
+
"learning_rate": 7.508416487165862e-06,
|
| 2710 |
+
"loss": 6.2419,
|
| 2711 |
+
"step": 3860
|
| 2712 |
+
},
|
| 2713 |
+
{
|
| 2714 |
+
"epoch": 0.774,
|
| 2715 |
+
"grad_norm": 0.5461062788963318,
|
| 2716 |
+
"learning_rate": 7.384144750910133e-06,
|
| 2717 |
+
"loss": 1.1085,
|
| 2718 |
+
"step": 3870
|
| 2719 |
+
},
|
| 2720 |
+
{
|
| 2721 |
+
"epoch": 0.776,
|
| 2722 |
+
"grad_norm": 0.7306953072547913,
|
| 2723 |
+
"learning_rate": 7.260731586586983e-06,
|
| 2724 |
+
"loss": 4.1298,
|
| 2725 |
+
"step": 3880
|
| 2726 |
+
},
|
| 2727 |
+
{
|
| 2728 |
+
"epoch": 0.778,
|
| 2729 |
+
"grad_norm": 1.150995135307312,
|
| 2730 |
+
"learning_rate": 7.138183009179922e-06,
|
| 2731 |
+
"loss": 0.7264,
|
| 2732 |
+
"step": 3890
|
| 2733 |
+
},
|
| 2734 |
+
{
|
| 2735 |
+
"epoch": 0.78,
|
| 2736 |
+
"grad_norm": 6.218198299407959,
|
| 2737 |
+
"learning_rate": 7.016504991533726e-06,
|
| 2738 |
+
"loss": 1.0637,
|
| 2739 |
+
"step": 3900
|
| 2740 |
+
},
|
| 2741 |
+
{
|
| 2742 |
+
"epoch": 0.782,
|
| 2743 |
+
"grad_norm": 1.58785080909729,
|
| 2744 |
+
"learning_rate": 6.895703464063319e-06,
|
| 2745 |
+
"loss": 1.2957,
|
| 2746 |
+
"step": 3910
|
| 2747 |
+
},
|
| 2748 |
+
{
|
| 2749 |
+
"epoch": 0.784,
|
| 2750 |
+
"grad_norm": 0.5615648627281189,
|
| 2751 |
+
"learning_rate": 6.775784314464717e-06,
|
| 2752 |
+
"loss": 0.4924,
|
| 2753 |
+
"step": 3920
|
| 2754 |
+
},
|
| 2755 |
+
{
|
| 2756 |
+
"epoch": 0.786,
|
| 2757 |
+
"grad_norm": 0.8472492098808289,
|
| 2758 |
+
"learning_rate": 6.656753387428089e-06,
|
| 2759 |
+
"loss": 1.0074,
|
| 2760 |
+
"step": 3930
|
| 2761 |
+
},
|
| 2762 |
+
{
|
| 2763 |
+
"epoch": 0.788,
|
| 2764 |
+
"grad_norm": 1.1373908519744873,
|
| 2765 |
+
"learning_rate": 6.538616484352902e-06,
|
| 2766 |
+
"loss": 1.7825,
|
| 2767 |
+
"step": 3940
|
| 2768 |
+
},
|
| 2769 |
+
{
|
| 2770 |
+
"epoch": 0.79,
|
| 2771 |
+
"grad_norm": 1.4689478874206543,
|
| 2772 |
+
"learning_rate": 6.421379363065142e-06,
|
| 2773 |
+
"loss": 1.2334,
|
| 2774 |
+
"step": 3950
|
| 2775 |
+
},
|
| 2776 |
+
{
|
| 2777 |
+
"epoch": 0.792,
|
| 2778 |
+
"grad_norm": 4.063680171966553,
|
| 2779 |
+
"learning_rate": 6.305047737536707e-06,
|
| 2780 |
+
"loss": 1.9249,
|
| 2781 |
+
"step": 3960
|
| 2782 |
+
},
|
| 2783 |
+
{
|
| 2784 |
+
"epoch": 0.794,
|
| 2785 |
+
"grad_norm": 3.548961877822876,
|
| 2786 |
+
"learning_rate": 6.189627277606894e-06,
|
| 2787 |
+
"loss": 1.4351,
|
| 2788 |
+
"step": 3970
|
| 2789 |
+
},
|
| 2790 |
+
{
|
| 2791 |
+
"epoch": 0.796,
|
| 2792 |
+
"grad_norm": 0.8903224468231201,
|
| 2793 |
+
"learning_rate": 6.075123608706093e-06,
|
| 2794 |
+
"loss": 1.6044,
|
| 2795 |
+
"step": 3980
|
| 2796 |
+
},
|
| 2797 |
+
{
|
| 2798 |
+
"epoch": 0.798,
|
| 2799 |
+
"grad_norm": 1.5656462907791138,
|
| 2800 |
+
"learning_rate": 5.961542311581586e-06,
|
| 2801 |
+
"loss": 1.7348,
|
| 2802 |
+
"step": 3990
|
| 2803 |
+
},
|
| 2804 |
+
{
|
| 2805 |
+
"epoch": 0.8,
|
| 2806 |
+
"grad_norm": 0.4290873110294342,
|
| 2807 |
+
"learning_rate": 5.848888922025553e-06,
|
| 2808 |
+
"loss": 12.6939,
|
| 2809 |
+
"step": 4000
|
| 2810 |
+
},
|
| 2811 |
+
{
|
| 2812 |
+
"epoch": 0.802,
|
| 2813 |
+
"grad_norm": 7.812740325927734,
|
| 2814 |
+
"learning_rate": 5.737168930605272e-06,
|
| 2815 |
+
"loss": 0.8231,
|
| 2816 |
+
"step": 4010
|
| 2817 |
+
},
|
| 2818 |
+
{
|
| 2819 |
+
"epoch": 0.804,
|
| 2820 |
+
"grad_norm": 0.8808218240737915,
|
| 2821 |
+
"learning_rate": 5.626387782395512e-06,
|
| 2822 |
+
"loss": 2.239,
|
| 2823 |
+
"step": 4020
|
| 2824 |
+
},
|
| 2825 |
+
{
|
| 2826 |
+
"epoch": 0.806,
|
| 2827 |
+
"grad_norm": 5.278824806213379,
|
| 2828 |
+
"learning_rate": 5.5165508767131415e-06,
|
| 2829 |
+
"loss": 3.1218,
|
| 2830 |
+
"step": 4030
|
| 2831 |
+
},
|
| 2832 |
+
{
|
| 2833 |
+
"epoch": 0.808,
|
| 2834 |
+
"grad_norm": 1.9946264028549194,
|
| 2835 |
+
"learning_rate": 5.4076635668540075e-06,
|
| 2836 |
+
"loss": 0.5524,
|
| 2837 |
+
"step": 4040
|
| 2838 |
+
},
|
| 2839 |
+
{
|
| 2840 |
+
"epoch": 0.81,
|
| 2841 |
+
"grad_norm": 2.5950546264648438,
|
| 2842 |
+
"learning_rate": 5.299731159831953e-06,
|
| 2843 |
+
"loss": 2.4598,
|
| 2844 |
+
"step": 4050
|
| 2845 |
+
},
|
| 2846 |
+
{
|
| 2847 |
+
"epoch": 0.812,
|
| 2848 |
+
"grad_norm": 0.0,
|
| 2849 |
+
"learning_rate": 5.192758916120236e-06,
|
| 2850 |
+
"loss": 0.634,
|
| 2851 |
+
"step": 4060
|
| 2852 |
+
},
|
| 2853 |
+
{
|
| 2854 |
+
"epoch": 0.814,
|
| 2855 |
+
"grad_norm": 0.4671136438846588,
|
| 2856 |
+
"learning_rate": 5.086752049395094e-06,
|
| 2857 |
+
"loss": 0.8943,
|
| 2858 |
+
"step": 4070
|
| 2859 |
+
},
|
| 2860 |
+
{
|
| 2861 |
+
"epoch": 0.816,
|
| 2862 |
+
"grad_norm": 3.0124940872192383,
|
| 2863 |
+
"learning_rate": 4.981715726281666e-06,
|
| 2864 |
+
"loss": 1.1776,
|
| 2865 |
+
"step": 4080
|
| 2866 |
+
},
|
| 2867 |
+
{
|
| 2868 |
+
"epoch": 0.818,
|
| 2869 |
+
"grad_norm": 1.0693814754486084,
|
| 2870 |
+
"learning_rate": 4.877655066102149e-06,
|
| 2871 |
+
"loss": 0.918,
|
| 2872 |
+
"step": 4090
|
| 2873 |
+
},
|
| 2874 |
+
{
|
| 2875 |
+
"epoch": 0.82,
|
| 2876 |
+
"grad_norm": 0.39503878355026245,
|
| 2877 |
+
"learning_rate": 4.7745751406263165e-06,
|
| 2878 |
+
"loss": 0.3834,
|
| 2879 |
+
"step": 4100
|
| 2880 |
+
},
|
| 2881 |
+
{
|
| 2882 |
+
"epoch": 0.822,
|
| 2883 |
+
"grad_norm": 2.3926539421081543,
|
| 2884 |
+
"learning_rate": 4.672480973824311e-06,
|
| 2885 |
+
"loss": 1.0476,
|
| 2886 |
+
"step": 4110
|
| 2887 |
+
},
|
| 2888 |
+
{
|
| 2889 |
+
"epoch": 0.824,
|
| 2890 |
+
"grad_norm": 2.5929503440856934,
|
| 2891 |
+
"learning_rate": 4.571377541621788e-06,
|
| 2892 |
+
"loss": 0.7036,
|
| 2893 |
+
"step": 4120
|
| 2894 |
+
},
|
| 2895 |
+
{
|
| 2896 |
+
"epoch": 0.826,
|
| 2897 |
+
"grad_norm": 1.2581967115402222,
|
| 2898 |
+
"learning_rate": 4.4712697716574e-06,
|
| 2899 |
+
"loss": 1.4096,
|
| 2900 |
+
"step": 4130
|
| 2901 |
+
},
|
| 2902 |
+
{
|
| 2903 |
+
"epoch": 0.828,
|
| 2904 |
+
"grad_norm": 3.9008257389068604,
|
| 2905 |
+
"learning_rate": 4.372162543042624e-06,
|
| 2906 |
+
"loss": 1.2209,
|
| 2907 |
+
"step": 4140
|
| 2908 |
+
},
|
| 2909 |
+
{
|
| 2910 |
+
"epoch": 0.83,
|
| 2911 |
+
"grad_norm": 0.9294935464859009,
|
| 2912 |
+
"learning_rate": 4.274060686123959e-06,
|
| 2913 |
+
"loss": 1.4758,
|
| 2914 |
+
"step": 4150
|
| 2915 |
+
},
|
| 2916 |
+
{
|
| 2917 |
+
"epoch": 0.832,
|
| 2918 |
+
"grad_norm": 0.4707418382167816,
|
| 2919 |
+
"learning_rate": 4.176968982247514e-06,
|
| 2920 |
+
"loss": 1.546,
|
| 2921 |
+
"step": 4160
|
| 2922 |
+
},
|
| 2923 |
+
{
|
| 2924 |
+
"epoch": 0.834,
|
| 2925 |
+
"grad_norm": 0.9732871055603027,
|
| 2926 |
+
"learning_rate": 4.08089216352596e-06,
|
| 2927 |
+
"loss": 0.8864,
|
| 2928 |
+
"step": 4170
|
| 2929 |
+
},
|
| 2930 |
+
{
|
| 2931 |
+
"epoch": 0.836,
|
| 2932 |
+
"grad_norm": 1.237377405166626,
|
| 2933 |
+
"learning_rate": 3.985834912607894e-06,
|
| 2934 |
+
"loss": 1.4541,
|
| 2935 |
+
"step": 4180
|
| 2936 |
+
},
|
| 2937 |
+
{
|
| 2938 |
+
"epoch": 0.838,
|
| 2939 |
+
"grad_norm": 1.330290675163269,
|
| 2940 |
+
"learning_rate": 3.891801862449629e-06,
|
| 2941 |
+
"loss": 0.6756,
|
| 2942 |
+
"step": 4190
|
| 2943 |
+
},
|
| 2944 |
+
{
|
| 2945 |
+
"epoch": 0.84,
|
| 2946 |
+
"grad_norm": 0.49262121319770813,
|
| 2947 |
+
"learning_rate": 3.798797596089351e-06,
|
| 2948 |
+
"loss": 1.3881,
|
| 2949 |
+
"step": 4200
|
| 2950 |
+
},
|
| 2951 |
+
{
|
| 2952 |
+
"epoch": 0.842,
|
| 2953 |
+
"grad_norm": 4.391107082366943,
|
| 2954 |
+
"learning_rate": 3.7068266464238084e-06,
|
| 2955 |
+
"loss": 2.3349,
|
| 2956 |
+
"step": 4210
|
| 2957 |
+
},
|
| 2958 |
+
{
|
| 2959 |
+
"epoch": 0.844,
|
| 2960 |
+
"grad_norm": 1.4304258823394775,
|
| 2961 |
+
"learning_rate": 3.6158934959873353e-06,
|
| 2962 |
+
"loss": 0.7973,
|
| 2963 |
+
"step": 4220
|
| 2964 |
+
},
|
| 2965 |
+
{
|
| 2966 |
+
"epoch": 0.846,
|
| 2967 |
+
"grad_norm": 1.131519079208374,
|
| 2968 |
+
"learning_rate": 3.5260025767333893e-06,
|
| 2969 |
+
"loss": 1.1455,
|
| 2970 |
+
"step": 4230
|
| 2971 |
+
},
|
| 2972 |
+
{
|
| 2973 |
+
"epoch": 0.848,
|
| 2974 |
+
"grad_norm": 4.368002414703369,
|
| 2975 |
+
"learning_rate": 3.4371582698185633e-06,
|
| 2976 |
+
"loss": 2.3828,
|
| 2977 |
+
"step": 4240
|
| 2978 |
+
},
|
| 2979 |
+
{
|
| 2980 |
+
"epoch": 0.85,
|
| 2981 |
+
"grad_norm": 3.4001667499542236,
|
| 2982 |
+
"learning_rate": 3.3493649053890326e-06,
|
| 2983 |
+
"loss": 0.8389,
|
| 2984 |
+
"step": 4250
|
| 2985 |
+
},
|
| 2986 |
+
{
|
| 2987 |
+
"epoch": 0.852,
|
| 2988 |
+
"grad_norm": 1.048996090888977,
|
| 2989 |
+
"learning_rate": 3.262626762369525e-06,
|
| 2990 |
+
"loss": 0.7594,
|
| 2991 |
+
"step": 4260
|
| 2992 |
+
},
|
| 2993 |
+
{
|
| 2994 |
+
"epoch": 0.854,
|
| 2995 |
+
"grad_norm": 0.4330098330974579,
|
| 2996 |
+
"learning_rate": 3.176948068254762e-06,
|
| 2997 |
+
"loss": 0.8006,
|
| 2998 |
+
"step": 4270
|
| 2999 |
+
},
|
| 3000 |
+
{
|
| 3001 |
+
"epoch": 0.856,
|
| 3002 |
+
"grad_norm": 5.37194299697876,
|
| 3003 |
+
"learning_rate": 3.092332998903416e-06,
|
| 3004 |
+
"loss": 1.4222,
|
| 3005 |
+
"step": 4280
|
| 3006 |
+
},
|
| 3007 |
+
{
|
| 3008 |
+
"epoch": 0.858,
|
| 3009 |
+
"grad_norm": 2.6493430137634277,
|
| 3010 |
+
"learning_rate": 3.0087856783345914e-06,
|
| 3011 |
+
"loss": 2.5477,
|
| 3012 |
+
"step": 4290
|
| 3013 |
+
},
|
| 3014 |
+
{
|
| 3015 |
+
"epoch": 0.86,
|
| 3016 |
+
"grad_norm": 1.8400111198425293,
|
| 3017 |
+
"learning_rate": 2.9263101785268254e-06,
|
| 3018 |
+
"loss": 2.2044,
|
| 3019 |
+
"step": 4300
|
| 3020 |
+
},
|
| 3021 |
+
{
|
| 3022 |
+
"epoch": 0.862,
|
| 3023 |
+
"grad_norm": 18.55029296875,
|
| 3024 |
+
"learning_rate": 2.8449105192196316e-06,
|
| 3025 |
+
"loss": 1.5267,
|
| 3026 |
+
"step": 4310
|
| 3027 |
+
},
|
| 3028 |
+
{
|
| 3029 |
+
"epoch": 0.864,
|
| 3030 |
+
"grad_norm": 1.1221182346343994,
|
| 3031 |
+
"learning_rate": 2.764590667717562e-06,
|
| 3032 |
+
"loss": 0.6063,
|
| 3033 |
+
"step": 4320
|
| 3034 |
+
},
|
| 3035 |
+
{
|
| 3036 |
+
"epoch": 0.866,
|
| 3037 |
+
"grad_norm": 1.4529755115509033,
|
| 3038 |
+
"learning_rate": 2.6853545386968606e-06,
|
| 3039 |
+
"loss": 2.6636,
|
| 3040 |
+
"step": 4330
|
| 3041 |
+
},
|
| 3042 |
+
{
|
| 3043 |
+
"epoch": 0.868,
|
| 3044 |
+
"grad_norm": 0.7516627907752991,
|
| 3045 |
+
"learning_rate": 2.6072059940146775e-06,
|
| 3046 |
+
"loss": 0.9864,
|
| 3047 |
+
"step": 4340
|
| 3048 |
+
},
|
| 3049 |
+
{
|
| 3050 |
+
"epoch": 0.87,
|
| 3051 |
+
"grad_norm": 2.175539970397949,
|
| 3052 |
+
"learning_rate": 2.5301488425208296e-06,
|
| 3053 |
+
"loss": 2.3756,
|
| 3054 |
+
"step": 4350
|
| 3055 |
+
},
|
| 3056 |
+
{
|
| 3057 |
+
"epoch": 0.872,
|
| 3058 |
+
"grad_norm": 3.53667950630188,
|
| 3059 |
+
"learning_rate": 2.454186839872158e-06,
|
| 3060 |
+
"loss": 0.962,
|
| 3061 |
+
"step": 4360
|
| 3062 |
+
},
|
| 3063 |
+
{
|
| 3064 |
+
"epoch": 0.874,
|
| 3065 |
+
"grad_norm": 1.7746353149414062,
|
| 3066 |
+
"learning_rate": 2.379323688349516e-06,
|
| 3067 |
+
"loss": 0.8765,
|
| 3068 |
+
"step": 4370
|
| 3069 |
+
},
|
| 3070 |
+
{
|
| 3071 |
+
"epoch": 0.876,
|
| 3072 |
+
"grad_norm": 2.2868964672088623,
|
| 3073 |
+
"learning_rate": 2.3055630366772856e-06,
|
| 3074 |
+
"loss": 1.3187,
|
| 3075 |
+
"step": 4380
|
| 3076 |
+
},
|
| 3077 |
+
{
|
| 3078 |
+
"epoch": 0.878,
|
| 3079 |
+
"grad_norm": 2.6441867351531982,
|
| 3080 |
+
"learning_rate": 2.2329084798455746e-06,
|
| 3081 |
+
"loss": 0.996,
|
| 3082 |
+
"step": 4390
|
| 3083 |
+
},
|
| 3084 |
+
{
|
| 3085 |
+
"epoch": 0.88,
|
| 3086 |
+
"grad_norm": 0.8521894216537476,
|
| 3087 |
+
"learning_rate": 2.1613635589349756e-06,
|
| 3088 |
+
"loss": 1.1203,
|
| 3089 |
+
"step": 4400
|
| 3090 |
+
},
|
| 3091 |
+
{
|
| 3092 |
+
"epoch": 0.882,
|
| 3093 |
+
"grad_norm": 2.128058433532715,
|
| 3094 |
+
"learning_rate": 2.0909317609440095e-06,
|
| 3095 |
+
"loss": 1.5124,
|
| 3096 |
+
"step": 4410
|
| 3097 |
+
},
|
| 3098 |
+
{
|
| 3099 |
+
"epoch": 0.884,
|
| 3100 |
+
"grad_norm": 1.3996164798736572,
|
| 3101 |
+
"learning_rate": 2.0216165186191407e-06,
|
| 3102 |
+
"loss": 0.5098,
|
| 3103 |
+
"step": 4420
|
| 3104 |
+
},
|
| 3105 |
+
{
|
| 3106 |
+
"epoch": 0.886,
|
| 3107 |
+
"grad_norm": 0.6261931657791138,
|
| 3108 |
+
"learning_rate": 1.95342121028749e-06,
|
| 3109 |
+
"loss": 0.9764,
|
| 3110 |
+
"step": 4430
|
| 3111 |
+
},
|
| 3112 |
+
{
|
| 3113 |
+
"epoch": 0.888,
|
| 3114 |
+
"grad_norm": 2.9988150596618652,
|
| 3115 |
+
"learning_rate": 1.8863491596921745e-06,
|
| 3116 |
+
"loss": 1.6156,
|
| 3117 |
+
"step": 4440
|
| 3118 |
+
},
|
| 3119 |
+
{
|
| 3120 |
+
"epoch": 0.89,
|
| 3121 |
+
"grad_norm": 0.7288640141487122,
|
| 3122 |
+
"learning_rate": 1.8204036358303173e-06,
|
| 3123 |
+
"loss": 1.3607,
|
| 3124 |
+
"step": 4450
|
| 3125 |
+
},
|
| 3126 |
+
{
|
| 3127 |
+
"epoch": 0.892,
|
| 3128 |
+
"grad_norm": 0.7692397832870483,
|
| 3129 |
+
"learning_rate": 1.7555878527937164e-06,
|
| 3130 |
+
"loss": 1.291,
|
| 3131 |
+
"step": 4460
|
| 3132 |
+
},
|
| 3133 |
+
{
|
| 3134 |
+
"epoch": 0.894,
|
| 3135 |
+
"grad_norm": 1.286188006401062,
|
| 3136 |
+
"learning_rate": 1.6919049696121958e-06,
|
| 3137 |
+
"loss": 0.6693,
|
| 3138 |
+
"step": 4470
|
| 3139 |
+
},
|
| 3140 |
+
{
|
| 3141 |
+
"epoch": 0.896,
|
| 3142 |
+
"grad_norm": 0.9771424531936646,
|
| 3143 |
+
"learning_rate": 1.629358090099639e-06,
|
| 3144 |
+
"loss": 1.8125,
|
| 3145 |
+
"step": 4480
|
| 3146 |
+
},
|
| 3147 |
+
{
|
| 3148 |
+
"epoch": 0.898,
|
| 3149 |
+
"grad_norm": 2.4204814434051514,
|
| 3150 |
+
"learning_rate": 1.5679502627027136e-06,
|
| 3151 |
+
"loss": 1.3824,
|
| 3152 |
+
"step": 4490
|
| 3153 |
+
},
|
| 3154 |
+
{
|
| 3155 |
+
"epoch": 0.9,
|
| 3156 |
+
"grad_norm": 4.12672758102417,
|
| 3157 |
+
"learning_rate": 1.5076844803522922e-06,
|
| 3158 |
+
"loss": 1.466,
|
| 3159 |
+
"step": 4500
|
| 3160 |
+
},
|
| 3161 |
+
{
|
| 3162 |
+
"epoch": 0.902,
|
| 3163 |
+
"grad_norm": 2.546496868133545,
|
| 3164 |
+
"learning_rate": 1.4485636803175829e-06,
|
| 3165 |
+
"loss": 0.8571,
|
| 3166 |
+
"step": 4510
|
| 3167 |
+
},
|
| 3168 |
+
{
|
| 3169 |
+
"epoch": 0.904,
|
| 3170 |
+
"grad_norm": 1.0487430095672607,
|
| 3171 |
+
"learning_rate": 1.3905907440629752e-06,
|
| 3172 |
+
"loss": 1.0712,
|
| 3173 |
+
"step": 4520
|
| 3174 |
+
},
|
| 3175 |
+
{
|
| 3176 |
+
"epoch": 0.906,
|
| 3177 |
+
"grad_norm": 1.2966355085372925,
|
| 3178 |
+
"learning_rate": 1.333768497107593e-06,
|
| 3179 |
+
"loss": 1.328,
|
| 3180 |
+
"step": 4530
|
| 3181 |
+
},
|
| 3182 |
+
{
|
| 3183 |
+
"epoch": 0.908,
|
| 3184 |
+
"grad_norm": 0.441129595041275,
|
| 3185 |
+
"learning_rate": 1.2780997088875869e-06,
|
| 3186 |
+
"loss": 3.858,
|
| 3187 |
+
"step": 4540
|
| 3188 |
+
},
|
| 3189 |
+
{
|
| 3190 |
+
"epoch": 0.91,
|
| 3191 |
+
"grad_norm": 1.228864073753357,
|
| 3192 |
+
"learning_rate": 1.2235870926211619e-06,
|
| 3193 |
+
"loss": 1.3761,
|
| 3194 |
+
"step": 4550
|
| 3195 |
+
},
|
| 3196 |
+
{
|
| 3197 |
+
"epoch": 0.912,
|
| 3198 |
+
"grad_norm": 0.6363465189933777,
|
| 3199 |
+
"learning_rate": 1.170233305176327e-06,
|
| 3200 |
+
"loss": 0.9014,
|
| 3201 |
+
"step": 4560
|
| 3202 |
+
},
|
| 3203 |
+
{
|
| 3204 |
+
"epoch": 0.914,
|
| 3205 |
+
"grad_norm": 1.6291348934173584,
|
| 3206 |
+
"learning_rate": 1.1180409469414094e-06,
|
| 3207 |
+
"loss": 1.2054,
|
| 3208 |
+
"step": 4570
|
| 3209 |
+
},
|
| 3210 |
+
{
|
| 3211 |
+
"epoch": 0.916,
|
| 3212 |
+
"grad_norm": 0.9478392601013184,
|
| 3213 |
+
"learning_rate": 1.067012561698319e-06,
|
| 3214 |
+
"loss": 1.1962,
|
| 3215 |
+
"step": 4580
|
| 3216 |
+
},
|
| 3217 |
+
{
|
| 3218 |
+
"epoch": 0.918,
|
| 3219 |
+
"grad_norm": 0.8788161277770996,
|
| 3220 |
+
"learning_rate": 1.0171506364985622e-06,
|
| 3221 |
+
"loss": 1.1866,
|
| 3222 |
+
"step": 4590
|
| 3223 |
+
},
|
| 3224 |
+
{
|
| 3225 |
+
"epoch": 0.92,
|
| 3226 |
+
"grad_norm": 7.388576984405518,
|
| 3227 |
+
"learning_rate": 9.684576015420278e-07,
|
| 3228 |
+
"loss": 1.4084,
|
| 3229 |
+
"step": 4600
|
| 3230 |
+
},
|
| 3231 |
+
{
|
| 3232 |
+
"epoch": 0.922,
|
| 3233 |
+
"grad_norm": 0.774747908115387,
|
| 3234 |
+
"learning_rate": 9.209358300585474e-07,
|
| 3235 |
+
"loss": 0.4785,
|
| 3236 |
+
"step": 4610
|
| 3237 |
+
},
|
| 3238 |
+
{
|
| 3239 |
+
"epoch": 0.924,
|
| 3240 |
+
"grad_norm": 3.6744463443756104,
|
| 3241 |
+
"learning_rate": 8.745876381922147e-07,
|
| 3242 |
+
"loss": 1.0671,
|
| 3243 |
+
"step": 4620
|
| 3244 |
+
},
|
| 3245 |
+
{
|
| 3246 |
+
"epoch": 0.926,
|
| 3247 |
+
"grad_norm": 15.845490455627441,
|
| 3248 |
+
"learning_rate": 8.294152848885157e-07,
|
| 3249 |
+
"loss": 3.2901,
|
| 3250 |
+
"step": 4630
|
| 3251 |
+
},
|
| 3252 |
+
{
|
| 3253 |
+
"epoch": 0.928,
|
| 3254 |
+
"grad_norm": 0.889503002166748,
|
| 3255 |
+
"learning_rate": 7.854209717842231e-07,
|
| 3256 |
+
"loss": 1.0359,
|
| 3257 |
+
"step": 4640
|
| 3258 |
+
},
|
| 3259 |
+
{
|
| 3260 |
+
"epoch": 0.93,
|
| 3261 |
+
"grad_norm": 0.6462771892547607,
|
| 3262 |
+
"learning_rate": 7.426068431000882e-07,
|
| 3263 |
+
"loss": 0.5115,
|
| 3264 |
+
"step": 4650
|
| 3265 |
+
},
|
| 3266 |
+
{
|
| 3267 |
+
"epoch": 0.932,
|
| 3268 |
+
"grad_norm": 1.1371968984603882,
|
| 3269 |
+
"learning_rate": 7.009749855363456e-07,
|
| 3270 |
+
"loss": 1.0589,
|
| 3271 |
+
"step": 4660
|
| 3272 |
+
},
|
| 3273 |
+
{
|
| 3274 |
+
"epoch": 0.934,
|
| 3275 |
+
"grad_norm": 0.5716176629066467,
|
| 3276 |
+
"learning_rate": 6.605274281709928e-07,
|
| 3277 |
+
"loss": 0.3984,
|
| 3278 |
+
"step": 4670
|
| 3279 |
+
},
|
| 3280 |
+
{
|
| 3281 |
+
"epoch": 0.936,
|
| 3282 |
+
"grad_norm": 3.242506742477417,
|
| 3283 |
+
"learning_rate": 6.212661423609184e-07,
|
| 3284 |
+
"loss": 2.5647,
|
| 3285 |
+
"step": 4680
|
| 3286 |
+
},
|
| 3287 |
+
{
|
| 3288 |
+
"epoch": 0.938,
|
| 3289 |
+
"grad_norm": 1.7465883493423462,
|
| 3290 |
+
"learning_rate": 5.83193041645802e-07,
|
| 3291 |
+
"loss": 0.93,
|
| 3292 |
+
"step": 4690
|
| 3293 |
+
},
|
| 3294 |
+
{
|
| 3295 |
+
"epoch": 0.94,
|
| 3296 |
+
"grad_norm": 5.38319730758667,
|
| 3297 |
+
"learning_rate": 5.463099816548579e-07,
|
| 3298 |
+
"loss": 1.4605,
|
| 3299 |
+
"step": 4700
|
| 3300 |
+
},
|
| 3301 |
+
{
|
| 3302 |
+
"epoch": 0.942,
|
| 3303 |
+
"grad_norm": 0.0,
|
| 3304 |
+
"learning_rate": 5.106187600163987e-07,
|
| 3305 |
+
"loss": 0.7053,
|
| 3306 |
+
"step": 4710
|
| 3307 |
+
},
|
| 3308 |
+
{
|
| 3309 |
+
"epoch": 0.944,
|
| 3310 |
+
"grad_norm": 16.922529220581055,
|
| 3311 |
+
"learning_rate": 4.7612111627021175e-07,
|
| 3312 |
+
"loss": 1.9524,
|
| 3313 |
+
"step": 4720
|
| 3314 |
+
},
|
| 3315 |
+
{
|
| 3316 |
+
"epoch": 0.946,
|
| 3317 |
+
"grad_norm": 0.44225987792015076,
|
| 3318 |
+
"learning_rate": 4.4281873178278475e-07,
|
| 3319 |
+
"loss": 2.0867,
|
| 3320 |
+
"step": 4730
|
| 3321 |
+
},
|
| 3322 |
+
{
|
| 3323 |
+
"epoch": 0.948,
|
| 3324 |
+
"grad_norm": 4.4445719718933105,
|
| 3325 |
+
"learning_rate": 4.107132296653549e-07,
|
| 3326 |
+
"loss": 0.6371,
|
| 3327 |
+
"step": 4740
|
| 3328 |
+
},
|
| 3329 |
+
{
|
| 3330 |
+
"epoch": 0.95,
|
| 3331 |
+
"grad_norm": 1.2280045747756958,
|
| 3332 |
+
"learning_rate": 3.7980617469479953e-07,
|
| 3333 |
+
"loss": 0.5299,
|
| 3334 |
+
"step": 4750
|
| 3335 |
+
},
|
| 3336 |
+
{
|
| 3337 |
+
"epoch": 0.952,
|
| 3338 |
+
"grad_norm": 2.0964748859405518,
|
| 3339 |
+
"learning_rate": 3.5009907323737825e-07,
|
| 3340 |
+
"loss": 4.1515,
|
| 3341 |
+
"step": 4760
|
| 3342 |
+
},
|
| 3343 |
+
{
|
| 3344 |
+
"epoch": 0.954,
|
| 3345 |
+
"grad_norm": 0.37474894523620605,
|
| 3346 |
+
"learning_rate": 3.215933731753024e-07,
|
| 3347 |
+
"loss": 0.762,
|
| 3348 |
+
"step": 4770
|
| 3349 |
+
},
|
| 3350 |
+
{
|
| 3351 |
+
"epoch": 0.956,
|
| 3352 |
+
"grad_norm": 0.0,
|
| 3353 |
+
"learning_rate": 2.942904638361804e-07,
|
| 3354 |
+
"loss": 0.9055,
|
| 3355 |
+
"step": 4780
|
| 3356 |
+
},
|
| 3357 |
+
{
|
| 3358 |
+
"epoch": 0.958,
|
| 3359 |
+
"grad_norm": 2.7244997024536133,
|
| 3360 |
+
"learning_rate": 2.681916759252917e-07,
|
| 3361 |
+
"loss": 1.026,
|
| 3362 |
+
"step": 4790
|
| 3363 |
+
},
|
| 3364 |
+
{
|
| 3365 |
+
"epoch": 0.96,
|
| 3366 |
+
"grad_norm": 0.0,
|
| 3367 |
+
"learning_rate": 2.4329828146074095e-07,
|
| 3368 |
+
"loss": 1.9403,
|
| 3369 |
+
"step": 4800
|
| 3370 |
+
},
|
| 3371 |
+
{
|
| 3372 |
+
"epoch": 0.962,
|
| 3373 |
+
"grad_norm": 11.823533058166504,
|
| 3374 |
+
"learning_rate": 2.1961149371145795e-07,
|
| 3375 |
+
"loss": 1.8509,
|
| 3376 |
+
"step": 4810
|
| 3377 |
+
},
|
| 3378 |
+
{
|
| 3379 |
+
"epoch": 0.964,
|
| 3380 |
+
"grad_norm": 0.9294369220733643,
|
| 3381 |
+
"learning_rate": 1.9713246713805588e-07,
|
| 3382 |
+
"loss": 1.5779,
|
| 3383 |
+
"step": 4820
|
| 3384 |
+
},
|
| 3385 |
+
{
|
| 3386 |
+
"epoch": 0.966,
|
| 3387 |
+
"grad_norm": 1.7021973133087158,
|
| 3388 |
+
"learning_rate": 1.7586229733657644e-07,
|
| 3389 |
+
"loss": 1.2824,
|
| 3390 |
+
"step": 4830
|
| 3391 |
+
},
|
| 3392 |
+
{
|
| 3393 |
+
"epoch": 0.968,
|
| 3394 |
+
"grad_norm": 0.9092425107955933,
|
| 3395 |
+
"learning_rate": 1.5580202098509077e-07,
|
| 3396 |
+
"loss": 1.8272,
|
| 3397 |
+
"step": 4840
|
| 3398 |
+
},
|
| 3399 |
+
{
|
| 3400 |
+
"epoch": 0.97,
|
| 3401 |
+
"grad_norm": 0.22959110140800476,
|
| 3402 |
+
"learning_rate": 1.3695261579316777e-07,
|
| 3403 |
+
"loss": 1.7201,
|
| 3404 |
+
"step": 4850
|
| 3405 |
+
},
|
| 3406 |
+
{
|
| 3407 |
+
"epoch": 0.972,
|
| 3408 |
+
"grad_norm": 1.6746439933776855,
|
| 3409 |
+
"learning_rate": 1.193150004542204e-07,
|
| 3410 |
+
"loss": 1.2317,
|
| 3411 |
+
"step": 4860
|
| 3412 |
+
},
|
| 3413 |
+
{
|
| 3414 |
+
"epoch": 0.974,
|
| 3415 |
+
"grad_norm": 4.792318344116211,
|
| 3416 |
+
"learning_rate": 1.0289003460074165e-07,
|
| 3417 |
+
"loss": 1.2935,
|
| 3418 |
+
"step": 4870
|
| 3419 |
+
},
|
| 3420 |
+
{
|
| 3421 |
+
"epoch": 0.976,
|
| 3422 |
+
"grad_norm": 0.7605292797088623,
|
| 3423 |
+
"learning_rate": 8.767851876239074e-08,
|
| 3424 |
+
"loss": 1.2833,
|
| 3425 |
+
"step": 4880
|
| 3426 |
+
},
|
| 3427 |
+
{
|
| 3428 |
+
"epoch": 0.978,
|
| 3429 |
+
"grad_norm": 0.4360625147819519,
|
| 3430 |
+
"learning_rate": 7.368119432699383e-08,
|
| 3431 |
+
"loss": 1.021,
|
| 3432 |
+
"step": 4890
|
| 3433 |
+
},
|
| 3434 |
+
{
|
| 3435 |
+
"epoch": 0.98,
|
| 3436 |
+
"grad_norm": 0.5451242923736572,
|
| 3437 |
+
"learning_rate": 6.089874350439506e-08,
|
| 3438 |
+
"loss": 1.1247,
|
| 3439 |
+
"step": 4900
|
| 3440 |
+
},
|
| 3441 |
+
{
|
| 3442 |
+
"epoch": 0.982,
|
| 3443 |
+
"grad_norm": 0.9947279691696167,
|
| 3444 |
+
"learning_rate": 4.9331789293211026e-08,
|
| 3445 |
+
"loss": 1.2268,
|
| 3446 |
+
"step": 4910
|
| 3447 |
+
},
|
| 3448 |
+
{
|
| 3449 |
+
"epoch": 0.984,
|
| 3450 |
+
"grad_norm": 9.412683486938477,
|
| 3451 |
+
"learning_rate": 3.8980895450474455e-08,
|
| 3452 |
+
"loss": 4.2583,
|
| 3453 |
+
"step": 4920
|
| 3454 |
+
},
|
| 3455 |
+
{
|
| 3456 |
+
"epoch": 0.986,
|
| 3457 |
+
"grad_norm": 0.6823949217796326,
|
| 3458 |
+
"learning_rate": 2.9846566464150626e-08,
|
| 3459 |
+
"loss": 0.7995,
|
| 3460 |
+
"step": 4930
|
| 3461 |
+
},
|
| 3462 |
+
{
|
| 3463 |
+
"epoch": 0.988,
|
| 3464 |
+
"grad_norm": 1.5976966619491577,
|
| 3465 |
+
"learning_rate": 2.192924752854042e-08,
|
| 3466 |
+
"loss": 1.3818,
|
| 3467 |
+
"step": 4940
|
| 3468 |
+
},
|
| 3469 |
+
{
|
| 3470 |
+
"epoch": 0.99,
|
| 3471 |
+
"grad_norm": 17.52108383178711,
|
| 3472 |
+
"learning_rate": 1.522932452260595e-08,
|
| 3473 |
+
"loss": 2.6165,
|
| 3474 |
+
"step": 4950
|
| 3475 |
+
},
|
| 3476 |
+
{
|
| 3477 |
+
"epoch": 0.992,
|
| 3478 |
+
"grad_norm": 3.4375789165496826,
|
| 3479 |
+
"learning_rate": 9.747123991141194e-09,
|
| 3480 |
+
"loss": 1.7912,
|
| 3481 |
+
"step": 4960
|
| 3482 |
+
},
|
| 3483 |
+
{
|
| 3484 |
+
"epoch": 0.994,
|
| 3485 |
+
"grad_norm": 1.8169946670532227,
|
| 3486 |
+
"learning_rate": 5.48291312886251e-09,
|
| 3487 |
+
"loss": 1.421,
|
| 3488 |
+
"step": 4970
|
| 3489 |
+
},
|
| 3490 |
+
{
|
| 3491 |
+
"epoch": 0.996,
|
| 3492 |
+
"grad_norm": 0.7665383815765381,
|
| 3493 |
+
"learning_rate": 2.4368997673940297e-09,
|
| 3494 |
+
"loss": 1.2866,
|
| 3495 |
+
"step": 4980
|
| 3496 |
+
},
|
| 3497 |
+
{
|
| 3498 |
+
"epoch": 0.998,
|
| 3499 |
+
"grad_norm": 0.4504777491092682,
|
| 3500 |
+
"learning_rate": 6.092323651313292e-10,
|
| 3501 |
+
"loss": 0.6754,
|
| 3502 |
+
"step": 4990
|
| 3503 |
+
},
|
| 3504 |
+
{
|
| 3505 |
+
"epoch": 1.0,
|
| 3506 |
+
"grad_norm": 0.5862205028533936,
|
| 3507 |
+
"learning_rate": 0.0,
|
| 3508 |
+
"loss": 1.1699,
|
| 3509 |
+
"step": 5000
|
| 3510 |
+
},
|
| 3511 |
+
{
|
| 3512 |
+
"epoch": 1.0,
|
| 3513 |
+
"step": 5000,
|
| 3514 |
+
"total_flos": 1.151346780094464e+16,
|
| 3515 |
+
"train_loss": 1.6451873833656312,
|
| 3516 |
+
"train_runtime": 1464.1065,
|
| 3517 |
+
"train_samples_per_second": 3.415,
|
| 3518 |
+
"train_steps_per_second": 3.415
|
| 3519 |
+
}
|
| 3520 |
+
],
|
| 3521 |
+
"logging_steps": 10,
|
| 3522 |
+
"max_steps": 5000,
|
| 3523 |
+
"num_input_tokens_seen": 0,
|
| 3524 |
+
"num_train_epochs": 1,
|
| 3525 |
+
"save_steps": 4000,
|
| 3526 |
+
"stateful_callbacks": {
|
| 3527 |
+
"TrainerControl": {
|
| 3528 |
+
"args": {
|
| 3529 |
+
"should_epoch_stop": false,
|
| 3530 |
+
"should_evaluate": false,
|
| 3531 |
+
"should_log": false,
|
| 3532 |
+
"should_save": true,
|
| 3533 |
+
"should_training_stop": true
|
| 3534 |
+
},
|
| 3535 |
+
"attributes": {}
|
| 3536 |
+
}
|
| 3537 |
+
},
|
| 3538 |
+
"total_flos": 1.151346780094464e+16,
|
| 3539 |
+
"train_batch_size": 1,
|
| 3540 |
+
"trial_name": null,
|
| 3541 |
+
"trial_params": null
|
| 3542 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Finance/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f9f7a5ac6abcf0832ff55273a20894e8519bd6572cf76a25d10dc91b646f33c
|
| 3 |
+
size 5432
|
Llama-2-13b-chat-hf/DomainBench/Finance/training_loss.png
ADDED
|
Llama-2-13b-chat-hf/DomainBench/Geography/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: other
|
| 4 |
+
base_model: /hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf
|
| 5 |
+
tags:
|
| 6 |
+
- llama-factory
|
| 7 |
+
- lora
|
| 8 |
+
- generated_from_trainer
|
| 9 |
+
model-index:
|
| 10 |
+
- name: threshold_3-lamb_0.1-lr_5e-5
|
| 11 |
+
results: []
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 15 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 16 |
+
|
| 17 |
+
# threshold_3-lamb_0.1-lr_5e-5
|
| 18 |
+
|
| 19 |
+
This model is a fine-tuned version of [/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf](https://huggingface.co//hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf) on the geosignal dataset.
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 5e-05
|
| 39 |
+
- train_batch_size: 1
|
| 40 |
+
- eval_batch_size: 8
|
| 41 |
+
- seed: 42
|
| 42 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 43 |
+
- lr_scheduler_type: cosine
|
| 44 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 45 |
+
- num_epochs: 1.0
|
| 46 |
+
|
| 47 |
+
### Training results
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
### Framework versions
|
| 52 |
+
|
| 53 |
+
- PEFT 0.12.0
|
| 54 |
+
- Transformers 4.46.1
|
| 55 |
+
- Pytorch 2.5.1+cu124
|
| 56 |
+
- Datasets 3.1.0
|
| 57 |
+
- Tokenizers 0.20.3
|
Llama-2-13b-chat-hf/DomainBench/Geography/adapter_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": true,
|
| 8 |
+
"init_lora_weights": true,
|
| 9 |
+
"layer_replication": null,
|
| 10 |
+
"layers_pattern": null,
|
| 11 |
+
"layers_to_transform": null,
|
| 12 |
+
"loftq_config": {},
|
| 13 |
+
"lora_alpha": 16,
|
| 14 |
+
"lora_dropout": 0.0,
|
| 15 |
+
"megatron_config": null,
|
| 16 |
+
"megatron_core": "megatron.core",
|
| 17 |
+
"modules_to_save": null,
|
| 18 |
+
"peft_type": "LORA",
|
| 19 |
+
"r": 8,
|
| 20 |
+
"rank_pattern": {},
|
| 21 |
+
"revision": null,
|
| 22 |
+
"target_modules": [
|
| 23 |
+
"q_proj",
|
| 24 |
+
"v_proj"
|
| 25 |
+
],
|
| 26 |
+
"task_type": "CAUSAL_LM",
|
| 27 |
+
"use_dora": false,
|
| 28 |
+
"use_rslora": false
|
| 29 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0893ead1d969d0b9cb57398624cdedc62eb1cde6eed3b8cdec11d83f405a8a67
|
| 3 |
+
size 26235704
|
Llama-2-13b-chat-hf/DomainBench/Geography/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 3.424475897929728e+16,
|
| 4 |
+
"train_loss": 1.0425229248046874,
|
| 5 |
+
"train_runtime": 1241.3169,
|
| 6 |
+
"train_samples_per_second": 4.028,
|
| 7 |
+
"train_steps_per_second": 4.028
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/logfile.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Geography/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
Llama-2-13b-chat-hf/DomainBench/Geography/tokenizer_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
|
| 33 |
+
"clean_up_tokenization_spaces": false,
|
| 34 |
+
"eos_token": "</s>",
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"split_special_tokens": false,
|
| 41 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 42 |
+
"unk_token": "<unk>",
|
| 43 |
+
"use_default_system_prompt": false
|
| 44 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 3.424475897929728e+16,
|
| 4 |
+
"train_loss": 1.0425229248046874,
|
| 5 |
+
"train_runtime": 1241.3169,
|
| 6 |
+
"train_samples_per_second": 4.028,
|
| 7 |
+
"train_steps_per_second": 4.028
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/trainer_log.jsonl
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 5000, "loss": 1.5419, "lr": 1.0000000000000002e-06, "epoch": 0.002, "percentage": 0.2, "elapsed_time": "0:00:03", "remaining_time": "0:29:48"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 5000, "loss": 2.7315, "lr": 2.0000000000000003e-06, "epoch": 0.004, "percentage": 0.4, "elapsed_time": "0:00:06", "remaining_time": "0:25:41"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 5000, "loss": 1.8219, "lr": 3e-06, "epoch": 0.006, "percentage": 0.6, "elapsed_time": "0:00:08", "remaining_time": "0:23:27"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 5000, "loss": 1.5216, "lr": 4.000000000000001e-06, "epoch": 0.008, "percentage": 0.8, "elapsed_time": "0:00:11", "remaining_time": "0:24:19"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 5000, "loss": 5.3329, "lr": 5e-06, "epoch": 0.01, "percentage": 1.0, "elapsed_time": "0:00:14", "remaining_time": "0:24:26"}
|
| 6 |
+
{"current_steps": 60, "total_steps": 5000, "loss": 2.0144, "lr": 6e-06, "epoch": 0.012, "percentage": 1.2, "elapsed_time": "0:00:17", "remaining_time": "0:24:17"}
|
| 7 |
+
{"current_steps": 70, "total_steps": 5000, "loss": 2.6128, "lr": 7.000000000000001e-06, "epoch": 0.014, "percentage": 1.4, "elapsed_time": "0:00:20", "remaining_time": "0:24:05"}
|
| 8 |
+
{"current_steps": 80, "total_steps": 5000, "loss": 2.3414, "lr": 8.000000000000001e-06, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:23", "remaining_time": "0:23:57"}
|
| 9 |
+
{"current_steps": 90, "total_steps": 5000, "loss": 3.2327, "lr": 9e-06, "epoch": 0.018, "percentage": 1.8, "elapsed_time": "0:00:26", "remaining_time": "0:23:42"}
|
| 10 |
+
{"current_steps": 100, "total_steps": 5000, "loss": 2.7985, "lr": 1e-05, "epoch": 0.02, "percentage": 2.0, "elapsed_time": "0:00:29", "remaining_time": "0:23:45"}
|
| 11 |
+
{"current_steps": 110, "total_steps": 5000, "loss": 2.397, "lr": 1.1000000000000001e-05, "epoch": 0.022, "percentage": 2.2, "elapsed_time": "0:00:31", "remaining_time": "0:23:33"}
|
| 12 |
+
{"current_steps": 120, "total_steps": 5000, "loss": 1.0396, "lr": 1.2e-05, "epoch": 0.024, "percentage": 2.4, "elapsed_time": "0:00:34", "remaining_time": "0:23:19"}
|
| 13 |
+
{"current_steps": 130, "total_steps": 5000, "loss": 3.2413, "lr": 1.3000000000000001e-05, "epoch": 0.026, "percentage": 2.6, "elapsed_time": "0:00:37", "remaining_time": "0:23:06"}
|
| 14 |
+
{"current_steps": 140, "total_steps": 5000, "loss": 3.3462, "lr": 1.4000000000000001e-05, "epoch": 0.028, "percentage": 2.8, "elapsed_time": "0:00:39", "remaining_time": "0:22:46"}
|
| 15 |
+
{"current_steps": 150, "total_steps": 5000, "loss": 1.3302, "lr": 1.5e-05, "epoch": 0.03, "percentage": 3.0, "elapsed_time": "0:00:41", "remaining_time": "0:22:08"}
|
| 16 |
+
{"current_steps": 160, "total_steps": 5000, "loss": 1.2748, "lr": 1.6000000000000003e-05, "epoch": 0.032, "percentage": 3.2, "elapsed_time": "0:00:42", "remaining_time": "0:21:36"}
|
| 17 |
+
{"current_steps": 170, "total_steps": 5000, "loss": 3.3206, "lr": 1.7000000000000003e-05, "epoch": 0.034, "percentage": 3.4, "elapsed_time": "0:00:45", "remaining_time": "0:21:29"}
|
| 18 |
+
{"current_steps": 180, "total_steps": 5000, "loss": 1.3943, "lr": 1.8e-05, "epoch": 0.036, "percentage": 3.6, "elapsed_time": "0:00:47", "remaining_time": "0:21:16"}
|
| 19 |
+
{"current_steps": 190, "total_steps": 5000, "loss": 1.2942, "lr": 1.9e-05, "epoch": 0.038, "percentage": 3.8, "elapsed_time": "0:00:49", "remaining_time": "0:21:03"}
|
| 20 |
+
{"current_steps": 200, "total_steps": 5000, "loss": 1.4252, "lr": 2e-05, "epoch": 0.04, "percentage": 4.0, "elapsed_time": "0:00:52", "remaining_time": "0:20:53"}
|
| 21 |
+
{"current_steps": 210, "total_steps": 5000, "loss": 1.6539, "lr": 2.1e-05, "epoch": 0.042, "percentage": 4.2, "elapsed_time": "0:00:54", "remaining_time": "0:20:41"}
|
| 22 |
+
{"current_steps": 220, "total_steps": 5000, "loss": 1.8091, "lr": 2.2000000000000003e-05, "epoch": 0.044, "percentage": 4.4, "elapsed_time": "0:00:57", "remaining_time": "0:20:45"}
|
| 23 |
+
{"current_steps": 230, "total_steps": 5000, "loss": 1.2866, "lr": 2.3000000000000003e-05, "epoch": 0.046, "percentage": 4.6, "elapsed_time": "0:01:00", "remaining_time": "0:20:53"}
|
| 24 |
+
{"current_steps": 240, "total_steps": 5000, "loss": 1.7432, "lr": 2.4e-05, "epoch": 0.048, "percentage": 4.8, "elapsed_time": "0:01:03", "remaining_time": "0:20:56"}
|
| 25 |
+
{"current_steps": 250, "total_steps": 5000, "loss": 1.6918, "lr": 2.5e-05, "epoch": 0.05, "percentage": 5.0, "elapsed_time": "0:01:06", "remaining_time": "0:20:59"}
|
| 26 |
+
{"current_steps": 260, "total_steps": 5000, "loss": 0.9121, "lr": 2.6000000000000002e-05, "epoch": 0.052, "percentage": 5.2, "elapsed_time": "0:01:08", "remaining_time": "0:20:56"}
|
| 27 |
+
{"current_steps": 270, "total_steps": 5000, "loss": 0.6088, "lr": 2.7000000000000002e-05, "epoch": 0.054, "percentage": 5.4, "elapsed_time": "0:01:11", "remaining_time": "0:20:53"}
|
| 28 |
+
{"current_steps": 280, "total_steps": 5000, "loss": 0.6236, "lr": 2.8000000000000003e-05, "epoch": 0.056, "percentage": 5.6, "elapsed_time": "0:01:13", "remaining_time": "0:20:40"}
|
| 29 |
+
{"current_steps": 290, "total_steps": 5000, "loss": 0.847, "lr": 2.9e-05, "epoch": 0.058, "percentage": 5.8, "elapsed_time": "0:01:16", "remaining_time": "0:20:40"}
|
| 30 |
+
{"current_steps": 300, "total_steps": 5000, "loss": 0.9911, "lr": 3e-05, "epoch": 0.06, "percentage": 6.0, "elapsed_time": "0:01:18", "remaining_time": "0:20:28"}
|
| 31 |
+
{"current_steps": 310, "total_steps": 5000, "loss": 1.4213, "lr": 3.1e-05, "epoch": 0.062, "percentage": 6.2, "elapsed_time": "0:01:21", "remaining_time": "0:20:26"}
|
| 32 |
+
{"current_steps": 320, "total_steps": 5000, "loss": 0.436, "lr": 3.2000000000000005e-05, "epoch": 0.064, "percentage": 6.4, "elapsed_time": "0:01:23", "remaining_time": "0:20:15"}
|
| 33 |
+
{"current_steps": 330, "total_steps": 5000, "loss": 1.0218, "lr": 3.3e-05, "epoch": 0.066, "percentage": 6.6, "elapsed_time": "0:01:25", "remaining_time": "0:20:16"}
|
| 34 |
+
{"current_steps": 340, "total_steps": 5000, "loss": 1.2908, "lr": 3.4000000000000007e-05, "epoch": 0.068, "percentage": 6.8, "elapsed_time": "0:01:27", "remaining_time": "0:20:02"}
|
| 35 |
+
{"current_steps": 350, "total_steps": 5000, "loss": 1.0009, "lr": 3.5e-05, "epoch": 0.07, "percentage": 7.0, "elapsed_time": "0:01:30", "remaining_time": "0:19:56"}
|
| 36 |
+
{"current_steps": 360, "total_steps": 5000, "loss": 0.6865, "lr": 3.6e-05, "epoch": 0.072, "percentage": 7.2, "elapsed_time": "0:01:32", "remaining_time": "0:19:50"}
|
| 37 |
+
{"current_steps": 370, "total_steps": 5000, "loss": 0.9106, "lr": 3.7e-05, "epoch": 0.074, "percentage": 7.4, "elapsed_time": "0:01:34", "remaining_time": "0:19:42"}
|
| 38 |
+
{"current_steps": 380, "total_steps": 5000, "loss": 1.4635, "lr": 3.8e-05, "epoch": 0.076, "percentage": 7.6, "elapsed_time": "0:01:37", "remaining_time": "0:19:40"}
|
| 39 |
+
{"current_steps": 390, "total_steps": 5000, "loss": 1.3782, "lr": 3.9000000000000006e-05, "epoch": 0.078, "percentage": 7.8, "elapsed_time": "0:01:39", "remaining_time": "0:19:38"}
|
| 40 |
+
{"current_steps": 400, "total_steps": 5000, "loss": 0.8778, "lr": 4e-05, "epoch": 0.08, "percentage": 8.0, "elapsed_time": "0:01:42", "remaining_time": "0:19:33"}
|
| 41 |
+
{"current_steps": 410, "total_steps": 5000, "loss": 0.8344, "lr": 4.1e-05, "epoch": 0.082, "percentage": 8.2, "elapsed_time": "0:01:44", "remaining_time": "0:19:25"}
|
| 42 |
+
{"current_steps": 420, "total_steps": 5000, "loss": 0.5733, "lr": 4.2e-05, "epoch": 0.084, "percentage": 8.4, "elapsed_time": "0:01:46", "remaining_time": "0:19:22"}
|
| 43 |
+
{"current_steps": 430, "total_steps": 5000, "loss": 0.6683, "lr": 4.3e-05, "epoch": 0.086, "percentage": 8.6, "elapsed_time": "0:01:48", "remaining_time": "0:19:17"}
|
| 44 |
+
{"current_steps": 440, "total_steps": 5000, "loss": 3.1046, "lr": 4.4000000000000006e-05, "epoch": 0.088, "percentage": 8.8, "elapsed_time": "0:01:50", "remaining_time": "0:19:09"}
|
| 45 |
+
{"current_steps": 450, "total_steps": 5000, "loss": 0.981, "lr": 4.5e-05, "epoch": 0.09, "percentage": 9.0, "elapsed_time": "0:01:53", "remaining_time": "0:19:11"}
|
| 46 |
+
{"current_steps": 460, "total_steps": 5000, "loss": 2.1118, "lr": 4.600000000000001e-05, "epoch": 0.092, "percentage": 9.2, "elapsed_time": "0:01:56", "remaining_time": "0:19:09"}
|
| 47 |
+
{"current_steps": 470, "total_steps": 5000, "loss": 0.3037, "lr": 4.7e-05, "epoch": 0.094, "percentage": 9.4, "elapsed_time": "0:01:58", "remaining_time": "0:18:58"}
|
| 48 |
+
{"current_steps": 480, "total_steps": 5000, "loss": 1.2638, "lr": 4.8e-05, "epoch": 0.096, "percentage": 9.6, "elapsed_time": "0:02:00", "remaining_time": "0:18:53"}
|
| 49 |
+
{"current_steps": 490, "total_steps": 5000, "loss": 2.3644, "lr": 4.9e-05, "epoch": 0.098, "percentage": 9.8, "elapsed_time": "0:02:02", "remaining_time": "0:18:49"}
|
| 50 |
+
{"current_steps": 500, "total_steps": 5000, "loss": 0.8317, "lr": 5e-05, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:02:05", "remaining_time": "0:18:51"}
|
| 51 |
+
{"current_steps": 510, "total_steps": 5000, "loss": 2.1864, "lr": 4.999939076763487e-05, "epoch": 0.102, "percentage": 10.2, "elapsed_time": "0:02:08", "remaining_time": "0:18:47"}
|
| 52 |
+
{"current_steps": 520, "total_steps": 5000, "loss": 1.3502, "lr": 4.999756310023261e-05, "epoch": 0.104, "percentage": 10.4, "elapsed_time": "0:02:10", "remaining_time": "0:18:48"}
|
| 53 |
+
{"current_steps": 530, "total_steps": 5000, "loss": 1.161, "lr": 4.999451708687114e-05, "epoch": 0.106, "percentage": 10.6, "elapsed_time": "0:02:13", "remaining_time": "0:18:46"}
|
| 54 |
+
{"current_steps": 540, "total_steps": 5000, "loss": 0.7291, "lr": 4.999025287600886e-05, "epoch": 0.108, "percentage": 10.8, "elapsed_time": "0:02:16", "remaining_time": "0:18:45"}
|
| 55 |
+
{"current_steps": 550, "total_steps": 5000, "loss": 0.4316, "lr": 4.99847706754774e-05, "epoch": 0.11, "percentage": 11.0, "elapsed_time": "0:02:18", "remaining_time": "0:18:40"}
|
| 56 |
+
{"current_steps": 560, "total_steps": 5000, "loss": 1.2009, "lr": 4.997807075247146e-05, "epoch": 0.112, "percentage": 11.2, "elapsed_time": "0:02:21", "remaining_time": "0:18:42"}
|
| 57 |
+
{"current_steps": 570, "total_steps": 5000, "loss": 0.5649, "lr": 4.997015343353585e-05, "epoch": 0.114, "percentage": 11.4, "elapsed_time": "0:02:24", "remaining_time": "0:18:39"}
|
| 58 |
+
{"current_steps": 580, "total_steps": 5000, "loss": 0.4128, "lr": 4.996101910454953e-05, "epoch": 0.116, "percentage": 11.6, "elapsed_time": "0:02:25", "remaining_time": "0:18:32"}
|
| 59 |
+
{"current_steps": 590, "total_steps": 5000, "loss": 0.977, "lr": 4.995066821070679e-05, "epoch": 0.118, "percentage": 11.8, "elapsed_time": "0:02:28", "remaining_time": "0:18:28"}
|
| 60 |
+
{"current_steps": 600, "total_steps": 5000, "loss": 1.3527, "lr": 4.993910125649561e-05, "epoch": 0.12, "percentage": 12.0, "elapsed_time": "0:02:30", "remaining_time": "0:18:26"}
|
| 61 |
+
{"current_steps": 610, "total_steps": 5000, "loss": 0.6522, "lr": 4.992631880567301e-05, "epoch": 0.122, "percentage": 12.2, "elapsed_time": "0:02:33", "remaining_time": "0:18:25"}
|
| 62 |
+
{"current_steps": 620, "total_steps": 5000, "loss": 0.7698, "lr": 4.991232148123761e-05, "epoch": 0.124, "percentage": 12.4, "elapsed_time": "0:02:35", "remaining_time": "0:18:20"}
|
| 63 |
+
{"current_steps": 630, "total_steps": 5000, "loss": 0.9953, "lr": 4.989710996539926e-05, "epoch": 0.126, "percentage": 12.6, "elapsed_time": "0:02:38", "remaining_time": "0:18:17"}
|
| 64 |
+
{"current_steps": 640, "total_steps": 5000, "loss": 0.8877, "lr": 4.988068499954578e-05, "epoch": 0.128, "percentage": 12.8, "elapsed_time": "0:02:41", "remaining_time": "0:18:16"}
|
| 65 |
+
{"current_steps": 650, "total_steps": 5000, "loss": 0.564, "lr": 4.9863047384206835e-05, "epoch": 0.13, "percentage": 13.0, "elapsed_time": "0:02:44", "remaining_time": "0:18:18"}
|
| 66 |
+
{"current_steps": 660, "total_steps": 5000, "loss": 0.451, "lr": 4.984419797901491e-05, "epoch": 0.132, "percentage": 13.2, "elapsed_time": "0:02:46", "remaining_time": "0:18:13"}
|
| 67 |
+
{"current_steps": 670, "total_steps": 5000, "loss": 1.5067, "lr": 4.982413770266342e-05, "epoch": 0.134, "percentage": 13.4, "elapsed_time": "0:02:48", "remaining_time": "0:18:06"}
|
| 68 |
+
{"current_steps": 680, "total_steps": 5000, "loss": 1.6702, "lr": 4.980286753286195e-05, "epoch": 0.136, "percentage": 13.6, "elapsed_time": "0:02:50", "remaining_time": "0:18:06"}
|
| 69 |
+
{"current_steps": 690, "total_steps": 5000, "loss": 0.7115, "lr": 4.978038850628854e-05, "epoch": 0.138, "percentage": 13.8, "elapsed_time": "0:02:53", "remaining_time": "0:18:03"}
|
| 70 |
+
{"current_steps": 700, "total_steps": 5000, "loss": 0.9633, "lr": 4.975670171853926e-05, "epoch": 0.14, "percentage": 14.0, "elapsed_time": "0:02:55", "remaining_time": "0:17:59"}
|
| 71 |
+
{"current_steps": 710, "total_steps": 5000, "loss": 1.1906, "lr": 4.9731808324074717e-05, "epoch": 0.142, "percentage": 14.2, "elapsed_time": "0:02:58", "remaining_time": "0:17:57"}
|
| 72 |
+
{"current_steps": 720, "total_steps": 5000, "loss": 1.7433, "lr": 4.9705709536163824e-05, "epoch": 0.144, "percentage": 14.4, "elapsed_time": "0:03:01", "remaining_time": "0:17:56"}
|
| 73 |
+
{"current_steps": 730, "total_steps": 5000, "loss": 0.483, "lr": 4.96784066268247e-05, "epoch": 0.146, "percentage": 14.6, "elapsed_time": "0:03:03", "remaining_time": "0:17:54"}
|
| 74 |
+
{"current_steps": 740, "total_steps": 5000, "loss": 1.0321, "lr": 4.964990092676263e-05, "epoch": 0.148, "percentage": 14.8, "elapsed_time": "0:03:06", "remaining_time": "0:17:52"}
|
| 75 |
+
{"current_steps": 750, "total_steps": 5000, "loss": 1.0468, "lr": 4.962019382530521e-05, "epoch": 0.15, "percentage": 15.0, "elapsed_time": "0:03:08", "remaining_time": "0:17:48"}
|
| 76 |
+
{"current_steps": 760, "total_steps": 5000, "loss": 0.5741, "lr": 4.9589286770334654e-05, "epoch": 0.152, "percentage": 15.2, "elapsed_time": "0:03:10", "remaining_time": "0:17:44"}
|
| 77 |
+
{"current_steps": 770, "total_steps": 5000, "loss": 0.8734, "lr": 4.9557181268217227e-05, "epoch": 0.154, "percentage": 15.4, "elapsed_time": "0:03:13", "remaining_time": "0:17:40"}
|
| 78 |
+
{"current_steps": 780, "total_steps": 5000, "loss": 2.3025, "lr": 4.952387888372979e-05, "epoch": 0.156, "percentage": 15.6, "elapsed_time": "0:03:15", "remaining_time": "0:17:37"}
|
| 79 |
+
{"current_steps": 790, "total_steps": 5000, "loss": 1.3188, "lr": 4.94893812399836e-05, "epoch": 0.158, "percentage": 15.8, "elapsed_time": "0:03:17", "remaining_time": "0:17:34"}
|
| 80 |
+
{"current_steps": 800, "total_steps": 5000, "loss": 2.0348, "lr": 4.9453690018345144e-05, "epoch": 0.16, "percentage": 16.0, "elapsed_time": "0:03:20", "remaining_time": "0:17:31"}
|
| 81 |
+
{"current_steps": 810, "total_steps": 5000, "loss": 1.8253, "lr": 4.94168069583542e-05, "epoch": 0.162, "percentage": 16.2, "elapsed_time": "0:03:22", "remaining_time": "0:17:29"}
|
| 82 |
+
{"current_steps": 820, "total_steps": 5000, "loss": 0.7857, "lr": 4.937873385763908e-05, "epoch": 0.164, "percentage": 16.4, "elapsed_time": "0:03:25", "remaining_time": "0:17:26"}
|
| 83 |
+
{"current_steps": 830, "total_steps": 5000, "loss": 1.6698, "lr": 4.933947257182901e-05, "epoch": 0.166, "percentage": 16.6, "elapsed_time": "0:03:27", "remaining_time": "0:17:23"}
|
| 84 |
+
{"current_steps": 840, "total_steps": 5000, "loss": 1.3913, "lr": 4.929902501446366e-05, "epoch": 0.168, "percentage": 16.8, "elapsed_time": "0:03:30", "remaining_time": "0:17:23"}
|
| 85 |
+
{"current_steps": 850, "total_steps": 5000, "loss": 1.0676, "lr": 4.925739315689991e-05, "epoch": 0.17, "percentage": 17.0, "elapsed_time": "0:03:33", "remaining_time": "0:17:21"}
|
| 86 |
+
{"current_steps": 860, "total_steps": 5000, "loss": 1.6685, "lr": 4.9214579028215776e-05, "epoch": 0.172, "percentage": 17.2, "elapsed_time": "0:03:35", "remaining_time": "0:17:18"}
|
| 87 |
+
{"current_steps": 870, "total_steps": 5000, "loss": 1.4978, "lr": 4.917058471511149e-05, "epoch": 0.174, "percentage": 17.4, "elapsed_time": "0:03:38", "remaining_time": "0:17:17"}
|
| 88 |
+
{"current_steps": 880, "total_steps": 5000, "loss": 8.4068, "lr": 4.912541236180779e-05, "epoch": 0.176, "percentage": 17.6, "elapsed_time": "0:03:41", "remaining_time": "0:17:14"}
|
| 89 |
+
{"current_steps": 890, "total_steps": 5000, "loss": 1.0415, "lr": 4.907906416994146e-05, "epoch": 0.178, "percentage": 17.8, "elapsed_time": "0:03:43", "remaining_time": "0:17:11"}
|
| 90 |
+
{"current_steps": 900, "total_steps": 5000, "loss": 1.7801, "lr": 4.9031542398457974e-05, "epoch": 0.18, "percentage": 18.0, "elapsed_time": "0:03:46", "remaining_time": "0:17:11"}
|
| 91 |
+
{"current_steps": 910, "total_steps": 5000, "loss": 0.4855, "lr": 4.898284936350144e-05, "epoch": 0.182, "percentage": 18.2, "elapsed_time": "0:03:48", "remaining_time": "0:17:06"}
|
| 92 |
+
{"current_steps": 920, "total_steps": 5000, "loss": 0.6015, "lr": 4.893298743830168e-05, "epoch": 0.184, "percentage": 18.4, "elapsed_time": "0:03:50", "remaining_time": "0:17:03"}
|
| 93 |
+
{"current_steps": 930, "total_steps": 5000, "loss": 0.7006, "lr": 4.888195905305859e-05, "epoch": 0.186, "percentage": 18.6, "elapsed_time": "0:03:53", "remaining_time": "0:16:59"}
|
| 94 |
+
{"current_steps": 940, "total_steps": 5000, "loss": 0.3036, "lr": 4.882976669482367e-05, "epoch": 0.188, "percentage": 18.8, "elapsed_time": "0:03:54", "remaining_time": "0:16:54"}
|
| 95 |
+
{"current_steps": 950, "total_steps": 5000, "loss": 0.2464, "lr": 4.877641290737884e-05, "epoch": 0.19, "percentage": 19.0, "elapsed_time": "0:03:56", "remaining_time": "0:16:47"}
|
| 96 |
+
{"current_steps": 960, "total_steps": 5000, "loss": 0.4622, "lr": 4.8721900291112415e-05, "epoch": 0.192, "percentage": 19.2, "elapsed_time": "0:03:58", "remaining_time": "0:16:44"}
|
| 97 |
+
{"current_steps": 970, "total_steps": 5000, "loss": 0.5846, "lr": 4.8666231502892415e-05, "epoch": 0.194, "percentage": 19.4, "elapsed_time": "0:04:01", "remaining_time": "0:16:41"}
|
| 98 |
+
{"current_steps": 980, "total_steps": 5000, "loss": 0.5897, "lr": 4.860940925593703e-05, "epoch": 0.196, "percentage": 19.6, "elapsed_time": "0:04:03", "remaining_time": "0:16:39"}
|
| 99 |
+
{"current_steps": 990, "total_steps": 5000, "loss": 0.6564, "lr": 4.855143631968242e-05, "epoch": 0.198, "percentage": 19.8, "elapsed_time": "0:04:05", "remaining_time": "0:16:34"}
|
| 100 |
+
{"current_steps": 1000, "total_steps": 5000, "loss": 0.6761, "lr": 4.849231551964771e-05, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:04:07", "remaining_time": "0:16:31"}
|
| 101 |
+
{"current_steps": 1010, "total_steps": 5000, "loss": 0.9705, "lr": 4.843204973729729e-05, "epoch": 0.202, "percentage": 20.2, "elapsed_time": "0:04:10", "remaining_time": "0:16:29"}
|
| 102 |
+
{"current_steps": 1020, "total_steps": 5000, "loss": 0.6534, "lr": 4.837064190990036e-05, "epoch": 0.204, "percentage": 20.4, "elapsed_time": "0:04:12", "remaining_time": "0:16:27"}
|
| 103 |
+
{"current_steps": 1030, "total_steps": 5000, "loss": 1.8363, "lr": 4.830809503038781e-05, "epoch": 0.206, "percentage": 20.6, "elapsed_time": "0:04:16", "remaining_time": "0:16:27"}
|
| 104 |
+
{"current_steps": 1040, "total_steps": 5000, "loss": 1.5076, "lr": 4.8244412147206284e-05, "epoch": 0.208, "percentage": 20.8, "elapsed_time": "0:04:17", "remaining_time": "0:16:22"}
|
| 105 |
+
{"current_steps": 1050, "total_steps": 5000, "loss": 0.9317, "lr": 4.817959636416969e-05, "epoch": 0.21, "percentage": 21.0, "elapsed_time": "0:04:20", "remaining_time": "0:16:20"}
|
| 106 |
+
{"current_steps": 1060, "total_steps": 5000, "loss": 1.2908, "lr": 4.8113650840307834e-05, "epoch": 0.212, "percentage": 21.2, "elapsed_time": "0:04:23", "remaining_time": "0:16:20"}
|
| 107 |
+
{"current_steps": 1070, "total_steps": 5000, "loss": 0.5742, "lr": 4.8046578789712515e-05, "epoch": 0.214, "percentage": 21.4, "elapsed_time": "0:04:26", "remaining_time": "0:16:19"}
|
| 108 |
+
{"current_steps": 1080, "total_steps": 5000, "loss": 1.2138, "lr": 4.797838348138086e-05, "epoch": 0.216, "percentage": 21.6, "elapsed_time": "0:04:28", "remaining_time": "0:16:14"}
|
| 109 |
+
{"current_steps": 1090, "total_steps": 5000, "loss": 1.9534, "lr": 4.790906823905599e-05, "epoch": 0.218, "percentage": 21.8, "elapsed_time": "0:04:31", "remaining_time": "0:16:13"}
|
| 110 |
+
{"current_steps": 1100, "total_steps": 5000, "loss": 1.2392, "lr": 4.783863644106502e-05, "epoch": 0.22, "percentage": 22.0, "elapsed_time": "0:04:33", "remaining_time": "0:16:10"}
|
| 111 |
+
{"current_steps": 1110, "total_steps": 5000, "loss": 0.7664, "lr": 4.776709152015443e-05, "epoch": 0.222, "percentage": 22.2, "elapsed_time": "0:04:35", "remaining_time": "0:16:05"}
|
| 112 |
+
{"current_steps": 1120, "total_steps": 5000, "loss": 0.6328, "lr": 4.769443696332272e-05, "epoch": 0.224, "percentage": 22.4, "elapsed_time": "0:04:37", "remaining_time": "0:16:00"}
|
| 113 |
+
{"current_steps": 1130, "total_steps": 5000, "loss": 1.3397, "lr": 4.762067631165049e-05, "epoch": 0.226, "percentage": 22.6, "elapsed_time": "0:04:39", "remaining_time": "0:15:56"}
|
| 114 |
+
{"current_steps": 1140, "total_steps": 5000, "loss": 0.5316, "lr": 4.754581316012785e-05, "epoch": 0.228, "percentage": 22.8, "elapsed_time": "0:04:41", "remaining_time": "0:15:52"}
|
| 115 |
+
{"current_steps": 1150, "total_steps": 5000, "loss": 1.7526, "lr": 4.7469851157479177e-05, "epoch": 0.23, "percentage": 23.0, "elapsed_time": "0:04:43", "remaining_time": "0:15:49"}
|
| 116 |
+
{"current_steps": 1160, "total_steps": 5000, "loss": 1.6985, "lr": 4.7392794005985326e-05, "epoch": 0.232, "percentage": 23.2, "elapsed_time": "0:04:46", "remaining_time": "0:15:49"}
|
| 117 |
+
{"current_steps": 1170, "total_steps": 5000, "loss": 1.7021, "lr": 4.731464546130314e-05, "epoch": 0.234, "percentage": 23.4, "elapsed_time": "0:04:49", "remaining_time": "0:15:47"}
|
| 118 |
+
{"current_steps": 1180, "total_steps": 5000, "loss": 0.6692, "lr": 4.723540933228244e-05, "epoch": 0.236, "percentage": 23.6, "elapsed_time": "0:04:52", "remaining_time": "0:15:45"}
|
| 119 |
+
{"current_steps": 1190, "total_steps": 5000, "loss": 0.8183, "lr": 4.715508948078037e-05, "epoch": 0.238, "percentage": 23.8, "elapsed_time": "0:04:54", "remaining_time": "0:15:43"}
|
| 120 |
+
{"current_steps": 1200, "total_steps": 5000, "loss": 0.7391, "lr": 4.707368982147318e-05, "epoch": 0.24, "percentage": 24.0, "elapsed_time": "0:04:56", "remaining_time": "0:15:39"}
|
| 121 |
+
{"current_steps": 1210, "total_steps": 5000, "loss": 1.0601, "lr": 4.6991214321665414e-05, "epoch": 0.242, "percentage": 24.2, "elapsed_time": "0:04:58", "remaining_time": "0:15:36"}
|
| 122 |
+
{"current_steps": 1220, "total_steps": 5000, "loss": 0.6689, "lr": 4.690766700109659e-05, "epoch": 0.244, "percentage": 24.4, "elapsed_time": "0:05:01", "remaining_time": "0:15:34"}
|
| 123 |
+
{"current_steps": 1230, "total_steps": 5000, "loss": 1.2384, "lr": 4.682305193174524e-05, "epoch": 0.246, "percentage": 24.6, "elapsed_time": "0:05:03", "remaining_time": "0:15:31"}
|
| 124 |
+
{"current_steps": 1240, "total_steps": 5000, "loss": 0.5366, "lr": 4.6737373237630476e-05, "epoch": 0.248, "percentage": 24.8, "elapsed_time": "0:05:05", "remaining_time": "0:15:27"}
|
| 125 |
+
{"current_steps": 1250, "total_steps": 5000, "loss": 0.9924, "lr": 4.665063509461097e-05, "epoch": 0.25, "percentage": 25.0, "elapsed_time": "0:05:08", "remaining_time": "0:15:24"}
|
| 126 |
+
{"current_steps": 1260, "total_steps": 5000, "loss": 1.1548, "lr": 4.656284173018144e-05, "epoch": 0.252, "percentage": 25.2, "elapsed_time": "0:05:10", "remaining_time": "0:15:21"}
|
| 127 |
+
{"current_steps": 1270, "total_steps": 5000, "loss": 0.798, "lr": 4.6473997423266614e-05, "epoch": 0.254, "percentage": 25.4, "elapsed_time": "0:05:12", "remaining_time": "0:15:18"}
|
| 128 |
+
{"current_steps": 1280, "total_steps": 5000, "loss": 0.8444, "lr": 4.638410650401267e-05, "epoch": 0.256, "percentage": 25.6, "elapsed_time": "0:05:15", "remaining_time": "0:15:18"}
|
| 129 |
+
{"current_steps": 1290, "total_steps": 5000, "loss": 1.4516, "lr": 4.629317335357619e-05, "epoch": 0.258, "percentage": 25.8, "elapsed_time": "0:05:18", "remaining_time": "0:15:16"}
|
| 130 |
+
{"current_steps": 1300, "total_steps": 5000, "loss": 0.4612, "lr": 4.620120240391065e-05, "epoch": 0.26, "percentage": 26.0, "elapsed_time": "0:05:21", "remaining_time": "0:15:13"}
|
| 131 |
+
{"current_steps": 1310, "total_steps": 5000, "loss": 0.8674, "lr": 4.610819813755038e-05, "epoch": 0.262, "percentage": 26.2, "elapsed_time": "0:05:23", "remaining_time": "0:15:10"}
|
| 132 |
+
{"current_steps": 1320, "total_steps": 5000, "loss": 0.8115, "lr": 4.601416508739211e-05, "epoch": 0.264, "percentage": 26.4, "elapsed_time": "0:05:26", "remaining_time": "0:15:09"}
|
| 133 |
+
{"current_steps": 1330, "total_steps": 5000, "loss": 0.4957, "lr": 4.591910783647404e-05, "epoch": 0.266, "percentage": 26.6, "elapsed_time": "0:05:29", "remaining_time": "0:15:07"}
|
| 134 |
+
{"current_steps": 1340, "total_steps": 5000, "loss": 0.862, "lr": 4.5823031017752485e-05, "epoch": 0.268, "percentage": 26.8, "elapsed_time": "0:05:31", "remaining_time": "0:15:04"}
|
| 135 |
+
{"current_steps": 1350, "total_steps": 5000, "loss": 0.2812, "lr": 4.572593931387604e-05, "epoch": 0.27, "percentage": 27.0, "elapsed_time": "0:05:32", "remaining_time": "0:14:59"}
|
| 136 |
+
{"current_steps": 1360, "total_steps": 5000, "loss": 2.1906, "lr": 4.562783745695738e-05, "epoch": 0.272, "percentage": 27.2, "elapsed_time": "0:05:35", "remaining_time": "0:14:57"}
|
| 137 |
+
{"current_steps": 1370, "total_steps": 5000, "loss": 0.9072, "lr": 4.5528730228342605e-05, "epoch": 0.274, "percentage": 27.4, "elapsed_time": "0:05:37", "remaining_time": "0:14:54"}
|
| 138 |
+
{"current_steps": 1380, "total_steps": 5000, "loss": 0.5203, "lr": 4.542862245837821e-05, "epoch": 0.276, "percentage": 27.6, "elapsed_time": "0:05:39", "remaining_time": "0:14:51"}
|
| 139 |
+
{"current_steps": 1390, "total_steps": 5000, "loss": 0.8603, "lr": 4.532751902617569e-05, "epoch": 0.278, "percentage": 27.8, "elapsed_time": "0:05:42", "remaining_time": "0:14:48"}
|
| 140 |
+
{"current_steps": 1400, "total_steps": 5000, "loss": 0.7643, "lr": 4.522542485937369e-05, "epoch": 0.28, "percentage": 28.0, "elapsed_time": "0:05:44", "remaining_time": "0:14:45"}
|
| 141 |
+
{"current_steps": 1410, "total_steps": 5000, "loss": 0.8807, "lr": 4.512234493389785e-05, "epoch": 0.282, "percentage": 28.2, "elapsed_time": "0:05:47", "remaining_time": "0:14:45"}
|
| 142 |
+
{"current_steps": 1420, "total_steps": 5000, "loss": 0.8509, "lr": 4.5018284273718336e-05, "epoch": 0.284, "percentage": 28.4, "elapsed_time": "0:05:50", "remaining_time": "0:14:43"}
|
| 143 |
+
{"current_steps": 1430, "total_steps": 5000, "loss": 1.7827, "lr": 4.491324795060491e-05, "epoch": 0.286, "percentage": 28.6, "elapsed_time": "0:05:53", "remaining_time": "0:14:41"}
|
| 144 |
+
{"current_steps": 1440, "total_steps": 5000, "loss": 0.7381, "lr": 4.480724108387977e-05, "epoch": 0.288, "percentage": 28.8, "elapsed_time": "0:05:56", "remaining_time": "0:14:40"}
|
| 145 |
+
{"current_steps": 1450, "total_steps": 5000, "loss": 1.691, "lr": 4.4700268840168045e-05, "epoch": 0.29, "percentage": 29.0, "elapsed_time": "0:05:58", "remaining_time": "0:14:38"}
|
| 146 |
+
{"current_steps": 1460, "total_steps": 5000, "loss": 2.0687, "lr": 4.4592336433146e-05, "epoch": 0.292, "percentage": 29.2, "elapsed_time": "0:06:01", "remaining_time": "0:14:35"}
|
| 147 |
+
{"current_steps": 1470, "total_steps": 5000, "loss": 1.8049, "lr": 4.448344912328686e-05, "epoch": 0.294, "percentage": 29.4, "elapsed_time": "0:06:04", "remaining_time": "0:14:34"}
|
| 148 |
+
{"current_steps": 1480, "total_steps": 5000, "loss": 0.8578, "lr": 4.4373612217604496e-05, "epoch": 0.296, "percentage": 29.6, "elapsed_time": "0:06:05", "remaining_time": "0:14:30"}
|
| 149 |
+
{"current_steps": 1490, "total_steps": 5000, "loss": 1.169, "lr": 4.426283106939474e-05, "epoch": 0.298, "percentage": 29.8, "elapsed_time": "0:06:08", "remaining_time": "0:14:28"}
|
| 150 |
+
{"current_steps": 1500, "total_steps": 5000, "loss": 0.3027, "lr": 4.415111107797445e-05, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:06:11", "remaining_time": "0:14:26"}
|
| 151 |
+
{"current_steps": 1510, "total_steps": 5000, "loss": 1.0326, "lr": 4.403845768841842e-05, "epoch": 0.302, "percentage": 30.2, "elapsed_time": "0:06:14", "remaining_time": "0:14:24"}
|
| 152 |
+
{"current_steps": 1520, "total_steps": 5000, "loss": 1.0785, "lr": 4.3924876391293915e-05, "epoch": 0.304, "percentage": 30.4, "elapsed_time": "0:06:15", "remaining_time": "0:14:20"}
|
| 153 |
+
{"current_steps": 1530, "total_steps": 5000, "loss": 1.4215, "lr": 4.381037272239311e-05, "epoch": 0.306, "percentage": 30.6, "elapsed_time": "0:06:17", "remaining_time": "0:14:17"}
|
| 154 |
+
{"current_steps": 1540, "total_steps": 5000, "loss": 0.4891, "lr": 4.36949522624633e-05, "epoch": 0.308, "percentage": 30.8, "elapsed_time": "0:06:20", "remaining_time": "0:14:15"}
|
| 155 |
+
{"current_steps": 1550, "total_steps": 5000, "loss": 1.3672, "lr": 4.357862063693486e-05, "epoch": 0.31, "percentage": 31.0, "elapsed_time": "0:06:22", "remaining_time": "0:14:12"}
|
| 156 |
+
{"current_steps": 1560, "total_steps": 5000, "loss": 1.0202, "lr": 4.3461383515647106e-05, "epoch": 0.312, "percentage": 31.2, "elapsed_time": "0:06:25", "remaining_time": "0:14:10"}
|
| 157 |
+
{"current_steps": 1570, "total_steps": 5000, "loss": 0.9313, "lr": 4.334324661257191e-05, "epoch": 0.314, "percentage": 31.4, "elapsed_time": "0:06:28", "remaining_time": "0:14:07"}
|
| 158 |
+
{"current_steps": 1580, "total_steps": 5000, "loss": 0.4453, "lr": 4.3224215685535294e-05, "epoch": 0.316, "percentage": 31.6, "elapsed_time": "0:06:30", "remaining_time": "0:14:04"}
|
| 159 |
+
{"current_steps": 1590, "total_steps": 5000, "loss": 1.694, "lr": 4.3104296535936695e-05, "epoch": 0.318, "percentage": 31.8, "elapsed_time": "0:06:33", "remaining_time": "0:14:03"}
|
| 160 |
+
{"current_steps": 1600, "total_steps": 5000, "loss": 1.4264, "lr": 4.2983495008466276e-05, "epoch": 0.32, "percentage": 32.0, "elapsed_time": "0:06:36", "remaining_time": "0:14:01"}
|
| 161 |
+
{"current_steps": 1610, "total_steps": 5000, "loss": 0.5798, "lr": 4.2861816990820084e-05, "epoch": 0.322, "percentage": 32.2, "elapsed_time": "0:06:38", "remaining_time": "0:14:00"}
|
| 162 |
+
{"current_steps": 1620, "total_steps": 5000, "loss": 0.6301, "lr": 4.273926841341302e-05, "epoch": 0.324, "percentage": 32.4, "elapsed_time": "0:06:41", "remaining_time": "0:13:58"}
|
| 163 |
+
{"current_steps": 1630, "total_steps": 5000, "loss": 0.9712, "lr": 4.261585524908987e-05, "epoch": 0.326, "percentage": 32.6, "elapsed_time": "0:06:44", "remaining_time": "0:13:55"}
|
| 164 |
+
{"current_steps": 1640, "total_steps": 5000, "loss": 0.7751, "lr": 4.249158351283414e-05, "epoch": 0.328, "percentage": 32.8, "elapsed_time": "0:06:46", "remaining_time": "0:13:52"}
|
| 165 |
+
{"current_steps": 1650, "total_steps": 5000, "loss": 0.5724, "lr": 4.2366459261474933e-05, "epoch": 0.33, "percentage": 33.0, "elapsed_time": "0:06:48", "remaining_time": "0:13:48"}
|
| 166 |
+
{"current_steps": 1660, "total_steps": 5000, "loss": 0.9216, "lr": 4.224048859339175e-05, "epoch": 0.332, "percentage": 33.2, "elapsed_time": "0:06:50", "remaining_time": "0:13:45"}
|
| 167 |
+
{"current_steps": 1670, "total_steps": 5000, "loss": 0.5461, "lr": 4.211367764821722e-05, "epoch": 0.334, "percentage": 33.4, "elapsed_time": "0:06:52", "remaining_time": "0:13:42"}
|
| 168 |
+
{"current_steps": 1680, "total_steps": 5000, "loss": 1.0833, "lr": 4.198603260653792e-05, "epoch": 0.336, "percentage": 33.6, "elapsed_time": "0:06:55", "remaining_time": "0:13:40"}
|
| 169 |
+
{"current_steps": 1690, "total_steps": 5000, "loss": 1.6468, "lr": 4.185755968959308e-05, "epoch": 0.338, "percentage": 33.8, "elapsed_time": "0:06:58", "remaining_time": "0:13:38"}
|
| 170 |
+
{"current_steps": 1700, "total_steps": 5000, "loss": 0.6583, "lr": 4.172826515897146e-05, "epoch": 0.34, "percentage": 34.0, "elapsed_time": "0:07:00", "remaining_time": "0:13:35"}
|
| 171 |
+
{"current_steps": 1710, "total_steps": 5000, "loss": 1.1721, "lr": 4.1598155316306044e-05, "epoch": 0.342, "percentage": 34.2, "elapsed_time": "0:07:02", "remaining_time": "0:13:33"}
|
| 172 |
+
{"current_steps": 1720, "total_steps": 5000, "loss": 0.8588, "lr": 4.146723650296701e-05, "epoch": 0.344, "percentage": 34.4, "elapsed_time": "0:07:05", "remaining_time": "0:13:30"}
|
| 173 |
+
{"current_steps": 1730, "total_steps": 5000, "loss": 0.4645, "lr": 4.133551509975264e-05, "epoch": 0.346, "percentage": 34.6, "elapsed_time": "0:07:07", "remaining_time": "0:13:28"}
|
| 174 |
+
{"current_steps": 1740, "total_steps": 5000, "loss": 1.4741, "lr": 4.1202997526578276e-05, "epoch": 0.348, "percentage": 34.8, "elapsed_time": "0:07:10", "remaining_time": "0:13:26"}
|
| 175 |
+
{"current_steps": 1750, "total_steps": 5000, "loss": 0.9873, "lr": 4.1069690242163484e-05, "epoch": 0.35, "percentage": 35.0, "elapsed_time": "0:07:12", "remaining_time": "0:13:23"}
|
| 176 |
+
{"current_steps": 1760, "total_steps": 5000, "loss": 0.6202, "lr": 4.093559974371725e-05, "epoch": 0.352, "percentage": 35.2, "elapsed_time": "0:07:15", "remaining_time": "0:13:22"}
|
| 177 |
+
{"current_steps": 1770, "total_steps": 5000, "loss": 0.7872, "lr": 4.080073256662127e-05, "epoch": 0.354, "percentage": 35.4, "elapsed_time": "0:07:18", "remaining_time": "0:13:20"}
|
| 178 |
+
{"current_steps": 1780, "total_steps": 5000, "loss": 1.9155, "lr": 4.066509528411152e-05, "epoch": 0.356, "percentage": 35.6, "elapsed_time": "0:07:21", "remaining_time": "0:13:18"}
|
| 179 |
+
{"current_steps": 1790, "total_steps": 5000, "loss": 0.5979, "lr": 4.052869450695776e-05, "epoch": 0.358, "percentage": 35.8, "elapsed_time": "0:07:23", "remaining_time": "0:13:15"}
|
| 180 |
+
{"current_steps": 1800, "total_steps": 5000, "loss": 0.8478, "lr": 4.039153688314145e-05, "epoch": 0.36, "percentage": 36.0, "elapsed_time": "0:07:26", "remaining_time": "0:13:13"}
|
| 181 |
+
{"current_steps": 1810, "total_steps": 5000, "loss": 0.623, "lr": 4.02536290975317e-05, "epoch": 0.362, "percentage": 36.2, "elapsed_time": "0:07:28", "remaining_time": "0:13:10"}
|
| 182 |
+
{"current_steps": 1820, "total_steps": 5000, "loss": 1.4658, "lr": 4.011497787155938e-05, "epoch": 0.364, "percentage": 36.4, "elapsed_time": "0:07:30", "remaining_time": "0:13:07"}
|
| 183 |
+
{"current_steps": 1830, "total_steps": 5000, "loss": 1.9824, "lr": 3.997558996288965e-05, "epoch": 0.366, "percentage": 36.6, "elapsed_time": "0:07:33", "remaining_time": "0:13:04"}
|
| 184 |
+
{"current_steps": 1840, "total_steps": 5000, "loss": 0.626, "lr": 3.983547216509254e-05, "epoch": 0.368, "percentage": 36.8, "elapsed_time": "0:07:35", "remaining_time": "0:13:02"}
|
| 185 |
+
{"current_steps": 1850, "total_steps": 5000, "loss": 0.5819, "lr": 3.969463130731183e-05, "epoch": 0.37, "percentage": 37.0, "elapsed_time": "0:07:37", "remaining_time": "0:12:59"}
|
| 186 |
+
{"current_steps": 1860, "total_steps": 5000, "loss": 0.7553, "lr": 3.955307425393224e-05, "epoch": 0.372, "percentage": 37.2, "elapsed_time": "0:07:40", "remaining_time": "0:12:57"}
|
| 187 |
+
{"current_steps": 1870, "total_steps": 5000, "loss": 1.3426, "lr": 3.941080790424484e-05, "epoch": 0.374, "percentage": 37.4, "elapsed_time": "0:07:42", "remaining_time": "0:12:54"}
|
| 188 |
+
{"current_steps": 1880, "total_steps": 5000, "loss": 0.8386, "lr": 3.92678391921108e-05, "epoch": 0.376, "percentage": 37.6, "elapsed_time": "0:07:45", "remaining_time": "0:12:52"}
|
| 189 |
+
{"current_steps": 1890, "total_steps": 5000, "loss": 0.5088, "lr": 3.912417508562345e-05, "epoch": 0.378, "percentage": 37.8, "elapsed_time": "0:07:47", "remaining_time": "0:12:50"}
|
| 190 |
+
{"current_steps": 1900, "total_steps": 5000, "loss": 2.6207, "lr": 3.897982258676867e-05, "epoch": 0.38, "percentage": 38.0, "elapsed_time": "0:07:50", "remaining_time": "0:12:47"}
|
| 191 |
+
{"current_steps": 1910, "total_steps": 5000, "loss": 0.8114, "lr": 3.883478873108361e-05, "epoch": 0.382, "percentage": 38.2, "elapsed_time": "0:07:53", "remaining_time": "0:12:46"}
|
| 192 |
+
{"current_steps": 1920, "total_steps": 5000, "loss": 0.489, "lr": 3.868908058731376e-05, "epoch": 0.384, "percentage": 38.4, "elapsed_time": "0:07:55", "remaining_time": "0:12:43"}
|
| 193 |
+
{"current_steps": 1930, "total_steps": 5000, "loss": 0.6035, "lr": 3.85427052570685e-05, "epoch": 0.386, "percentage": 38.6, "elapsed_time": "0:07:58", "remaining_time": "0:12:40"}
|
| 194 |
+
{"current_steps": 1940, "total_steps": 5000, "loss": 0.9305, "lr": 3.8395669874474915e-05, "epoch": 0.388, "percentage": 38.8, "elapsed_time": "0:08:00", "remaining_time": "0:12:38"}
|
| 195 |
+
{"current_steps": 1950, "total_steps": 5000, "loss": 0.5244, "lr": 3.824798160583012e-05, "epoch": 0.39, "percentage": 39.0, "elapsed_time": "0:08:02", "remaining_time": "0:12:34"}
|
| 196 |
+
{"current_steps": 1960, "total_steps": 5000, "loss": 1.4947, "lr": 3.8099647649251986e-05, "epoch": 0.392, "percentage": 39.2, "elapsed_time": "0:08:05", "remaining_time": "0:12:32"}
|
| 197 |
+
{"current_steps": 1970, "total_steps": 5000, "loss": 0.6931, "lr": 3.795067523432826e-05, "epoch": 0.394, "percentage": 39.4, "elapsed_time": "0:08:07", "remaining_time": "0:12:30"}
|
| 198 |
+
{"current_steps": 1980, "total_steps": 5000, "loss": 0.8547, "lr": 3.780107162176429e-05, "epoch": 0.396, "percentage": 39.6, "elapsed_time": "0:08:09", "remaining_time": "0:12:27"}
|
| 199 |
+
{"current_steps": 1990, "total_steps": 5000, "loss": 2.4946, "lr": 3.765084410302909e-05, "epoch": 0.398, "percentage": 39.8, "elapsed_time": "0:08:12", "remaining_time": "0:12:25"}
|
| 200 |
+
{"current_steps": 2000, "total_steps": 5000, "loss": 1.2444, "lr": 3.7500000000000003e-05, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:08:15", "remaining_time": "0:12:23"}
|
| 201 |
+
{"current_steps": 2010, "total_steps": 5000, "loss": 1.2603, "lr": 3.7348546664605777e-05, "epoch": 0.402, "percentage": 40.2, "elapsed_time": "0:08:18", "remaining_time": "0:12:21"}
|
| 202 |
+
{"current_steps": 2020, "total_steps": 5000, "loss": 0.5348, "lr": 3.719649147846832e-05, "epoch": 0.404, "percentage": 40.4, "elapsed_time": "0:08:20", "remaining_time": "0:12:19"}
|
| 203 |
+
{"current_steps": 2030, "total_steps": 5000, "loss": 0.6968, "lr": 3.704384185254288e-05, "epoch": 0.406, "percentage": 40.6, "elapsed_time": "0:08:23", "remaining_time": "0:12:16"}
|
| 204 |
+
{"current_steps": 2040, "total_steps": 5000, "loss": 2.8761, "lr": 3.689060522675689e-05, "epoch": 0.408, "percentage": 40.8, "elapsed_time": "0:08:25", "remaining_time": "0:12:13"}
|
| 205 |
+
{"current_steps": 2050, "total_steps": 5000, "loss": 1.6509, "lr": 3.673678906964727e-05, "epoch": 0.41, "percentage": 41.0, "elapsed_time": "0:08:27", "remaining_time": "0:12:10"}
|
| 206 |
+
{"current_steps": 2060, "total_steps": 5000, "loss": 0.8742, "lr": 3.6582400877996546e-05, "epoch": 0.412, "percentage": 41.2, "elapsed_time": "0:08:30", "remaining_time": "0:12:08"}
|
| 207 |
+
{"current_steps": 2070, "total_steps": 5000, "loss": 1.241, "lr": 3.642744817646736e-05, "epoch": 0.414, "percentage": 41.4, "elapsed_time": "0:08:33", "remaining_time": "0:12:06"}
|
| 208 |
+
{"current_steps": 2080, "total_steps": 5000, "loss": 0.6697, "lr": 3.627193851723577e-05, "epoch": 0.416, "percentage": 41.6, "elapsed_time": "0:08:36", "remaining_time": "0:12:04"}
|
| 209 |
+
{"current_steps": 2090, "total_steps": 5000, "loss": 0.8601, "lr": 3.611587947962319e-05, "epoch": 0.418, "percentage": 41.8, "elapsed_time": "0:08:38", "remaining_time": "0:12:01"}
|
| 210 |
+
{"current_steps": 2100, "total_steps": 5000, "loss": 1.6495, "lr": 3.5959278669726935e-05, "epoch": 0.42, "percentage": 42.0, "elapsed_time": "0:08:41", "remaining_time": "0:11:59"}
|
| 211 |
+
{"current_steps": 2110, "total_steps": 5000, "loss": 1.1695, "lr": 3.580214372004956e-05, "epoch": 0.422, "percentage": 42.2, "elapsed_time": "0:08:44", "remaining_time": "0:11:57"}
|
| 212 |
+
{"current_steps": 2120, "total_steps": 5000, "loss": 2.1528, "lr": 3.564448228912682e-05, "epoch": 0.424, "percentage": 42.4, "elapsed_time": "0:08:46", "remaining_time": "0:11:54"}
|
| 213 |
+
{"current_steps": 2130, "total_steps": 5000, "loss": 0.7449, "lr": 3.548630206115443e-05, "epoch": 0.426, "percentage": 42.6, "elapsed_time": "0:08:48", "remaining_time": "0:11:52"}
|
| 214 |
+
{"current_steps": 2140, "total_steps": 5000, "loss": 0.7268, "lr": 3.532761074561355e-05, "epoch": 0.428, "percentage": 42.8, "elapsed_time": "0:08:51", "remaining_time": "0:11:49"}
|
| 215 |
+
{"current_steps": 2150, "total_steps": 5000, "loss": 0.7794, "lr": 3.516841607689501e-05, "epoch": 0.43, "percentage": 43.0, "elapsed_time": "0:08:53", "remaining_time": "0:11:47"}
|
| 216 |
+
{"current_steps": 2160, "total_steps": 5000, "loss": 0.727, "lr": 3.5008725813922386e-05, "epoch": 0.432, "percentage": 43.2, "elapsed_time": "0:08:56", "remaining_time": "0:11:45"}
|
| 217 |
+
{"current_steps": 2170, "total_steps": 5000, "loss": 2.1652, "lr": 3.484854773977378e-05, "epoch": 0.434, "percentage": 43.4, "elapsed_time": "0:08:59", "remaining_time": "0:11:43"}
|
| 218 |
+
{"current_steps": 2180, "total_steps": 5000, "loss": 0.4095, "lr": 3.4687889661302576e-05, "epoch": 0.436, "percentage": 43.6, "elapsed_time": "0:09:02", "remaining_time": "0:11:41"}
|
| 219 |
+
{"current_steps": 2190, "total_steps": 5000, "loss": 0.8675, "lr": 3.452675940875686e-05, "epoch": 0.438, "percentage": 43.8, "elapsed_time": "0:09:04", "remaining_time": "0:11:38"}
|
| 220 |
+
{"current_steps": 2200, "total_steps": 5000, "loss": 0.8658, "lr": 3.436516483539781e-05, "epoch": 0.44, "percentage": 44.0, "elapsed_time": "0:09:07", "remaining_time": "0:11:36"}
|
| 221 |
+
{"current_steps": 2210, "total_steps": 5000, "loss": 0.3739, "lr": 3.4203113817116957e-05, "epoch": 0.442, "percentage": 44.2, "elapsed_time": "0:09:09", "remaining_time": "0:11:33"}
|
| 222 |
+
{"current_steps": 2220, "total_steps": 5000, "loss": 0.8909, "lr": 3.4040614252052305e-05, "epoch": 0.444, "percentage": 44.4, "elapsed_time": "0:09:12", "remaining_time": "0:11:31"}
|
| 223 |
+
{"current_steps": 2230, "total_steps": 5000, "loss": 1.4721, "lr": 3.387767406020343e-05, "epoch": 0.446, "percentage": 44.6, "elapsed_time": "0:09:15", "remaining_time": "0:11:29"}
|
| 224 |
+
{"current_steps": 2240, "total_steps": 5000, "loss": 1.001, "lr": 3.3714301183045385e-05, "epoch": 0.448, "percentage": 44.8, "elapsed_time": "0:09:18", "remaining_time": "0:11:27"}
|
| 225 |
+
{"current_steps": 2250, "total_steps": 5000, "loss": 0.7794, "lr": 3.355050358314172e-05, "epoch": 0.45, "percentage": 45.0, "elapsed_time": "0:09:20", "remaining_time": "0:11:25"}
|
| 226 |
+
{"current_steps": 2260, "total_steps": 5000, "loss": 0.3814, "lr": 3.338628924375638e-05, "epoch": 0.452, "percentage": 45.2, "elapsed_time": "0:09:22", "remaining_time": "0:11:22"}
|
| 227 |
+
{"current_steps": 2270, "total_steps": 5000, "loss": 0.5824, "lr": 3.322166616846458e-05, "epoch": 0.454, "percentage": 45.4, "elapsed_time": "0:09:25", "remaining_time": "0:11:20"}
|
| 228 |
+
{"current_steps": 2280, "total_steps": 5000, "loss": 1.3243, "lr": 3.305664238076278e-05, "epoch": 0.456, "percentage": 45.6, "elapsed_time": "0:09:28", "remaining_time": "0:11:17"}
|
| 229 |
+
{"current_steps": 2290, "total_steps": 5000, "loss": 0.4481, "lr": 3.289122592367757e-05, "epoch": 0.458, "percentage": 45.8, "elapsed_time": "0:09:30", "remaining_time": "0:11:14"}
|
| 230 |
+
{"current_steps": 2300, "total_steps": 5000, "loss": 0.5601, "lr": 3.272542485937369e-05, "epoch": 0.46, "percentage": 46.0, "elapsed_time": "0:09:32", "remaining_time": "0:11:12"}
|
| 231 |
+
{"current_steps": 2310, "total_steps": 5000, "loss": 0.6695, "lr": 3.2559247268761115e-05, "epoch": 0.462, "percentage": 46.2, "elapsed_time": "0:09:34", "remaining_time": "0:11:09"}
|
| 232 |
+
{"current_steps": 2320, "total_steps": 5000, "loss": 0.9525, "lr": 3.239270125110117e-05, "epoch": 0.464, "percentage": 46.4, "elapsed_time": "0:09:37", "remaining_time": "0:11:07"}
|
| 233 |
+
{"current_steps": 2330, "total_steps": 5000, "loss": 0.5488, "lr": 3.222579492361179e-05, "epoch": 0.466, "percentage": 46.6, "elapsed_time": "0:09:39", "remaining_time": "0:11:04"}
|
| 234 |
+
{"current_steps": 2340, "total_steps": 5000, "loss": 0.8862, "lr": 3.205853642107192e-05, "epoch": 0.468, "percentage": 46.8, "elapsed_time": "0:09:41", "remaining_time": "0:11:01"}
|
| 235 |
+
{"current_steps": 2350, "total_steps": 5000, "loss": 1.1218, "lr": 3.1890933895424976e-05, "epoch": 0.47, "percentage": 47.0, "elapsed_time": "0:09:44", "remaining_time": "0:10:58"}
|
| 236 |
+
{"current_steps": 2360, "total_steps": 5000, "loss": 1.4365, "lr": 3.172299551538164e-05, "epoch": 0.472, "percentage": 47.2, "elapsed_time": "0:09:47", "remaining_time": "0:10:56"}
|
| 237 |
+
{"current_steps": 2370, "total_steps": 5000, "loss": 1.4355, "lr": 3.155472946602162e-05, "epoch": 0.474, "percentage": 47.4, "elapsed_time": "0:09:49", "remaining_time": "0:10:54"}
|
| 238 |
+
{"current_steps": 2380, "total_steps": 5000, "loss": 1.6462, "lr": 3.138614394839476e-05, "epoch": 0.476, "percentage": 47.6, "elapsed_time": "0:09:52", "remaining_time": "0:10:52"}
|
| 239 |
+
{"current_steps": 2390, "total_steps": 5000, "loss": 0.6112, "lr": 3.121724717912138e-05, "epoch": 0.478, "percentage": 47.8, "elapsed_time": "0:09:54", "remaining_time": "0:10:48"}
|
| 240 |
+
{"current_steps": 2400, "total_steps": 5000, "loss": 0.2338, "lr": 3.104804738999169e-05, "epoch": 0.48, "percentage": 48.0, "elapsed_time": "0:09:56", "remaining_time": "0:10:45"}
|
| 241 |
+
{"current_steps": 2410, "total_steps": 5000, "loss": 0.5969, "lr": 3.087855282756475e-05, "epoch": 0.482, "percentage": 48.2, "elapsed_time": "0:09:59", "remaining_time": "0:10:44"}
|
| 242 |
+
{"current_steps": 2420, "total_steps": 5000, "loss": 0.8462, "lr": 3.0708771752766394e-05, "epoch": 0.484, "percentage": 48.4, "elapsed_time": "0:10:01", "remaining_time": "0:10:41"}
|
| 243 |
+
{"current_steps": 2430, "total_steps": 5000, "loss": 0.4652, "lr": 3.053871244048669e-05, "epoch": 0.486, "percentage": 48.6, "elapsed_time": "0:10:04", "remaining_time": "0:10:38"}
|
| 244 |
+
{"current_steps": 2440, "total_steps": 5000, "loss": 0.212, "lr": 3.0368383179176585e-05, "epoch": 0.488, "percentage": 48.8, "elapsed_time": "0:10:06", "remaining_time": "0:10:36"}
|
| 245 |
+
{"current_steps": 2450, "total_steps": 5000, "loss": 0.697, "lr": 3.0197792270443982e-05, "epoch": 0.49, "percentage": 49.0, "elapsed_time": "0:10:09", "remaining_time": "0:10:34"}
|
| 246 |
+
{"current_steps": 2460, "total_steps": 5000, "loss": 1.3351, "lr": 3.002694802864912e-05, "epoch": 0.492, "percentage": 49.2, "elapsed_time": "0:10:12", "remaining_time": "0:10:32"}
|
| 247 |
+
{"current_steps": 2470, "total_steps": 5000, "loss": 1.8147, "lr": 2.98558587804993e-05, "epoch": 0.494, "percentage": 49.4, "elapsed_time": "0:10:15", "remaining_time": "0:10:29"}
|
| 248 |
+
{"current_steps": 2480, "total_steps": 5000, "loss": 0.9978, "lr": 2.9684532864643122e-05, "epoch": 0.496, "percentage": 49.6, "elapsed_time": "0:10:18", "remaining_time": "0:10:28"}
|
| 249 |
+
{"current_steps": 2490, "total_steps": 5000, "loss": 1.6129, "lr": 2.9512978631264006e-05, "epoch": 0.498, "percentage": 49.8, "elapsed_time": "0:10:20", "remaining_time": "0:10:25"}
|
| 250 |
+
{"current_steps": 2500, "total_steps": 5000, "loss": 0.745, "lr": 2.9341204441673266e-05, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:10:23", "remaining_time": "0:10:23"}
|
| 251 |
+
{"current_steps": 2510, "total_steps": 5000, "loss": 0.5694, "lr": 2.916921866790256e-05, "epoch": 0.502, "percentage": 50.2, "elapsed_time": "0:10:25", "remaining_time": "0:10:20"}
|
| 252 |
+
{"current_steps": 2520, "total_steps": 5000, "loss": 0.453, "lr": 2.8997029692295874e-05, "epoch": 0.504, "percentage": 50.4, "elapsed_time": "0:10:27", "remaining_time": "0:10:17"}
|
| 253 |
+
{"current_steps": 2530, "total_steps": 5000, "loss": 0.7539, "lr": 2.8824645907100954e-05, "epoch": 0.506, "percentage": 50.6, "elapsed_time": "0:10:29", "remaining_time": "0:10:14"}
|
| 254 |
+
{"current_steps": 2540, "total_steps": 5000, "loss": 0.5247, "lr": 2.8652075714060295e-05, "epoch": 0.508, "percentage": 50.8, "elapsed_time": "0:10:31", "remaining_time": "0:10:11"}
|
| 255 |
+
{"current_steps": 2550, "total_steps": 5000, "loss": 0.7769, "lr": 2.8479327524001636e-05, "epoch": 0.51, "percentage": 51.0, "elapsed_time": "0:10:33", "remaining_time": "0:10:08"}
|
| 256 |
+
{"current_steps": 2560, "total_steps": 5000, "loss": 0.7204, "lr": 2.8306409756428064e-05, "epoch": 0.512, "percentage": 51.2, "elapsed_time": "0:10:36", "remaining_time": "0:10:06"}
|
| 257 |
+
{"current_steps": 2570, "total_steps": 5000, "loss": 0.9135, "lr": 2.8133330839107608e-05, "epoch": 0.514, "percentage": 51.4, "elapsed_time": "0:10:39", "remaining_time": "0:10:04"}
|
| 258 |
+
{"current_steps": 2580, "total_steps": 5000, "loss": 0.5892, "lr": 2.7960099207662532e-05, "epoch": 0.516, "percentage": 51.6, "elapsed_time": "0:10:41", "remaining_time": "0:10:01"}
|
| 259 |
+
{"current_steps": 2590, "total_steps": 5000, "loss": 0.5568, "lr": 2.7786723305158136e-05, "epoch": 0.518, "percentage": 51.8, "elapsed_time": "0:10:43", "remaining_time": "0:09:58"}
|
| 260 |
+
{"current_steps": 2600, "total_steps": 5000, "loss": 1.3712, "lr": 2.761321158169134e-05, "epoch": 0.52, "percentage": 52.0, "elapsed_time": "0:10:46", "remaining_time": "0:09:56"}
|
| 261 |
+
{"current_steps": 2610, "total_steps": 5000, "loss": 0.9695, "lr": 2.7439572493978736e-05, "epoch": 0.522, "percentage": 52.2, "elapsed_time": "0:10:48", "remaining_time": "0:09:54"}
|
| 262 |
+
{"current_steps": 2620, "total_steps": 5000, "loss": 0.7138, "lr": 2.726581450494451e-05, "epoch": 0.524, "percentage": 52.4, "elapsed_time": "0:10:51", "remaining_time": "0:09:51"}
|
| 263 |
+
{"current_steps": 2630, "total_steps": 5000, "loss": 1.0675, "lr": 2.7091946083307896e-05, "epoch": 0.526, "percentage": 52.6, "elapsed_time": "0:10:53", "remaining_time": "0:09:48"}
|
| 264 |
+
{"current_steps": 2640, "total_steps": 5000, "loss": 0.8781, "lr": 2.6917975703170466e-05, "epoch": 0.528, "percentage": 52.8, "elapsed_time": "0:10:55", "remaining_time": "0:09:46"}
|
| 265 |
+
{"current_steps": 2650, "total_steps": 5000, "loss": 0.5354, "lr": 2.674391184360313e-05, "epoch": 0.53, "percentage": 53.0, "elapsed_time": "0:10:58", "remaining_time": "0:09:43"}
|
| 266 |
+
{"current_steps": 2660, "total_steps": 5000, "loss": 0.456, "lr": 2.656976298823284e-05, "epoch": 0.532, "percentage": 53.2, "elapsed_time": "0:11:00", "remaining_time": "0:09:41"}
|
| 267 |
+
{"current_steps": 2670, "total_steps": 5000, "loss": 2.0864, "lr": 2.6395537624829096e-05, "epoch": 0.534, "percentage": 53.4, "elapsed_time": "0:11:02", "remaining_time": "0:09:38"}
|
| 268 |
+
{"current_steps": 2680, "total_steps": 5000, "loss": 0.8204, "lr": 2.6221244244890336e-05, "epoch": 0.536, "percentage": 53.6, "elapsed_time": "0:11:05", "remaining_time": "0:09:35"}
|
| 269 |
+
{"current_steps": 2690, "total_steps": 5000, "loss": 0.6519, "lr": 2.604689134322999e-05, "epoch": 0.538, "percentage": 53.8, "elapsed_time": "0:11:06", "remaining_time": "0:09:32"}
|
| 270 |
+
{"current_steps": 2700, "total_steps": 5000, "loss": 0.7875, "lr": 2.587248741756253e-05, "epoch": 0.54, "percentage": 54.0, "elapsed_time": "0:11:08", "remaining_time": "0:09:29"}
|
| 271 |
+
{"current_steps": 2710, "total_steps": 5000, "loss": 1.4801, "lr": 2.5698040968089225e-05, "epoch": 0.542, "percentage": 54.2, "elapsed_time": "0:11:11", "remaining_time": "0:09:27"}
|
| 272 |
+
{"current_steps": 2720, "total_steps": 5000, "loss": 0.9097, "lr": 2.5523560497083926e-05, "epoch": 0.544, "percentage": 54.4, "elapsed_time": "0:11:14", "remaining_time": "0:09:25"}
|
| 273 |
+
{"current_steps": 2730, "total_steps": 5000, "loss": 0.5698, "lr": 2.5349054508478637e-05, "epoch": 0.546, "percentage": 54.6, "elapsed_time": "0:11:16", "remaining_time": "0:09:22"}
|
| 274 |
+
{"current_steps": 2740, "total_steps": 5000, "loss": 0.4508, "lr": 2.517453150744904e-05, "epoch": 0.548, "percentage": 54.8, "elapsed_time": "0:11:17", "remaining_time": "0:09:19"}
|
| 275 |
+
{"current_steps": 2750, "total_steps": 5000, "loss": 0.4544, "lr": 2.5e-05, "epoch": 0.55, "percentage": 55.0, "elapsed_time": "0:11:20", "remaining_time": "0:09:16"}
|
| 276 |
+
{"current_steps": 2760, "total_steps": 5000, "loss": 1.0367, "lr": 2.4825468492550964e-05, "epoch": 0.552, "percentage": 55.2, "elapsed_time": "0:11:23", "remaining_time": "0:09:14"}
|
| 277 |
+
{"current_steps": 2770, "total_steps": 5000, "loss": 5.9188, "lr": 2.4650945491521372e-05, "epoch": 0.554, "percentage": 55.4, "elapsed_time": "0:11:25", "remaining_time": "0:09:11"}
|
| 278 |
+
{"current_steps": 2780, "total_steps": 5000, "loss": 0.784, "lr": 2.447643950291608e-05, "epoch": 0.556, "percentage": 55.6, "elapsed_time": "0:11:28", "remaining_time": "0:09:09"}
|
| 279 |
+
{"current_steps": 2790, "total_steps": 5000, "loss": 1.3575, "lr": 2.4301959031910784e-05, "epoch": 0.558, "percentage": 55.8, "elapsed_time": "0:11:31", "remaining_time": "0:09:07"}
|
| 280 |
+
{"current_steps": 2800, "total_steps": 5000, "loss": 1.2835, "lr": 2.4127512582437485e-05, "epoch": 0.56, "percentage": 56.0, "elapsed_time": "0:11:33", "remaining_time": "0:09:04"}
|
| 281 |
+
{"current_steps": 2810, "total_steps": 5000, "loss": 2.173, "lr": 2.3953108656770016e-05, "epoch": 0.562, "percentage": 56.2, "elapsed_time": "0:11:35", "remaining_time": "0:09:02"}
|
| 282 |
+
{"current_steps": 2820, "total_steps": 5000, "loss": 0.354, "lr": 2.377875575510967e-05, "epoch": 0.564, "percentage": 56.4, "elapsed_time": "0:11:38", "remaining_time": "0:08:59"}
|
| 283 |
+
{"current_steps": 2830, "total_steps": 5000, "loss": 0.8905, "lr": 2.3604462375170906e-05, "epoch": 0.566, "percentage": 56.6, "elapsed_time": "0:11:41", "remaining_time": "0:08:57"}
|
| 284 |
+
{"current_steps": 2840, "total_steps": 5000, "loss": 0.9289, "lr": 2.3430237011767167e-05, "epoch": 0.568, "percentage": 56.8, "elapsed_time": "0:11:44", "remaining_time": "0:08:55"}
|
| 285 |
+
{"current_steps": 2850, "total_steps": 5000, "loss": 0.512, "lr": 2.3256088156396868e-05, "epoch": 0.57, "percentage": 57.0, "elapsed_time": "0:11:46", "remaining_time": "0:08:52"}
|
| 286 |
+
{"current_steps": 2860, "total_steps": 5000, "loss": 0.6724, "lr": 2.3082024296829536e-05, "epoch": 0.572, "percentage": 57.2, "elapsed_time": "0:11:49", "remaining_time": "0:08:50"}
|
| 287 |
+
{"current_steps": 2870, "total_steps": 5000, "loss": 0.7876, "lr": 2.2908053916692117e-05, "epoch": 0.574, "percentage": 57.4, "elapsed_time": "0:11:51", "remaining_time": "0:08:48"}
|
| 288 |
+
{"current_steps": 2880, "total_steps": 5000, "loss": 1.3706, "lr": 2.2734185495055503e-05, "epoch": 0.576, "percentage": 57.6, "elapsed_time": "0:11:54", "remaining_time": "0:08:45"}
|
| 289 |
+
{"current_steps": 2890, "total_steps": 5000, "loss": 1.0091, "lr": 2.2560427506021266e-05, "epoch": 0.578, "percentage": 57.8, "elapsed_time": "0:11:57", "remaining_time": "0:08:43"}
|
| 290 |
+
{"current_steps": 2900, "total_steps": 5000, "loss": 0.4665, "lr": 2.238678841830867e-05, "epoch": 0.58, "percentage": 58.0, "elapsed_time": "0:12:00", "remaining_time": "0:08:41"}
|
| 291 |
+
{"current_steps": 2910, "total_steps": 5000, "loss": 1.412, "lr": 2.2213276694841866e-05, "epoch": 0.582, "percentage": 58.2, "elapsed_time": "0:12:02", "remaining_time": "0:08:38"}
|
| 292 |
+
{"current_steps": 2920, "total_steps": 5000, "loss": 1.3582, "lr": 2.2039900792337474e-05, "epoch": 0.584, "percentage": 58.4, "elapsed_time": "0:12:04", "remaining_time": "0:08:36"}
|
| 293 |
+
{"current_steps": 2930, "total_steps": 5000, "loss": 0.3297, "lr": 2.186666916089239e-05, "epoch": 0.586, "percentage": 58.6, "elapsed_time": "0:12:06", "remaining_time": "0:08:33"}
|
| 294 |
+
{"current_steps": 2940, "total_steps": 5000, "loss": 0.5486, "lr": 2.1693590243571938e-05, "epoch": 0.588, "percentage": 58.8, "elapsed_time": "0:12:08", "remaining_time": "0:08:30"}
|
| 295 |
+
{"current_steps": 2950, "total_steps": 5000, "loss": 0.341, "lr": 2.1520672475998373e-05, "epoch": 0.59, "percentage": 59.0, "elapsed_time": "0:12:11", "remaining_time": "0:08:28"}
|
| 296 |
+
{"current_steps": 2960, "total_steps": 5000, "loss": 1.8438, "lr": 2.1347924285939714e-05, "epoch": 0.592, "percentage": 59.2, "elapsed_time": "0:12:14", "remaining_time": "0:08:26"}
|
| 297 |
+
{"current_steps": 2970, "total_steps": 5000, "loss": 1.0661, "lr": 2.117535409289905e-05, "epoch": 0.594, "percentage": 59.4, "elapsed_time": "0:12:17", "remaining_time": "0:08:23"}
|
| 298 |
+
{"current_steps": 2980, "total_steps": 5000, "loss": 0.5445, "lr": 2.1002970307704132e-05, "epoch": 0.596, "percentage": 59.6, "elapsed_time": "0:12:19", "remaining_time": "0:08:21"}
|
| 299 |
+
{"current_steps": 2990, "total_steps": 5000, "loss": 0.735, "lr": 2.0830781332097446e-05, "epoch": 0.598, "percentage": 59.8, "elapsed_time": "0:12:22", "remaining_time": "0:08:18"}
|
| 300 |
+
{"current_steps": 3000, "total_steps": 5000, "loss": 1.2389, "lr": 2.0658795558326743e-05, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:12:24", "remaining_time": "0:08:16"}
|
| 301 |
+
{"current_steps": 3010, "total_steps": 5000, "loss": 0.6412, "lr": 2.0487021368736003e-05, "epoch": 0.602, "percentage": 60.2, "elapsed_time": "0:12:27", "remaining_time": "0:08:14"}
|
| 302 |
+
{"current_steps": 3020, "total_steps": 5000, "loss": 0.6114, "lr": 2.031546713535688e-05, "epoch": 0.604, "percentage": 60.4, "elapsed_time": "0:12:30", "remaining_time": "0:08:11"}
|
| 303 |
+
{"current_steps": 3030, "total_steps": 5000, "loss": 0.846, "lr": 2.0144141219500705e-05, "epoch": 0.606, "percentage": 60.6, "elapsed_time": "0:12:32", "remaining_time": "0:08:09"}
|
| 304 |
+
{"current_steps": 3040, "total_steps": 5000, "loss": 1.1015, "lr": 1.9973051971350888e-05, "epoch": 0.608, "percentage": 60.8, "elapsed_time": "0:12:35", "remaining_time": "0:08:07"}
|
| 305 |
+
{"current_steps": 3050, "total_steps": 5000, "loss": 0.2744, "lr": 1.980220772955602e-05, "epoch": 0.61, "percentage": 61.0, "elapsed_time": "0:12:37", "remaining_time": "0:08:04"}
|
| 306 |
+
{"current_steps": 3060, "total_steps": 5000, "loss": 1.048, "lr": 1.963161682082342e-05, "epoch": 0.612, "percentage": 61.2, "elapsed_time": "0:12:40", "remaining_time": "0:08:02"}
|
| 307 |
+
{"current_steps": 3070, "total_steps": 5000, "loss": 1.5106, "lr": 1.946128755951332e-05, "epoch": 0.614, "percentage": 61.4, "elapsed_time": "0:12:43", "remaining_time": "0:07:59"}
|
| 308 |
+
{"current_steps": 3080, "total_steps": 5000, "loss": 1.3313, "lr": 1.9291228247233605e-05, "epoch": 0.616, "percentage": 61.6, "elapsed_time": "0:12:45", "remaining_time": "0:07:57"}
|
| 309 |
+
{"current_steps": 3090, "total_steps": 5000, "loss": 1.1983, "lr": 1.912144717243525e-05, "epoch": 0.618, "percentage": 61.8, "elapsed_time": "0:12:48", "remaining_time": "0:07:55"}
|
| 310 |
+
{"current_steps": 3100, "total_steps": 5000, "loss": 0.8728, "lr": 1.895195261000831e-05, "epoch": 0.62, "percentage": 62.0, "elapsed_time": "0:12:51", "remaining_time": "0:07:52"}
|
| 311 |
+
{"current_steps": 3110, "total_steps": 5000, "loss": 0.6307, "lr": 1.8782752820878634e-05, "epoch": 0.622, "percentage": 62.2, "elapsed_time": "0:12:53", "remaining_time": "0:07:50"}
|
| 312 |
+
{"current_steps": 3120, "total_steps": 5000, "loss": 0.7477, "lr": 1.8613856051605243e-05, "epoch": 0.624, "percentage": 62.4, "elapsed_time": "0:12:56", "remaining_time": "0:07:47"}
|
| 313 |
+
{"current_steps": 3130, "total_steps": 5000, "loss": 1.0535, "lr": 1.8445270533978388e-05, "epoch": 0.626, "percentage": 62.6, "elapsed_time": "0:12:58", "remaining_time": "0:07:45"}
|
| 314 |
+
{"current_steps": 3140, "total_steps": 5000, "loss": 1.0675, "lr": 1.827700448461836e-05, "epoch": 0.628, "percentage": 62.8, "elapsed_time": "0:13:01", "remaining_time": "0:07:43"}
|
| 315 |
+
{"current_steps": 3150, "total_steps": 5000, "loss": 1.6361, "lr": 1.8109066104575023e-05, "epoch": 0.63, "percentage": 63.0, "elapsed_time": "0:13:03", "remaining_time": "0:07:40"}
|
| 316 |
+
{"current_steps": 3160, "total_steps": 5000, "loss": 1.1624, "lr": 1.7941463578928086e-05, "epoch": 0.632, "percentage": 63.2, "elapsed_time": "0:13:06", "remaining_time": "0:07:38"}
|
| 317 |
+
{"current_steps": 3170, "total_steps": 5000, "loss": 0.8855, "lr": 1.7774205076388206e-05, "epoch": 0.634, "percentage": 63.4, "elapsed_time": "0:13:08", "remaining_time": "0:07:35"}
|
| 318 |
+
{"current_steps": 3180, "total_steps": 5000, "loss": 1.0965, "lr": 1.7607298748898842e-05, "epoch": 0.636, "percentage": 63.6, "elapsed_time": "0:13:11", "remaining_time": "0:07:33"}
|
| 319 |
+
{"current_steps": 3190, "total_steps": 5000, "loss": 0.3191, "lr": 1.744075273123889e-05, "epoch": 0.638, "percentage": 63.8, "elapsed_time": "0:13:13", "remaining_time": "0:07:30"}
|
| 320 |
+
{"current_steps": 3200, "total_steps": 5000, "loss": 0.6535, "lr": 1.7274575140626318e-05, "epoch": 0.64, "percentage": 64.0, "elapsed_time": "0:13:15", "remaining_time": "0:07:27"}
|
| 321 |
+
{"current_steps": 3210, "total_steps": 5000, "loss": 0.4069, "lr": 1.7108774076322443e-05, "epoch": 0.642, "percentage": 64.2, "elapsed_time": "0:13:16", "remaining_time": "0:07:24"}
|
| 322 |
+
{"current_steps": 3220, "total_steps": 5000, "loss": 0.5898, "lr": 1.6943357619237226e-05, "epoch": 0.644, "percentage": 64.4, "elapsed_time": "0:13:18", "remaining_time": "0:07:21"}
|
| 323 |
+
{"current_steps": 3230, "total_steps": 5000, "loss": 0.3429, "lr": 1.677833383153542e-05, "epoch": 0.646, "percentage": 64.6, "elapsed_time": "0:13:21", "remaining_time": "0:07:19"}
|
| 324 |
+
{"current_steps": 3240, "total_steps": 5000, "loss": 0.6403, "lr": 1.6613710756243626e-05, "epoch": 0.648, "percentage": 64.8, "elapsed_time": "0:13:23", "remaining_time": "0:07:16"}
|
| 325 |
+
{"current_steps": 3250, "total_steps": 5000, "loss": 1.1898, "lr": 1.6449496416858284e-05, "epoch": 0.65, "percentage": 65.0, "elapsed_time": "0:13:26", "remaining_time": "0:07:14"}
|
| 326 |
+
{"current_steps": 3260, "total_steps": 5000, "loss": 0.4049, "lr": 1.6285698816954624e-05, "epoch": 0.652, "percentage": 65.2, "elapsed_time": "0:13:28", "remaining_time": "0:07:11"}
|
| 327 |
+
{"current_steps": 3270, "total_steps": 5000, "loss": 0.6081, "lr": 1.612232593979658e-05, "epoch": 0.654, "percentage": 65.4, "elapsed_time": "0:13:30", "remaining_time": "0:07:09"}
|
| 328 |
+
{"current_steps": 3280, "total_steps": 5000, "loss": 0.8107, "lr": 1.5959385747947698e-05, "epoch": 0.656, "percentage": 65.6, "elapsed_time": "0:13:33", "remaining_time": "0:07:06"}
|
| 329 |
+
{"current_steps": 3290, "total_steps": 5000, "loss": 0.6606, "lr": 1.5796886182883053e-05, "epoch": 0.658, "percentage": 65.8, "elapsed_time": "0:13:35", "remaining_time": "0:07:03"}
|
| 330 |
+
{"current_steps": 3300, "total_steps": 5000, "loss": 0.4767, "lr": 1.56348351646022e-05, "epoch": 0.66, "percentage": 66.0, "elapsed_time": "0:13:37", "remaining_time": "0:07:01"}
|
| 331 |
+
{"current_steps": 3310, "total_steps": 5000, "loss": 0.988, "lr": 1.547324059124315e-05, "epoch": 0.662, "percentage": 66.2, "elapsed_time": "0:13:39", "remaining_time": "0:06:58"}
|
| 332 |
+
{"current_steps": 3320, "total_steps": 5000, "loss": 1.3237, "lr": 1.5312110338697426e-05, "epoch": 0.664, "percentage": 66.4, "elapsed_time": "0:13:42", "remaining_time": "0:06:56"}
|
| 333 |
+
{"current_steps": 3330, "total_steps": 5000, "loss": 0.4173, "lr": 1.5151452260226224e-05, "epoch": 0.666, "percentage": 66.6, "elapsed_time": "0:13:44", "remaining_time": "0:06:53"}
|
| 334 |
+
{"current_steps": 3340, "total_steps": 5000, "loss": 0.7819, "lr": 1.4991274186077632e-05, "epoch": 0.668, "percentage": 66.8, "elapsed_time": "0:13:47", "remaining_time": "0:06:51"}
|
| 335 |
+
{"current_steps": 3350, "total_steps": 5000, "loss": 0.334, "lr": 1.4831583923104999e-05, "epoch": 0.67, "percentage": 67.0, "elapsed_time": "0:13:48", "remaining_time": "0:06:48"}
|
| 336 |
+
{"current_steps": 3360, "total_steps": 5000, "loss": 0.8675, "lr": 1.467238925438646e-05, "epoch": 0.672, "percentage": 67.2, "elapsed_time": "0:13:51", "remaining_time": "0:06:46"}
|
| 337 |
+
{"current_steps": 3370, "total_steps": 5000, "loss": 0.6056, "lr": 1.4513697938845572e-05, "epoch": 0.674, "percentage": 67.4, "elapsed_time": "0:13:54", "remaining_time": "0:06:43"}
|
| 338 |
+
{"current_steps": 3380, "total_steps": 5000, "loss": 0.2135, "lr": 1.4355517710873184e-05, "epoch": 0.676, "percentage": 67.6, "elapsed_time": "0:13:56", "remaining_time": "0:06:41"}
|
| 339 |
+
{"current_steps": 3390, "total_steps": 5000, "loss": 1.0051, "lr": 1.4197856279950438e-05, "epoch": 0.678, "percentage": 67.8, "elapsed_time": "0:13:59", "remaining_time": "0:06:38"}
|
| 340 |
+
{"current_steps": 3400, "total_steps": 5000, "loss": 0.4415, "lr": 1.4040721330273062e-05, "epoch": 0.68, "percentage": 68.0, "elapsed_time": "0:14:02", "remaining_time": "0:06:36"}
|
| 341 |
+
{"current_steps": 3410, "total_steps": 5000, "loss": 0.625, "lr": 1.388412052037682e-05, "epoch": 0.682, "percentage": 68.2, "elapsed_time": "0:14:05", "remaining_time": "0:06:34"}
|
| 342 |
+
{"current_steps": 3420, "total_steps": 5000, "loss": 1.0217, "lr": 1.3728061482764238e-05, "epoch": 0.684, "percentage": 68.4, "elapsed_time": "0:14:08", "remaining_time": "0:06:31"}
|
| 343 |
+
{"current_steps": 3430, "total_steps": 5000, "loss": 1.101, "lr": 1.3572551823532654e-05, "epoch": 0.686, "percentage": 68.6, "elapsed_time": "0:14:10", "remaining_time": "0:06:29"}
|
| 344 |
+
{"current_steps": 3440, "total_steps": 5000, "loss": 0.9139, "lr": 1.3417599122003464e-05, "epoch": 0.688, "percentage": 68.8, "elapsed_time": "0:14:13", "remaining_time": "0:06:27"}
|
| 345 |
+
{"current_steps": 3450, "total_steps": 5000, "loss": 0.7426, "lr": 1.3263210930352737e-05, "epoch": 0.69, "percentage": 69.0, "elapsed_time": "0:14:16", "remaining_time": "0:06:24"}
|
| 346 |
+
{"current_steps": 3460, "total_steps": 5000, "loss": 1.0954, "lr": 1.3109394773243117e-05, "epoch": 0.692, "percentage": 69.2, "elapsed_time": "0:14:18", "remaining_time": "0:06:22"}
|
| 347 |
+
{"current_steps": 3470, "total_steps": 5000, "loss": 0.6788, "lr": 1.2956158147457115e-05, "epoch": 0.694, "percentage": 69.4, "elapsed_time": "0:14:21", "remaining_time": "0:06:19"}
|
| 348 |
+
{"current_steps": 3480, "total_steps": 5000, "loss": 0.8617, "lr": 1.280350852153168e-05, "epoch": 0.696, "percentage": 69.6, "elapsed_time": "0:14:24", "remaining_time": "0:06:17"}
|
| 349 |
+
{"current_steps": 3490, "total_steps": 5000, "loss": 0.653, "lr": 1.2651453335394231e-05, "epoch": 0.698, "percentage": 69.8, "elapsed_time": "0:14:27", "remaining_time": "0:06:15"}
|
| 350 |
+
{"current_steps": 3500, "total_steps": 5000, "loss": 0.8003, "lr": 1.2500000000000006e-05, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:14:29", "remaining_time": "0:06:12"}
|
| 351 |
+
{"current_steps": 3510, "total_steps": 5000, "loss": 0.5579, "lr": 1.234915589697091e-05, "epoch": 0.702, "percentage": 70.2, "elapsed_time": "0:14:31", "remaining_time": "0:06:10"}
|
| 352 |
+
{"current_steps": 3520, "total_steps": 5000, "loss": 1.5354, "lr": 1.2198928378235716e-05, "epoch": 0.704, "percentage": 70.4, "elapsed_time": "0:14:34", "remaining_time": "0:06:07"}
|
| 353 |
+
{"current_steps": 3530, "total_steps": 5000, "loss": 1.6175, "lr": 1.2049324765671749e-05, "epoch": 0.706, "percentage": 70.6, "elapsed_time": "0:14:37", "remaining_time": "0:06:05"}
|
| 354 |
+
{"current_steps": 3540, "total_steps": 5000, "loss": 0.4771, "lr": 1.1900352350748026e-05, "epoch": 0.708, "percentage": 70.8, "elapsed_time": "0:14:39", "remaining_time": "0:06:02"}
|
| 355 |
+
{"current_steps": 3550, "total_steps": 5000, "loss": 0.779, "lr": 1.175201839416988e-05, "epoch": 0.71, "percentage": 71.0, "elapsed_time": "0:14:42", "remaining_time": "0:06:00"}
|
| 356 |
+
{"current_steps": 3560, "total_steps": 5000, "loss": 1.1478, "lr": 1.1604330125525079e-05, "epoch": 0.712, "percentage": 71.2, "elapsed_time": "0:14:44", "remaining_time": "0:05:57"}
|
| 357 |
+
{"current_steps": 3570, "total_steps": 5000, "loss": 0.7484, "lr": 1.1457294742931507e-05, "epoch": 0.714, "percentage": 71.4, "elapsed_time": "0:14:47", "remaining_time": "0:05:55"}
|
| 358 |
+
{"current_steps": 3580, "total_steps": 5000, "loss": 1.0581, "lr": 1.1310919412686247e-05, "epoch": 0.716, "percentage": 71.6, "elapsed_time": "0:14:49", "remaining_time": "0:05:52"}
|
| 359 |
+
{"current_steps": 3590, "total_steps": 5000, "loss": 0.989, "lr": 1.11652112689164e-05, "epoch": 0.718, "percentage": 71.8, "elapsed_time": "0:14:51", "remaining_time": "0:05:50"}
|
| 360 |
+
{"current_steps": 3600, "total_steps": 5000, "loss": 1.5538, "lr": 1.1020177413231334e-05, "epoch": 0.72, "percentage": 72.0, "elapsed_time": "0:14:53", "remaining_time": "0:05:47"}
|
| 361 |
+
{"current_steps": 3610, "total_steps": 5000, "loss": 0.9328, "lr": 1.0875824914376553e-05, "epoch": 0.722, "percentage": 72.2, "elapsed_time": "0:14:55", "remaining_time": "0:05:44"}
|
| 362 |
+
{"current_steps": 3620, "total_steps": 5000, "loss": 1.4623, "lr": 1.0732160807889211e-05, "epoch": 0.724, "percentage": 72.4, "elapsed_time": "0:14:58", "remaining_time": "0:05:42"}
|
| 363 |
+
{"current_steps": 3630, "total_steps": 5000, "loss": 1.0274, "lr": 1.058919209575517e-05, "epoch": 0.726, "percentage": 72.6, "elapsed_time": "0:15:00", "remaining_time": "0:05:40"}
|
| 364 |
+
{"current_steps": 3640, "total_steps": 5000, "loss": 0.5183, "lr": 1.0446925746067768e-05, "epoch": 0.728, "percentage": 72.8, "elapsed_time": "0:15:03", "remaining_time": "0:05:37"}
|
| 365 |
+
{"current_steps": 3650, "total_steps": 5000, "loss": 0.8523, "lr": 1.0305368692688174e-05, "epoch": 0.73, "percentage": 73.0, "elapsed_time": "0:15:05", "remaining_time": "0:05:34"}
|
| 366 |
+
{"current_steps": 3660, "total_steps": 5000, "loss": 0.9334, "lr": 1.0164527834907467e-05, "epoch": 0.732, "percentage": 73.2, "elapsed_time": "0:15:07", "remaining_time": "0:05:32"}
|
| 367 |
+
{"current_steps": 3670, "total_steps": 5000, "loss": 1.2435, "lr": 1.0024410037110357e-05, "epoch": 0.734, "percentage": 73.4, "elapsed_time": "0:15:09", "remaining_time": "0:05:29"}
|
| 368 |
+
{"current_steps": 3680, "total_steps": 5000, "loss": 1.5611, "lr": 9.88502212844063e-06, "epoch": 0.736, "percentage": 73.6, "elapsed_time": "0:15:12", "remaining_time": "0:05:27"}
|
| 369 |
+
{"current_steps": 3690, "total_steps": 5000, "loss": 0.9649, "lr": 9.746370902468311e-06, "epoch": 0.738, "percentage": 73.8, "elapsed_time": "0:15:14", "remaining_time": "0:05:24"}
|
| 370 |
+
{"current_steps": 3700, "total_steps": 5000, "loss": 0.6053, "lr": 9.608463116858542e-06, "epoch": 0.74, "percentage": 74.0, "elapsed_time": "0:15:17", "remaining_time": "0:05:22"}
|
| 371 |
+
{"current_steps": 3710, "total_steps": 5000, "loss": 0.6377, "lr": 9.471305493042243e-06, "epoch": 0.742, "percentage": 74.2, "elapsed_time": "0:15:19", "remaining_time": "0:05:19"}
|
| 372 |
+
{"current_steps": 3720, "total_steps": 5000, "loss": 1.0698, "lr": 9.334904715888495e-06, "epoch": 0.744, "percentage": 74.4, "elapsed_time": "0:15:21", "remaining_time": "0:05:17"}
|
| 373 |
+
{"current_steps": 3730, "total_steps": 5000, "loss": 0.9858, "lr": 9.199267433378727e-06, "epoch": 0.746, "percentage": 74.6, "elapsed_time": "0:15:24", "remaining_time": "0:05:14"}
|
| 374 |
+
{"current_steps": 3740, "total_steps": 5000, "loss": 0.3602, "lr": 9.064400256282757e-06, "epoch": 0.748, "percentage": 74.8, "elapsed_time": "0:15:26", "remaining_time": "0:05:12"}
|
| 375 |
+
{"current_steps": 3750, "total_steps": 5000, "loss": 0.6672, "lr": 8.930309757836517e-06, "epoch": 0.75, "percentage": 75.0, "elapsed_time": "0:15:28", "remaining_time": "0:05:09"}
|
| 376 |
+
{"current_steps": 3760, "total_steps": 5000, "loss": 0.5627, "lr": 8.797002473421728e-06, "epoch": 0.752, "percentage": 75.2, "elapsed_time": "0:15:30", "remaining_time": "0:05:06"}
|
| 377 |
+
{"current_steps": 3770, "total_steps": 5000, "loss": 1.0572, "lr": 8.664484900247363e-06, "epoch": 0.754, "percentage": 75.4, "elapsed_time": "0:15:33", "remaining_time": "0:05:04"}
|
| 378 |
+
{"current_steps": 3780, "total_steps": 5000, "loss": 1.2507, "lr": 8.532763497032987e-06, "epoch": 0.756, "percentage": 75.6, "elapsed_time": "0:15:36", "remaining_time": "0:05:02"}
|
| 379 |
+
{"current_steps": 3790, "total_steps": 5000, "loss": 0.7735, "lr": 8.40184468369396e-06, "epoch": 0.758, "percentage": 75.8, "elapsed_time": "0:15:39", "remaining_time": "0:04:59"}
|
| 380 |
+
{"current_steps": 3800, "total_steps": 5000, "loss": 1.298, "lr": 8.271734841028553e-06, "epoch": 0.76, "percentage": 76.0, "elapsed_time": "0:15:42", "remaining_time": "0:04:57"}
|
| 381 |
+
{"current_steps": 3810, "total_steps": 5000, "loss": 0.7805, "lr": 8.142440310406924e-06, "epoch": 0.762, "percentage": 76.2, "elapsed_time": "0:15:45", "remaining_time": "0:04:55"}
|
| 382 |
+
{"current_steps": 3820, "total_steps": 5000, "loss": 0.7498, "lr": 8.013967393462094e-06, "epoch": 0.764, "percentage": 76.4, "elapsed_time": "0:15:47", "remaining_time": "0:04:52"}
|
| 383 |
+
{"current_steps": 3830, "total_steps": 5000, "loss": 1.1696, "lr": 7.886322351782783e-06, "epoch": 0.766, "percentage": 76.6, "elapsed_time": "0:15:50", "remaining_time": "0:04:50"}
|
| 384 |
+
{"current_steps": 3840, "total_steps": 5000, "loss": 0.6709, "lr": 7.759511406608255e-06, "epoch": 0.768, "percentage": 76.8, "elapsed_time": "0:15:52", "remaining_time": "0:04:47"}
|
| 385 |
+
{"current_steps": 3850, "total_steps": 5000, "loss": 0.9733, "lr": 7.633540738525066e-06, "epoch": 0.77, "percentage": 77.0, "elapsed_time": "0:15:55", "remaining_time": "0:04:45"}
|
| 386 |
+
{"current_steps": 3860, "total_steps": 5000, "loss": 0.6711, "lr": 7.508416487165862e-06, "epoch": 0.772, "percentage": 77.2, "elapsed_time": "0:15:57", "remaining_time": "0:04:42"}
|
| 387 |
+
{"current_steps": 3870, "total_steps": 5000, "loss": 0.7134, "lr": 7.384144750910133e-06, "epoch": 0.774, "percentage": 77.4, "elapsed_time": "0:16:00", "remaining_time": "0:04:40"}
|
| 388 |
+
{"current_steps": 3880, "total_steps": 5000, "loss": 1.0323, "lr": 7.260731586586983e-06, "epoch": 0.776, "percentage": 77.6, "elapsed_time": "0:16:02", "remaining_time": "0:04:37"}
|
| 389 |
+
{"current_steps": 3890, "total_steps": 5000, "loss": 0.4097, "lr": 7.138183009179922e-06, "epoch": 0.778, "percentage": 77.8, "elapsed_time": "0:16:04", "remaining_time": "0:04:35"}
|
| 390 |
+
{"current_steps": 3900, "total_steps": 5000, "loss": 1.0325, "lr": 7.016504991533726e-06, "epoch": 0.78, "percentage": 78.0, "elapsed_time": "0:16:07", "remaining_time": "0:04:32"}
|
| 391 |
+
{"current_steps": 3910, "total_steps": 5000, "loss": 0.2871, "lr": 6.895703464063319e-06, "epoch": 0.782, "percentage": 78.2, "elapsed_time": "0:16:10", "remaining_time": "0:04:30"}
|
| 392 |
+
{"current_steps": 3920, "total_steps": 5000, "loss": 0.8634, "lr": 6.775784314464717e-06, "epoch": 0.784, "percentage": 78.4, "elapsed_time": "0:16:12", "remaining_time": "0:04:27"}
|
| 393 |
+
{"current_steps": 3930, "total_steps": 5000, "loss": 1.8368, "lr": 6.656753387428089e-06, "epoch": 0.786, "percentage": 78.6, "elapsed_time": "0:16:15", "remaining_time": "0:04:25"}
|
| 394 |
+
{"current_steps": 3940, "total_steps": 5000, "loss": 0.5746, "lr": 6.538616484352902e-06, "epoch": 0.788, "percentage": 78.8, "elapsed_time": "0:16:17", "remaining_time": "0:04:23"}
|
| 395 |
+
{"current_steps": 3950, "total_steps": 5000, "loss": 2.333, "lr": 6.421379363065142e-06, "epoch": 0.79, "percentage": 79.0, "elapsed_time": "0:16:20", "remaining_time": "0:04:20"}
|
| 396 |
+
{"current_steps": 3960, "total_steps": 5000, "loss": 0.393, "lr": 6.305047737536707e-06, "epoch": 0.792, "percentage": 79.2, "elapsed_time": "0:16:22", "remaining_time": "0:04:18"}
|
| 397 |
+
{"current_steps": 3970, "total_steps": 5000, "loss": 1.0324, "lr": 6.189627277606894e-06, "epoch": 0.794, "percentage": 79.4, "elapsed_time": "0:16:25", "remaining_time": "0:04:15"}
|
| 398 |
+
{"current_steps": 3980, "total_steps": 5000, "loss": 0.9407, "lr": 6.075123608706093e-06, "epoch": 0.796, "percentage": 79.6, "elapsed_time": "0:16:27", "remaining_time": "0:04:13"}
|
| 399 |
+
{"current_steps": 3990, "total_steps": 5000, "loss": 1.2251, "lr": 5.961542311581586e-06, "epoch": 0.798, "percentage": 79.8, "elapsed_time": "0:16:30", "remaining_time": "0:04:10"}
|
| 400 |
+
{"current_steps": 4000, "total_steps": 5000, "loss": 0.635, "lr": 5.848888922025553e-06, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:16:32", "remaining_time": "0:04:08"}
|
| 401 |
+
{"current_steps": 4010, "total_steps": 5000, "loss": 0.3039, "lr": 5.737168930605272e-06, "epoch": 0.802, "percentage": 80.2, "elapsed_time": "0:16:35", "remaining_time": "0:04:05"}
|
| 402 |
+
{"current_steps": 4020, "total_steps": 5000, "loss": 1.0606, "lr": 5.626387782395512e-06, "epoch": 0.804, "percentage": 80.4, "elapsed_time": "0:16:38", "remaining_time": "0:04:03"}
|
| 403 |
+
{"current_steps": 4030, "total_steps": 5000, "loss": 2.3978, "lr": 5.5165508767131415e-06, "epoch": 0.806, "percentage": 80.6, "elapsed_time": "0:16:40", "remaining_time": "0:04:00"}
|
| 404 |
+
{"current_steps": 4040, "total_steps": 5000, "loss": 1.7771, "lr": 5.4076635668540075e-06, "epoch": 0.808, "percentage": 80.8, "elapsed_time": "0:16:42", "remaining_time": "0:03:58"}
|
| 405 |
+
{"current_steps": 4050, "total_steps": 5000, "loss": 1.5544, "lr": 5.299731159831953e-06, "epoch": 0.81, "percentage": 81.0, "elapsed_time": "0:16:45", "remaining_time": "0:03:55"}
|
| 406 |
+
{"current_steps": 4060, "total_steps": 5000, "loss": 0.574, "lr": 5.192758916120236e-06, "epoch": 0.812, "percentage": 81.2, "elapsed_time": "0:16:48", "remaining_time": "0:03:53"}
|
| 407 |
+
{"current_steps": 4070, "total_steps": 5000, "loss": 0.9646, "lr": 5.086752049395094e-06, "epoch": 0.814, "percentage": 81.4, "elapsed_time": "0:16:50", "remaining_time": "0:03:50"}
|
| 408 |
+
{"current_steps": 4080, "total_steps": 5000, "loss": 0.422, "lr": 4.981715726281666e-06, "epoch": 0.816, "percentage": 81.6, "elapsed_time": "0:16:52", "remaining_time": "0:03:48"}
|
| 409 |
+
{"current_steps": 4090, "total_steps": 5000, "loss": 0.6738, "lr": 4.877655066102149e-06, "epoch": 0.818, "percentage": 81.8, "elapsed_time": "0:16:54", "remaining_time": "0:03:45"}
|
| 410 |
+
{"current_steps": 4100, "total_steps": 5000, "loss": 0.1991, "lr": 4.7745751406263165e-06, "epoch": 0.82, "percentage": 82.0, "elapsed_time": "0:16:57", "remaining_time": "0:03:43"}
|
| 411 |
+
{"current_steps": 4110, "total_steps": 5000, "loss": 0.7171, "lr": 4.672480973824311e-06, "epoch": 0.822, "percentage": 82.2, "elapsed_time": "0:17:00", "remaining_time": "0:03:40"}
|
| 412 |
+
{"current_steps": 4120, "total_steps": 5000, "loss": 0.7478, "lr": 4.571377541621788e-06, "epoch": 0.824, "percentage": 82.4, "elapsed_time": "0:17:02", "remaining_time": "0:03:38"}
|
| 413 |
+
{"current_steps": 4130, "total_steps": 5000, "loss": 0.6985, "lr": 4.4712697716574e-06, "epoch": 0.826, "percentage": 82.6, "elapsed_time": "0:17:05", "remaining_time": "0:03:35"}
|
| 414 |
+
{"current_steps": 4140, "total_steps": 5000, "loss": 0.7893, "lr": 4.372162543042624e-06, "epoch": 0.828, "percentage": 82.8, "elapsed_time": "0:17:07", "remaining_time": "0:03:33"}
|
| 415 |
+
{"current_steps": 4150, "total_steps": 5000, "loss": 0.8127, "lr": 4.274060686123959e-06, "epoch": 0.83, "percentage": 83.0, "elapsed_time": "0:17:10", "remaining_time": "0:03:31"}
|
| 416 |
+
{"current_steps": 4160, "total_steps": 5000, "loss": 0.5259, "lr": 4.176968982247514e-06, "epoch": 0.832, "percentage": 83.2, "elapsed_time": "0:17:13", "remaining_time": "0:03:28"}
|
| 417 |
+
{"current_steps": 4170, "total_steps": 5000, "loss": 0.9835, "lr": 4.08089216352596e-06, "epoch": 0.834, "percentage": 83.4, "elapsed_time": "0:17:15", "remaining_time": "0:03:26"}
|
| 418 |
+
{"current_steps": 4180, "total_steps": 5000, "loss": 0.6508, "lr": 3.985834912607894e-06, "epoch": 0.836, "percentage": 83.6, "elapsed_time": "0:17:17", "remaining_time": "0:03:23"}
|
| 419 |
+
{"current_steps": 4190, "total_steps": 5000, "loss": 0.437, "lr": 3.891801862449629e-06, "epoch": 0.838, "percentage": 83.8, "elapsed_time": "0:17:19", "remaining_time": "0:03:20"}
|
| 420 |
+
{"current_steps": 4200, "total_steps": 5000, "loss": 1.6048, "lr": 3.798797596089351e-06, "epoch": 0.84, "percentage": 84.0, "elapsed_time": "0:17:22", "remaining_time": "0:03:18"}
|
| 421 |
+
{"current_steps": 4210, "total_steps": 5000, "loss": 0.8315, "lr": 3.7068266464238084e-06, "epoch": 0.842, "percentage": 84.2, "elapsed_time": "0:17:25", "remaining_time": "0:03:16"}
|
| 422 |
+
{"current_steps": 4220, "total_steps": 5000, "loss": 0.3853, "lr": 3.6158934959873353e-06, "epoch": 0.844, "percentage": 84.4, "elapsed_time": "0:17:26", "remaining_time": "0:03:13"}
|
| 423 |
+
{"current_steps": 4230, "total_steps": 5000, "loss": 1.3448, "lr": 3.5260025767333893e-06, "epoch": 0.846, "percentage": 84.6, "elapsed_time": "0:17:29", "remaining_time": "0:03:11"}
|
| 424 |
+
{"current_steps": 4240, "total_steps": 5000, "loss": 0.2405, "lr": 3.4371582698185633e-06, "epoch": 0.848, "percentage": 84.8, "elapsed_time": "0:17:31", "remaining_time": "0:03:08"}
|
| 425 |
+
{"current_steps": 4250, "total_steps": 5000, "loss": 0.8112, "lr": 3.3493649053890326e-06, "epoch": 0.85, "percentage": 85.0, "elapsed_time": "0:17:34", "remaining_time": "0:03:06"}
|
| 426 |
+
{"current_steps": 4260, "total_steps": 5000, "loss": 1.3852, "lr": 3.262626762369525e-06, "epoch": 0.852, "percentage": 85.2, "elapsed_time": "0:17:36", "remaining_time": "0:03:03"}
|
| 427 |
+
{"current_steps": 4270, "total_steps": 5000, "loss": 0.3188, "lr": 3.176948068254762e-06, "epoch": 0.854, "percentage": 85.4, "elapsed_time": "0:17:38", "remaining_time": "0:03:01"}
|
| 428 |
+
{"current_steps": 4280, "total_steps": 5000, "loss": 0.4712, "lr": 3.092332998903416e-06, "epoch": 0.856, "percentage": 85.6, "elapsed_time": "0:17:41", "remaining_time": "0:02:58"}
|
| 429 |
+
{"current_steps": 4290, "total_steps": 5000, "loss": 0.4124, "lr": 3.0087856783345914e-06, "epoch": 0.858, "percentage": 85.8, "elapsed_time": "0:17:43", "remaining_time": "0:02:56"}
|
| 430 |
+
{"current_steps": 4300, "total_steps": 5000, "loss": 1.4114, "lr": 2.9263101785268254e-06, "epoch": 0.86, "percentage": 86.0, "elapsed_time": "0:17:46", "remaining_time": "0:02:53"}
|
| 431 |
+
{"current_steps": 4310, "total_steps": 5000, "loss": 1.5525, "lr": 2.8449105192196316e-06, "epoch": 0.862, "percentage": 86.2, "elapsed_time": "0:17:49", "remaining_time": "0:02:51"}
|
| 432 |
+
{"current_steps": 4320, "total_steps": 5000, "loss": 1.1899, "lr": 2.764590667717562e-06, "epoch": 0.864, "percentage": 86.4, "elapsed_time": "0:17:51", "remaining_time": "0:02:48"}
|
| 433 |
+
{"current_steps": 4330, "total_steps": 5000, "loss": 0.5857, "lr": 2.6853545386968606e-06, "epoch": 0.866, "percentage": 86.6, "elapsed_time": "0:17:53", "remaining_time": "0:02:46"}
|
| 434 |
+
{"current_steps": 4340, "total_steps": 5000, "loss": 1.6246, "lr": 2.6072059940146775e-06, "epoch": 0.868, "percentage": 86.8, "elapsed_time": "0:17:56", "remaining_time": "0:02:43"}
|
| 435 |
+
{"current_steps": 4350, "total_steps": 5000, "loss": 0.2832, "lr": 2.5301488425208296e-06, "epoch": 0.87, "percentage": 87.0, "elapsed_time": "0:17:58", "remaining_time": "0:02:41"}
|
| 436 |
+
{"current_steps": 4360, "total_steps": 5000, "loss": 1.5246, "lr": 2.454186839872158e-06, "epoch": 0.872, "percentage": 87.2, "elapsed_time": "0:18:01", "remaining_time": "0:02:38"}
|
| 437 |
+
{"current_steps": 4370, "total_steps": 5000, "loss": 1.5678, "lr": 2.379323688349516e-06, "epoch": 0.874, "percentage": 87.4, "elapsed_time": "0:18:03", "remaining_time": "0:02:36"}
|
| 438 |
+
{"current_steps": 4380, "total_steps": 5000, "loss": 1.3038, "lr": 2.3055630366772856e-06, "epoch": 0.876, "percentage": 87.6, "elapsed_time": "0:18:05", "remaining_time": "0:02:33"}
|
| 439 |
+
{"current_steps": 4390, "total_steps": 5000, "loss": 0.7802, "lr": 2.2329084798455746e-06, "epoch": 0.878, "percentage": 87.8, "elapsed_time": "0:18:08", "remaining_time": "0:02:31"}
|
| 440 |
+
{"current_steps": 4400, "total_steps": 5000, "loss": 1.1187, "lr": 2.1613635589349756e-06, "epoch": 0.88, "percentage": 88.0, "elapsed_time": "0:18:10", "remaining_time": "0:02:28"}
|
| 441 |
+
{"current_steps": 4410, "total_steps": 5000, "loss": 1.7619, "lr": 2.0909317609440095e-06, "epoch": 0.882, "percentage": 88.2, "elapsed_time": "0:18:13", "remaining_time": "0:02:26"}
|
| 442 |
+
{"current_steps": 4420, "total_steps": 5000, "loss": 1.2435, "lr": 2.0216165186191407e-06, "epoch": 0.884, "percentage": 88.4, "elapsed_time": "0:18:16", "remaining_time": "0:02:23"}
|
| 443 |
+
{"current_steps": 4430, "total_steps": 5000, "loss": 1.7134, "lr": 1.95342121028749e-06, "epoch": 0.886, "percentage": 88.6, "elapsed_time": "0:18:19", "remaining_time": "0:02:21"}
|
| 444 |
+
{"current_steps": 4440, "total_steps": 5000, "loss": 0.8158, "lr": 1.8863491596921745e-06, "epoch": 0.888, "percentage": 88.8, "elapsed_time": "0:18:22", "remaining_time": "0:02:19"}
|
| 445 |
+
{"current_steps": 4450, "total_steps": 5000, "loss": 0.7814, "lr": 1.8204036358303173e-06, "epoch": 0.89, "percentage": 89.0, "elapsed_time": "0:18:25", "remaining_time": "0:02:16"}
|
| 446 |
+
{"current_steps": 4460, "total_steps": 5000, "loss": 0.6062, "lr": 1.7555878527937164e-06, "epoch": 0.892, "percentage": 89.2, "elapsed_time": "0:18:27", "remaining_time": "0:02:14"}
|
| 447 |
+
{"current_steps": 4470, "total_steps": 5000, "loss": 1.6605, "lr": 1.6919049696121958e-06, "epoch": 0.894, "percentage": 89.4, "elapsed_time": "0:18:29", "remaining_time": "0:02:11"}
|
| 448 |
+
{"current_steps": 4480, "total_steps": 5000, "loss": 0.6406, "lr": 1.629358090099639e-06, "epoch": 0.896, "percentage": 89.6, "elapsed_time": "0:18:31", "remaining_time": "0:02:09"}
|
| 449 |
+
{"current_steps": 4490, "total_steps": 5000, "loss": 0.8206, "lr": 1.5679502627027136e-06, "epoch": 0.898, "percentage": 89.8, "elapsed_time": "0:18:33", "remaining_time": "0:02:06"}
|
| 450 |
+
{"current_steps": 4500, "total_steps": 5000, "loss": 0.3695, "lr": 1.5076844803522922e-06, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:18:35", "remaining_time": "0:02:03"}
|
| 451 |
+
{"current_steps": 4510, "total_steps": 5000, "loss": 1.0353, "lr": 1.4485636803175829e-06, "epoch": 0.902, "percentage": 90.2, "elapsed_time": "0:18:38", "remaining_time": "0:02:01"}
|
| 452 |
+
{"current_steps": 4520, "total_steps": 5000, "loss": 0.8934, "lr": 1.3905907440629752e-06, "epoch": 0.904, "percentage": 90.4, "elapsed_time": "0:18:41", "remaining_time": "0:01:59"}
|
| 453 |
+
{"current_steps": 4530, "total_steps": 5000, "loss": 0.5448, "lr": 1.333768497107593e-06, "epoch": 0.906, "percentage": 90.6, "elapsed_time": "0:18:44", "remaining_time": "0:01:56"}
|
| 454 |
+
{"current_steps": 4540, "total_steps": 5000, "loss": 0.6171, "lr": 1.2780997088875869e-06, "epoch": 0.908, "percentage": 90.8, "elapsed_time": "0:18:46", "remaining_time": "0:01:54"}
|
| 455 |
+
{"current_steps": 4550, "total_steps": 5000, "loss": 0.8105, "lr": 1.2235870926211619e-06, "epoch": 0.91, "percentage": 91.0, "elapsed_time": "0:18:49", "remaining_time": "0:01:51"}
|
| 456 |
+
{"current_steps": 4560, "total_steps": 5000, "loss": 0.6286, "lr": 1.170233305176327e-06, "epoch": 0.912, "percentage": 91.2, "elapsed_time": "0:18:51", "remaining_time": "0:01:49"}
|
| 457 |
+
{"current_steps": 4570, "total_steps": 5000, "loss": 1.37, "lr": 1.1180409469414094e-06, "epoch": 0.914, "percentage": 91.4, "elapsed_time": "0:18:54", "remaining_time": "0:01:46"}
|
| 458 |
+
{"current_steps": 4580, "total_steps": 5000, "loss": 0.6792, "lr": 1.067012561698319e-06, "epoch": 0.916, "percentage": 91.6, "elapsed_time": "0:18:55", "remaining_time": "0:01:44"}
|
| 459 |
+
{"current_steps": 4590, "total_steps": 5000, "loss": 3.2842, "lr": 1.0171506364985622e-06, "epoch": 0.918, "percentage": 91.8, "elapsed_time": "0:18:58", "remaining_time": "0:01:41"}
|
| 460 |
+
{"current_steps": 4600, "total_steps": 5000, "loss": 0.7875, "lr": 9.684576015420278e-07, "epoch": 0.92, "percentage": 92.0, "elapsed_time": "0:19:01", "remaining_time": "0:01:39"}
|
| 461 |
+
{"current_steps": 4610, "total_steps": 5000, "loss": 0.7768, "lr": 9.209358300585474e-07, "epoch": 0.922, "percentage": 92.2, "elapsed_time": "0:19:03", "remaining_time": "0:01:36"}
|
| 462 |
+
{"current_steps": 4620, "total_steps": 5000, "loss": 0.9518, "lr": 8.745876381922147e-07, "epoch": 0.924, "percentage": 92.4, "elapsed_time": "0:19:06", "remaining_time": "0:01:34"}
|
| 463 |
+
{"current_steps": 4630, "total_steps": 5000, "loss": 0.4208, "lr": 8.294152848885157e-07, "epoch": 0.926, "percentage": 92.6, "elapsed_time": "0:19:09", "remaining_time": "0:01:31"}
|
| 464 |
+
{"current_steps": 4640, "total_steps": 5000, "loss": 0.2602, "lr": 7.854209717842231e-07, "epoch": 0.928, "percentage": 92.8, "elapsed_time": "0:19:11", "remaining_time": "0:01:29"}
|
| 465 |
+
{"current_steps": 4650, "total_steps": 5000, "loss": 0.8418, "lr": 7.426068431000882e-07, "epoch": 0.93, "percentage": 93.0, "elapsed_time": "0:19:14", "remaining_time": "0:01:26"}
|
| 466 |
+
{"current_steps": 4660, "total_steps": 5000, "loss": 0.4949, "lr": 7.009749855363456e-07, "epoch": 0.932, "percentage": 93.2, "elapsed_time": "0:19:16", "remaining_time": "0:01:24"}
|
| 467 |
+
{"current_steps": 4670, "total_steps": 5000, "loss": 0.5919, "lr": 6.605274281709928e-07, "epoch": 0.934, "percentage": 93.4, "elapsed_time": "0:19:19", "remaining_time": "0:01:21"}
|
| 468 |
+
{"current_steps": 4680, "total_steps": 5000, "loss": 0.9009, "lr": 6.212661423609184e-07, "epoch": 0.936, "percentage": 93.6, "elapsed_time": "0:19:21", "remaining_time": "0:01:19"}
|
| 469 |
+
{"current_steps": 4690, "total_steps": 5000, "loss": 0.4119, "lr": 5.83193041645802e-07, "epoch": 0.938, "percentage": 93.8, "elapsed_time": "0:19:23", "remaining_time": "0:01:16"}
|
| 470 |
+
{"current_steps": 4700, "total_steps": 5000, "loss": 0.4794, "lr": 5.463099816548579e-07, "epoch": 0.94, "percentage": 94.0, "elapsed_time": "0:19:25", "remaining_time": "0:01:14"}
|
| 471 |
+
{"current_steps": 4710, "total_steps": 5000, "loss": 0.8312, "lr": 5.106187600163987e-07, "epoch": 0.942, "percentage": 94.2, "elapsed_time": "0:19:27", "remaining_time": "0:01:11"}
|
| 472 |
+
{"current_steps": 4720, "total_steps": 5000, "loss": 1.0177, "lr": 4.7612111627021175e-07, "epoch": 0.944, "percentage": 94.4, "elapsed_time": "0:19:30", "remaining_time": "0:01:09"}
|
| 473 |
+
{"current_steps": 4730, "total_steps": 5000, "loss": 0.7843, "lr": 4.4281873178278475e-07, "epoch": 0.946, "percentage": 94.6, "elapsed_time": "0:19:32", "remaining_time": "0:01:06"}
|
| 474 |
+
{"current_steps": 4740, "total_steps": 5000, "loss": 0.7314, "lr": 4.107132296653549e-07, "epoch": 0.948, "percentage": 94.8, "elapsed_time": "0:19:35", "remaining_time": "0:01:04"}
|
| 475 |
+
{"current_steps": 4750, "total_steps": 5000, "loss": 0.5168, "lr": 3.7980617469479953e-07, "epoch": 0.95, "percentage": 95.0, "elapsed_time": "0:19:37", "remaining_time": "0:01:01"}
|
| 476 |
+
{"current_steps": 4760, "total_steps": 5000, "loss": 1.4444, "lr": 3.5009907323737825e-07, "epoch": 0.952, "percentage": 95.2, "elapsed_time": "0:19:40", "remaining_time": "0:00:59"}
|
| 477 |
+
{"current_steps": 4770, "total_steps": 5000, "loss": 1.1199, "lr": 3.215933731753024e-07, "epoch": 0.954, "percentage": 95.4, "elapsed_time": "0:19:42", "remaining_time": "0:00:57"}
|
| 478 |
+
{"current_steps": 4780, "total_steps": 5000, "loss": 0.5519, "lr": 2.942904638361804e-07, "epoch": 0.956, "percentage": 95.6, "elapsed_time": "0:19:45", "remaining_time": "0:00:54"}
|
| 479 |
+
{"current_steps": 4790, "total_steps": 5000, "loss": 0.9452, "lr": 2.681916759252917e-07, "epoch": 0.958, "percentage": 95.8, "elapsed_time": "0:19:47", "remaining_time": "0:00:52"}
|
| 480 |
+
{"current_steps": 4800, "total_steps": 5000, "loss": 3.2854, "lr": 2.4329828146074095e-07, "epoch": 0.96, "percentage": 96.0, "elapsed_time": "0:19:49", "remaining_time": "0:00:49"}
|
| 481 |
+
{"current_steps": 4810, "total_steps": 5000, "loss": 0.5169, "lr": 2.1961149371145795e-07, "epoch": 0.962, "percentage": 96.2, "elapsed_time": "0:19:51", "remaining_time": "0:00:47"}
|
| 482 |
+
{"current_steps": 4820, "total_steps": 5000, "loss": 0.8747, "lr": 1.9713246713805588e-07, "epoch": 0.964, "percentage": 96.4, "elapsed_time": "0:19:53", "remaining_time": "0:00:44"}
|
| 483 |
+
{"current_steps": 4830, "total_steps": 5000, "loss": 1.0667, "lr": 1.7586229733657644e-07, "epoch": 0.966, "percentage": 96.6, "elapsed_time": "0:19:56", "remaining_time": "0:00:42"}
|
| 484 |
+
{"current_steps": 4840, "total_steps": 5000, "loss": 1.4877, "lr": 1.5580202098509077e-07, "epoch": 0.968, "percentage": 96.8, "elapsed_time": "0:19:59", "remaining_time": "0:00:39"}
|
| 485 |
+
{"current_steps": 4850, "total_steps": 5000, "loss": 0.9893, "lr": 1.3695261579316777e-07, "epoch": 0.97, "percentage": 97.0, "elapsed_time": "0:20:01", "remaining_time": "0:00:37"}
|
| 486 |
+
{"current_steps": 4860, "total_steps": 5000, "loss": 0.9399, "lr": 1.193150004542204e-07, "epoch": 0.972, "percentage": 97.2, "elapsed_time": "0:20:04", "remaining_time": "0:00:34"}
|
| 487 |
+
{"current_steps": 4870, "total_steps": 5000, "loss": 0.4823, "lr": 1.0289003460074165e-07, "epoch": 0.974, "percentage": 97.4, "elapsed_time": "0:20:07", "remaining_time": "0:00:32"}
|
| 488 |
+
{"current_steps": 4880, "total_steps": 5000, "loss": 0.999, "lr": 8.767851876239074e-08, "epoch": 0.976, "percentage": 97.6, "elapsed_time": "0:20:10", "remaining_time": "0:00:29"}
|
| 489 |
+
{"current_steps": 4890, "total_steps": 5000, "loss": 0.6337, "lr": 7.368119432699383e-08, "epoch": 0.978, "percentage": 97.8, "elapsed_time": "0:20:13", "remaining_time": "0:00:27"}
|
| 490 |
+
{"current_steps": 4900, "total_steps": 5000, "loss": 0.5041, "lr": 6.089874350439506e-08, "epoch": 0.98, "percentage": 98.0, "elapsed_time": "0:20:16", "remaining_time": "0:00:24"}
|
| 491 |
+
{"current_steps": 4910, "total_steps": 5000, "loss": 0.9211, "lr": 4.9331789293211026e-08, "epoch": 0.982, "percentage": 98.2, "elapsed_time": "0:20:18", "remaining_time": "0:00:22"}
|
| 492 |
+
{"current_steps": 4920, "total_steps": 5000, "loss": 0.5071, "lr": 3.8980895450474455e-08, "epoch": 0.984, "percentage": 98.4, "elapsed_time": "0:20:20", "remaining_time": "0:00:19"}
|
| 493 |
+
{"current_steps": 4930, "total_steps": 5000, "loss": 0.7782, "lr": 2.9846566464150626e-08, "epoch": 0.986, "percentage": 98.6, "elapsed_time": "0:20:23", "remaining_time": "0:00:17"}
|
| 494 |
+
{"current_steps": 4940, "total_steps": 5000, "loss": 0.9379, "lr": 2.192924752854042e-08, "epoch": 0.988, "percentage": 98.8, "elapsed_time": "0:20:25", "remaining_time": "0:00:14"}
|
| 495 |
+
{"current_steps": 4950, "total_steps": 5000, "loss": 0.552, "lr": 1.522932452260595e-08, "epoch": 0.99, "percentage": 99.0, "elapsed_time": "0:20:28", "remaining_time": "0:00:12"}
|
| 496 |
+
{"current_steps": 4960, "total_steps": 5000, "loss": 0.5151, "lr": 9.747123991141194e-09, "epoch": 0.992, "percentage": 99.2, "elapsed_time": "0:20:30", "remaining_time": "0:00:09"}
|
| 497 |
+
{"current_steps": 4970, "total_steps": 5000, "loss": 0.7505, "lr": 5.48291312886251e-09, "epoch": 0.994, "percentage": 99.4, "elapsed_time": "0:20:32", "remaining_time": "0:00:07"}
|
| 498 |
+
{"current_steps": 4980, "total_steps": 5000, "loss": 0.8377, "lr": 2.4368997673940297e-09, "epoch": 0.996, "percentage": 99.6, "elapsed_time": "0:20:35", "remaining_time": "0:00:04"}
|
| 499 |
+
{"current_steps": 4990, "total_steps": 5000, "loss": 1.0276, "lr": 6.092323651313292e-10, "epoch": 0.998, "percentage": 99.8, "elapsed_time": "0:20:38", "remaining_time": "0:00:02"}
|
| 500 |
+
{"current_steps": 5000, "total_steps": 5000, "loss": 0.4942, "lr": 0.0, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:20:41", "remaining_time": "0:00:00"}
|
| 501 |
+
{"current_steps": 5000, "total_steps": 5000, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:20:41", "remaining_time": "0:00:00"}
|
Llama-2-13b-chat-hf/DomainBench/Geography/trainer_state.json
ADDED
|
@@ -0,0 +1,3542 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 5000,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.002,
|
| 13 |
+
"grad_norm": 0.12749333679676056,
|
| 14 |
+
"learning_rate": 1.0000000000000002e-06,
|
| 15 |
+
"loss": 1.5419,
|
| 16 |
+
"step": 10
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.004,
|
| 20 |
+
"grad_norm": 0.4542064964771271,
|
| 21 |
+
"learning_rate": 2.0000000000000003e-06,
|
| 22 |
+
"loss": 2.7315,
|
| 23 |
+
"step": 20
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.006,
|
| 27 |
+
"grad_norm": 0.33891889452934265,
|
| 28 |
+
"learning_rate": 3e-06,
|
| 29 |
+
"loss": 1.8219,
|
| 30 |
+
"step": 30
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.008,
|
| 34 |
+
"grad_norm": 0.26358193159103394,
|
| 35 |
+
"learning_rate": 4.000000000000001e-06,
|
| 36 |
+
"loss": 1.5216,
|
| 37 |
+
"step": 40
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.01,
|
| 41 |
+
"grad_norm": 0.5897868275642395,
|
| 42 |
+
"learning_rate": 5e-06,
|
| 43 |
+
"loss": 5.3329,
|
| 44 |
+
"step": 50
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.012,
|
| 48 |
+
"grad_norm": 0.07071671634912491,
|
| 49 |
+
"learning_rate": 6e-06,
|
| 50 |
+
"loss": 2.0144,
|
| 51 |
+
"step": 60
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.014,
|
| 55 |
+
"grad_norm": 1.6330965757369995,
|
| 56 |
+
"learning_rate": 7.000000000000001e-06,
|
| 57 |
+
"loss": 2.6128,
|
| 58 |
+
"step": 70
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 0.016,
|
| 62 |
+
"grad_norm": 3.5290110111236572,
|
| 63 |
+
"learning_rate": 8.000000000000001e-06,
|
| 64 |
+
"loss": 2.3414,
|
| 65 |
+
"step": 80
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"epoch": 0.018,
|
| 69 |
+
"grad_norm": 0.41117197275161743,
|
| 70 |
+
"learning_rate": 9e-06,
|
| 71 |
+
"loss": 3.2327,
|
| 72 |
+
"step": 90
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"epoch": 0.02,
|
| 76 |
+
"grad_norm": 0.8093858957290649,
|
| 77 |
+
"learning_rate": 1e-05,
|
| 78 |
+
"loss": 2.7985,
|
| 79 |
+
"step": 100
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"epoch": 0.022,
|
| 83 |
+
"grad_norm": 6.350818157196045,
|
| 84 |
+
"learning_rate": 1.1000000000000001e-05,
|
| 85 |
+
"loss": 2.397,
|
| 86 |
+
"step": 110
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 0.024,
|
| 90 |
+
"grad_norm": 0.4250973165035248,
|
| 91 |
+
"learning_rate": 1.2e-05,
|
| 92 |
+
"loss": 1.0396,
|
| 93 |
+
"step": 120
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"epoch": 0.026,
|
| 97 |
+
"grad_norm": 1.2124313116073608,
|
| 98 |
+
"learning_rate": 1.3000000000000001e-05,
|
| 99 |
+
"loss": 3.2413,
|
| 100 |
+
"step": 130
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.028,
|
| 104 |
+
"grad_norm": 0.0,
|
| 105 |
+
"learning_rate": 1.4000000000000001e-05,
|
| 106 |
+
"loss": 3.3462,
|
| 107 |
+
"step": 140
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 0.03,
|
| 111 |
+
"grad_norm": 1.524579644203186,
|
| 112 |
+
"learning_rate": 1.5e-05,
|
| 113 |
+
"loss": 1.3302,
|
| 114 |
+
"step": 150
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
"epoch": 0.032,
|
| 118 |
+
"grad_norm": 4.701014518737793,
|
| 119 |
+
"learning_rate": 1.6000000000000003e-05,
|
| 120 |
+
"loss": 1.2748,
|
| 121 |
+
"step": 160
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"epoch": 0.034,
|
| 125 |
+
"grad_norm": 1.1295222043991089,
|
| 126 |
+
"learning_rate": 1.7000000000000003e-05,
|
| 127 |
+
"loss": 3.3206,
|
| 128 |
+
"step": 170
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"epoch": 0.036,
|
| 132 |
+
"grad_norm": 4.18114709854126,
|
| 133 |
+
"learning_rate": 1.8e-05,
|
| 134 |
+
"loss": 1.3943,
|
| 135 |
+
"step": 180
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"epoch": 0.038,
|
| 139 |
+
"grad_norm": 0.0,
|
| 140 |
+
"learning_rate": 1.9e-05,
|
| 141 |
+
"loss": 1.2942,
|
| 142 |
+
"step": 190
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"epoch": 0.04,
|
| 146 |
+
"grad_norm": 0.5731304883956909,
|
| 147 |
+
"learning_rate": 2e-05,
|
| 148 |
+
"loss": 1.4252,
|
| 149 |
+
"step": 200
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"epoch": 0.042,
|
| 153 |
+
"grad_norm": 0.0,
|
| 154 |
+
"learning_rate": 2.1e-05,
|
| 155 |
+
"loss": 1.6539,
|
| 156 |
+
"step": 210
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 0.044,
|
| 160 |
+
"grad_norm": 0.9085258841514587,
|
| 161 |
+
"learning_rate": 2.2000000000000003e-05,
|
| 162 |
+
"loss": 1.8091,
|
| 163 |
+
"step": 220
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"epoch": 0.046,
|
| 167 |
+
"grad_norm": 0.22193272411823273,
|
| 168 |
+
"learning_rate": 2.3000000000000003e-05,
|
| 169 |
+
"loss": 1.2866,
|
| 170 |
+
"step": 230
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"epoch": 0.048,
|
| 174 |
+
"grad_norm": 0.32045862078666687,
|
| 175 |
+
"learning_rate": 2.4e-05,
|
| 176 |
+
"loss": 1.7432,
|
| 177 |
+
"step": 240
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"epoch": 0.05,
|
| 181 |
+
"grad_norm": 0.15331393480300903,
|
| 182 |
+
"learning_rate": 2.5e-05,
|
| 183 |
+
"loss": 1.6918,
|
| 184 |
+
"step": 250
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"epoch": 0.052,
|
| 188 |
+
"grad_norm": 0.13029654324054718,
|
| 189 |
+
"learning_rate": 2.6000000000000002e-05,
|
| 190 |
+
"loss": 0.9121,
|
| 191 |
+
"step": 260
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.054,
|
| 195 |
+
"grad_norm": 0.27324891090393066,
|
| 196 |
+
"learning_rate": 2.7000000000000002e-05,
|
| 197 |
+
"loss": 0.6088,
|
| 198 |
+
"step": 270
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"epoch": 0.056,
|
| 202 |
+
"grad_norm": 0.3138855993747711,
|
| 203 |
+
"learning_rate": 2.8000000000000003e-05,
|
| 204 |
+
"loss": 0.6236,
|
| 205 |
+
"step": 280
|
| 206 |
+
},
|
| 207 |
+
{
|
| 208 |
+
"epoch": 0.058,
|
| 209 |
+
"grad_norm": 0.0,
|
| 210 |
+
"learning_rate": 2.9e-05,
|
| 211 |
+
"loss": 0.847,
|
| 212 |
+
"step": 290
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"epoch": 0.06,
|
| 216 |
+
"grad_norm": 0.5300617218017578,
|
| 217 |
+
"learning_rate": 3e-05,
|
| 218 |
+
"loss": 0.9911,
|
| 219 |
+
"step": 300
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"epoch": 0.062,
|
| 223 |
+
"grad_norm": 0.41367462277412415,
|
| 224 |
+
"learning_rate": 3.1e-05,
|
| 225 |
+
"loss": 1.4213,
|
| 226 |
+
"step": 310
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"epoch": 0.064,
|
| 230 |
+
"grad_norm": 0.0,
|
| 231 |
+
"learning_rate": 3.2000000000000005e-05,
|
| 232 |
+
"loss": 0.436,
|
| 233 |
+
"step": 320
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"epoch": 0.066,
|
| 237 |
+
"grad_norm": 0.4312784671783447,
|
| 238 |
+
"learning_rate": 3.3e-05,
|
| 239 |
+
"loss": 1.0218,
|
| 240 |
+
"step": 330
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"epoch": 0.068,
|
| 244 |
+
"grad_norm": 2.3603317737579346,
|
| 245 |
+
"learning_rate": 3.4000000000000007e-05,
|
| 246 |
+
"loss": 1.2908,
|
| 247 |
+
"step": 340
|
| 248 |
+
},
|
| 249 |
+
{
|
| 250 |
+
"epoch": 0.07,
|
| 251 |
+
"grad_norm": 0.43904104828834534,
|
| 252 |
+
"learning_rate": 3.5e-05,
|
| 253 |
+
"loss": 1.0009,
|
| 254 |
+
"step": 350
|
| 255 |
+
},
|
| 256 |
+
{
|
| 257 |
+
"epoch": 0.072,
|
| 258 |
+
"grad_norm": 0.23096807301044464,
|
| 259 |
+
"learning_rate": 3.6e-05,
|
| 260 |
+
"loss": 0.6865,
|
| 261 |
+
"step": 360
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"epoch": 0.074,
|
| 265 |
+
"grad_norm": 1.1962610483169556,
|
| 266 |
+
"learning_rate": 3.7e-05,
|
| 267 |
+
"loss": 0.9106,
|
| 268 |
+
"step": 370
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"epoch": 0.076,
|
| 272 |
+
"grad_norm": 0.38495856523513794,
|
| 273 |
+
"learning_rate": 3.8e-05,
|
| 274 |
+
"loss": 1.4635,
|
| 275 |
+
"step": 380
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 0.078,
|
| 279 |
+
"grad_norm": 1.5584137439727783,
|
| 280 |
+
"learning_rate": 3.9000000000000006e-05,
|
| 281 |
+
"loss": 1.3782,
|
| 282 |
+
"step": 390
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.08,
|
| 286 |
+
"grad_norm": 2.283113956451416,
|
| 287 |
+
"learning_rate": 4e-05,
|
| 288 |
+
"loss": 0.8778,
|
| 289 |
+
"step": 400
|
| 290 |
+
},
|
| 291 |
+
{
|
| 292 |
+
"epoch": 0.082,
|
| 293 |
+
"grad_norm": 0.4340592324733734,
|
| 294 |
+
"learning_rate": 4.1e-05,
|
| 295 |
+
"loss": 0.8344,
|
| 296 |
+
"step": 410
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"epoch": 0.084,
|
| 300 |
+
"grad_norm": 0.7569859623908997,
|
| 301 |
+
"learning_rate": 4.2e-05,
|
| 302 |
+
"loss": 0.5733,
|
| 303 |
+
"step": 420
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"epoch": 0.086,
|
| 307 |
+
"grad_norm": 0.17662915587425232,
|
| 308 |
+
"learning_rate": 4.3e-05,
|
| 309 |
+
"loss": 0.6683,
|
| 310 |
+
"step": 430
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"epoch": 0.088,
|
| 314 |
+
"grad_norm": 0.0,
|
| 315 |
+
"learning_rate": 4.4000000000000006e-05,
|
| 316 |
+
"loss": 3.1046,
|
| 317 |
+
"step": 440
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 0.09,
|
| 321 |
+
"grad_norm": 0.09121380746364594,
|
| 322 |
+
"learning_rate": 4.5e-05,
|
| 323 |
+
"loss": 0.981,
|
| 324 |
+
"step": 450
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"epoch": 0.092,
|
| 328 |
+
"grad_norm": 5.895723342895508,
|
| 329 |
+
"learning_rate": 4.600000000000001e-05,
|
| 330 |
+
"loss": 2.1118,
|
| 331 |
+
"step": 460
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"epoch": 0.094,
|
| 335 |
+
"grad_norm": 0.0,
|
| 336 |
+
"learning_rate": 4.7e-05,
|
| 337 |
+
"loss": 0.3037,
|
| 338 |
+
"step": 470
|
| 339 |
+
},
|
| 340 |
+
{
|
| 341 |
+
"epoch": 0.096,
|
| 342 |
+
"grad_norm": 2.7312169075012207,
|
| 343 |
+
"learning_rate": 4.8e-05,
|
| 344 |
+
"loss": 1.2638,
|
| 345 |
+
"step": 480
|
| 346 |
+
},
|
| 347 |
+
{
|
| 348 |
+
"epoch": 0.098,
|
| 349 |
+
"grad_norm": 39.255226135253906,
|
| 350 |
+
"learning_rate": 4.9e-05,
|
| 351 |
+
"loss": 2.3644,
|
| 352 |
+
"step": 490
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
"epoch": 0.1,
|
| 356 |
+
"grad_norm": 1.8758808374404907,
|
| 357 |
+
"learning_rate": 5e-05,
|
| 358 |
+
"loss": 0.8317,
|
| 359 |
+
"step": 500
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 0.102,
|
| 363 |
+
"grad_norm": 14.16323184967041,
|
| 364 |
+
"learning_rate": 4.999939076763487e-05,
|
| 365 |
+
"loss": 2.1864,
|
| 366 |
+
"step": 510
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"epoch": 0.104,
|
| 370 |
+
"grad_norm": 0.8826745748519897,
|
| 371 |
+
"learning_rate": 4.999756310023261e-05,
|
| 372 |
+
"loss": 1.3502,
|
| 373 |
+
"step": 520
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"epoch": 0.106,
|
| 377 |
+
"grad_norm": 1.1676051616668701,
|
| 378 |
+
"learning_rate": 4.999451708687114e-05,
|
| 379 |
+
"loss": 1.161,
|
| 380 |
+
"step": 530
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"epoch": 0.108,
|
| 384 |
+
"grad_norm": 1.338937759399414,
|
| 385 |
+
"learning_rate": 4.999025287600886e-05,
|
| 386 |
+
"loss": 0.7291,
|
| 387 |
+
"step": 540
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.11,
|
| 391 |
+
"grad_norm": 0.0,
|
| 392 |
+
"learning_rate": 4.99847706754774e-05,
|
| 393 |
+
"loss": 0.4316,
|
| 394 |
+
"step": 550
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"epoch": 0.112,
|
| 398 |
+
"grad_norm": 1.1105668544769287,
|
| 399 |
+
"learning_rate": 4.997807075247146e-05,
|
| 400 |
+
"loss": 1.2009,
|
| 401 |
+
"step": 560
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"epoch": 0.114,
|
| 405 |
+
"grad_norm": 4.630878448486328,
|
| 406 |
+
"learning_rate": 4.997015343353585e-05,
|
| 407 |
+
"loss": 0.5649,
|
| 408 |
+
"step": 570
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 0.116,
|
| 412 |
+
"grad_norm": 0.0,
|
| 413 |
+
"learning_rate": 4.996101910454953e-05,
|
| 414 |
+
"loss": 0.4128,
|
| 415 |
+
"step": 580
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 0.118,
|
| 419 |
+
"grad_norm": 3.0937798023223877,
|
| 420 |
+
"learning_rate": 4.995066821070679e-05,
|
| 421 |
+
"loss": 0.977,
|
| 422 |
+
"step": 590
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"epoch": 0.12,
|
| 426 |
+
"grad_norm": 0.31115856766700745,
|
| 427 |
+
"learning_rate": 4.993910125649561e-05,
|
| 428 |
+
"loss": 1.3527,
|
| 429 |
+
"step": 600
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 0.122,
|
| 433 |
+
"grad_norm": 2.9121241569519043,
|
| 434 |
+
"learning_rate": 4.992631880567301e-05,
|
| 435 |
+
"loss": 0.6522,
|
| 436 |
+
"step": 610
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"epoch": 0.124,
|
| 440 |
+
"grad_norm": 0.5493115782737732,
|
| 441 |
+
"learning_rate": 4.991232148123761e-05,
|
| 442 |
+
"loss": 0.7698,
|
| 443 |
+
"step": 620
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"epoch": 0.126,
|
| 447 |
+
"grad_norm": 0.0,
|
| 448 |
+
"learning_rate": 4.989710996539926e-05,
|
| 449 |
+
"loss": 0.9953,
|
| 450 |
+
"step": 630
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"epoch": 0.128,
|
| 454 |
+
"grad_norm": 31.73255729675293,
|
| 455 |
+
"learning_rate": 4.988068499954578e-05,
|
| 456 |
+
"loss": 0.8877,
|
| 457 |
+
"step": 640
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"epoch": 0.13,
|
| 461 |
+
"grad_norm": 0.3843584954738617,
|
| 462 |
+
"learning_rate": 4.9863047384206835e-05,
|
| 463 |
+
"loss": 0.564,
|
| 464 |
+
"step": 650
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"epoch": 0.132,
|
| 468 |
+
"grad_norm": 0.27858206629753113,
|
| 469 |
+
"learning_rate": 4.984419797901491e-05,
|
| 470 |
+
"loss": 0.451,
|
| 471 |
+
"step": 660
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"epoch": 0.134,
|
| 475 |
+
"grad_norm": 0.0,
|
| 476 |
+
"learning_rate": 4.982413770266342e-05,
|
| 477 |
+
"loss": 1.5067,
|
| 478 |
+
"step": 670
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.136,
|
| 482 |
+
"grad_norm": 1.5187193155288696,
|
| 483 |
+
"learning_rate": 4.980286753286195e-05,
|
| 484 |
+
"loss": 1.6702,
|
| 485 |
+
"step": 680
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"epoch": 0.138,
|
| 489 |
+
"grad_norm": 0.8326213359832764,
|
| 490 |
+
"learning_rate": 4.978038850628854e-05,
|
| 491 |
+
"loss": 0.7115,
|
| 492 |
+
"step": 690
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 0.14,
|
| 496 |
+
"grad_norm": 7.2340898513793945,
|
| 497 |
+
"learning_rate": 4.975670171853926e-05,
|
| 498 |
+
"loss": 0.9633,
|
| 499 |
+
"step": 700
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 0.142,
|
| 503 |
+
"grad_norm": 1.582497477531433,
|
| 504 |
+
"learning_rate": 4.9731808324074717e-05,
|
| 505 |
+
"loss": 1.1906,
|
| 506 |
+
"step": 710
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 0.144,
|
| 510 |
+
"grad_norm": 2.172502040863037,
|
| 511 |
+
"learning_rate": 4.9705709536163824e-05,
|
| 512 |
+
"loss": 1.7433,
|
| 513 |
+
"step": 720
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"epoch": 0.146,
|
| 517 |
+
"grad_norm": 1.8554096221923828,
|
| 518 |
+
"learning_rate": 4.96784066268247e-05,
|
| 519 |
+
"loss": 0.483,
|
| 520 |
+
"step": 730
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 0.148,
|
| 524 |
+
"grad_norm": 2.1049275398254395,
|
| 525 |
+
"learning_rate": 4.964990092676263e-05,
|
| 526 |
+
"loss": 1.0321,
|
| 527 |
+
"step": 740
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"epoch": 0.15,
|
| 531 |
+
"grad_norm": 2.521188735961914,
|
| 532 |
+
"learning_rate": 4.962019382530521e-05,
|
| 533 |
+
"loss": 1.0468,
|
| 534 |
+
"step": 750
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"epoch": 0.152,
|
| 538 |
+
"grad_norm": 0.0,
|
| 539 |
+
"learning_rate": 4.9589286770334654e-05,
|
| 540 |
+
"loss": 0.5741,
|
| 541 |
+
"step": 760
|
| 542 |
+
},
|
| 543 |
+
{
|
| 544 |
+
"epoch": 0.154,
|
| 545 |
+
"grad_norm": 0.0,
|
| 546 |
+
"learning_rate": 4.9557181268217227e-05,
|
| 547 |
+
"loss": 0.8734,
|
| 548 |
+
"step": 770
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 0.156,
|
| 552 |
+
"grad_norm": 0.5660704970359802,
|
| 553 |
+
"learning_rate": 4.952387888372979e-05,
|
| 554 |
+
"loss": 2.3025,
|
| 555 |
+
"step": 780
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"epoch": 0.158,
|
| 559 |
+
"grad_norm": 0.5353797078132629,
|
| 560 |
+
"learning_rate": 4.94893812399836e-05,
|
| 561 |
+
"loss": 1.3188,
|
| 562 |
+
"step": 790
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"epoch": 0.16,
|
| 566 |
+
"grad_norm": 0.0,
|
| 567 |
+
"learning_rate": 4.9453690018345144e-05,
|
| 568 |
+
"loss": 2.0348,
|
| 569 |
+
"step": 800
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 0.162,
|
| 573 |
+
"grad_norm": 0.0,
|
| 574 |
+
"learning_rate": 4.94168069583542e-05,
|
| 575 |
+
"loss": 1.8253,
|
| 576 |
+
"step": 810
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 0.164,
|
| 580 |
+
"grad_norm": 1.0347248315811157,
|
| 581 |
+
"learning_rate": 4.937873385763908e-05,
|
| 582 |
+
"loss": 0.7857,
|
| 583 |
+
"step": 820
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"epoch": 0.166,
|
| 587 |
+
"grad_norm": 0.0,
|
| 588 |
+
"learning_rate": 4.933947257182901e-05,
|
| 589 |
+
"loss": 1.6698,
|
| 590 |
+
"step": 830
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 0.168,
|
| 594 |
+
"grad_norm": 0.3763255774974823,
|
| 595 |
+
"learning_rate": 4.929902501446366e-05,
|
| 596 |
+
"loss": 1.3913,
|
| 597 |
+
"step": 840
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"epoch": 0.17,
|
| 601 |
+
"grad_norm": 4.2643327713012695,
|
| 602 |
+
"learning_rate": 4.925739315689991e-05,
|
| 603 |
+
"loss": 1.0676,
|
| 604 |
+
"step": 850
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"epoch": 0.172,
|
| 608 |
+
"grad_norm": 9.34708023071289,
|
| 609 |
+
"learning_rate": 4.9214579028215776e-05,
|
| 610 |
+
"loss": 1.6685,
|
| 611 |
+
"step": 860
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 0.174,
|
| 615 |
+
"grad_norm": 0.23915188014507294,
|
| 616 |
+
"learning_rate": 4.917058471511149e-05,
|
| 617 |
+
"loss": 1.4978,
|
| 618 |
+
"step": 870
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"epoch": 0.176,
|
| 622 |
+
"grad_norm": 0.6251460909843445,
|
| 623 |
+
"learning_rate": 4.912541236180779e-05,
|
| 624 |
+
"loss": 8.4068,
|
| 625 |
+
"step": 880
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 0.178,
|
| 629 |
+
"grad_norm": 6.69281005859375,
|
| 630 |
+
"learning_rate": 4.907906416994146e-05,
|
| 631 |
+
"loss": 1.0415,
|
| 632 |
+
"step": 890
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"epoch": 0.18,
|
| 636 |
+
"grad_norm": 0.18685762584209442,
|
| 637 |
+
"learning_rate": 4.9031542398457974e-05,
|
| 638 |
+
"loss": 1.7801,
|
| 639 |
+
"step": 900
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"epoch": 0.182,
|
| 643 |
+
"grad_norm": 0.0,
|
| 644 |
+
"learning_rate": 4.898284936350144e-05,
|
| 645 |
+
"loss": 0.4855,
|
| 646 |
+
"step": 910
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"epoch": 0.184,
|
| 650 |
+
"grad_norm": 0.500586986541748,
|
| 651 |
+
"learning_rate": 4.893298743830168e-05,
|
| 652 |
+
"loss": 0.6015,
|
| 653 |
+
"step": 920
|
| 654 |
+
},
|
| 655 |
+
{
|
| 656 |
+
"epoch": 0.186,
|
| 657 |
+
"grad_norm": 0.0,
|
| 658 |
+
"learning_rate": 4.888195905305859e-05,
|
| 659 |
+
"loss": 0.7006,
|
| 660 |
+
"step": 930
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"epoch": 0.188,
|
| 664 |
+
"grad_norm": 0.9140529036521912,
|
| 665 |
+
"learning_rate": 4.882976669482367e-05,
|
| 666 |
+
"loss": 0.3036,
|
| 667 |
+
"step": 940
|
| 668 |
+
},
|
| 669 |
+
{
|
| 670 |
+
"epoch": 0.19,
|
| 671 |
+
"grad_norm": 0.0,
|
| 672 |
+
"learning_rate": 4.877641290737884e-05,
|
| 673 |
+
"loss": 0.2464,
|
| 674 |
+
"step": 950
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 0.192,
|
| 678 |
+
"grad_norm": 0.0,
|
| 679 |
+
"learning_rate": 4.8721900291112415e-05,
|
| 680 |
+
"loss": 0.4622,
|
| 681 |
+
"step": 960
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"epoch": 0.194,
|
| 685 |
+
"grad_norm": 0.4619162082672119,
|
| 686 |
+
"learning_rate": 4.8666231502892415e-05,
|
| 687 |
+
"loss": 0.5846,
|
| 688 |
+
"step": 970
|
| 689 |
+
},
|
| 690 |
+
{
|
| 691 |
+
"epoch": 0.196,
|
| 692 |
+
"grad_norm": 0.39642179012298584,
|
| 693 |
+
"learning_rate": 4.860940925593703e-05,
|
| 694 |
+
"loss": 0.5897,
|
| 695 |
+
"step": 980
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"epoch": 0.198,
|
| 699 |
+
"grad_norm": 0.0,
|
| 700 |
+
"learning_rate": 4.855143631968242e-05,
|
| 701 |
+
"loss": 0.6564,
|
| 702 |
+
"step": 990
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"epoch": 0.2,
|
| 706 |
+
"grad_norm": 3.3878490924835205,
|
| 707 |
+
"learning_rate": 4.849231551964771e-05,
|
| 708 |
+
"loss": 0.6761,
|
| 709 |
+
"step": 1000
|
| 710 |
+
},
|
| 711 |
+
{
|
| 712 |
+
"epoch": 0.202,
|
| 713 |
+
"grad_norm": 1.3075069189071655,
|
| 714 |
+
"learning_rate": 4.843204973729729e-05,
|
| 715 |
+
"loss": 0.9705,
|
| 716 |
+
"step": 1010
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.204,
|
| 720 |
+
"grad_norm": 1.7418557405471802,
|
| 721 |
+
"learning_rate": 4.837064190990036e-05,
|
| 722 |
+
"loss": 0.6534,
|
| 723 |
+
"step": 1020
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"epoch": 0.206,
|
| 727 |
+
"grad_norm": 0.7898538708686829,
|
| 728 |
+
"learning_rate": 4.830809503038781e-05,
|
| 729 |
+
"loss": 1.8363,
|
| 730 |
+
"step": 1030
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"epoch": 0.208,
|
| 734 |
+
"grad_norm": 6.782915115356445,
|
| 735 |
+
"learning_rate": 4.8244412147206284e-05,
|
| 736 |
+
"loss": 1.5076,
|
| 737 |
+
"step": 1040
|
| 738 |
+
},
|
| 739 |
+
{
|
| 740 |
+
"epoch": 0.21,
|
| 741 |
+
"grad_norm": 0.13786576688289642,
|
| 742 |
+
"learning_rate": 4.817959636416969e-05,
|
| 743 |
+
"loss": 0.9317,
|
| 744 |
+
"step": 1050
|
| 745 |
+
},
|
| 746 |
+
{
|
| 747 |
+
"epoch": 0.212,
|
| 748 |
+
"grad_norm": 1.196662425994873,
|
| 749 |
+
"learning_rate": 4.8113650840307834e-05,
|
| 750 |
+
"loss": 1.2908,
|
| 751 |
+
"step": 1060
|
| 752 |
+
},
|
| 753 |
+
{
|
| 754 |
+
"epoch": 0.214,
|
| 755 |
+
"grad_norm": 3.733067750930786,
|
| 756 |
+
"learning_rate": 4.8046578789712515e-05,
|
| 757 |
+
"loss": 0.5742,
|
| 758 |
+
"step": 1070
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.216,
|
| 762 |
+
"grad_norm": 0.0,
|
| 763 |
+
"learning_rate": 4.797838348138086e-05,
|
| 764 |
+
"loss": 1.2138,
|
| 765 |
+
"step": 1080
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 0.218,
|
| 769 |
+
"grad_norm": 10.585489273071289,
|
| 770 |
+
"learning_rate": 4.790906823905599e-05,
|
| 771 |
+
"loss": 1.9534,
|
| 772 |
+
"step": 1090
|
| 773 |
+
},
|
| 774 |
+
{
|
| 775 |
+
"epoch": 0.22,
|
| 776 |
+
"grad_norm": 3.255509853363037,
|
| 777 |
+
"learning_rate": 4.783863644106502e-05,
|
| 778 |
+
"loss": 1.2392,
|
| 779 |
+
"step": 1100
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 0.222,
|
| 783 |
+
"grad_norm": 1.0870699882507324,
|
| 784 |
+
"learning_rate": 4.776709152015443e-05,
|
| 785 |
+
"loss": 0.7664,
|
| 786 |
+
"step": 1110
|
| 787 |
+
},
|
| 788 |
+
{
|
| 789 |
+
"epoch": 0.224,
|
| 790 |
+
"grad_norm": 0.0,
|
| 791 |
+
"learning_rate": 4.769443696332272e-05,
|
| 792 |
+
"loss": 0.6328,
|
| 793 |
+
"step": 1120
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"epoch": 0.226,
|
| 797 |
+
"grad_norm": 12.5580472946167,
|
| 798 |
+
"learning_rate": 4.762067631165049e-05,
|
| 799 |
+
"loss": 1.3397,
|
| 800 |
+
"step": 1130
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.228,
|
| 804 |
+
"grad_norm": 4.46583890914917,
|
| 805 |
+
"learning_rate": 4.754581316012785e-05,
|
| 806 |
+
"loss": 0.5316,
|
| 807 |
+
"step": 1140
|
| 808 |
+
},
|
| 809 |
+
{
|
| 810 |
+
"epoch": 0.23,
|
| 811 |
+
"grad_norm": 0.9223441481590271,
|
| 812 |
+
"learning_rate": 4.7469851157479177e-05,
|
| 813 |
+
"loss": 1.7526,
|
| 814 |
+
"step": 1150
|
| 815 |
+
},
|
| 816 |
+
{
|
| 817 |
+
"epoch": 0.232,
|
| 818 |
+
"grad_norm": 0.2915666997432709,
|
| 819 |
+
"learning_rate": 4.7392794005985326e-05,
|
| 820 |
+
"loss": 1.6985,
|
| 821 |
+
"step": 1160
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 0.234,
|
| 825 |
+
"grad_norm": 0.0,
|
| 826 |
+
"learning_rate": 4.731464546130314e-05,
|
| 827 |
+
"loss": 1.7021,
|
| 828 |
+
"step": 1170
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 0.236,
|
| 832 |
+
"grad_norm": 3.659092426300049,
|
| 833 |
+
"learning_rate": 4.723540933228244e-05,
|
| 834 |
+
"loss": 0.6692,
|
| 835 |
+
"step": 1180
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 0.238,
|
| 839 |
+
"grad_norm": 2.8215394020080566,
|
| 840 |
+
"learning_rate": 4.715508948078037e-05,
|
| 841 |
+
"loss": 0.8183,
|
| 842 |
+
"step": 1190
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.24,
|
| 846 |
+
"grad_norm": 1.1959506273269653,
|
| 847 |
+
"learning_rate": 4.707368982147318e-05,
|
| 848 |
+
"loss": 0.7391,
|
| 849 |
+
"step": 1200
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"epoch": 0.242,
|
| 853 |
+
"grad_norm": 2.793842315673828,
|
| 854 |
+
"learning_rate": 4.6991214321665414e-05,
|
| 855 |
+
"loss": 1.0601,
|
| 856 |
+
"step": 1210
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 0.244,
|
| 860 |
+
"grad_norm": 0.8359806537628174,
|
| 861 |
+
"learning_rate": 4.690766700109659e-05,
|
| 862 |
+
"loss": 0.6689,
|
| 863 |
+
"step": 1220
|
| 864 |
+
},
|
| 865 |
+
{
|
| 866 |
+
"epoch": 0.246,
|
| 867 |
+
"grad_norm": 2.5736119747161865,
|
| 868 |
+
"learning_rate": 4.682305193174524e-05,
|
| 869 |
+
"loss": 1.2384,
|
| 870 |
+
"step": 1230
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"epoch": 0.248,
|
| 874 |
+
"grad_norm": 0.0,
|
| 875 |
+
"learning_rate": 4.6737373237630476e-05,
|
| 876 |
+
"loss": 0.5366,
|
| 877 |
+
"step": 1240
|
| 878 |
+
},
|
| 879 |
+
{
|
| 880 |
+
"epoch": 0.25,
|
| 881 |
+
"grad_norm": 0.0,
|
| 882 |
+
"learning_rate": 4.665063509461097e-05,
|
| 883 |
+
"loss": 0.9924,
|
| 884 |
+
"step": 1250
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"epoch": 0.252,
|
| 888 |
+
"grad_norm": 2.5116872787475586,
|
| 889 |
+
"learning_rate": 4.656284173018144e-05,
|
| 890 |
+
"loss": 1.1548,
|
| 891 |
+
"step": 1260
|
| 892 |
+
},
|
| 893 |
+
{
|
| 894 |
+
"epoch": 0.254,
|
| 895 |
+
"grad_norm": 0.1771220713853836,
|
| 896 |
+
"learning_rate": 4.6473997423266614e-05,
|
| 897 |
+
"loss": 0.798,
|
| 898 |
+
"step": 1270
|
| 899 |
+
},
|
| 900 |
+
{
|
| 901 |
+
"epoch": 0.256,
|
| 902 |
+
"grad_norm": 3.1786563396453857,
|
| 903 |
+
"learning_rate": 4.638410650401267e-05,
|
| 904 |
+
"loss": 0.8444,
|
| 905 |
+
"step": 1280
|
| 906 |
+
},
|
| 907 |
+
{
|
| 908 |
+
"epoch": 0.258,
|
| 909 |
+
"grad_norm": 2.526992082595825,
|
| 910 |
+
"learning_rate": 4.629317335357619e-05,
|
| 911 |
+
"loss": 1.4516,
|
| 912 |
+
"step": 1290
|
| 913 |
+
},
|
| 914 |
+
{
|
| 915 |
+
"epoch": 0.26,
|
| 916 |
+
"grad_norm": 1.599918007850647,
|
| 917 |
+
"learning_rate": 4.620120240391065e-05,
|
| 918 |
+
"loss": 0.4612,
|
| 919 |
+
"step": 1300
|
| 920 |
+
},
|
| 921 |
+
{
|
| 922 |
+
"epoch": 0.262,
|
| 923 |
+
"grad_norm": 0.9092065691947937,
|
| 924 |
+
"learning_rate": 4.610819813755038e-05,
|
| 925 |
+
"loss": 0.8674,
|
| 926 |
+
"step": 1310
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"epoch": 0.264,
|
| 930 |
+
"grad_norm": 1.3519505262374878,
|
| 931 |
+
"learning_rate": 4.601416508739211e-05,
|
| 932 |
+
"loss": 0.8115,
|
| 933 |
+
"step": 1320
|
| 934 |
+
},
|
| 935 |
+
{
|
| 936 |
+
"epoch": 0.266,
|
| 937 |
+
"grad_norm": 2.8794472217559814,
|
| 938 |
+
"learning_rate": 4.591910783647404e-05,
|
| 939 |
+
"loss": 0.4957,
|
| 940 |
+
"step": 1330
|
| 941 |
+
},
|
| 942 |
+
{
|
| 943 |
+
"epoch": 0.268,
|
| 944 |
+
"grad_norm": 3.0845656394958496,
|
| 945 |
+
"learning_rate": 4.5823031017752485e-05,
|
| 946 |
+
"loss": 0.862,
|
| 947 |
+
"step": 1340
|
| 948 |
+
},
|
| 949 |
+
{
|
| 950 |
+
"epoch": 0.27,
|
| 951 |
+
"grad_norm": 0.0,
|
| 952 |
+
"learning_rate": 4.572593931387604e-05,
|
| 953 |
+
"loss": 0.2812,
|
| 954 |
+
"step": 1350
|
| 955 |
+
},
|
| 956 |
+
{
|
| 957 |
+
"epoch": 0.272,
|
| 958 |
+
"grad_norm": 0.0,
|
| 959 |
+
"learning_rate": 4.562783745695738e-05,
|
| 960 |
+
"loss": 2.1906,
|
| 961 |
+
"step": 1360
|
| 962 |
+
},
|
| 963 |
+
{
|
| 964 |
+
"epoch": 0.274,
|
| 965 |
+
"grad_norm": 8.815956115722656,
|
| 966 |
+
"learning_rate": 4.5528730228342605e-05,
|
| 967 |
+
"loss": 0.9072,
|
| 968 |
+
"step": 1370
|
| 969 |
+
},
|
| 970 |
+
{
|
| 971 |
+
"epoch": 0.276,
|
| 972 |
+
"grad_norm": 1.1590182781219482,
|
| 973 |
+
"learning_rate": 4.542862245837821e-05,
|
| 974 |
+
"loss": 0.5203,
|
| 975 |
+
"step": 1380
|
| 976 |
+
},
|
| 977 |
+
{
|
| 978 |
+
"epoch": 0.278,
|
| 979 |
+
"grad_norm": 3.897277593612671,
|
| 980 |
+
"learning_rate": 4.532751902617569e-05,
|
| 981 |
+
"loss": 0.8603,
|
| 982 |
+
"step": 1390
|
| 983 |
+
},
|
| 984 |
+
{
|
| 985 |
+
"epoch": 0.28,
|
| 986 |
+
"grad_norm": 7.185044288635254,
|
| 987 |
+
"learning_rate": 4.522542485937369e-05,
|
| 988 |
+
"loss": 0.7643,
|
| 989 |
+
"step": 1400
|
| 990 |
+
},
|
| 991 |
+
{
|
| 992 |
+
"epoch": 0.282,
|
| 993 |
+
"grad_norm": 2.3547818660736084,
|
| 994 |
+
"learning_rate": 4.512234493389785e-05,
|
| 995 |
+
"loss": 0.8807,
|
| 996 |
+
"step": 1410
|
| 997 |
+
},
|
| 998 |
+
{
|
| 999 |
+
"epoch": 0.284,
|
| 1000 |
+
"grad_norm": 4.687290668487549,
|
| 1001 |
+
"learning_rate": 4.5018284273718336e-05,
|
| 1002 |
+
"loss": 0.8509,
|
| 1003 |
+
"step": 1420
|
| 1004 |
+
},
|
| 1005 |
+
{
|
| 1006 |
+
"epoch": 0.286,
|
| 1007 |
+
"grad_norm": 0.5399876832962036,
|
| 1008 |
+
"learning_rate": 4.491324795060491e-05,
|
| 1009 |
+
"loss": 1.7827,
|
| 1010 |
+
"step": 1430
|
| 1011 |
+
},
|
| 1012 |
+
{
|
| 1013 |
+
"epoch": 0.288,
|
| 1014 |
+
"grad_norm": 3.768984794616699,
|
| 1015 |
+
"learning_rate": 4.480724108387977e-05,
|
| 1016 |
+
"loss": 0.7381,
|
| 1017 |
+
"step": 1440
|
| 1018 |
+
},
|
| 1019 |
+
{
|
| 1020 |
+
"epoch": 0.29,
|
| 1021 |
+
"grad_norm": 4.747011184692383,
|
| 1022 |
+
"learning_rate": 4.4700268840168045e-05,
|
| 1023 |
+
"loss": 1.691,
|
| 1024 |
+
"step": 1450
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 0.292,
|
| 1028 |
+
"grad_norm": 1.8903411626815796,
|
| 1029 |
+
"learning_rate": 4.4592336433146e-05,
|
| 1030 |
+
"loss": 2.0687,
|
| 1031 |
+
"step": 1460
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 0.294,
|
| 1035 |
+
"grad_norm": 2.1586012840270996,
|
| 1036 |
+
"learning_rate": 4.448344912328686e-05,
|
| 1037 |
+
"loss": 1.8049,
|
| 1038 |
+
"step": 1470
|
| 1039 |
+
},
|
| 1040 |
+
{
|
| 1041 |
+
"epoch": 0.296,
|
| 1042 |
+
"grad_norm": 0.43952643871307373,
|
| 1043 |
+
"learning_rate": 4.4373612217604496e-05,
|
| 1044 |
+
"loss": 0.8578,
|
| 1045 |
+
"step": 1480
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"epoch": 0.298,
|
| 1049 |
+
"grad_norm": 0.4807486832141876,
|
| 1050 |
+
"learning_rate": 4.426283106939474e-05,
|
| 1051 |
+
"loss": 1.169,
|
| 1052 |
+
"step": 1490
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 0.3,
|
| 1056 |
+
"grad_norm": 0.5909303426742554,
|
| 1057 |
+
"learning_rate": 4.415111107797445e-05,
|
| 1058 |
+
"loss": 0.3027,
|
| 1059 |
+
"step": 1500
|
| 1060 |
+
},
|
| 1061 |
+
{
|
| 1062 |
+
"epoch": 0.302,
|
| 1063 |
+
"grad_norm": 7.744114398956299,
|
| 1064 |
+
"learning_rate": 4.403845768841842e-05,
|
| 1065 |
+
"loss": 1.0326,
|
| 1066 |
+
"step": 1510
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"epoch": 0.304,
|
| 1070 |
+
"grad_norm": 0.0,
|
| 1071 |
+
"learning_rate": 4.3924876391293915e-05,
|
| 1072 |
+
"loss": 1.0785,
|
| 1073 |
+
"step": 1520
|
| 1074 |
+
},
|
| 1075 |
+
{
|
| 1076 |
+
"epoch": 0.306,
|
| 1077 |
+
"grad_norm": 11.178783416748047,
|
| 1078 |
+
"learning_rate": 4.381037272239311e-05,
|
| 1079 |
+
"loss": 1.4215,
|
| 1080 |
+
"step": 1530
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"epoch": 0.308,
|
| 1084 |
+
"grad_norm": 0.0,
|
| 1085 |
+
"learning_rate": 4.36949522624633e-05,
|
| 1086 |
+
"loss": 0.4891,
|
| 1087 |
+
"step": 1540
|
| 1088 |
+
},
|
| 1089 |
+
{
|
| 1090 |
+
"epoch": 0.31,
|
| 1091 |
+
"grad_norm": 0.0,
|
| 1092 |
+
"learning_rate": 4.357862063693486e-05,
|
| 1093 |
+
"loss": 1.3672,
|
| 1094 |
+
"step": 1550
|
| 1095 |
+
},
|
| 1096 |
+
{
|
| 1097 |
+
"epoch": 0.312,
|
| 1098 |
+
"grad_norm": 8.229108810424805,
|
| 1099 |
+
"learning_rate": 4.3461383515647106e-05,
|
| 1100 |
+
"loss": 1.0202,
|
| 1101 |
+
"step": 1560
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"epoch": 0.314,
|
| 1105 |
+
"grad_norm": 1.5745594501495361,
|
| 1106 |
+
"learning_rate": 4.334324661257191e-05,
|
| 1107 |
+
"loss": 0.9313,
|
| 1108 |
+
"step": 1570
|
| 1109 |
+
},
|
| 1110 |
+
{
|
| 1111 |
+
"epoch": 0.316,
|
| 1112 |
+
"grad_norm": 2.392512559890747,
|
| 1113 |
+
"learning_rate": 4.3224215685535294e-05,
|
| 1114 |
+
"loss": 0.4453,
|
| 1115 |
+
"step": 1580
|
| 1116 |
+
},
|
| 1117 |
+
{
|
| 1118 |
+
"epoch": 0.318,
|
| 1119 |
+
"grad_norm": 0.0,
|
| 1120 |
+
"learning_rate": 4.3104296535936695e-05,
|
| 1121 |
+
"loss": 1.694,
|
| 1122 |
+
"step": 1590
|
| 1123 |
+
},
|
| 1124 |
+
{
|
| 1125 |
+
"epoch": 0.32,
|
| 1126 |
+
"grad_norm": 0.782818615436554,
|
| 1127 |
+
"learning_rate": 4.2983495008466276e-05,
|
| 1128 |
+
"loss": 1.4264,
|
| 1129 |
+
"step": 1600
|
| 1130 |
+
},
|
| 1131 |
+
{
|
| 1132 |
+
"epoch": 0.322,
|
| 1133 |
+
"grad_norm": 2.611419916152954,
|
| 1134 |
+
"learning_rate": 4.2861816990820084e-05,
|
| 1135 |
+
"loss": 0.5798,
|
| 1136 |
+
"step": 1610
|
| 1137 |
+
},
|
| 1138 |
+
{
|
| 1139 |
+
"epoch": 0.324,
|
| 1140 |
+
"grad_norm": 1.1155253648757935,
|
| 1141 |
+
"learning_rate": 4.273926841341302e-05,
|
| 1142 |
+
"loss": 0.6301,
|
| 1143 |
+
"step": 1620
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"epoch": 0.326,
|
| 1147 |
+
"grad_norm": 3.934415102005005,
|
| 1148 |
+
"learning_rate": 4.261585524908987e-05,
|
| 1149 |
+
"loss": 0.9712,
|
| 1150 |
+
"step": 1630
|
| 1151 |
+
},
|
| 1152 |
+
{
|
| 1153 |
+
"epoch": 0.328,
|
| 1154 |
+
"grad_norm": 3.1011881828308105,
|
| 1155 |
+
"learning_rate": 4.249158351283414e-05,
|
| 1156 |
+
"loss": 0.7751,
|
| 1157 |
+
"step": 1640
|
| 1158 |
+
},
|
| 1159 |
+
{
|
| 1160 |
+
"epoch": 0.33,
|
| 1161 |
+
"grad_norm": 5.468195915222168,
|
| 1162 |
+
"learning_rate": 4.2366459261474933e-05,
|
| 1163 |
+
"loss": 0.5724,
|
| 1164 |
+
"step": 1650
|
| 1165 |
+
},
|
| 1166 |
+
{
|
| 1167 |
+
"epoch": 0.332,
|
| 1168 |
+
"grad_norm": 0.4597141742706299,
|
| 1169 |
+
"learning_rate": 4.224048859339175e-05,
|
| 1170 |
+
"loss": 0.9216,
|
| 1171 |
+
"step": 1660
|
| 1172 |
+
},
|
| 1173 |
+
{
|
| 1174 |
+
"epoch": 0.334,
|
| 1175 |
+
"grad_norm": 0.0,
|
| 1176 |
+
"learning_rate": 4.211367764821722e-05,
|
| 1177 |
+
"loss": 0.5461,
|
| 1178 |
+
"step": 1670
|
| 1179 |
+
},
|
| 1180 |
+
{
|
| 1181 |
+
"epoch": 0.336,
|
| 1182 |
+
"grad_norm": 1.2237460613250732,
|
| 1183 |
+
"learning_rate": 4.198603260653792e-05,
|
| 1184 |
+
"loss": 1.0833,
|
| 1185 |
+
"step": 1680
|
| 1186 |
+
},
|
| 1187 |
+
{
|
| 1188 |
+
"epoch": 0.338,
|
| 1189 |
+
"grad_norm": 28.4909725189209,
|
| 1190 |
+
"learning_rate": 4.185755968959308e-05,
|
| 1191 |
+
"loss": 1.6468,
|
| 1192 |
+
"step": 1690
|
| 1193 |
+
},
|
| 1194 |
+
{
|
| 1195 |
+
"epoch": 0.34,
|
| 1196 |
+
"grad_norm": 0.7980864644050598,
|
| 1197 |
+
"learning_rate": 4.172826515897146e-05,
|
| 1198 |
+
"loss": 0.6583,
|
| 1199 |
+
"step": 1700
|
| 1200 |
+
},
|
| 1201 |
+
{
|
| 1202 |
+
"epoch": 0.342,
|
| 1203 |
+
"grad_norm": 1.208788275718689,
|
| 1204 |
+
"learning_rate": 4.1598155316306044e-05,
|
| 1205 |
+
"loss": 1.1721,
|
| 1206 |
+
"step": 1710
|
| 1207 |
+
},
|
| 1208 |
+
{
|
| 1209 |
+
"epoch": 0.344,
|
| 1210 |
+
"grad_norm": 0.0,
|
| 1211 |
+
"learning_rate": 4.146723650296701e-05,
|
| 1212 |
+
"loss": 0.8588,
|
| 1213 |
+
"step": 1720
|
| 1214 |
+
},
|
| 1215 |
+
{
|
| 1216 |
+
"epoch": 0.346,
|
| 1217 |
+
"grad_norm": 0.0,
|
| 1218 |
+
"learning_rate": 4.133551509975264e-05,
|
| 1219 |
+
"loss": 0.4645,
|
| 1220 |
+
"step": 1730
|
| 1221 |
+
},
|
| 1222 |
+
{
|
| 1223 |
+
"epoch": 0.348,
|
| 1224 |
+
"grad_norm": 0.0,
|
| 1225 |
+
"learning_rate": 4.1202997526578276e-05,
|
| 1226 |
+
"loss": 1.4741,
|
| 1227 |
+
"step": 1740
|
| 1228 |
+
},
|
| 1229 |
+
{
|
| 1230 |
+
"epoch": 0.35,
|
| 1231 |
+
"grad_norm": 0.33904269337654114,
|
| 1232 |
+
"learning_rate": 4.1069690242163484e-05,
|
| 1233 |
+
"loss": 0.9873,
|
| 1234 |
+
"step": 1750
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 0.352,
|
| 1238 |
+
"grad_norm": 0.7592562437057495,
|
| 1239 |
+
"learning_rate": 4.093559974371725e-05,
|
| 1240 |
+
"loss": 0.6202,
|
| 1241 |
+
"step": 1760
|
| 1242 |
+
},
|
| 1243 |
+
{
|
| 1244 |
+
"epoch": 0.354,
|
| 1245 |
+
"grad_norm": 0.0,
|
| 1246 |
+
"learning_rate": 4.080073256662127e-05,
|
| 1247 |
+
"loss": 0.7872,
|
| 1248 |
+
"step": 1770
|
| 1249 |
+
},
|
| 1250 |
+
{
|
| 1251 |
+
"epoch": 0.356,
|
| 1252 |
+
"grad_norm": 0.42382943630218506,
|
| 1253 |
+
"learning_rate": 4.066509528411152e-05,
|
| 1254 |
+
"loss": 1.9155,
|
| 1255 |
+
"step": 1780
|
| 1256 |
+
},
|
| 1257 |
+
{
|
| 1258 |
+
"epoch": 0.358,
|
| 1259 |
+
"grad_norm": 0.7281541228294373,
|
| 1260 |
+
"learning_rate": 4.052869450695776e-05,
|
| 1261 |
+
"loss": 0.5979,
|
| 1262 |
+
"step": 1790
|
| 1263 |
+
},
|
| 1264 |
+
{
|
| 1265 |
+
"epoch": 0.36,
|
| 1266 |
+
"grad_norm": 2.018998146057129,
|
| 1267 |
+
"learning_rate": 4.039153688314145e-05,
|
| 1268 |
+
"loss": 0.8478,
|
| 1269 |
+
"step": 1800
|
| 1270 |
+
},
|
| 1271 |
+
{
|
| 1272 |
+
"epoch": 0.362,
|
| 1273 |
+
"grad_norm": 0.34330451488494873,
|
| 1274 |
+
"learning_rate": 4.02536290975317e-05,
|
| 1275 |
+
"loss": 0.623,
|
| 1276 |
+
"step": 1810
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 0.364,
|
| 1280 |
+
"grad_norm": 0.0,
|
| 1281 |
+
"learning_rate": 4.011497787155938e-05,
|
| 1282 |
+
"loss": 1.4658,
|
| 1283 |
+
"step": 1820
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 0.366,
|
| 1287 |
+
"grad_norm": 0.0,
|
| 1288 |
+
"learning_rate": 3.997558996288965e-05,
|
| 1289 |
+
"loss": 1.9824,
|
| 1290 |
+
"step": 1830
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 0.368,
|
| 1294 |
+
"grad_norm": 0.0,
|
| 1295 |
+
"learning_rate": 3.983547216509254e-05,
|
| 1296 |
+
"loss": 0.626,
|
| 1297 |
+
"step": 1840
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 0.37,
|
| 1301 |
+
"grad_norm": 0.4369350075721741,
|
| 1302 |
+
"learning_rate": 3.969463130731183e-05,
|
| 1303 |
+
"loss": 0.5819,
|
| 1304 |
+
"step": 1850
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 0.372,
|
| 1308 |
+
"grad_norm": 0.36127737164497375,
|
| 1309 |
+
"learning_rate": 3.955307425393224e-05,
|
| 1310 |
+
"loss": 0.7553,
|
| 1311 |
+
"step": 1860
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 0.374,
|
| 1315 |
+
"grad_norm": 19.533721923828125,
|
| 1316 |
+
"learning_rate": 3.941080790424484e-05,
|
| 1317 |
+
"loss": 1.3426,
|
| 1318 |
+
"step": 1870
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 0.376,
|
| 1322 |
+
"grad_norm": 1.5646860599517822,
|
| 1323 |
+
"learning_rate": 3.92678391921108e-05,
|
| 1324 |
+
"loss": 0.8386,
|
| 1325 |
+
"step": 1880
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 0.378,
|
| 1329 |
+
"grad_norm": 0.8158350586891174,
|
| 1330 |
+
"learning_rate": 3.912417508562345e-05,
|
| 1331 |
+
"loss": 0.5088,
|
| 1332 |
+
"step": 1890
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 0.38,
|
| 1336 |
+
"grad_norm": 9.138282775878906,
|
| 1337 |
+
"learning_rate": 3.897982258676867e-05,
|
| 1338 |
+
"loss": 2.6207,
|
| 1339 |
+
"step": 1900
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 0.382,
|
| 1343 |
+
"grad_norm": 1.7073054313659668,
|
| 1344 |
+
"learning_rate": 3.883478873108361e-05,
|
| 1345 |
+
"loss": 0.8114,
|
| 1346 |
+
"step": 1910
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 0.384,
|
| 1350 |
+
"grad_norm": 0.0,
|
| 1351 |
+
"learning_rate": 3.868908058731376e-05,
|
| 1352 |
+
"loss": 0.489,
|
| 1353 |
+
"step": 1920
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 0.386,
|
| 1357 |
+
"grad_norm": 0.0,
|
| 1358 |
+
"learning_rate": 3.85427052570685e-05,
|
| 1359 |
+
"loss": 0.6035,
|
| 1360 |
+
"step": 1930
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 0.388,
|
| 1364 |
+
"grad_norm": 0.5438594222068787,
|
| 1365 |
+
"learning_rate": 3.8395669874474915e-05,
|
| 1366 |
+
"loss": 0.9305,
|
| 1367 |
+
"step": 1940
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 0.39,
|
| 1371 |
+
"grad_norm": 0.0,
|
| 1372 |
+
"learning_rate": 3.824798160583012e-05,
|
| 1373 |
+
"loss": 0.5244,
|
| 1374 |
+
"step": 1950
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 0.392,
|
| 1378 |
+
"grad_norm": 0.6433933973312378,
|
| 1379 |
+
"learning_rate": 3.8099647649251986e-05,
|
| 1380 |
+
"loss": 1.4947,
|
| 1381 |
+
"step": 1960
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"epoch": 0.394,
|
| 1385 |
+
"grad_norm": 1.229801058769226,
|
| 1386 |
+
"learning_rate": 3.795067523432826e-05,
|
| 1387 |
+
"loss": 0.6931,
|
| 1388 |
+
"step": 1970
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"epoch": 0.396,
|
| 1392 |
+
"grad_norm": 4.736255645751953,
|
| 1393 |
+
"learning_rate": 3.780107162176429e-05,
|
| 1394 |
+
"loss": 0.8547,
|
| 1395 |
+
"step": 1980
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 0.398,
|
| 1399 |
+
"grad_norm": 11.904961585998535,
|
| 1400 |
+
"learning_rate": 3.765084410302909e-05,
|
| 1401 |
+
"loss": 2.4946,
|
| 1402 |
+
"step": 1990
|
| 1403 |
+
},
|
| 1404 |
+
{
|
| 1405 |
+
"epoch": 0.4,
|
| 1406 |
+
"grad_norm": 6.879239082336426,
|
| 1407 |
+
"learning_rate": 3.7500000000000003e-05,
|
| 1408 |
+
"loss": 1.2444,
|
| 1409 |
+
"step": 2000
|
| 1410 |
+
},
|
| 1411 |
+
{
|
| 1412 |
+
"epoch": 0.402,
|
| 1413 |
+
"grad_norm": 0.3526920676231384,
|
| 1414 |
+
"learning_rate": 3.7348546664605777e-05,
|
| 1415 |
+
"loss": 1.2603,
|
| 1416 |
+
"step": 2010
|
| 1417 |
+
},
|
| 1418 |
+
{
|
| 1419 |
+
"epoch": 0.404,
|
| 1420 |
+
"grad_norm": 0.6010252237319946,
|
| 1421 |
+
"learning_rate": 3.719649147846832e-05,
|
| 1422 |
+
"loss": 0.5348,
|
| 1423 |
+
"step": 2020
|
| 1424 |
+
},
|
| 1425 |
+
{
|
| 1426 |
+
"epoch": 0.406,
|
| 1427 |
+
"grad_norm": 2.7081878185272217,
|
| 1428 |
+
"learning_rate": 3.704384185254288e-05,
|
| 1429 |
+
"loss": 0.6968,
|
| 1430 |
+
"step": 2030
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 0.408,
|
| 1434 |
+
"grad_norm": 79.92549133300781,
|
| 1435 |
+
"learning_rate": 3.689060522675689e-05,
|
| 1436 |
+
"loss": 2.8761,
|
| 1437 |
+
"step": 2040
|
| 1438 |
+
},
|
| 1439 |
+
{
|
| 1440 |
+
"epoch": 0.41,
|
| 1441 |
+
"grad_norm": 1.650651216506958,
|
| 1442 |
+
"learning_rate": 3.673678906964727e-05,
|
| 1443 |
+
"loss": 1.6509,
|
| 1444 |
+
"step": 2050
|
| 1445 |
+
},
|
| 1446 |
+
{
|
| 1447 |
+
"epoch": 0.412,
|
| 1448 |
+
"grad_norm": 7.615381240844727,
|
| 1449 |
+
"learning_rate": 3.6582400877996546e-05,
|
| 1450 |
+
"loss": 0.8742,
|
| 1451 |
+
"step": 2060
|
| 1452 |
+
},
|
| 1453 |
+
{
|
| 1454 |
+
"epoch": 0.414,
|
| 1455 |
+
"grad_norm": 1.0772336721420288,
|
| 1456 |
+
"learning_rate": 3.642744817646736e-05,
|
| 1457 |
+
"loss": 1.241,
|
| 1458 |
+
"step": 2070
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"epoch": 0.416,
|
| 1462 |
+
"grad_norm": 0.4477096498012543,
|
| 1463 |
+
"learning_rate": 3.627193851723577e-05,
|
| 1464 |
+
"loss": 0.6697,
|
| 1465 |
+
"step": 2080
|
| 1466 |
+
},
|
| 1467 |
+
{
|
| 1468 |
+
"epoch": 0.418,
|
| 1469 |
+
"grad_norm": 6.794370174407959,
|
| 1470 |
+
"learning_rate": 3.611587947962319e-05,
|
| 1471 |
+
"loss": 0.8601,
|
| 1472 |
+
"step": 2090
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 0.42,
|
| 1476 |
+
"grad_norm": 0.0,
|
| 1477 |
+
"learning_rate": 3.5959278669726935e-05,
|
| 1478 |
+
"loss": 1.6495,
|
| 1479 |
+
"step": 2100
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 0.422,
|
| 1483 |
+
"grad_norm": 6.150529384613037,
|
| 1484 |
+
"learning_rate": 3.580214372004956e-05,
|
| 1485 |
+
"loss": 1.1695,
|
| 1486 |
+
"step": 2110
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 0.424,
|
| 1490 |
+
"grad_norm": 1.6611673831939697,
|
| 1491 |
+
"learning_rate": 3.564448228912682e-05,
|
| 1492 |
+
"loss": 2.1528,
|
| 1493 |
+
"step": 2120
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 0.426,
|
| 1497 |
+
"grad_norm": 20.955806732177734,
|
| 1498 |
+
"learning_rate": 3.548630206115443e-05,
|
| 1499 |
+
"loss": 0.7449,
|
| 1500 |
+
"step": 2130
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 0.428,
|
| 1504 |
+
"grad_norm": 11.42070198059082,
|
| 1505 |
+
"learning_rate": 3.532761074561355e-05,
|
| 1506 |
+
"loss": 0.7268,
|
| 1507 |
+
"step": 2140
|
| 1508 |
+
},
|
| 1509 |
+
{
|
| 1510 |
+
"epoch": 0.43,
|
| 1511 |
+
"grad_norm": 3.5283825397491455,
|
| 1512 |
+
"learning_rate": 3.516841607689501e-05,
|
| 1513 |
+
"loss": 0.7794,
|
| 1514 |
+
"step": 2150
|
| 1515 |
+
},
|
| 1516 |
+
{
|
| 1517 |
+
"epoch": 0.432,
|
| 1518 |
+
"grad_norm": 1.6688228845596313,
|
| 1519 |
+
"learning_rate": 3.5008725813922386e-05,
|
| 1520 |
+
"loss": 0.727,
|
| 1521 |
+
"step": 2160
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"epoch": 0.434,
|
| 1525 |
+
"grad_norm": 2.2399706840515137,
|
| 1526 |
+
"learning_rate": 3.484854773977378e-05,
|
| 1527 |
+
"loss": 2.1652,
|
| 1528 |
+
"step": 2170
|
| 1529 |
+
},
|
| 1530 |
+
{
|
| 1531 |
+
"epoch": 0.436,
|
| 1532 |
+
"grad_norm": 1.4572434425354004,
|
| 1533 |
+
"learning_rate": 3.4687889661302576e-05,
|
| 1534 |
+
"loss": 0.4095,
|
| 1535 |
+
"step": 2180
|
| 1536 |
+
},
|
| 1537 |
+
{
|
| 1538 |
+
"epoch": 0.438,
|
| 1539 |
+
"grad_norm": 9.194727897644043,
|
| 1540 |
+
"learning_rate": 3.452675940875686e-05,
|
| 1541 |
+
"loss": 0.8675,
|
| 1542 |
+
"step": 2190
|
| 1543 |
+
},
|
| 1544 |
+
{
|
| 1545 |
+
"epoch": 0.44,
|
| 1546 |
+
"grad_norm": 6.2736053466796875,
|
| 1547 |
+
"learning_rate": 3.436516483539781e-05,
|
| 1548 |
+
"loss": 0.8658,
|
| 1549 |
+
"step": 2200
|
| 1550 |
+
},
|
| 1551 |
+
{
|
| 1552 |
+
"epoch": 0.442,
|
| 1553 |
+
"grad_norm": 0.8128998875617981,
|
| 1554 |
+
"learning_rate": 3.4203113817116957e-05,
|
| 1555 |
+
"loss": 0.3739,
|
| 1556 |
+
"step": 2210
|
| 1557 |
+
},
|
| 1558 |
+
{
|
| 1559 |
+
"epoch": 0.444,
|
| 1560 |
+
"grad_norm": 3.398008346557617,
|
| 1561 |
+
"learning_rate": 3.4040614252052305e-05,
|
| 1562 |
+
"loss": 0.8909,
|
| 1563 |
+
"step": 2220
|
| 1564 |
+
},
|
| 1565 |
+
{
|
| 1566 |
+
"epoch": 0.446,
|
| 1567 |
+
"grad_norm": 0.40850016474723816,
|
| 1568 |
+
"learning_rate": 3.387767406020343e-05,
|
| 1569 |
+
"loss": 1.4721,
|
| 1570 |
+
"step": 2230
|
| 1571 |
+
},
|
| 1572 |
+
{
|
| 1573 |
+
"epoch": 0.448,
|
| 1574 |
+
"grad_norm": 6.545433521270752,
|
| 1575 |
+
"learning_rate": 3.3714301183045385e-05,
|
| 1576 |
+
"loss": 1.001,
|
| 1577 |
+
"step": 2240
|
| 1578 |
+
},
|
| 1579 |
+
{
|
| 1580 |
+
"epoch": 0.45,
|
| 1581 |
+
"grad_norm": 1.5666799545288086,
|
| 1582 |
+
"learning_rate": 3.355050358314172e-05,
|
| 1583 |
+
"loss": 0.7794,
|
| 1584 |
+
"step": 2250
|
| 1585 |
+
},
|
| 1586 |
+
{
|
| 1587 |
+
"epoch": 0.452,
|
| 1588 |
+
"grad_norm": 3.0852999687194824,
|
| 1589 |
+
"learning_rate": 3.338628924375638e-05,
|
| 1590 |
+
"loss": 0.3814,
|
| 1591 |
+
"step": 2260
|
| 1592 |
+
},
|
| 1593 |
+
{
|
| 1594 |
+
"epoch": 0.454,
|
| 1595 |
+
"grad_norm": 0.48218706250190735,
|
| 1596 |
+
"learning_rate": 3.322166616846458e-05,
|
| 1597 |
+
"loss": 0.5824,
|
| 1598 |
+
"step": 2270
|
| 1599 |
+
},
|
| 1600 |
+
{
|
| 1601 |
+
"epoch": 0.456,
|
| 1602 |
+
"grad_norm": 1.3498566150665283,
|
| 1603 |
+
"learning_rate": 3.305664238076278e-05,
|
| 1604 |
+
"loss": 1.3243,
|
| 1605 |
+
"step": 2280
|
| 1606 |
+
},
|
| 1607 |
+
{
|
| 1608 |
+
"epoch": 0.458,
|
| 1609 |
+
"grad_norm": 0.0,
|
| 1610 |
+
"learning_rate": 3.289122592367757e-05,
|
| 1611 |
+
"loss": 0.4481,
|
| 1612 |
+
"step": 2290
|
| 1613 |
+
},
|
| 1614 |
+
{
|
| 1615 |
+
"epoch": 0.46,
|
| 1616 |
+
"grad_norm": 0.0,
|
| 1617 |
+
"learning_rate": 3.272542485937369e-05,
|
| 1618 |
+
"loss": 0.5601,
|
| 1619 |
+
"step": 2300
|
| 1620 |
+
},
|
| 1621 |
+
{
|
| 1622 |
+
"epoch": 0.462,
|
| 1623 |
+
"grad_norm": 0.0,
|
| 1624 |
+
"learning_rate": 3.2559247268761115e-05,
|
| 1625 |
+
"loss": 0.6695,
|
| 1626 |
+
"step": 2310
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 0.464,
|
| 1630 |
+
"grad_norm": 11.921672821044922,
|
| 1631 |
+
"learning_rate": 3.239270125110117e-05,
|
| 1632 |
+
"loss": 0.9525,
|
| 1633 |
+
"step": 2320
|
| 1634 |
+
},
|
| 1635 |
+
{
|
| 1636 |
+
"epoch": 0.466,
|
| 1637 |
+
"grad_norm": 1.5229754447937012,
|
| 1638 |
+
"learning_rate": 3.222579492361179e-05,
|
| 1639 |
+
"loss": 0.5488,
|
| 1640 |
+
"step": 2330
|
| 1641 |
+
},
|
| 1642 |
+
{
|
| 1643 |
+
"epoch": 0.468,
|
| 1644 |
+
"grad_norm": 12.575560569763184,
|
| 1645 |
+
"learning_rate": 3.205853642107192e-05,
|
| 1646 |
+
"loss": 0.8862,
|
| 1647 |
+
"step": 2340
|
| 1648 |
+
},
|
| 1649 |
+
{
|
| 1650 |
+
"epoch": 0.47,
|
| 1651 |
+
"grad_norm": 0.0,
|
| 1652 |
+
"learning_rate": 3.1890933895424976e-05,
|
| 1653 |
+
"loss": 1.1218,
|
| 1654 |
+
"step": 2350
|
| 1655 |
+
},
|
| 1656 |
+
{
|
| 1657 |
+
"epoch": 0.472,
|
| 1658 |
+
"grad_norm": 0.77229905128479,
|
| 1659 |
+
"learning_rate": 3.172299551538164e-05,
|
| 1660 |
+
"loss": 1.4365,
|
| 1661 |
+
"step": 2360
|
| 1662 |
+
},
|
| 1663 |
+
{
|
| 1664 |
+
"epoch": 0.474,
|
| 1665 |
+
"grad_norm": 0.7523584961891174,
|
| 1666 |
+
"learning_rate": 3.155472946602162e-05,
|
| 1667 |
+
"loss": 1.4355,
|
| 1668 |
+
"step": 2370
|
| 1669 |
+
},
|
| 1670 |
+
{
|
| 1671 |
+
"epoch": 0.476,
|
| 1672 |
+
"grad_norm": 2.284735918045044,
|
| 1673 |
+
"learning_rate": 3.138614394839476e-05,
|
| 1674 |
+
"loss": 1.6462,
|
| 1675 |
+
"step": 2380
|
| 1676 |
+
},
|
| 1677 |
+
{
|
| 1678 |
+
"epoch": 0.478,
|
| 1679 |
+
"grad_norm": 0.0,
|
| 1680 |
+
"learning_rate": 3.121724717912138e-05,
|
| 1681 |
+
"loss": 0.6112,
|
| 1682 |
+
"step": 2390
|
| 1683 |
+
},
|
| 1684 |
+
{
|
| 1685 |
+
"epoch": 0.48,
|
| 1686 |
+
"grad_norm": 1.5213804244995117,
|
| 1687 |
+
"learning_rate": 3.104804738999169e-05,
|
| 1688 |
+
"loss": 0.2338,
|
| 1689 |
+
"step": 2400
|
| 1690 |
+
},
|
| 1691 |
+
{
|
| 1692 |
+
"epoch": 0.482,
|
| 1693 |
+
"grad_norm": 4.481227874755859,
|
| 1694 |
+
"learning_rate": 3.087855282756475e-05,
|
| 1695 |
+
"loss": 0.5969,
|
| 1696 |
+
"step": 2410
|
| 1697 |
+
},
|
| 1698 |
+
{
|
| 1699 |
+
"epoch": 0.484,
|
| 1700 |
+
"grad_norm": 2.4558937549591064,
|
| 1701 |
+
"learning_rate": 3.0708771752766394e-05,
|
| 1702 |
+
"loss": 0.8462,
|
| 1703 |
+
"step": 2420
|
| 1704 |
+
},
|
| 1705 |
+
{
|
| 1706 |
+
"epoch": 0.486,
|
| 1707 |
+
"grad_norm": 4.428265571594238,
|
| 1708 |
+
"learning_rate": 3.053871244048669e-05,
|
| 1709 |
+
"loss": 0.4652,
|
| 1710 |
+
"step": 2430
|
| 1711 |
+
},
|
| 1712 |
+
{
|
| 1713 |
+
"epoch": 0.488,
|
| 1714 |
+
"grad_norm": 0.458535760641098,
|
| 1715 |
+
"learning_rate": 3.0368383179176585e-05,
|
| 1716 |
+
"loss": 0.212,
|
| 1717 |
+
"step": 2440
|
| 1718 |
+
},
|
| 1719 |
+
{
|
| 1720 |
+
"epoch": 0.49,
|
| 1721 |
+
"grad_norm": 0.5157924294471741,
|
| 1722 |
+
"learning_rate": 3.0197792270443982e-05,
|
| 1723 |
+
"loss": 0.697,
|
| 1724 |
+
"step": 2450
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 0.492,
|
| 1728 |
+
"grad_norm": 0.37859025597572327,
|
| 1729 |
+
"learning_rate": 3.002694802864912e-05,
|
| 1730 |
+
"loss": 1.3351,
|
| 1731 |
+
"step": 2460
|
| 1732 |
+
},
|
| 1733 |
+
{
|
| 1734 |
+
"epoch": 0.494,
|
| 1735 |
+
"grad_norm": 4.0266242027282715,
|
| 1736 |
+
"learning_rate": 2.98558587804993e-05,
|
| 1737 |
+
"loss": 1.8147,
|
| 1738 |
+
"step": 2470
|
| 1739 |
+
},
|
| 1740 |
+
{
|
| 1741 |
+
"epoch": 0.496,
|
| 1742 |
+
"grad_norm": 9.819830894470215,
|
| 1743 |
+
"learning_rate": 2.9684532864643122e-05,
|
| 1744 |
+
"loss": 0.9978,
|
| 1745 |
+
"step": 2480
|
| 1746 |
+
},
|
| 1747 |
+
{
|
| 1748 |
+
"epoch": 0.498,
|
| 1749 |
+
"grad_norm": 10.189305305480957,
|
| 1750 |
+
"learning_rate": 2.9512978631264006e-05,
|
| 1751 |
+
"loss": 1.6129,
|
| 1752 |
+
"step": 2490
|
| 1753 |
+
},
|
| 1754 |
+
{
|
| 1755 |
+
"epoch": 0.5,
|
| 1756 |
+
"grad_norm": 0.588488757610321,
|
| 1757 |
+
"learning_rate": 2.9341204441673266e-05,
|
| 1758 |
+
"loss": 0.745,
|
| 1759 |
+
"step": 2500
|
| 1760 |
+
},
|
| 1761 |
+
{
|
| 1762 |
+
"epoch": 0.502,
|
| 1763 |
+
"grad_norm": 0.0,
|
| 1764 |
+
"learning_rate": 2.916921866790256e-05,
|
| 1765 |
+
"loss": 0.5694,
|
| 1766 |
+
"step": 2510
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 0.504,
|
| 1770 |
+
"grad_norm": 1.1335722208023071,
|
| 1771 |
+
"learning_rate": 2.8997029692295874e-05,
|
| 1772 |
+
"loss": 0.453,
|
| 1773 |
+
"step": 2520
|
| 1774 |
+
},
|
| 1775 |
+
{
|
| 1776 |
+
"epoch": 0.506,
|
| 1777 |
+
"grad_norm": 3.6554768085479736,
|
| 1778 |
+
"learning_rate": 2.8824645907100954e-05,
|
| 1779 |
+
"loss": 0.7539,
|
| 1780 |
+
"step": 2530
|
| 1781 |
+
},
|
| 1782 |
+
{
|
| 1783 |
+
"epoch": 0.508,
|
| 1784 |
+
"grad_norm": 3.770594596862793,
|
| 1785 |
+
"learning_rate": 2.8652075714060295e-05,
|
| 1786 |
+
"loss": 0.5247,
|
| 1787 |
+
"step": 2540
|
| 1788 |
+
},
|
| 1789 |
+
{
|
| 1790 |
+
"epoch": 0.51,
|
| 1791 |
+
"grad_norm": 0.0,
|
| 1792 |
+
"learning_rate": 2.8479327524001636e-05,
|
| 1793 |
+
"loss": 0.7769,
|
| 1794 |
+
"step": 2550
|
| 1795 |
+
},
|
| 1796 |
+
{
|
| 1797 |
+
"epoch": 0.512,
|
| 1798 |
+
"grad_norm": 1.4518766403198242,
|
| 1799 |
+
"learning_rate": 2.8306409756428064e-05,
|
| 1800 |
+
"loss": 0.7204,
|
| 1801 |
+
"step": 2560
|
| 1802 |
+
},
|
| 1803 |
+
{
|
| 1804 |
+
"epoch": 0.514,
|
| 1805 |
+
"grad_norm": 5.560041427612305,
|
| 1806 |
+
"learning_rate": 2.8133330839107608e-05,
|
| 1807 |
+
"loss": 0.9135,
|
| 1808 |
+
"step": 2570
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 0.516,
|
| 1812 |
+
"grad_norm": 0.0,
|
| 1813 |
+
"learning_rate": 2.7960099207662532e-05,
|
| 1814 |
+
"loss": 0.5892,
|
| 1815 |
+
"step": 2580
|
| 1816 |
+
},
|
| 1817 |
+
{
|
| 1818 |
+
"epoch": 0.518,
|
| 1819 |
+
"grad_norm": 7.593757152557373,
|
| 1820 |
+
"learning_rate": 2.7786723305158136e-05,
|
| 1821 |
+
"loss": 0.5568,
|
| 1822 |
+
"step": 2590
|
| 1823 |
+
},
|
| 1824 |
+
{
|
| 1825 |
+
"epoch": 0.52,
|
| 1826 |
+
"grad_norm": 1.4705710411071777,
|
| 1827 |
+
"learning_rate": 2.761321158169134e-05,
|
| 1828 |
+
"loss": 1.3712,
|
| 1829 |
+
"step": 2600
|
| 1830 |
+
},
|
| 1831 |
+
{
|
| 1832 |
+
"epoch": 0.522,
|
| 1833 |
+
"grad_norm": 2.1466057300567627,
|
| 1834 |
+
"learning_rate": 2.7439572493978736e-05,
|
| 1835 |
+
"loss": 0.9695,
|
| 1836 |
+
"step": 2610
|
| 1837 |
+
},
|
| 1838 |
+
{
|
| 1839 |
+
"epoch": 0.524,
|
| 1840 |
+
"grad_norm": 5.329553604125977,
|
| 1841 |
+
"learning_rate": 2.726581450494451e-05,
|
| 1842 |
+
"loss": 0.7138,
|
| 1843 |
+
"step": 2620
|
| 1844 |
+
},
|
| 1845 |
+
{
|
| 1846 |
+
"epoch": 0.526,
|
| 1847 |
+
"grad_norm": 3.9968855381011963,
|
| 1848 |
+
"learning_rate": 2.7091946083307896e-05,
|
| 1849 |
+
"loss": 1.0675,
|
| 1850 |
+
"step": 2630
|
| 1851 |
+
},
|
| 1852 |
+
{
|
| 1853 |
+
"epoch": 0.528,
|
| 1854 |
+
"grad_norm": 4.9435553550720215,
|
| 1855 |
+
"learning_rate": 2.6917975703170466e-05,
|
| 1856 |
+
"loss": 0.8781,
|
| 1857 |
+
"step": 2640
|
| 1858 |
+
},
|
| 1859 |
+
{
|
| 1860 |
+
"epoch": 0.53,
|
| 1861 |
+
"grad_norm": 1.5722167491912842,
|
| 1862 |
+
"learning_rate": 2.674391184360313e-05,
|
| 1863 |
+
"loss": 0.5354,
|
| 1864 |
+
"step": 2650
|
| 1865 |
+
},
|
| 1866 |
+
{
|
| 1867 |
+
"epoch": 0.532,
|
| 1868 |
+
"grad_norm": 0.7694377303123474,
|
| 1869 |
+
"learning_rate": 2.656976298823284e-05,
|
| 1870 |
+
"loss": 0.456,
|
| 1871 |
+
"step": 2660
|
| 1872 |
+
},
|
| 1873 |
+
{
|
| 1874 |
+
"epoch": 0.534,
|
| 1875 |
+
"grad_norm": 0.0,
|
| 1876 |
+
"learning_rate": 2.6395537624829096e-05,
|
| 1877 |
+
"loss": 2.0864,
|
| 1878 |
+
"step": 2670
|
| 1879 |
+
},
|
| 1880 |
+
{
|
| 1881 |
+
"epoch": 0.536,
|
| 1882 |
+
"grad_norm": 0.564000129699707,
|
| 1883 |
+
"learning_rate": 2.6221244244890336e-05,
|
| 1884 |
+
"loss": 0.8204,
|
| 1885 |
+
"step": 2680
|
| 1886 |
+
},
|
| 1887 |
+
{
|
| 1888 |
+
"epoch": 0.538,
|
| 1889 |
+
"grad_norm": 1.3732776641845703,
|
| 1890 |
+
"learning_rate": 2.604689134322999e-05,
|
| 1891 |
+
"loss": 0.6519,
|
| 1892 |
+
"step": 2690
|
| 1893 |
+
},
|
| 1894 |
+
{
|
| 1895 |
+
"epoch": 0.54,
|
| 1896 |
+
"grad_norm": 9.038244247436523,
|
| 1897 |
+
"learning_rate": 2.587248741756253e-05,
|
| 1898 |
+
"loss": 0.7875,
|
| 1899 |
+
"step": 2700
|
| 1900 |
+
},
|
| 1901 |
+
{
|
| 1902 |
+
"epoch": 0.542,
|
| 1903 |
+
"grad_norm": 0.0,
|
| 1904 |
+
"learning_rate": 2.5698040968089225e-05,
|
| 1905 |
+
"loss": 1.4801,
|
| 1906 |
+
"step": 2710
|
| 1907 |
+
},
|
| 1908 |
+
{
|
| 1909 |
+
"epoch": 0.544,
|
| 1910 |
+
"grad_norm": 6.459538459777832,
|
| 1911 |
+
"learning_rate": 2.5523560497083926e-05,
|
| 1912 |
+
"loss": 0.9097,
|
| 1913 |
+
"step": 2720
|
| 1914 |
+
},
|
| 1915 |
+
{
|
| 1916 |
+
"epoch": 0.546,
|
| 1917 |
+
"grad_norm": 1.4535608291625977,
|
| 1918 |
+
"learning_rate": 2.5349054508478637e-05,
|
| 1919 |
+
"loss": 0.5698,
|
| 1920 |
+
"step": 2730
|
| 1921 |
+
},
|
| 1922 |
+
{
|
| 1923 |
+
"epoch": 0.548,
|
| 1924 |
+
"grad_norm": 1.2716917991638184,
|
| 1925 |
+
"learning_rate": 2.517453150744904e-05,
|
| 1926 |
+
"loss": 0.4508,
|
| 1927 |
+
"step": 2740
|
| 1928 |
+
},
|
| 1929 |
+
{
|
| 1930 |
+
"epoch": 0.55,
|
| 1931 |
+
"grad_norm": 0.0,
|
| 1932 |
+
"learning_rate": 2.5e-05,
|
| 1933 |
+
"loss": 0.4544,
|
| 1934 |
+
"step": 2750
|
| 1935 |
+
},
|
| 1936 |
+
{
|
| 1937 |
+
"epoch": 0.552,
|
| 1938 |
+
"grad_norm": 2.410973310470581,
|
| 1939 |
+
"learning_rate": 2.4825468492550964e-05,
|
| 1940 |
+
"loss": 1.0367,
|
| 1941 |
+
"step": 2760
|
| 1942 |
+
},
|
| 1943 |
+
{
|
| 1944 |
+
"epoch": 0.554,
|
| 1945 |
+
"grad_norm": 171.79568481445312,
|
| 1946 |
+
"learning_rate": 2.4650945491521372e-05,
|
| 1947 |
+
"loss": 5.9188,
|
| 1948 |
+
"step": 2770
|
| 1949 |
+
},
|
| 1950 |
+
{
|
| 1951 |
+
"epoch": 0.556,
|
| 1952 |
+
"grad_norm": 0.4065113365650177,
|
| 1953 |
+
"learning_rate": 2.447643950291608e-05,
|
| 1954 |
+
"loss": 0.784,
|
| 1955 |
+
"step": 2780
|
| 1956 |
+
},
|
| 1957 |
+
{
|
| 1958 |
+
"epoch": 0.558,
|
| 1959 |
+
"grad_norm": 8.022871017456055,
|
| 1960 |
+
"learning_rate": 2.4301959031910784e-05,
|
| 1961 |
+
"loss": 1.3575,
|
| 1962 |
+
"step": 2790
|
| 1963 |
+
},
|
| 1964 |
+
{
|
| 1965 |
+
"epoch": 0.56,
|
| 1966 |
+
"grad_norm": 0.0,
|
| 1967 |
+
"learning_rate": 2.4127512582437485e-05,
|
| 1968 |
+
"loss": 1.2835,
|
| 1969 |
+
"step": 2800
|
| 1970 |
+
},
|
| 1971 |
+
{
|
| 1972 |
+
"epoch": 0.562,
|
| 1973 |
+
"grad_norm": 27.728992462158203,
|
| 1974 |
+
"learning_rate": 2.3953108656770016e-05,
|
| 1975 |
+
"loss": 2.173,
|
| 1976 |
+
"step": 2810
|
| 1977 |
+
},
|
| 1978 |
+
{
|
| 1979 |
+
"epoch": 0.564,
|
| 1980 |
+
"grad_norm": 0.0,
|
| 1981 |
+
"learning_rate": 2.377875575510967e-05,
|
| 1982 |
+
"loss": 0.354,
|
| 1983 |
+
"step": 2820
|
| 1984 |
+
},
|
| 1985 |
+
{
|
| 1986 |
+
"epoch": 0.566,
|
| 1987 |
+
"grad_norm": 0.1998305767774582,
|
| 1988 |
+
"learning_rate": 2.3604462375170906e-05,
|
| 1989 |
+
"loss": 0.8905,
|
| 1990 |
+
"step": 2830
|
| 1991 |
+
},
|
| 1992 |
+
{
|
| 1993 |
+
"epoch": 0.568,
|
| 1994 |
+
"grad_norm": 2.29780650138855,
|
| 1995 |
+
"learning_rate": 2.3430237011767167e-05,
|
| 1996 |
+
"loss": 0.9289,
|
| 1997 |
+
"step": 2840
|
| 1998 |
+
},
|
| 1999 |
+
{
|
| 2000 |
+
"epoch": 0.57,
|
| 2001 |
+
"grad_norm": 0.0,
|
| 2002 |
+
"learning_rate": 2.3256088156396868e-05,
|
| 2003 |
+
"loss": 0.512,
|
| 2004 |
+
"step": 2850
|
| 2005 |
+
},
|
| 2006 |
+
{
|
| 2007 |
+
"epoch": 0.572,
|
| 2008 |
+
"grad_norm": 0.30271175503730774,
|
| 2009 |
+
"learning_rate": 2.3082024296829536e-05,
|
| 2010 |
+
"loss": 0.6724,
|
| 2011 |
+
"step": 2860
|
| 2012 |
+
},
|
| 2013 |
+
{
|
| 2014 |
+
"epoch": 0.574,
|
| 2015 |
+
"grad_norm": 16.384737014770508,
|
| 2016 |
+
"learning_rate": 2.2908053916692117e-05,
|
| 2017 |
+
"loss": 0.7876,
|
| 2018 |
+
"step": 2870
|
| 2019 |
+
},
|
| 2020 |
+
{
|
| 2021 |
+
"epoch": 0.576,
|
| 2022 |
+
"grad_norm": 0.38019096851348877,
|
| 2023 |
+
"learning_rate": 2.2734185495055503e-05,
|
| 2024 |
+
"loss": 1.3706,
|
| 2025 |
+
"step": 2880
|
| 2026 |
+
},
|
| 2027 |
+
{
|
| 2028 |
+
"epoch": 0.578,
|
| 2029 |
+
"grad_norm": 0.6822488903999329,
|
| 2030 |
+
"learning_rate": 2.2560427506021266e-05,
|
| 2031 |
+
"loss": 1.0091,
|
| 2032 |
+
"step": 2890
|
| 2033 |
+
},
|
| 2034 |
+
{
|
| 2035 |
+
"epoch": 0.58,
|
| 2036 |
+
"grad_norm": 6.04833984375,
|
| 2037 |
+
"learning_rate": 2.238678841830867e-05,
|
| 2038 |
+
"loss": 0.4665,
|
| 2039 |
+
"step": 2900
|
| 2040 |
+
},
|
| 2041 |
+
{
|
| 2042 |
+
"epoch": 0.582,
|
| 2043 |
+
"grad_norm": 24.140220642089844,
|
| 2044 |
+
"learning_rate": 2.2213276694841866e-05,
|
| 2045 |
+
"loss": 1.412,
|
| 2046 |
+
"step": 2910
|
| 2047 |
+
},
|
| 2048 |
+
{
|
| 2049 |
+
"epoch": 0.584,
|
| 2050 |
+
"grad_norm": 0.0,
|
| 2051 |
+
"learning_rate": 2.2039900792337474e-05,
|
| 2052 |
+
"loss": 1.3582,
|
| 2053 |
+
"step": 2920
|
| 2054 |
+
},
|
| 2055 |
+
{
|
| 2056 |
+
"epoch": 0.586,
|
| 2057 |
+
"grad_norm": 29.037202835083008,
|
| 2058 |
+
"learning_rate": 2.186666916089239e-05,
|
| 2059 |
+
"loss": 0.3297,
|
| 2060 |
+
"step": 2930
|
| 2061 |
+
},
|
| 2062 |
+
{
|
| 2063 |
+
"epoch": 0.588,
|
| 2064 |
+
"grad_norm": 3.5834364891052246,
|
| 2065 |
+
"learning_rate": 2.1693590243571938e-05,
|
| 2066 |
+
"loss": 0.5486,
|
| 2067 |
+
"step": 2940
|
| 2068 |
+
},
|
| 2069 |
+
{
|
| 2070 |
+
"epoch": 0.59,
|
| 2071 |
+
"grad_norm": 0.7133229970932007,
|
| 2072 |
+
"learning_rate": 2.1520672475998373e-05,
|
| 2073 |
+
"loss": 0.341,
|
| 2074 |
+
"step": 2950
|
| 2075 |
+
},
|
| 2076 |
+
{
|
| 2077 |
+
"epoch": 0.592,
|
| 2078 |
+
"grad_norm": 22.81307601928711,
|
| 2079 |
+
"learning_rate": 2.1347924285939714e-05,
|
| 2080 |
+
"loss": 1.8438,
|
| 2081 |
+
"step": 2960
|
| 2082 |
+
},
|
| 2083 |
+
{
|
| 2084 |
+
"epoch": 0.594,
|
| 2085 |
+
"grad_norm": 8.678629875183105,
|
| 2086 |
+
"learning_rate": 2.117535409289905e-05,
|
| 2087 |
+
"loss": 1.0661,
|
| 2088 |
+
"step": 2970
|
| 2089 |
+
},
|
| 2090 |
+
{
|
| 2091 |
+
"epoch": 0.596,
|
| 2092 |
+
"grad_norm": 0.36412814259529114,
|
| 2093 |
+
"learning_rate": 2.1002970307704132e-05,
|
| 2094 |
+
"loss": 0.5445,
|
| 2095 |
+
"step": 2980
|
| 2096 |
+
},
|
| 2097 |
+
{
|
| 2098 |
+
"epoch": 0.598,
|
| 2099 |
+
"grad_norm": 1.426496148109436,
|
| 2100 |
+
"learning_rate": 2.0830781332097446e-05,
|
| 2101 |
+
"loss": 0.735,
|
| 2102 |
+
"step": 2990
|
| 2103 |
+
},
|
| 2104 |
+
{
|
| 2105 |
+
"epoch": 0.6,
|
| 2106 |
+
"grad_norm": 1.3443537950515747,
|
| 2107 |
+
"learning_rate": 2.0658795558326743e-05,
|
| 2108 |
+
"loss": 1.2389,
|
| 2109 |
+
"step": 3000
|
| 2110 |
+
},
|
| 2111 |
+
{
|
| 2112 |
+
"epoch": 0.602,
|
| 2113 |
+
"grad_norm": 0.4442681670188904,
|
| 2114 |
+
"learning_rate": 2.0487021368736003e-05,
|
| 2115 |
+
"loss": 0.6412,
|
| 2116 |
+
"step": 3010
|
| 2117 |
+
},
|
| 2118 |
+
{
|
| 2119 |
+
"epoch": 0.604,
|
| 2120 |
+
"grad_norm": 9.767306327819824,
|
| 2121 |
+
"learning_rate": 2.031546713535688e-05,
|
| 2122 |
+
"loss": 0.6114,
|
| 2123 |
+
"step": 3020
|
| 2124 |
+
},
|
| 2125 |
+
{
|
| 2126 |
+
"epoch": 0.606,
|
| 2127 |
+
"grad_norm": 0.5233049392700195,
|
| 2128 |
+
"learning_rate": 2.0144141219500705e-05,
|
| 2129 |
+
"loss": 0.846,
|
| 2130 |
+
"step": 3030
|
| 2131 |
+
},
|
| 2132 |
+
{
|
| 2133 |
+
"epoch": 0.608,
|
| 2134 |
+
"grad_norm": 7.53090763092041,
|
| 2135 |
+
"learning_rate": 1.9973051971350888e-05,
|
| 2136 |
+
"loss": 1.1015,
|
| 2137 |
+
"step": 3040
|
| 2138 |
+
},
|
| 2139 |
+
{
|
| 2140 |
+
"epoch": 0.61,
|
| 2141 |
+
"grad_norm": 0.44011208415031433,
|
| 2142 |
+
"learning_rate": 1.980220772955602e-05,
|
| 2143 |
+
"loss": 0.2744,
|
| 2144 |
+
"step": 3050
|
| 2145 |
+
},
|
| 2146 |
+
{
|
| 2147 |
+
"epoch": 0.612,
|
| 2148 |
+
"grad_norm": 0.6516274213790894,
|
| 2149 |
+
"learning_rate": 1.963161682082342e-05,
|
| 2150 |
+
"loss": 1.048,
|
| 2151 |
+
"step": 3060
|
| 2152 |
+
},
|
| 2153 |
+
{
|
| 2154 |
+
"epoch": 0.614,
|
| 2155 |
+
"grad_norm": 14.277541160583496,
|
| 2156 |
+
"learning_rate": 1.946128755951332e-05,
|
| 2157 |
+
"loss": 1.5106,
|
| 2158 |
+
"step": 3070
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"epoch": 0.616,
|
| 2162 |
+
"grad_norm": 1.5146019458770752,
|
| 2163 |
+
"learning_rate": 1.9291228247233605e-05,
|
| 2164 |
+
"loss": 1.3313,
|
| 2165 |
+
"step": 3080
|
| 2166 |
+
},
|
| 2167 |
+
{
|
| 2168 |
+
"epoch": 0.618,
|
| 2169 |
+
"grad_norm": 6.09342098236084,
|
| 2170 |
+
"learning_rate": 1.912144717243525e-05,
|
| 2171 |
+
"loss": 1.1983,
|
| 2172 |
+
"step": 3090
|
| 2173 |
+
},
|
| 2174 |
+
{
|
| 2175 |
+
"epoch": 0.62,
|
| 2176 |
+
"grad_norm": 0.4981762170791626,
|
| 2177 |
+
"learning_rate": 1.895195261000831e-05,
|
| 2178 |
+
"loss": 0.8728,
|
| 2179 |
+
"step": 3100
|
| 2180 |
+
},
|
| 2181 |
+
{
|
| 2182 |
+
"epoch": 0.622,
|
| 2183 |
+
"grad_norm": 0.0,
|
| 2184 |
+
"learning_rate": 1.8782752820878634e-05,
|
| 2185 |
+
"loss": 0.6307,
|
| 2186 |
+
"step": 3110
|
| 2187 |
+
},
|
| 2188 |
+
{
|
| 2189 |
+
"epoch": 0.624,
|
| 2190 |
+
"grad_norm": 3.658278226852417,
|
| 2191 |
+
"learning_rate": 1.8613856051605243e-05,
|
| 2192 |
+
"loss": 0.7477,
|
| 2193 |
+
"step": 3120
|
| 2194 |
+
},
|
| 2195 |
+
{
|
| 2196 |
+
"epoch": 0.626,
|
| 2197 |
+
"grad_norm": 2.42810320854187,
|
| 2198 |
+
"learning_rate": 1.8445270533978388e-05,
|
| 2199 |
+
"loss": 1.0535,
|
| 2200 |
+
"step": 3130
|
| 2201 |
+
},
|
| 2202 |
+
{
|
| 2203 |
+
"epoch": 0.628,
|
| 2204 |
+
"grad_norm": 1.3915554285049438,
|
| 2205 |
+
"learning_rate": 1.827700448461836e-05,
|
| 2206 |
+
"loss": 1.0675,
|
| 2207 |
+
"step": 3140
|
| 2208 |
+
},
|
| 2209 |
+
{
|
| 2210 |
+
"epoch": 0.63,
|
| 2211 |
+
"grad_norm": 0.0,
|
| 2212 |
+
"learning_rate": 1.8109066104575023e-05,
|
| 2213 |
+
"loss": 1.6361,
|
| 2214 |
+
"step": 3150
|
| 2215 |
+
},
|
| 2216 |
+
{
|
| 2217 |
+
"epoch": 0.632,
|
| 2218 |
+
"grad_norm": 5.701220989227295,
|
| 2219 |
+
"learning_rate": 1.7941463578928086e-05,
|
| 2220 |
+
"loss": 1.1624,
|
| 2221 |
+
"step": 3160
|
| 2222 |
+
},
|
| 2223 |
+
{
|
| 2224 |
+
"epoch": 0.634,
|
| 2225 |
+
"grad_norm": 1.6050679683685303,
|
| 2226 |
+
"learning_rate": 1.7774205076388206e-05,
|
| 2227 |
+
"loss": 0.8855,
|
| 2228 |
+
"step": 3170
|
| 2229 |
+
},
|
| 2230 |
+
{
|
| 2231 |
+
"epoch": 0.636,
|
| 2232 |
+
"grad_norm": 6.965709209442139,
|
| 2233 |
+
"learning_rate": 1.7607298748898842e-05,
|
| 2234 |
+
"loss": 1.0965,
|
| 2235 |
+
"step": 3180
|
| 2236 |
+
},
|
| 2237 |
+
{
|
| 2238 |
+
"epoch": 0.638,
|
| 2239 |
+
"grad_norm": 4.4240570068359375,
|
| 2240 |
+
"learning_rate": 1.744075273123889e-05,
|
| 2241 |
+
"loss": 0.3191,
|
| 2242 |
+
"step": 3190
|
| 2243 |
+
},
|
| 2244 |
+
{
|
| 2245 |
+
"epoch": 0.64,
|
| 2246 |
+
"grad_norm": 0.0,
|
| 2247 |
+
"learning_rate": 1.7274575140626318e-05,
|
| 2248 |
+
"loss": 0.6535,
|
| 2249 |
+
"step": 3200
|
| 2250 |
+
},
|
| 2251 |
+
{
|
| 2252 |
+
"epoch": 0.642,
|
| 2253 |
+
"grad_norm": 0.0,
|
| 2254 |
+
"learning_rate": 1.7108774076322443e-05,
|
| 2255 |
+
"loss": 0.4069,
|
| 2256 |
+
"step": 3210
|
| 2257 |
+
},
|
| 2258 |
+
{
|
| 2259 |
+
"epoch": 0.644,
|
| 2260 |
+
"grad_norm": 0.47515714168548584,
|
| 2261 |
+
"learning_rate": 1.6943357619237226e-05,
|
| 2262 |
+
"loss": 0.5898,
|
| 2263 |
+
"step": 3220
|
| 2264 |
+
},
|
| 2265 |
+
{
|
| 2266 |
+
"epoch": 0.646,
|
| 2267 |
+
"grad_norm": 0.7474708557128906,
|
| 2268 |
+
"learning_rate": 1.677833383153542e-05,
|
| 2269 |
+
"loss": 0.3429,
|
| 2270 |
+
"step": 3230
|
| 2271 |
+
},
|
| 2272 |
+
{
|
| 2273 |
+
"epoch": 0.648,
|
| 2274 |
+
"grad_norm": 1.0888911485671997,
|
| 2275 |
+
"learning_rate": 1.6613710756243626e-05,
|
| 2276 |
+
"loss": 0.6403,
|
| 2277 |
+
"step": 3240
|
| 2278 |
+
},
|
| 2279 |
+
{
|
| 2280 |
+
"epoch": 0.65,
|
| 2281 |
+
"grad_norm": 0.7075658440589905,
|
| 2282 |
+
"learning_rate": 1.6449496416858284e-05,
|
| 2283 |
+
"loss": 1.1898,
|
| 2284 |
+
"step": 3250
|
| 2285 |
+
},
|
| 2286 |
+
{
|
| 2287 |
+
"epoch": 0.652,
|
| 2288 |
+
"grad_norm": 5.701323509216309,
|
| 2289 |
+
"learning_rate": 1.6285698816954624e-05,
|
| 2290 |
+
"loss": 0.4049,
|
| 2291 |
+
"step": 3260
|
| 2292 |
+
},
|
| 2293 |
+
{
|
| 2294 |
+
"epoch": 0.654,
|
| 2295 |
+
"grad_norm": 14.747136116027832,
|
| 2296 |
+
"learning_rate": 1.612232593979658e-05,
|
| 2297 |
+
"loss": 0.6081,
|
| 2298 |
+
"step": 3270
|
| 2299 |
+
},
|
| 2300 |
+
{
|
| 2301 |
+
"epoch": 0.656,
|
| 2302 |
+
"grad_norm": 0.0,
|
| 2303 |
+
"learning_rate": 1.5959385747947698e-05,
|
| 2304 |
+
"loss": 0.8107,
|
| 2305 |
+
"step": 3280
|
| 2306 |
+
},
|
| 2307 |
+
{
|
| 2308 |
+
"epoch": 0.658,
|
| 2309 |
+
"grad_norm": 0.0,
|
| 2310 |
+
"learning_rate": 1.5796886182883053e-05,
|
| 2311 |
+
"loss": 0.6606,
|
| 2312 |
+
"step": 3290
|
| 2313 |
+
},
|
| 2314 |
+
{
|
| 2315 |
+
"epoch": 0.66,
|
| 2316 |
+
"grad_norm": 0.8380840420722961,
|
| 2317 |
+
"learning_rate": 1.56348351646022e-05,
|
| 2318 |
+
"loss": 0.4767,
|
| 2319 |
+
"step": 3300
|
| 2320 |
+
},
|
| 2321 |
+
{
|
| 2322 |
+
"epoch": 0.662,
|
| 2323 |
+
"grad_norm": 0.0,
|
| 2324 |
+
"learning_rate": 1.547324059124315e-05,
|
| 2325 |
+
"loss": 0.988,
|
| 2326 |
+
"step": 3310
|
| 2327 |
+
},
|
| 2328 |
+
{
|
| 2329 |
+
"epoch": 0.664,
|
| 2330 |
+
"grad_norm": 0.0,
|
| 2331 |
+
"learning_rate": 1.5312110338697426e-05,
|
| 2332 |
+
"loss": 1.3237,
|
| 2333 |
+
"step": 3320
|
| 2334 |
+
},
|
| 2335 |
+
{
|
| 2336 |
+
"epoch": 0.666,
|
| 2337 |
+
"grad_norm": 3.07537841796875,
|
| 2338 |
+
"learning_rate": 1.5151452260226224e-05,
|
| 2339 |
+
"loss": 0.4173,
|
| 2340 |
+
"step": 3330
|
| 2341 |
+
},
|
| 2342 |
+
{
|
| 2343 |
+
"epoch": 0.668,
|
| 2344 |
+
"grad_norm": 2.070822238922119,
|
| 2345 |
+
"learning_rate": 1.4991274186077632e-05,
|
| 2346 |
+
"loss": 0.7819,
|
| 2347 |
+
"step": 3340
|
| 2348 |
+
},
|
| 2349 |
+
{
|
| 2350 |
+
"epoch": 0.67,
|
| 2351 |
+
"grad_norm": 0.3668426275253296,
|
| 2352 |
+
"learning_rate": 1.4831583923104999e-05,
|
| 2353 |
+
"loss": 0.334,
|
| 2354 |
+
"step": 3350
|
| 2355 |
+
},
|
| 2356 |
+
{
|
| 2357 |
+
"epoch": 0.672,
|
| 2358 |
+
"grad_norm": 1.416359543800354,
|
| 2359 |
+
"learning_rate": 1.467238925438646e-05,
|
| 2360 |
+
"loss": 0.8675,
|
| 2361 |
+
"step": 3360
|
| 2362 |
+
},
|
| 2363 |
+
{
|
| 2364 |
+
"epoch": 0.674,
|
| 2365 |
+
"grad_norm": 0.936389148235321,
|
| 2366 |
+
"learning_rate": 1.4513697938845572e-05,
|
| 2367 |
+
"loss": 0.6056,
|
| 2368 |
+
"step": 3370
|
| 2369 |
+
},
|
| 2370 |
+
{
|
| 2371 |
+
"epoch": 0.676,
|
| 2372 |
+
"grad_norm": 0.4283387362957001,
|
| 2373 |
+
"learning_rate": 1.4355517710873184e-05,
|
| 2374 |
+
"loss": 0.2135,
|
| 2375 |
+
"step": 3380
|
| 2376 |
+
},
|
| 2377 |
+
{
|
| 2378 |
+
"epoch": 0.678,
|
| 2379 |
+
"grad_norm": 9.048284530639648,
|
| 2380 |
+
"learning_rate": 1.4197856279950438e-05,
|
| 2381 |
+
"loss": 1.0051,
|
| 2382 |
+
"step": 3390
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"epoch": 0.68,
|
| 2386 |
+
"grad_norm": 4.453339099884033,
|
| 2387 |
+
"learning_rate": 1.4040721330273062e-05,
|
| 2388 |
+
"loss": 0.4415,
|
| 2389 |
+
"step": 3400
|
| 2390 |
+
},
|
| 2391 |
+
{
|
| 2392 |
+
"epoch": 0.682,
|
| 2393 |
+
"grad_norm": 0.0,
|
| 2394 |
+
"learning_rate": 1.388412052037682e-05,
|
| 2395 |
+
"loss": 0.625,
|
| 2396 |
+
"step": 3410
|
| 2397 |
+
},
|
| 2398 |
+
{
|
| 2399 |
+
"epoch": 0.684,
|
| 2400 |
+
"grad_norm": 2.6629364490509033,
|
| 2401 |
+
"learning_rate": 1.3728061482764238e-05,
|
| 2402 |
+
"loss": 1.0217,
|
| 2403 |
+
"step": 3420
|
| 2404 |
+
},
|
| 2405 |
+
{
|
| 2406 |
+
"epoch": 0.686,
|
| 2407 |
+
"grad_norm": 0.0,
|
| 2408 |
+
"learning_rate": 1.3572551823532654e-05,
|
| 2409 |
+
"loss": 1.101,
|
| 2410 |
+
"step": 3430
|
| 2411 |
+
},
|
| 2412 |
+
{
|
| 2413 |
+
"epoch": 0.688,
|
| 2414 |
+
"grad_norm": 3.9471435546875,
|
| 2415 |
+
"learning_rate": 1.3417599122003464e-05,
|
| 2416 |
+
"loss": 0.9139,
|
| 2417 |
+
"step": 3440
|
| 2418 |
+
},
|
| 2419 |
+
{
|
| 2420 |
+
"epoch": 0.69,
|
| 2421 |
+
"grad_norm": 0.805842936038971,
|
| 2422 |
+
"learning_rate": 1.3263210930352737e-05,
|
| 2423 |
+
"loss": 0.7426,
|
| 2424 |
+
"step": 3450
|
| 2425 |
+
},
|
| 2426 |
+
{
|
| 2427 |
+
"epoch": 0.692,
|
| 2428 |
+
"grad_norm": 0.8995339274406433,
|
| 2429 |
+
"learning_rate": 1.3109394773243117e-05,
|
| 2430 |
+
"loss": 1.0954,
|
| 2431 |
+
"step": 3460
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 0.694,
|
| 2435 |
+
"grad_norm": 0.29068702459335327,
|
| 2436 |
+
"learning_rate": 1.2956158147457115e-05,
|
| 2437 |
+
"loss": 0.6788,
|
| 2438 |
+
"step": 3470
|
| 2439 |
+
},
|
| 2440 |
+
{
|
| 2441 |
+
"epoch": 0.696,
|
| 2442 |
+
"grad_norm": 0.0,
|
| 2443 |
+
"learning_rate": 1.280350852153168e-05,
|
| 2444 |
+
"loss": 0.8617,
|
| 2445 |
+
"step": 3480
|
| 2446 |
+
},
|
| 2447 |
+
{
|
| 2448 |
+
"epoch": 0.698,
|
| 2449 |
+
"grad_norm": 0.6072128415107727,
|
| 2450 |
+
"learning_rate": 1.2651453335394231e-05,
|
| 2451 |
+
"loss": 0.653,
|
| 2452 |
+
"step": 3490
|
| 2453 |
+
},
|
| 2454 |
+
{
|
| 2455 |
+
"epoch": 0.7,
|
| 2456 |
+
"grad_norm": 0.0,
|
| 2457 |
+
"learning_rate": 1.2500000000000006e-05,
|
| 2458 |
+
"loss": 0.8003,
|
| 2459 |
+
"step": 3500
|
| 2460 |
+
},
|
| 2461 |
+
{
|
| 2462 |
+
"epoch": 0.702,
|
| 2463 |
+
"grad_norm": 0.8060992360115051,
|
| 2464 |
+
"learning_rate": 1.234915589697091e-05,
|
| 2465 |
+
"loss": 0.5579,
|
| 2466 |
+
"step": 3510
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"epoch": 0.704,
|
| 2470 |
+
"grad_norm": 0.0,
|
| 2471 |
+
"learning_rate": 1.2198928378235716e-05,
|
| 2472 |
+
"loss": 1.5354,
|
| 2473 |
+
"step": 3520
|
| 2474 |
+
},
|
| 2475 |
+
{
|
| 2476 |
+
"epoch": 0.706,
|
| 2477 |
+
"grad_norm": 13.488487243652344,
|
| 2478 |
+
"learning_rate": 1.2049324765671749e-05,
|
| 2479 |
+
"loss": 1.6175,
|
| 2480 |
+
"step": 3530
|
| 2481 |
+
},
|
| 2482 |
+
{
|
| 2483 |
+
"epoch": 0.708,
|
| 2484 |
+
"grad_norm": 1.7564411163330078,
|
| 2485 |
+
"learning_rate": 1.1900352350748026e-05,
|
| 2486 |
+
"loss": 0.4771,
|
| 2487 |
+
"step": 3540
|
| 2488 |
+
},
|
| 2489 |
+
{
|
| 2490 |
+
"epoch": 0.71,
|
| 2491 |
+
"grad_norm": 0.0,
|
| 2492 |
+
"learning_rate": 1.175201839416988e-05,
|
| 2493 |
+
"loss": 0.779,
|
| 2494 |
+
"step": 3550
|
| 2495 |
+
},
|
| 2496 |
+
{
|
| 2497 |
+
"epoch": 0.712,
|
| 2498 |
+
"grad_norm": 25.30704689025879,
|
| 2499 |
+
"learning_rate": 1.1604330125525079e-05,
|
| 2500 |
+
"loss": 1.1478,
|
| 2501 |
+
"step": 3560
|
| 2502 |
+
},
|
| 2503 |
+
{
|
| 2504 |
+
"epoch": 0.714,
|
| 2505 |
+
"grad_norm": 0.45064303278923035,
|
| 2506 |
+
"learning_rate": 1.1457294742931507e-05,
|
| 2507 |
+
"loss": 0.7484,
|
| 2508 |
+
"step": 3570
|
| 2509 |
+
},
|
| 2510 |
+
{
|
| 2511 |
+
"epoch": 0.716,
|
| 2512 |
+
"grad_norm": 0.0,
|
| 2513 |
+
"learning_rate": 1.1310919412686247e-05,
|
| 2514 |
+
"loss": 1.0581,
|
| 2515 |
+
"step": 3580
|
| 2516 |
+
},
|
| 2517 |
+
{
|
| 2518 |
+
"epoch": 0.718,
|
| 2519 |
+
"grad_norm": 0.0,
|
| 2520 |
+
"learning_rate": 1.11652112689164e-05,
|
| 2521 |
+
"loss": 0.989,
|
| 2522 |
+
"step": 3590
|
| 2523 |
+
},
|
| 2524 |
+
{
|
| 2525 |
+
"epoch": 0.72,
|
| 2526 |
+
"grad_norm": 0.0,
|
| 2527 |
+
"learning_rate": 1.1020177413231334e-05,
|
| 2528 |
+
"loss": 1.5538,
|
| 2529 |
+
"step": 3600
|
| 2530 |
+
},
|
| 2531 |
+
{
|
| 2532 |
+
"epoch": 0.722,
|
| 2533 |
+
"grad_norm": 0.0,
|
| 2534 |
+
"learning_rate": 1.0875824914376553e-05,
|
| 2535 |
+
"loss": 0.9328,
|
| 2536 |
+
"step": 3610
|
| 2537 |
+
},
|
| 2538 |
+
{
|
| 2539 |
+
"epoch": 0.724,
|
| 2540 |
+
"grad_norm": 2.3531455993652344,
|
| 2541 |
+
"learning_rate": 1.0732160807889211e-05,
|
| 2542 |
+
"loss": 1.4623,
|
| 2543 |
+
"step": 3620
|
| 2544 |
+
},
|
| 2545 |
+
{
|
| 2546 |
+
"epoch": 0.726,
|
| 2547 |
+
"grad_norm": 0.0,
|
| 2548 |
+
"learning_rate": 1.058919209575517e-05,
|
| 2549 |
+
"loss": 1.0274,
|
| 2550 |
+
"step": 3630
|
| 2551 |
+
},
|
| 2552 |
+
{
|
| 2553 |
+
"epoch": 0.728,
|
| 2554 |
+
"grad_norm": 2.038518190383911,
|
| 2555 |
+
"learning_rate": 1.0446925746067768e-05,
|
| 2556 |
+
"loss": 0.5183,
|
| 2557 |
+
"step": 3640
|
| 2558 |
+
},
|
| 2559 |
+
{
|
| 2560 |
+
"epoch": 0.73,
|
| 2561 |
+
"grad_norm": 0.0,
|
| 2562 |
+
"learning_rate": 1.0305368692688174e-05,
|
| 2563 |
+
"loss": 0.8523,
|
| 2564 |
+
"step": 3650
|
| 2565 |
+
},
|
| 2566 |
+
{
|
| 2567 |
+
"epoch": 0.732,
|
| 2568 |
+
"grad_norm": 2.5623738765716553,
|
| 2569 |
+
"learning_rate": 1.0164527834907467e-05,
|
| 2570 |
+
"loss": 0.9334,
|
| 2571 |
+
"step": 3660
|
| 2572 |
+
},
|
| 2573 |
+
{
|
| 2574 |
+
"epoch": 0.734,
|
| 2575 |
+
"grad_norm": 0.0,
|
| 2576 |
+
"learning_rate": 1.0024410037110357e-05,
|
| 2577 |
+
"loss": 1.2435,
|
| 2578 |
+
"step": 3670
|
| 2579 |
+
},
|
| 2580 |
+
{
|
| 2581 |
+
"epoch": 0.736,
|
| 2582 |
+
"grad_norm": 0.3360690474510193,
|
| 2583 |
+
"learning_rate": 9.88502212844063e-06,
|
| 2584 |
+
"loss": 1.5611,
|
| 2585 |
+
"step": 3680
|
| 2586 |
+
},
|
| 2587 |
+
{
|
| 2588 |
+
"epoch": 0.738,
|
| 2589 |
+
"grad_norm": 3.1597137451171875,
|
| 2590 |
+
"learning_rate": 9.746370902468311e-06,
|
| 2591 |
+
"loss": 0.9649,
|
| 2592 |
+
"step": 3690
|
| 2593 |
+
},
|
| 2594 |
+
{
|
| 2595 |
+
"epoch": 0.74,
|
| 2596 |
+
"grad_norm": 0.0,
|
| 2597 |
+
"learning_rate": 9.608463116858542e-06,
|
| 2598 |
+
"loss": 0.6053,
|
| 2599 |
+
"step": 3700
|
| 2600 |
+
},
|
| 2601 |
+
{
|
| 2602 |
+
"epoch": 0.742,
|
| 2603 |
+
"grad_norm": 5.861716270446777,
|
| 2604 |
+
"learning_rate": 9.471305493042243e-06,
|
| 2605 |
+
"loss": 0.6377,
|
| 2606 |
+
"step": 3710
|
| 2607 |
+
},
|
| 2608 |
+
{
|
| 2609 |
+
"epoch": 0.744,
|
| 2610 |
+
"grad_norm": 1.0426483154296875,
|
| 2611 |
+
"learning_rate": 9.334904715888495e-06,
|
| 2612 |
+
"loss": 1.0698,
|
| 2613 |
+
"step": 3720
|
| 2614 |
+
},
|
| 2615 |
+
{
|
| 2616 |
+
"epoch": 0.746,
|
| 2617 |
+
"grad_norm": 3.7929494380950928,
|
| 2618 |
+
"learning_rate": 9.199267433378727e-06,
|
| 2619 |
+
"loss": 0.9858,
|
| 2620 |
+
"step": 3730
|
| 2621 |
+
},
|
| 2622 |
+
{
|
| 2623 |
+
"epoch": 0.748,
|
| 2624 |
+
"grad_norm": 0.8633703589439392,
|
| 2625 |
+
"learning_rate": 9.064400256282757e-06,
|
| 2626 |
+
"loss": 0.3602,
|
| 2627 |
+
"step": 3740
|
| 2628 |
+
},
|
| 2629 |
+
{
|
| 2630 |
+
"epoch": 0.75,
|
| 2631 |
+
"grad_norm": 2.713400363922119,
|
| 2632 |
+
"learning_rate": 8.930309757836517e-06,
|
| 2633 |
+
"loss": 0.6672,
|
| 2634 |
+
"step": 3750
|
| 2635 |
+
},
|
| 2636 |
+
{
|
| 2637 |
+
"epoch": 0.752,
|
| 2638 |
+
"grad_norm": 0.759688138961792,
|
| 2639 |
+
"learning_rate": 8.797002473421728e-06,
|
| 2640 |
+
"loss": 0.5627,
|
| 2641 |
+
"step": 3760
|
| 2642 |
+
},
|
| 2643 |
+
{
|
| 2644 |
+
"epoch": 0.754,
|
| 2645 |
+
"grad_norm": 1.423932671546936,
|
| 2646 |
+
"learning_rate": 8.664484900247363e-06,
|
| 2647 |
+
"loss": 1.0572,
|
| 2648 |
+
"step": 3770
|
| 2649 |
+
},
|
| 2650 |
+
{
|
| 2651 |
+
"epoch": 0.756,
|
| 2652 |
+
"grad_norm": 10.111916542053223,
|
| 2653 |
+
"learning_rate": 8.532763497032987e-06,
|
| 2654 |
+
"loss": 1.2507,
|
| 2655 |
+
"step": 3780
|
| 2656 |
+
},
|
| 2657 |
+
{
|
| 2658 |
+
"epoch": 0.758,
|
| 2659 |
+
"grad_norm": 0.9725751876831055,
|
| 2660 |
+
"learning_rate": 8.40184468369396e-06,
|
| 2661 |
+
"loss": 0.7735,
|
| 2662 |
+
"step": 3790
|
| 2663 |
+
},
|
| 2664 |
+
{
|
| 2665 |
+
"epoch": 0.76,
|
| 2666 |
+
"grad_norm": 16.589933395385742,
|
| 2667 |
+
"learning_rate": 8.271734841028553e-06,
|
| 2668 |
+
"loss": 1.298,
|
| 2669 |
+
"step": 3800
|
| 2670 |
+
},
|
| 2671 |
+
{
|
| 2672 |
+
"epoch": 0.762,
|
| 2673 |
+
"grad_norm": 1.6179834604263306,
|
| 2674 |
+
"learning_rate": 8.142440310406924e-06,
|
| 2675 |
+
"loss": 0.7805,
|
| 2676 |
+
"step": 3810
|
| 2677 |
+
},
|
| 2678 |
+
{
|
| 2679 |
+
"epoch": 0.764,
|
| 2680 |
+
"grad_norm": 0.0,
|
| 2681 |
+
"learning_rate": 8.013967393462094e-06,
|
| 2682 |
+
"loss": 0.7498,
|
| 2683 |
+
"step": 3820
|
| 2684 |
+
},
|
| 2685 |
+
{
|
| 2686 |
+
"epoch": 0.766,
|
| 2687 |
+
"grad_norm": 0.0,
|
| 2688 |
+
"learning_rate": 7.886322351782783e-06,
|
| 2689 |
+
"loss": 1.1696,
|
| 2690 |
+
"step": 3830
|
| 2691 |
+
},
|
| 2692 |
+
{
|
| 2693 |
+
"epoch": 0.768,
|
| 2694 |
+
"grad_norm": 0.0,
|
| 2695 |
+
"learning_rate": 7.759511406608255e-06,
|
| 2696 |
+
"loss": 0.6709,
|
| 2697 |
+
"step": 3840
|
| 2698 |
+
},
|
| 2699 |
+
{
|
| 2700 |
+
"epoch": 0.77,
|
| 2701 |
+
"grad_norm": 0.7475419044494629,
|
| 2702 |
+
"learning_rate": 7.633540738525066e-06,
|
| 2703 |
+
"loss": 0.9733,
|
| 2704 |
+
"step": 3850
|
| 2705 |
+
},
|
| 2706 |
+
{
|
| 2707 |
+
"epoch": 0.772,
|
| 2708 |
+
"grad_norm": 4.253176689147949,
|
| 2709 |
+
"learning_rate": 7.508416487165862e-06,
|
| 2710 |
+
"loss": 0.6711,
|
| 2711 |
+
"step": 3860
|
| 2712 |
+
},
|
| 2713 |
+
{
|
| 2714 |
+
"epoch": 0.774,
|
| 2715 |
+
"grad_norm": 9.327080726623535,
|
| 2716 |
+
"learning_rate": 7.384144750910133e-06,
|
| 2717 |
+
"loss": 0.7134,
|
| 2718 |
+
"step": 3870
|
| 2719 |
+
},
|
| 2720 |
+
{
|
| 2721 |
+
"epoch": 0.776,
|
| 2722 |
+
"grad_norm": 2.4807772636413574,
|
| 2723 |
+
"learning_rate": 7.260731586586983e-06,
|
| 2724 |
+
"loss": 1.0323,
|
| 2725 |
+
"step": 3880
|
| 2726 |
+
},
|
| 2727 |
+
{
|
| 2728 |
+
"epoch": 0.778,
|
| 2729 |
+
"grad_norm": 0.0,
|
| 2730 |
+
"learning_rate": 7.138183009179922e-06,
|
| 2731 |
+
"loss": 0.4097,
|
| 2732 |
+
"step": 3890
|
| 2733 |
+
},
|
| 2734 |
+
{
|
| 2735 |
+
"epoch": 0.78,
|
| 2736 |
+
"grad_norm": 1.1434834003448486,
|
| 2737 |
+
"learning_rate": 7.016504991533726e-06,
|
| 2738 |
+
"loss": 1.0325,
|
| 2739 |
+
"step": 3900
|
| 2740 |
+
},
|
| 2741 |
+
{
|
| 2742 |
+
"epoch": 0.782,
|
| 2743 |
+
"grad_norm": 0.7977542281150818,
|
| 2744 |
+
"learning_rate": 6.895703464063319e-06,
|
| 2745 |
+
"loss": 0.2871,
|
| 2746 |
+
"step": 3910
|
| 2747 |
+
},
|
| 2748 |
+
{
|
| 2749 |
+
"epoch": 0.784,
|
| 2750 |
+
"grad_norm": 3.6492695808410645,
|
| 2751 |
+
"learning_rate": 6.775784314464717e-06,
|
| 2752 |
+
"loss": 0.8634,
|
| 2753 |
+
"step": 3920
|
| 2754 |
+
},
|
| 2755 |
+
{
|
| 2756 |
+
"epoch": 0.786,
|
| 2757 |
+
"grad_norm": 0.0,
|
| 2758 |
+
"learning_rate": 6.656753387428089e-06,
|
| 2759 |
+
"loss": 1.8368,
|
| 2760 |
+
"step": 3930
|
| 2761 |
+
},
|
| 2762 |
+
{
|
| 2763 |
+
"epoch": 0.788,
|
| 2764 |
+
"grad_norm": 1.5954694747924805,
|
| 2765 |
+
"learning_rate": 6.538616484352902e-06,
|
| 2766 |
+
"loss": 0.5746,
|
| 2767 |
+
"step": 3940
|
| 2768 |
+
},
|
| 2769 |
+
{
|
| 2770 |
+
"epoch": 0.79,
|
| 2771 |
+
"grad_norm": 0.0,
|
| 2772 |
+
"learning_rate": 6.421379363065142e-06,
|
| 2773 |
+
"loss": 2.333,
|
| 2774 |
+
"step": 3950
|
| 2775 |
+
},
|
| 2776 |
+
{
|
| 2777 |
+
"epoch": 0.792,
|
| 2778 |
+
"grad_norm": 0.48572126030921936,
|
| 2779 |
+
"learning_rate": 6.305047737536707e-06,
|
| 2780 |
+
"loss": 0.393,
|
| 2781 |
+
"step": 3960
|
| 2782 |
+
},
|
| 2783 |
+
{
|
| 2784 |
+
"epoch": 0.794,
|
| 2785 |
+
"grad_norm": 0.7762076258659363,
|
| 2786 |
+
"learning_rate": 6.189627277606894e-06,
|
| 2787 |
+
"loss": 1.0324,
|
| 2788 |
+
"step": 3970
|
| 2789 |
+
},
|
| 2790 |
+
{
|
| 2791 |
+
"epoch": 0.796,
|
| 2792 |
+
"grad_norm": 0.4900152385234833,
|
| 2793 |
+
"learning_rate": 6.075123608706093e-06,
|
| 2794 |
+
"loss": 0.9407,
|
| 2795 |
+
"step": 3980
|
| 2796 |
+
},
|
| 2797 |
+
{
|
| 2798 |
+
"epoch": 0.798,
|
| 2799 |
+
"grad_norm": 4.45159912109375,
|
| 2800 |
+
"learning_rate": 5.961542311581586e-06,
|
| 2801 |
+
"loss": 1.2251,
|
| 2802 |
+
"step": 3990
|
| 2803 |
+
},
|
| 2804 |
+
{
|
| 2805 |
+
"epoch": 0.8,
|
| 2806 |
+
"grad_norm": 0.44430792331695557,
|
| 2807 |
+
"learning_rate": 5.848888922025553e-06,
|
| 2808 |
+
"loss": 0.635,
|
| 2809 |
+
"step": 4000
|
| 2810 |
+
},
|
| 2811 |
+
{
|
| 2812 |
+
"epoch": 0.802,
|
| 2813 |
+
"grad_norm": 0.338540256023407,
|
| 2814 |
+
"learning_rate": 5.737168930605272e-06,
|
| 2815 |
+
"loss": 0.3039,
|
| 2816 |
+
"step": 4010
|
| 2817 |
+
},
|
| 2818 |
+
{
|
| 2819 |
+
"epoch": 0.804,
|
| 2820 |
+
"grad_norm": 0.0,
|
| 2821 |
+
"learning_rate": 5.626387782395512e-06,
|
| 2822 |
+
"loss": 1.0606,
|
| 2823 |
+
"step": 4020
|
| 2824 |
+
},
|
| 2825 |
+
{
|
| 2826 |
+
"epoch": 0.806,
|
| 2827 |
+
"grad_norm": 47.97066879272461,
|
| 2828 |
+
"learning_rate": 5.5165508767131415e-06,
|
| 2829 |
+
"loss": 2.3978,
|
| 2830 |
+
"step": 4030
|
| 2831 |
+
},
|
| 2832 |
+
{
|
| 2833 |
+
"epoch": 0.808,
|
| 2834 |
+
"grad_norm": 17.087448120117188,
|
| 2835 |
+
"learning_rate": 5.4076635668540075e-06,
|
| 2836 |
+
"loss": 1.7771,
|
| 2837 |
+
"step": 4040
|
| 2838 |
+
},
|
| 2839 |
+
{
|
| 2840 |
+
"epoch": 0.81,
|
| 2841 |
+
"grad_norm": 0.0,
|
| 2842 |
+
"learning_rate": 5.299731159831953e-06,
|
| 2843 |
+
"loss": 1.5544,
|
| 2844 |
+
"step": 4050
|
| 2845 |
+
},
|
| 2846 |
+
{
|
| 2847 |
+
"epoch": 0.812,
|
| 2848 |
+
"grad_norm": 3.899841785430908,
|
| 2849 |
+
"learning_rate": 5.192758916120236e-06,
|
| 2850 |
+
"loss": 0.574,
|
| 2851 |
+
"step": 4060
|
| 2852 |
+
},
|
| 2853 |
+
{
|
| 2854 |
+
"epoch": 0.814,
|
| 2855 |
+
"grad_norm": 0.0,
|
| 2856 |
+
"learning_rate": 5.086752049395094e-06,
|
| 2857 |
+
"loss": 0.9646,
|
| 2858 |
+
"step": 4070
|
| 2859 |
+
},
|
| 2860 |
+
{
|
| 2861 |
+
"epoch": 0.816,
|
| 2862 |
+
"grad_norm": 0.9200013279914856,
|
| 2863 |
+
"learning_rate": 4.981715726281666e-06,
|
| 2864 |
+
"loss": 0.422,
|
| 2865 |
+
"step": 4080
|
| 2866 |
+
},
|
| 2867 |
+
{
|
| 2868 |
+
"epoch": 0.818,
|
| 2869 |
+
"grad_norm": 10.240484237670898,
|
| 2870 |
+
"learning_rate": 4.877655066102149e-06,
|
| 2871 |
+
"loss": 0.6738,
|
| 2872 |
+
"step": 4090
|
| 2873 |
+
},
|
| 2874 |
+
{
|
| 2875 |
+
"epoch": 0.82,
|
| 2876 |
+
"grad_norm": 0.5230764746665955,
|
| 2877 |
+
"learning_rate": 4.7745751406263165e-06,
|
| 2878 |
+
"loss": 0.1991,
|
| 2879 |
+
"step": 4100
|
| 2880 |
+
},
|
| 2881 |
+
{
|
| 2882 |
+
"epoch": 0.822,
|
| 2883 |
+
"grad_norm": 9.851215362548828,
|
| 2884 |
+
"learning_rate": 4.672480973824311e-06,
|
| 2885 |
+
"loss": 0.7171,
|
| 2886 |
+
"step": 4110
|
| 2887 |
+
},
|
| 2888 |
+
{
|
| 2889 |
+
"epoch": 0.824,
|
| 2890 |
+
"grad_norm": 3.019394874572754,
|
| 2891 |
+
"learning_rate": 4.571377541621788e-06,
|
| 2892 |
+
"loss": 0.7478,
|
| 2893 |
+
"step": 4120
|
| 2894 |
+
},
|
| 2895 |
+
{
|
| 2896 |
+
"epoch": 0.826,
|
| 2897 |
+
"grad_norm": 0.4723840355873108,
|
| 2898 |
+
"learning_rate": 4.4712697716574e-06,
|
| 2899 |
+
"loss": 0.6985,
|
| 2900 |
+
"step": 4130
|
| 2901 |
+
},
|
| 2902 |
+
{
|
| 2903 |
+
"epoch": 0.828,
|
| 2904 |
+
"grad_norm": 1.5289415121078491,
|
| 2905 |
+
"learning_rate": 4.372162543042624e-06,
|
| 2906 |
+
"loss": 0.7893,
|
| 2907 |
+
"step": 4140
|
| 2908 |
+
},
|
| 2909 |
+
{
|
| 2910 |
+
"epoch": 0.83,
|
| 2911 |
+
"grad_norm": 1.3816901445388794,
|
| 2912 |
+
"learning_rate": 4.274060686123959e-06,
|
| 2913 |
+
"loss": 0.8127,
|
| 2914 |
+
"step": 4150
|
| 2915 |
+
},
|
| 2916 |
+
{
|
| 2917 |
+
"epoch": 0.832,
|
| 2918 |
+
"grad_norm": 0.4731523394584656,
|
| 2919 |
+
"learning_rate": 4.176968982247514e-06,
|
| 2920 |
+
"loss": 0.5259,
|
| 2921 |
+
"step": 4160
|
| 2922 |
+
},
|
| 2923 |
+
{
|
| 2924 |
+
"epoch": 0.834,
|
| 2925 |
+
"grad_norm": 19.804990768432617,
|
| 2926 |
+
"learning_rate": 4.08089216352596e-06,
|
| 2927 |
+
"loss": 0.9835,
|
| 2928 |
+
"step": 4170
|
| 2929 |
+
},
|
| 2930 |
+
{
|
| 2931 |
+
"epoch": 0.836,
|
| 2932 |
+
"grad_norm": 14.022736549377441,
|
| 2933 |
+
"learning_rate": 3.985834912607894e-06,
|
| 2934 |
+
"loss": 0.6508,
|
| 2935 |
+
"step": 4180
|
| 2936 |
+
},
|
| 2937 |
+
{
|
| 2938 |
+
"epoch": 0.838,
|
| 2939 |
+
"grad_norm": 5.0765485763549805,
|
| 2940 |
+
"learning_rate": 3.891801862449629e-06,
|
| 2941 |
+
"loss": 0.437,
|
| 2942 |
+
"step": 4190
|
| 2943 |
+
},
|
| 2944 |
+
{
|
| 2945 |
+
"epoch": 0.84,
|
| 2946 |
+
"grad_norm": 13.543389320373535,
|
| 2947 |
+
"learning_rate": 3.798797596089351e-06,
|
| 2948 |
+
"loss": 1.6048,
|
| 2949 |
+
"step": 4200
|
| 2950 |
+
},
|
| 2951 |
+
{
|
| 2952 |
+
"epoch": 0.842,
|
| 2953 |
+
"grad_norm": 3.4361746311187744,
|
| 2954 |
+
"learning_rate": 3.7068266464238084e-06,
|
| 2955 |
+
"loss": 0.8315,
|
| 2956 |
+
"step": 4210
|
| 2957 |
+
},
|
| 2958 |
+
{
|
| 2959 |
+
"epoch": 0.844,
|
| 2960 |
+
"grad_norm": 0.3679335415363312,
|
| 2961 |
+
"learning_rate": 3.6158934959873353e-06,
|
| 2962 |
+
"loss": 0.3853,
|
| 2963 |
+
"step": 4220
|
| 2964 |
+
},
|
| 2965 |
+
{
|
| 2966 |
+
"epoch": 0.846,
|
| 2967 |
+
"grad_norm": 0.0,
|
| 2968 |
+
"learning_rate": 3.5260025767333893e-06,
|
| 2969 |
+
"loss": 1.3448,
|
| 2970 |
+
"step": 4230
|
| 2971 |
+
},
|
| 2972 |
+
{
|
| 2973 |
+
"epoch": 0.848,
|
| 2974 |
+
"grad_norm": 4.070678234100342,
|
| 2975 |
+
"learning_rate": 3.4371582698185633e-06,
|
| 2976 |
+
"loss": 0.2405,
|
| 2977 |
+
"step": 4240
|
| 2978 |
+
},
|
| 2979 |
+
{
|
| 2980 |
+
"epoch": 0.85,
|
| 2981 |
+
"grad_norm": 0.0,
|
| 2982 |
+
"learning_rate": 3.3493649053890326e-06,
|
| 2983 |
+
"loss": 0.8112,
|
| 2984 |
+
"step": 4250
|
| 2985 |
+
},
|
| 2986 |
+
{
|
| 2987 |
+
"epoch": 0.852,
|
| 2988 |
+
"grad_norm": 3.341031551361084,
|
| 2989 |
+
"learning_rate": 3.262626762369525e-06,
|
| 2990 |
+
"loss": 1.3852,
|
| 2991 |
+
"step": 4260
|
| 2992 |
+
},
|
| 2993 |
+
{
|
| 2994 |
+
"epoch": 0.854,
|
| 2995 |
+
"grad_norm": 0.310798317193985,
|
| 2996 |
+
"learning_rate": 3.176948068254762e-06,
|
| 2997 |
+
"loss": 0.3188,
|
| 2998 |
+
"step": 4270
|
| 2999 |
+
},
|
| 3000 |
+
{
|
| 3001 |
+
"epoch": 0.856,
|
| 3002 |
+
"grad_norm": 6.217565536499023,
|
| 3003 |
+
"learning_rate": 3.092332998903416e-06,
|
| 3004 |
+
"loss": 0.4712,
|
| 3005 |
+
"step": 4280
|
| 3006 |
+
},
|
| 3007 |
+
{
|
| 3008 |
+
"epoch": 0.858,
|
| 3009 |
+
"grad_norm": 0.0,
|
| 3010 |
+
"learning_rate": 3.0087856783345914e-06,
|
| 3011 |
+
"loss": 0.4124,
|
| 3012 |
+
"step": 4290
|
| 3013 |
+
},
|
| 3014 |
+
{
|
| 3015 |
+
"epoch": 0.86,
|
| 3016 |
+
"grad_norm": 1.9965441226959229,
|
| 3017 |
+
"learning_rate": 2.9263101785268254e-06,
|
| 3018 |
+
"loss": 1.4114,
|
| 3019 |
+
"step": 4300
|
| 3020 |
+
},
|
| 3021 |
+
{
|
| 3022 |
+
"epoch": 0.862,
|
| 3023 |
+
"grad_norm": 28.95586395263672,
|
| 3024 |
+
"learning_rate": 2.8449105192196316e-06,
|
| 3025 |
+
"loss": 1.5525,
|
| 3026 |
+
"step": 4310
|
| 3027 |
+
},
|
| 3028 |
+
{
|
| 3029 |
+
"epoch": 0.864,
|
| 3030 |
+
"grad_norm": 0.0,
|
| 3031 |
+
"learning_rate": 2.764590667717562e-06,
|
| 3032 |
+
"loss": 1.1899,
|
| 3033 |
+
"step": 4320
|
| 3034 |
+
},
|
| 3035 |
+
{
|
| 3036 |
+
"epoch": 0.866,
|
| 3037 |
+
"grad_norm": 3.072131872177124,
|
| 3038 |
+
"learning_rate": 2.6853545386968606e-06,
|
| 3039 |
+
"loss": 0.5857,
|
| 3040 |
+
"step": 4330
|
| 3041 |
+
},
|
| 3042 |
+
{
|
| 3043 |
+
"epoch": 0.868,
|
| 3044 |
+
"grad_norm": 0.0,
|
| 3045 |
+
"learning_rate": 2.6072059940146775e-06,
|
| 3046 |
+
"loss": 1.6246,
|
| 3047 |
+
"step": 4340
|
| 3048 |
+
},
|
| 3049 |
+
{
|
| 3050 |
+
"epoch": 0.87,
|
| 3051 |
+
"grad_norm": 0.0,
|
| 3052 |
+
"learning_rate": 2.5301488425208296e-06,
|
| 3053 |
+
"loss": 0.2832,
|
| 3054 |
+
"step": 4350
|
| 3055 |
+
},
|
| 3056 |
+
{
|
| 3057 |
+
"epoch": 0.872,
|
| 3058 |
+
"grad_norm": 1.8508862257003784,
|
| 3059 |
+
"learning_rate": 2.454186839872158e-06,
|
| 3060 |
+
"loss": 1.5246,
|
| 3061 |
+
"step": 4360
|
| 3062 |
+
},
|
| 3063 |
+
{
|
| 3064 |
+
"epoch": 0.874,
|
| 3065 |
+
"grad_norm": 2.0368053913116455,
|
| 3066 |
+
"learning_rate": 2.379323688349516e-06,
|
| 3067 |
+
"loss": 1.5678,
|
| 3068 |
+
"step": 4370
|
| 3069 |
+
},
|
| 3070 |
+
{
|
| 3071 |
+
"epoch": 0.876,
|
| 3072 |
+
"grad_norm": 11.109952926635742,
|
| 3073 |
+
"learning_rate": 2.3055630366772856e-06,
|
| 3074 |
+
"loss": 1.3038,
|
| 3075 |
+
"step": 4380
|
| 3076 |
+
},
|
| 3077 |
+
{
|
| 3078 |
+
"epoch": 0.878,
|
| 3079 |
+
"grad_norm": 0.0,
|
| 3080 |
+
"learning_rate": 2.2329084798455746e-06,
|
| 3081 |
+
"loss": 0.7802,
|
| 3082 |
+
"step": 4390
|
| 3083 |
+
},
|
| 3084 |
+
{
|
| 3085 |
+
"epoch": 0.88,
|
| 3086 |
+
"grad_norm": 5.772270202636719,
|
| 3087 |
+
"learning_rate": 2.1613635589349756e-06,
|
| 3088 |
+
"loss": 1.1187,
|
| 3089 |
+
"step": 4400
|
| 3090 |
+
},
|
| 3091 |
+
{
|
| 3092 |
+
"epoch": 0.882,
|
| 3093 |
+
"grad_norm": 0.8582921028137207,
|
| 3094 |
+
"learning_rate": 2.0909317609440095e-06,
|
| 3095 |
+
"loss": 1.7619,
|
| 3096 |
+
"step": 4410
|
| 3097 |
+
},
|
| 3098 |
+
{
|
| 3099 |
+
"epoch": 0.884,
|
| 3100 |
+
"grad_norm": 5.193258285522461,
|
| 3101 |
+
"learning_rate": 2.0216165186191407e-06,
|
| 3102 |
+
"loss": 1.2435,
|
| 3103 |
+
"step": 4420
|
| 3104 |
+
},
|
| 3105 |
+
{
|
| 3106 |
+
"epoch": 0.886,
|
| 3107 |
+
"grad_norm": 5.076511383056641,
|
| 3108 |
+
"learning_rate": 1.95342121028749e-06,
|
| 3109 |
+
"loss": 1.7134,
|
| 3110 |
+
"step": 4430
|
| 3111 |
+
},
|
| 3112 |
+
{
|
| 3113 |
+
"epoch": 0.888,
|
| 3114 |
+
"grad_norm": 0.6807665228843689,
|
| 3115 |
+
"learning_rate": 1.8863491596921745e-06,
|
| 3116 |
+
"loss": 0.8158,
|
| 3117 |
+
"step": 4440
|
| 3118 |
+
},
|
| 3119 |
+
{
|
| 3120 |
+
"epoch": 0.89,
|
| 3121 |
+
"grad_norm": 4.553281784057617,
|
| 3122 |
+
"learning_rate": 1.8204036358303173e-06,
|
| 3123 |
+
"loss": 0.7814,
|
| 3124 |
+
"step": 4450
|
| 3125 |
+
},
|
| 3126 |
+
{
|
| 3127 |
+
"epoch": 0.892,
|
| 3128 |
+
"grad_norm": 8.79820728302002,
|
| 3129 |
+
"learning_rate": 1.7555878527937164e-06,
|
| 3130 |
+
"loss": 0.6062,
|
| 3131 |
+
"step": 4460
|
| 3132 |
+
},
|
| 3133 |
+
{
|
| 3134 |
+
"epoch": 0.894,
|
| 3135 |
+
"grad_norm": 0.0,
|
| 3136 |
+
"learning_rate": 1.6919049696121958e-06,
|
| 3137 |
+
"loss": 1.6605,
|
| 3138 |
+
"step": 4470
|
| 3139 |
+
},
|
| 3140 |
+
{
|
| 3141 |
+
"epoch": 0.896,
|
| 3142 |
+
"grad_norm": 1.9376107454299927,
|
| 3143 |
+
"learning_rate": 1.629358090099639e-06,
|
| 3144 |
+
"loss": 0.6406,
|
| 3145 |
+
"step": 4480
|
| 3146 |
+
},
|
| 3147 |
+
{
|
| 3148 |
+
"epoch": 0.898,
|
| 3149 |
+
"grad_norm": 0.6032451391220093,
|
| 3150 |
+
"learning_rate": 1.5679502627027136e-06,
|
| 3151 |
+
"loss": 0.8206,
|
| 3152 |
+
"step": 4490
|
| 3153 |
+
},
|
| 3154 |
+
{
|
| 3155 |
+
"epoch": 0.9,
|
| 3156 |
+
"grad_norm": 6.2024078369140625,
|
| 3157 |
+
"learning_rate": 1.5076844803522922e-06,
|
| 3158 |
+
"loss": 0.3695,
|
| 3159 |
+
"step": 4500
|
| 3160 |
+
},
|
| 3161 |
+
{
|
| 3162 |
+
"epoch": 0.902,
|
| 3163 |
+
"grad_norm": 1.7813353538513184,
|
| 3164 |
+
"learning_rate": 1.4485636803175829e-06,
|
| 3165 |
+
"loss": 1.0353,
|
| 3166 |
+
"step": 4510
|
| 3167 |
+
},
|
| 3168 |
+
{
|
| 3169 |
+
"epoch": 0.904,
|
| 3170 |
+
"grad_norm": 0.858911395072937,
|
| 3171 |
+
"learning_rate": 1.3905907440629752e-06,
|
| 3172 |
+
"loss": 0.8934,
|
| 3173 |
+
"step": 4520
|
| 3174 |
+
},
|
| 3175 |
+
{
|
| 3176 |
+
"epoch": 0.906,
|
| 3177 |
+
"grad_norm": 2.2456307411193848,
|
| 3178 |
+
"learning_rate": 1.333768497107593e-06,
|
| 3179 |
+
"loss": 0.5448,
|
| 3180 |
+
"step": 4530
|
| 3181 |
+
},
|
| 3182 |
+
{
|
| 3183 |
+
"epoch": 0.908,
|
| 3184 |
+
"grad_norm": 6.542331695556641,
|
| 3185 |
+
"learning_rate": 1.2780997088875869e-06,
|
| 3186 |
+
"loss": 0.6171,
|
| 3187 |
+
"step": 4540
|
| 3188 |
+
},
|
| 3189 |
+
{
|
| 3190 |
+
"epoch": 0.91,
|
| 3191 |
+
"grad_norm": 0.3684110641479492,
|
| 3192 |
+
"learning_rate": 1.2235870926211619e-06,
|
| 3193 |
+
"loss": 0.8105,
|
| 3194 |
+
"step": 4550
|
| 3195 |
+
},
|
| 3196 |
+
{
|
| 3197 |
+
"epoch": 0.912,
|
| 3198 |
+
"grad_norm": 1.4687561988830566,
|
| 3199 |
+
"learning_rate": 1.170233305176327e-06,
|
| 3200 |
+
"loss": 0.6286,
|
| 3201 |
+
"step": 4560
|
| 3202 |
+
},
|
| 3203 |
+
{
|
| 3204 |
+
"epoch": 0.914,
|
| 3205 |
+
"grad_norm": 4.056073188781738,
|
| 3206 |
+
"learning_rate": 1.1180409469414094e-06,
|
| 3207 |
+
"loss": 1.37,
|
| 3208 |
+
"step": 4570
|
| 3209 |
+
},
|
| 3210 |
+
{
|
| 3211 |
+
"epoch": 0.916,
|
| 3212 |
+
"grad_norm": 0.0,
|
| 3213 |
+
"learning_rate": 1.067012561698319e-06,
|
| 3214 |
+
"loss": 0.6792,
|
| 3215 |
+
"step": 4580
|
| 3216 |
+
},
|
| 3217 |
+
{
|
| 3218 |
+
"epoch": 0.918,
|
| 3219 |
+
"grad_norm": 10.166934967041016,
|
| 3220 |
+
"learning_rate": 1.0171506364985622e-06,
|
| 3221 |
+
"loss": 3.2842,
|
| 3222 |
+
"step": 4590
|
| 3223 |
+
},
|
| 3224 |
+
{
|
| 3225 |
+
"epoch": 0.92,
|
| 3226 |
+
"grad_norm": 2.7631049156188965,
|
| 3227 |
+
"learning_rate": 9.684576015420278e-07,
|
| 3228 |
+
"loss": 0.7875,
|
| 3229 |
+
"step": 4600
|
| 3230 |
+
},
|
| 3231 |
+
{
|
| 3232 |
+
"epoch": 0.922,
|
| 3233 |
+
"grad_norm": 1.1183360815048218,
|
| 3234 |
+
"learning_rate": 9.209358300585474e-07,
|
| 3235 |
+
"loss": 0.7768,
|
| 3236 |
+
"step": 4610
|
| 3237 |
+
},
|
| 3238 |
+
{
|
| 3239 |
+
"epoch": 0.924,
|
| 3240 |
+
"grad_norm": 0.0,
|
| 3241 |
+
"learning_rate": 8.745876381922147e-07,
|
| 3242 |
+
"loss": 0.9518,
|
| 3243 |
+
"step": 4620
|
| 3244 |
+
},
|
| 3245 |
+
{
|
| 3246 |
+
"epoch": 0.926,
|
| 3247 |
+
"grad_norm": 0.7455951571464539,
|
| 3248 |
+
"learning_rate": 8.294152848885157e-07,
|
| 3249 |
+
"loss": 0.4208,
|
| 3250 |
+
"step": 4630
|
| 3251 |
+
},
|
| 3252 |
+
{
|
| 3253 |
+
"epoch": 0.928,
|
| 3254 |
+
"grad_norm": 0.0,
|
| 3255 |
+
"learning_rate": 7.854209717842231e-07,
|
| 3256 |
+
"loss": 0.2602,
|
| 3257 |
+
"step": 4640
|
| 3258 |
+
},
|
| 3259 |
+
{
|
| 3260 |
+
"epoch": 0.93,
|
| 3261 |
+
"grad_norm": 0.605595052242279,
|
| 3262 |
+
"learning_rate": 7.426068431000882e-07,
|
| 3263 |
+
"loss": 0.8418,
|
| 3264 |
+
"step": 4650
|
| 3265 |
+
},
|
| 3266 |
+
{
|
| 3267 |
+
"epoch": 0.932,
|
| 3268 |
+
"grad_norm": 7.5212554931640625,
|
| 3269 |
+
"learning_rate": 7.009749855363456e-07,
|
| 3270 |
+
"loss": 0.4949,
|
| 3271 |
+
"step": 4660
|
| 3272 |
+
},
|
| 3273 |
+
{
|
| 3274 |
+
"epoch": 0.934,
|
| 3275 |
+
"grad_norm": 0.0,
|
| 3276 |
+
"learning_rate": 6.605274281709928e-07,
|
| 3277 |
+
"loss": 0.5919,
|
| 3278 |
+
"step": 4670
|
| 3279 |
+
},
|
| 3280 |
+
{
|
| 3281 |
+
"epoch": 0.936,
|
| 3282 |
+
"grad_norm": 0.0,
|
| 3283 |
+
"learning_rate": 6.212661423609184e-07,
|
| 3284 |
+
"loss": 0.9009,
|
| 3285 |
+
"step": 4680
|
| 3286 |
+
},
|
| 3287 |
+
{
|
| 3288 |
+
"epoch": 0.938,
|
| 3289 |
+
"grad_norm": 0.7921484708786011,
|
| 3290 |
+
"learning_rate": 5.83193041645802e-07,
|
| 3291 |
+
"loss": 0.4119,
|
| 3292 |
+
"step": 4690
|
| 3293 |
+
},
|
| 3294 |
+
{
|
| 3295 |
+
"epoch": 0.94,
|
| 3296 |
+
"grad_norm": 0.0,
|
| 3297 |
+
"learning_rate": 5.463099816548579e-07,
|
| 3298 |
+
"loss": 0.4794,
|
| 3299 |
+
"step": 4700
|
| 3300 |
+
},
|
| 3301 |
+
{
|
| 3302 |
+
"epoch": 0.942,
|
| 3303 |
+
"grad_norm": 1.862557053565979,
|
| 3304 |
+
"learning_rate": 5.106187600163987e-07,
|
| 3305 |
+
"loss": 0.8312,
|
| 3306 |
+
"step": 4710
|
| 3307 |
+
},
|
| 3308 |
+
{
|
| 3309 |
+
"epoch": 0.944,
|
| 3310 |
+
"grad_norm": 1.6036162376403809,
|
| 3311 |
+
"learning_rate": 4.7612111627021175e-07,
|
| 3312 |
+
"loss": 1.0177,
|
| 3313 |
+
"step": 4720
|
| 3314 |
+
},
|
| 3315 |
+
{
|
| 3316 |
+
"epoch": 0.946,
|
| 3317 |
+
"grad_norm": 0.8620813488960266,
|
| 3318 |
+
"learning_rate": 4.4281873178278475e-07,
|
| 3319 |
+
"loss": 0.7843,
|
| 3320 |
+
"step": 4730
|
| 3321 |
+
},
|
| 3322 |
+
{
|
| 3323 |
+
"epoch": 0.948,
|
| 3324 |
+
"grad_norm": 0.0,
|
| 3325 |
+
"learning_rate": 4.107132296653549e-07,
|
| 3326 |
+
"loss": 0.7314,
|
| 3327 |
+
"step": 4740
|
| 3328 |
+
},
|
| 3329 |
+
{
|
| 3330 |
+
"epoch": 0.95,
|
| 3331 |
+
"grad_norm": 2.933771848678589,
|
| 3332 |
+
"learning_rate": 3.7980617469479953e-07,
|
| 3333 |
+
"loss": 0.5168,
|
| 3334 |
+
"step": 4750
|
| 3335 |
+
},
|
| 3336 |
+
{
|
| 3337 |
+
"epoch": 0.952,
|
| 3338 |
+
"grad_norm": 0.0,
|
| 3339 |
+
"learning_rate": 3.5009907323737825e-07,
|
| 3340 |
+
"loss": 1.4444,
|
| 3341 |
+
"step": 4760
|
| 3342 |
+
},
|
| 3343 |
+
{
|
| 3344 |
+
"epoch": 0.954,
|
| 3345 |
+
"grad_norm": 19.85219955444336,
|
| 3346 |
+
"learning_rate": 3.215933731753024e-07,
|
| 3347 |
+
"loss": 1.1199,
|
| 3348 |
+
"step": 4770
|
| 3349 |
+
},
|
| 3350 |
+
{
|
| 3351 |
+
"epoch": 0.956,
|
| 3352 |
+
"grad_norm": 0.0,
|
| 3353 |
+
"learning_rate": 2.942904638361804e-07,
|
| 3354 |
+
"loss": 0.5519,
|
| 3355 |
+
"step": 4780
|
| 3356 |
+
},
|
| 3357 |
+
{
|
| 3358 |
+
"epoch": 0.958,
|
| 3359 |
+
"grad_norm": 1.450451135635376,
|
| 3360 |
+
"learning_rate": 2.681916759252917e-07,
|
| 3361 |
+
"loss": 0.9452,
|
| 3362 |
+
"step": 4790
|
| 3363 |
+
},
|
| 3364 |
+
{
|
| 3365 |
+
"epoch": 0.96,
|
| 3366 |
+
"grad_norm": 0.0,
|
| 3367 |
+
"learning_rate": 2.4329828146074095e-07,
|
| 3368 |
+
"loss": 3.2854,
|
| 3369 |
+
"step": 4800
|
| 3370 |
+
},
|
| 3371 |
+
{
|
| 3372 |
+
"epoch": 0.962,
|
| 3373 |
+
"grad_norm": 0.326992005109787,
|
| 3374 |
+
"learning_rate": 2.1961149371145795e-07,
|
| 3375 |
+
"loss": 0.5169,
|
| 3376 |
+
"step": 4810
|
| 3377 |
+
},
|
| 3378 |
+
{
|
| 3379 |
+
"epoch": 0.964,
|
| 3380 |
+
"grad_norm": 0.0,
|
| 3381 |
+
"learning_rate": 1.9713246713805588e-07,
|
| 3382 |
+
"loss": 0.8747,
|
| 3383 |
+
"step": 4820
|
| 3384 |
+
},
|
| 3385 |
+
{
|
| 3386 |
+
"epoch": 0.966,
|
| 3387 |
+
"grad_norm": 3.3700320720672607,
|
| 3388 |
+
"learning_rate": 1.7586229733657644e-07,
|
| 3389 |
+
"loss": 1.0667,
|
| 3390 |
+
"step": 4830
|
| 3391 |
+
},
|
| 3392 |
+
{
|
| 3393 |
+
"epoch": 0.968,
|
| 3394 |
+
"grad_norm": 9.731518745422363,
|
| 3395 |
+
"learning_rate": 1.5580202098509077e-07,
|
| 3396 |
+
"loss": 1.4877,
|
| 3397 |
+
"step": 4840
|
| 3398 |
+
},
|
| 3399 |
+
{
|
| 3400 |
+
"epoch": 0.97,
|
| 3401 |
+
"grad_norm": 0.9191303253173828,
|
| 3402 |
+
"learning_rate": 1.3695261579316777e-07,
|
| 3403 |
+
"loss": 0.9893,
|
| 3404 |
+
"step": 4850
|
| 3405 |
+
},
|
| 3406 |
+
{
|
| 3407 |
+
"epoch": 0.972,
|
| 3408 |
+
"grad_norm": 0.6930148005485535,
|
| 3409 |
+
"learning_rate": 1.193150004542204e-07,
|
| 3410 |
+
"loss": 0.9399,
|
| 3411 |
+
"step": 4860
|
| 3412 |
+
},
|
| 3413 |
+
{
|
| 3414 |
+
"epoch": 0.974,
|
| 3415 |
+
"grad_norm": 1.6349282264709473,
|
| 3416 |
+
"learning_rate": 1.0289003460074165e-07,
|
| 3417 |
+
"loss": 0.4823,
|
| 3418 |
+
"step": 4870
|
| 3419 |
+
},
|
| 3420 |
+
{
|
| 3421 |
+
"epoch": 0.976,
|
| 3422 |
+
"grad_norm": 0.8186390399932861,
|
| 3423 |
+
"learning_rate": 8.767851876239074e-08,
|
| 3424 |
+
"loss": 0.999,
|
| 3425 |
+
"step": 4880
|
| 3426 |
+
},
|
| 3427 |
+
{
|
| 3428 |
+
"epoch": 0.978,
|
| 3429 |
+
"grad_norm": 0.3777938485145569,
|
| 3430 |
+
"learning_rate": 7.368119432699383e-08,
|
| 3431 |
+
"loss": 0.6337,
|
| 3432 |
+
"step": 4890
|
| 3433 |
+
},
|
| 3434 |
+
{
|
| 3435 |
+
"epoch": 0.98,
|
| 3436 |
+
"grad_norm": 1.0768874883651733,
|
| 3437 |
+
"learning_rate": 6.089874350439506e-08,
|
| 3438 |
+
"loss": 0.5041,
|
| 3439 |
+
"step": 4900
|
| 3440 |
+
},
|
| 3441 |
+
{
|
| 3442 |
+
"epoch": 0.982,
|
| 3443 |
+
"grad_norm": 1.119691252708435,
|
| 3444 |
+
"learning_rate": 4.9331789293211026e-08,
|
| 3445 |
+
"loss": 0.9211,
|
| 3446 |
+
"step": 4910
|
| 3447 |
+
},
|
| 3448 |
+
{
|
| 3449 |
+
"epoch": 0.984,
|
| 3450 |
+
"grad_norm": 0.404674768447876,
|
| 3451 |
+
"learning_rate": 3.8980895450474455e-08,
|
| 3452 |
+
"loss": 0.5071,
|
| 3453 |
+
"step": 4920
|
| 3454 |
+
},
|
| 3455 |
+
{
|
| 3456 |
+
"epoch": 0.986,
|
| 3457 |
+
"grad_norm": 15.42659854888916,
|
| 3458 |
+
"learning_rate": 2.9846566464150626e-08,
|
| 3459 |
+
"loss": 0.7782,
|
| 3460 |
+
"step": 4930
|
| 3461 |
+
},
|
| 3462 |
+
{
|
| 3463 |
+
"epoch": 0.988,
|
| 3464 |
+
"grad_norm": 0.0,
|
| 3465 |
+
"learning_rate": 2.192924752854042e-08,
|
| 3466 |
+
"loss": 0.9379,
|
| 3467 |
+
"step": 4940
|
| 3468 |
+
},
|
| 3469 |
+
{
|
| 3470 |
+
"epoch": 0.99,
|
| 3471 |
+
"grad_norm": 0.2315896451473236,
|
| 3472 |
+
"learning_rate": 1.522932452260595e-08,
|
| 3473 |
+
"loss": 0.552,
|
| 3474 |
+
"step": 4950
|
| 3475 |
+
},
|
| 3476 |
+
{
|
| 3477 |
+
"epoch": 0.992,
|
| 3478 |
+
"grad_norm": 9.166641235351562,
|
| 3479 |
+
"learning_rate": 9.747123991141194e-09,
|
| 3480 |
+
"loss": 0.5151,
|
| 3481 |
+
"step": 4960
|
| 3482 |
+
},
|
| 3483 |
+
{
|
| 3484 |
+
"epoch": 0.994,
|
| 3485 |
+
"grad_norm": 2.209789752960205,
|
| 3486 |
+
"learning_rate": 5.48291312886251e-09,
|
| 3487 |
+
"loss": 0.7505,
|
| 3488 |
+
"step": 4970
|
| 3489 |
+
},
|
| 3490 |
+
{
|
| 3491 |
+
"epoch": 0.996,
|
| 3492 |
+
"grad_norm": 0.7653072476387024,
|
| 3493 |
+
"learning_rate": 2.4368997673940297e-09,
|
| 3494 |
+
"loss": 0.8377,
|
| 3495 |
+
"step": 4980
|
| 3496 |
+
},
|
| 3497 |
+
{
|
| 3498 |
+
"epoch": 0.998,
|
| 3499 |
+
"grad_norm": 1.3366658687591553,
|
| 3500 |
+
"learning_rate": 6.092323651313292e-10,
|
| 3501 |
+
"loss": 1.0276,
|
| 3502 |
+
"step": 4990
|
| 3503 |
+
},
|
| 3504 |
+
{
|
| 3505 |
+
"epoch": 1.0,
|
| 3506 |
+
"grad_norm": 0.0,
|
| 3507 |
+
"learning_rate": 0.0,
|
| 3508 |
+
"loss": 0.4942,
|
| 3509 |
+
"step": 5000
|
| 3510 |
+
},
|
| 3511 |
+
{
|
| 3512 |
+
"epoch": 1.0,
|
| 3513 |
+
"step": 5000,
|
| 3514 |
+
"total_flos": 3.424475897929728e+16,
|
| 3515 |
+
"train_loss": 1.0425229248046874,
|
| 3516 |
+
"train_runtime": 1241.3169,
|
| 3517 |
+
"train_samples_per_second": 4.028,
|
| 3518 |
+
"train_steps_per_second": 4.028
|
| 3519 |
+
}
|
| 3520 |
+
],
|
| 3521 |
+
"logging_steps": 10,
|
| 3522 |
+
"max_steps": 5000,
|
| 3523 |
+
"num_input_tokens_seen": 0,
|
| 3524 |
+
"num_train_epochs": 1,
|
| 3525 |
+
"save_steps": 4000,
|
| 3526 |
+
"stateful_callbacks": {
|
| 3527 |
+
"TrainerControl": {
|
| 3528 |
+
"args": {
|
| 3529 |
+
"should_epoch_stop": false,
|
| 3530 |
+
"should_evaluate": false,
|
| 3531 |
+
"should_log": false,
|
| 3532 |
+
"should_save": true,
|
| 3533 |
+
"should_training_stop": true
|
| 3534 |
+
},
|
| 3535 |
+
"attributes": {}
|
| 3536 |
+
}
|
| 3537 |
+
},
|
| 3538 |
+
"total_flos": 3.424475897929728e+16,
|
| 3539 |
+
"train_batch_size": 1,
|
| 3540 |
+
"trial_name": null,
|
| 3541 |
+
"trial_params": null
|
| 3542 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Geography/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37b0537f42057321aef63580537318f00e41ce6b989422434a03bdf8f6e599f3
|
| 3 |
+
size 5432
|
Llama-2-13b-chat-hf/DomainBench/Geography/training_loss.png
ADDED
|
Llama-2-13b-chat-hf/DomainBench/Medicine/README.md
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: other
|
| 4 |
+
base_model: /hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf
|
| 5 |
+
tags:
|
| 6 |
+
- llama-factory
|
| 7 |
+
- lora
|
| 8 |
+
- generated_from_trainer
|
| 9 |
+
model-index:
|
| 10 |
+
- name: threshold_3-lamb_0.1-lr_5e-5
|
| 11 |
+
results: []
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 15 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 16 |
+
|
| 17 |
+
# threshold_3-lamb_0.1-lr_5e-5
|
| 18 |
+
|
| 19 |
+
This model is a fine-tuned version of [/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf](https://huggingface.co//hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf) on the gen_med_gpt dataset.
|
| 20 |
+
|
| 21 |
+
## Model description
|
| 22 |
+
|
| 23 |
+
More information needed
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
More information needed
|
| 28 |
+
|
| 29 |
+
## Training and evaluation data
|
| 30 |
+
|
| 31 |
+
More information needed
|
| 32 |
+
|
| 33 |
+
## Training procedure
|
| 34 |
+
|
| 35 |
+
### Training hyperparameters
|
| 36 |
+
|
| 37 |
+
The following hyperparameters were used during training:
|
| 38 |
+
- learning_rate: 5e-05
|
| 39 |
+
- train_batch_size: 1
|
| 40 |
+
- eval_batch_size: 8
|
| 41 |
+
- seed: 42
|
| 42 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 43 |
+
- lr_scheduler_type: cosine
|
| 44 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 45 |
+
- num_epochs: 1.0
|
| 46 |
+
|
| 47 |
+
### Training results
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
### Framework versions
|
| 52 |
+
|
| 53 |
+
- PEFT 0.12.0
|
| 54 |
+
- Transformers 4.46.1
|
| 55 |
+
- Pytorch 2.5.1+cu124
|
| 56 |
+
- Datasets 3.1.0
|
| 57 |
+
- Tokenizers 0.20.3
|
Llama-2-13b-chat-hf/DomainBench/Medicine/adapter_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "/hujinwu/LLM_Assemble/pretrain_model/Llama-2-13b-chat-hf",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"fan_in_fan_out": false,
|
| 7 |
+
"inference_mode": true,
|
| 8 |
+
"init_lora_weights": true,
|
| 9 |
+
"layer_replication": null,
|
| 10 |
+
"layers_pattern": null,
|
| 11 |
+
"layers_to_transform": null,
|
| 12 |
+
"loftq_config": {},
|
| 13 |
+
"lora_alpha": 16,
|
| 14 |
+
"lora_dropout": 0.0,
|
| 15 |
+
"megatron_config": null,
|
| 16 |
+
"megatron_core": "megatron.core",
|
| 17 |
+
"modules_to_save": null,
|
| 18 |
+
"peft_type": "LORA",
|
| 19 |
+
"r": 8,
|
| 20 |
+
"rank_pattern": {},
|
| 21 |
+
"revision": null,
|
| 22 |
+
"target_modules": [
|
| 23 |
+
"v_proj",
|
| 24 |
+
"q_proj"
|
| 25 |
+
],
|
| 26 |
+
"task_type": "CAUSAL_LM",
|
| 27 |
+
"use_dora": false,
|
| 28 |
+
"use_rslora": false
|
| 29 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Medicine/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90dbf183e9007b63219a2bb1e84166f60b2129f15acf2dda914b4a80058873de
|
| 3 |
+
size 26235704
|
Llama-2-13b-chat-hf/DomainBench/Medicine/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 2.556923075960832e+16,
|
| 4 |
+
"train_loss": 0.03917525251507759,
|
| 5 |
+
"train_runtime": 757.0692,
|
| 6 |
+
"train_samples_per_second": 6.604,
|
| 7 |
+
"train_steps_per_second": 6.604
|
| 8 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Medicine/logfile.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Llama-2-13b-chat-hf/DomainBench/Medicine/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
Llama-2-13b-chat-hf/DomainBench/Medicine/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|