Farouk commited on
Commit ·
f8b60c0
1
Parent(s): 0eb810b
Training in progress, step 6200
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- all_results.json +8 -8
- checkpoint-4200/adapter_model/adapter_model/README.md +24 -0
- checkpoint-4200/adapter_model/adapter_model/adapter_model.bin +1 -1
- checkpoint-6200/README.md +20 -0
- checkpoint-6200/adapter_config.json +26 -0
- checkpoint-6200/adapter_model.bin +3 -0
- checkpoint-6200/added_tokens.json +3 -0
- checkpoint-6200/optimizer.pt +3 -0
- checkpoint-6200/rng_state.pth +3 -0
- checkpoint-6200/scheduler.pt +3 -0
- checkpoint-6200/special_tokens_map.json +6 -0
- checkpoint-6200/tokenizer.model +3 -0
- checkpoint-6200/tokenizer_config.json +35 -0
- checkpoint-6200/trainer_state.json +0 -0
- checkpoint-6200/training_args.bin +3 -0
- eval_results.json +4 -4
- metrics.json +1 -1
- train_results.json +5 -5
- trainer_state.json +3666 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 64,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"o_proj",
|
| 18 |
-
"k_proj",
|
| 19 |
-
"down_proj",
|
| 20 |
"gate_proj",
|
|
|
|
|
|
|
| 21 |
"up_proj",
|
| 22 |
"v_proj",
|
| 23 |
-
"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 64,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
|
|
|
|
|
|
|
|
|
| 17 |
"gate_proj",
|
| 18 |
+
"down_proj",
|
| 19 |
+
"q_proj",
|
| 20 |
"up_proj",
|
| 21 |
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 871609293
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89e3e86e88a2a473616d28f379b3735697c068cbf1c5d7c8fe7b56148a37a0af
|
| 3 |
size 871609293
|
all_results.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 0.
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
-
"eval_runtime": 21.
|
| 5 |
-
"eval_samples_per_second": 2.
|
| 6 |
-
"eval_steps_per_second": 1.
|
| 7 |
-
"train_loss": 0.
|
| 8 |
-
"train_runtime":
|
| 9 |
-
"train_samples_per_second": 17.
|
| 10 |
-
"train_steps_per_second": 17.
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 0.05,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
+
"eval_runtime": 21.7341,
|
| 5 |
+
"eval_samples_per_second": 2.301,
|
| 6 |
+
"eval_steps_per_second": 1.15,
|
| 7 |
+
"train_loss": 0.5874443841576577,
|
| 8 |
+
"train_runtime": 1725.6374,
|
| 9 |
+
"train_samples_per_second": 17.385,
|
| 10 |
+
"train_steps_per_second": 17.385
|
| 11 |
}
|
checkpoint-4200/adapter_model/adapter_model/README.md
CHANGED
|
@@ -114,6 +114,28 @@ The following `bitsandbytes` quantization config was used during training:
|
|
| 114 |
- bnb_4bit_use_double_quant: True
|
| 115 |
- bnb_4bit_compute_dtype: bfloat16
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
The following `bitsandbytes` quantization config was used during training:
|
| 118 |
- load_in_8bit: False
|
| 119 |
- load_in_4bit: True
|
|
@@ -136,5 +158,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
| 136 |
- PEFT 0.4.0
|
| 137 |
- PEFT 0.4.0
|
| 138 |
- PEFT 0.4.0
|
|
|
|
|
|
|
| 139 |
|
| 140 |
- PEFT 0.4.0
|
|
|
|
| 114 |
- bnb_4bit_use_double_quant: True
|
| 115 |
- bnb_4bit_compute_dtype: bfloat16
|
| 116 |
|
| 117 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 118 |
+
- load_in_8bit: False
|
| 119 |
+
- load_in_4bit: True
|
| 120 |
+
- llm_int8_threshold: 6.0
|
| 121 |
+
- llm_int8_skip_modules: None
|
| 122 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 123 |
+
- llm_int8_has_fp16_weight: False
|
| 124 |
+
- bnb_4bit_quant_type: nf4
|
| 125 |
+
- bnb_4bit_use_double_quant: True
|
| 126 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 127 |
+
|
| 128 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 129 |
+
- load_in_8bit: False
|
| 130 |
+
- load_in_4bit: True
|
| 131 |
+
- llm_int8_threshold: 6.0
|
| 132 |
+
- llm_int8_skip_modules: None
|
| 133 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 134 |
+
- llm_int8_has_fp16_weight: False
|
| 135 |
+
- bnb_4bit_quant_type: nf4
|
| 136 |
+
- bnb_4bit_use_double_quant: True
|
| 137 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 138 |
+
|
| 139 |
The following `bitsandbytes` quantization config was used during training:
|
| 140 |
- load_in_8bit: False
|
| 141 |
- load_in_4bit: True
|
|
|
|
| 158 |
- PEFT 0.4.0
|
| 159 |
- PEFT 0.4.0
|
| 160 |
- PEFT 0.4.0
|
| 161 |
+
- PEFT 0.4.0
|
| 162 |
+
- PEFT 0.4.0
|
| 163 |
|
| 164 |
- PEFT 0.4.0
|
checkpoint-4200/adapter_model/adapter_model/adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 871609293
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff18c40f9b3c9fb20f1c95d4dff151244eba09eee79ae11c6121cc23181c2442
|
| 3 |
size 871609293
|
checkpoint-6200/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
---
|
| 4 |
+
## Training procedure
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 8 |
+
- load_in_8bit: False
|
| 9 |
+
- load_in_4bit: True
|
| 10 |
+
- llm_int8_threshold: 6.0
|
| 11 |
+
- llm_int8_skip_modules: None
|
| 12 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 13 |
+
- llm_int8_has_fp16_weight: False
|
| 14 |
+
- bnb_4bit_quant_type: nf4
|
| 15 |
+
- bnb_4bit_use_double_quant: True
|
| 16 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 17 |
+
### Framework versions
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
- PEFT 0.4.0
|
checkpoint-6200/adapter_config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"auto_mapping": null,
|
| 3 |
+
"base_model_name_or_path": "codellama/CodeLlama-34b-Python-hf",
|
| 4 |
+
"bias": "none",
|
| 5 |
+
"fan_in_fan_out": false,
|
| 6 |
+
"inference_mode": true,
|
| 7 |
+
"init_lora_weights": true,
|
| 8 |
+
"layers_pattern": null,
|
| 9 |
+
"layers_to_transform": null,
|
| 10 |
+
"lora_alpha": 16.0,
|
| 11 |
+
"lora_dropout": 0.1,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "LORA",
|
| 14 |
+
"r": 64,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"target_modules": [
|
| 17 |
+
"gate_proj",
|
| 18 |
+
"down_proj",
|
| 19 |
+
"q_proj",
|
| 20 |
+
"up_proj",
|
| 21 |
+
"v_proj",
|
| 22 |
+
"k_proj",
|
| 23 |
+
"o_proj"
|
| 24 |
+
],
|
| 25 |
+
"task_type": "CAUSAL_LM"
|
| 26 |
+
}
|
checkpoint-6200/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89e3e86e88a2a473616d28f379b3735697c068cbf1c5d7c8fe7b56148a37a0af
|
| 3 |
+
size 871609293
|
checkpoint-6200/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[PAD]": 32000
|
| 3 |
+
}
|
checkpoint-6200/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f915a3be0537ed39b74e6b133c9652eb8040c1f61e027bd380f53fc1de4740e
|
| 3 |
+
size 873872799
|
checkpoint-6200/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d56947d85c4236c819c7e5ba1f3020a0c401b4caa051580cac172d0a50c72119
|
| 3 |
+
size 14511
|
checkpoint-6200/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81248501833af563175f43c1d681185643b8411cee1fb1e631b8687c465eb2e3
|
| 3 |
+
size 627
|
checkpoint-6200/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"unk_token": "<unk>"
|
| 6 |
+
}
|
checkpoint-6200/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-6200/tokenizer_config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"bos_token": {
|
| 5 |
+
"__type": "AddedToken",
|
| 6 |
+
"content": "<s>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": true,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"clean_up_tokenization_spaces": false,
|
| 13 |
+
"eos_token": {
|
| 14 |
+
"__type": "AddedToken",
|
| 15 |
+
"content": "</s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": true,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false
|
| 20 |
+
},
|
| 21 |
+
"legacy": null,
|
| 22 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 23 |
+
"pad_token": null,
|
| 24 |
+
"padding_side": "right",
|
| 25 |
+
"sp_model_kwargs": {},
|
| 26 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 27 |
+
"unk_token": {
|
| 28 |
+
"__type": "AddedToken",
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false
|
| 34 |
+
}
|
| 35 |
+
}
|
checkpoint-6200/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-6200/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85783faab59f5f6d8bcf691e35bb86cff435e22f3fa9169bf4e56c0239c8d7e4
|
| 3 |
+
size 6011
|
eval_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 0.
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
-
"eval_runtime": 21.
|
| 5 |
-
"eval_samples_per_second": 2.
|
| 6 |
-
"eval_steps_per_second": 1.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 0.05,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
+
"eval_runtime": 21.7341,
|
| 5 |
+
"eval_samples_per_second": 2.301,
|
| 6 |
+
"eval_steps_per_second": 1.15
|
| 7 |
}
|
metrics.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"run_name": "codellama34b_unnatural", "train_runtime":
|
|
|
|
| 1 |
+
{"run_name": "codellama34b_unnatural", "train_runtime": 1725.6374, "train_samples_per_second": 17.385, "train_steps_per_second": 17.385, "train_loss": 0.5874443841576577, "epoch": 0.05, "eval_loss": 6.335043907165527, "eval_runtime": 21.7341, "eval_samples_per_second": 2.301, "eval_steps_per_second": 1.15}
|
train_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 0.
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples_per_second": 17.
|
| 6 |
-
"train_steps_per_second": 17.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 0.05,
|
| 3 |
+
"train_loss": 0.5874443841576577,
|
| 4 |
+
"train_runtime": 1725.6374,
|
| 5 |
+
"train_samples_per_second": 17.385,
|
| 6 |
+
"train_steps_per_second": 17.385
|
| 7 |
}
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 6.335043907165527,
|
| 3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -32928,11 +32928,3674 @@
|
|
| 32928 |
"train_runtime": 1748.3273,
|
| 32929 |
"train_samples_per_second": 17.159,
|
| 32930 |
"train_steps_per_second": 17.159
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32931 |
}
|
| 32932 |
],
|
| 32933 |
"max_steps": 30000,
|
| 32934 |
"num_train_epochs": 1,
|
| 32935 |
-
"total_flos":
|
| 32936 |
"trial_name": null,
|
| 32937 |
"trial_params": null
|
| 32938 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 6.335043907165527,
|
| 3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
| 4 |
+
"epoch": 0.045833015048506606,
|
| 5 |
+
"global_step": 6000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 32928 |
"train_runtime": 1748.3273,
|
| 32929 |
"train_samples_per_second": 17.159,
|
| 32930 |
"train_steps_per_second": 17.159
|
| 32931 |
+
},
|
| 32932 |
+
{
|
| 32933 |
+
"epoch": 0.04,
|
| 32934 |
+
"learning_rate": 0.0004,
|
| 32935 |
+
"loss": 8.7161,
|
| 32936 |
+
"step": 5401
|
| 32937 |
+
},
|
| 32938 |
+
{
|
| 32939 |
+
"epoch": 0.04,
|
| 32940 |
+
"learning_rate": 0.0004,
|
| 32941 |
+
"loss": 7.6554,
|
| 32942 |
+
"step": 5402
|
| 32943 |
+
},
|
| 32944 |
+
{
|
| 32945 |
+
"epoch": 0.04,
|
| 32946 |
+
"learning_rate": 0.0004,
|
| 32947 |
+
"loss": 6.7865,
|
| 32948 |
+
"step": 5403
|
| 32949 |
+
},
|
| 32950 |
+
{
|
| 32951 |
+
"epoch": 0.04,
|
| 32952 |
+
"learning_rate": 0.0004,
|
| 32953 |
+
"loss": 8.4439,
|
| 32954 |
+
"step": 5404
|
| 32955 |
+
},
|
| 32956 |
+
{
|
| 32957 |
+
"epoch": 0.04,
|
| 32958 |
+
"learning_rate": 0.0004,
|
| 32959 |
+
"loss": 7.0393,
|
| 32960 |
+
"step": 5405
|
| 32961 |
+
},
|
| 32962 |
+
{
|
| 32963 |
+
"epoch": 0.04,
|
| 32964 |
+
"learning_rate": 0.0004,
|
| 32965 |
+
"loss": 7.3437,
|
| 32966 |
+
"step": 5406
|
| 32967 |
+
},
|
| 32968 |
+
{
|
| 32969 |
+
"epoch": 0.04,
|
| 32970 |
+
"learning_rate": 0.0004,
|
| 32971 |
+
"loss": 8.6367,
|
| 32972 |
+
"step": 5407
|
| 32973 |
+
},
|
| 32974 |
+
{
|
| 32975 |
+
"epoch": 0.04,
|
| 32976 |
+
"learning_rate": 0.0004,
|
| 32977 |
+
"loss": 7.3527,
|
| 32978 |
+
"step": 5408
|
| 32979 |
+
},
|
| 32980 |
+
{
|
| 32981 |
+
"epoch": 0.04,
|
| 32982 |
+
"learning_rate": 0.0004,
|
| 32983 |
+
"loss": 7.4897,
|
| 32984 |
+
"step": 5409
|
| 32985 |
+
},
|
| 32986 |
+
{
|
| 32987 |
+
"epoch": 0.04,
|
| 32988 |
+
"learning_rate": 0.0004,
|
| 32989 |
+
"loss": 6.9515,
|
| 32990 |
+
"step": 5410
|
| 32991 |
+
},
|
| 32992 |
+
{
|
| 32993 |
+
"epoch": 0.04,
|
| 32994 |
+
"learning_rate": 0.0004,
|
| 32995 |
+
"loss": 6.6737,
|
| 32996 |
+
"step": 5411
|
| 32997 |
+
},
|
| 32998 |
+
{
|
| 32999 |
+
"epoch": 0.04,
|
| 33000 |
+
"learning_rate": 0.0004,
|
| 33001 |
+
"loss": 4.9802,
|
| 33002 |
+
"step": 5412
|
| 33003 |
+
},
|
| 33004 |
+
{
|
| 33005 |
+
"epoch": 0.04,
|
| 33006 |
+
"learning_rate": 0.0004,
|
| 33007 |
+
"loss": 6.9681,
|
| 33008 |
+
"step": 5413
|
| 33009 |
+
},
|
| 33010 |
+
{
|
| 33011 |
+
"epoch": 0.04,
|
| 33012 |
+
"learning_rate": 0.0004,
|
| 33013 |
+
"loss": 5.0998,
|
| 33014 |
+
"step": 5414
|
| 33015 |
+
},
|
| 33016 |
+
{
|
| 33017 |
+
"epoch": 0.04,
|
| 33018 |
+
"learning_rate": 0.0004,
|
| 33019 |
+
"loss": 5.0552,
|
| 33020 |
+
"step": 5415
|
| 33021 |
+
},
|
| 33022 |
+
{
|
| 33023 |
+
"epoch": 0.04,
|
| 33024 |
+
"learning_rate": 0.0004,
|
| 33025 |
+
"loss": 3.956,
|
| 33026 |
+
"step": 5416
|
| 33027 |
+
},
|
| 33028 |
+
{
|
| 33029 |
+
"epoch": 0.04,
|
| 33030 |
+
"learning_rate": 0.0004,
|
| 33031 |
+
"loss": 7.4181,
|
| 33032 |
+
"step": 5417
|
| 33033 |
+
},
|
| 33034 |
+
{
|
| 33035 |
+
"epoch": 0.04,
|
| 33036 |
+
"learning_rate": 0.0004,
|
| 33037 |
+
"loss": 4.7352,
|
| 33038 |
+
"step": 5418
|
| 33039 |
+
},
|
| 33040 |
+
{
|
| 33041 |
+
"epoch": 0.04,
|
| 33042 |
+
"learning_rate": 0.0004,
|
| 33043 |
+
"loss": 6.7415,
|
| 33044 |
+
"step": 5419
|
| 33045 |
+
},
|
| 33046 |
+
{
|
| 33047 |
+
"epoch": 0.04,
|
| 33048 |
+
"learning_rate": 0.0004,
|
| 33049 |
+
"loss": 4.289,
|
| 33050 |
+
"step": 5420
|
| 33051 |
+
},
|
| 33052 |
+
{
|
| 33053 |
+
"epoch": 0.04,
|
| 33054 |
+
"learning_rate": 0.0004,
|
| 33055 |
+
"loss": 7.1359,
|
| 33056 |
+
"step": 5421
|
| 33057 |
+
},
|
| 33058 |
+
{
|
| 33059 |
+
"epoch": 0.04,
|
| 33060 |
+
"learning_rate": 0.0004,
|
| 33061 |
+
"loss": 3.1149,
|
| 33062 |
+
"step": 5422
|
| 33063 |
+
},
|
| 33064 |
+
{
|
| 33065 |
+
"epoch": 0.04,
|
| 33066 |
+
"learning_rate": 0.0004,
|
| 33067 |
+
"loss": 8.371,
|
| 33068 |
+
"step": 5423
|
| 33069 |
+
},
|
| 33070 |
+
{
|
| 33071 |
+
"epoch": 0.04,
|
| 33072 |
+
"learning_rate": 0.0004,
|
| 33073 |
+
"loss": 2.7619,
|
| 33074 |
+
"step": 5424
|
| 33075 |
+
},
|
| 33076 |
+
{
|
| 33077 |
+
"epoch": 0.04,
|
| 33078 |
+
"learning_rate": 0.0004,
|
| 33079 |
+
"loss": 5.3229,
|
| 33080 |
+
"step": 5425
|
| 33081 |
+
},
|
| 33082 |
+
{
|
| 33083 |
+
"epoch": 0.04,
|
| 33084 |
+
"learning_rate": 0.0004,
|
| 33085 |
+
"loss": 2.5643,
|
| 33086 |
+
"step": 5426
|
| 33087 |
+
},
|
| 33088 |
+
{
|
| 33089 |
+
"epoch": 0.04,
|
| 33090 |
+
"learning_rate": 0.0004,
|
| 33091 |
+
"loss": 6.8752,
|
| 33092 |
+
"step": 5427
|
| 33093 |
+
},
|
| 33094 |
+
{
|
| 33095 |
+
"epoch": 0.04,
|
| 33096 |
+
"learning_rate": 0.0004,
|
| 33097 |
+
"loss": 3.8785,
|
| 33098 |
+
"step": 5428
|
| 33099 |
+
},
|
| 33100 |
+
{
|
| 33101 |
+
"epoch": 0.04,
|
| 33102 |
+
"learning_rate": 0.0004,
|
| 33103 |
+
"loss": 5.06,
|
| 33104 |
+
"step": 5429
|
| 33105 |
+
},
|
| 33106 |
+
{
|
| 33107 |
+
"epoch": 0.04,
|
| 33108 |
+
"learning_rate": 0.0004,
|
| 33109 |
+
"loss": 6.7646,
|
| 33110 |
+
"step": 5430
|
| 33111 |
+
},
|
| 33112 |
+
{
|
| 33113 |
+
"epoch": 0.04,
|
| 33114 |
+
"learning_rate": 0.0004,
|
| 33115 |
+
"loss": 3.774,
|
| 33116 |
+
"step": 5431
|
| 33117 |
+
},
|
| 33118 |
+
{
|
| 33119 |
+
"epoch": 0.04,
|
| 33120 |
+
"learning_rate": 0.0004,
|
| 33121 |
+
"loss": 2.777,
|
| 33122 |
+
"step": 5432
|
| 33123 |
+
},
|
| 33124 |
+
{
|
| 33125 |
+
"epoch": 0.04,
|
| 33126 |
+
"learning_rate": 0.0004,
|
| 33127 |
+
"loss": 7.2203,
|
| 33128 |
+
"step": 5433
|
| 33129 |
+
},
|
| 33130 |
+
{
|
| 33131 |
+
"epoch": 0.04,
|
| 33132 |
+
"learning_rate": 0.0004,
|
| 33133 |
+
"loss": 3.7283,
|
| 33134 |
+
"step": 5434
|
| 33135 |
+
},
|
| 33136 |
+
{
|
| 33137 |
+
"epoch": 0.04,
|
| 33138 |
+
"learning_rate": 0.0004,
|
| 33139 |
+
"loss": 7.4205,
|
| 33140 |
+
"step": 5435
|
| 33141 |
+
},
|
| 33142 |
+
{
|
| 33143 |
+
"epoch": 0.04,
|
| 33144 |
+
"learning_rate": 0.0004,
|
| 33145 |
+
"loss": 3.7002,
|
| 33146 |
+
"step": 5436
|
| 33147 |
+
},
|
| 33148 |
+
{
|
| 33149 |
+
"epoch": 0.04,
|
| 33150 |
+
"learning_rate": 0.0004,
|
| 33151 |
+
"loss": 3.32,
|
| 33152 |
+
"step": 5437
|
| 33153 |
+
},
|
| 33154 |
+
{
|
| 33155 |
+
"epoch": 0.04,
|
| 33156 |
+
"learning_rate": 0.0004,
|
| 33157 |
+
"loss": 7.5895,
|
| 33158 |
+
"step": 5438
|
| 33159 |
+
},
|
| 33160 |
+
{
|
| 33161 |
+
"epoch": 0.04,
|
| 33162 |
+
"learning_rate": 0.0004,
|
| 33163 |
+
"loss": 2.5335,
|
| 33164 |
+
"step": 5439
|
| 33165 |
+
},
|
| 33166 |
+
{
|
| 33167 |
+
"epoch": 0.04,
|
| 33168 |
+
"learning_rate": 0.0004,
|
| 33169 |
+
"loss": 2.5173,
|
| 33170 |
+
"step": 5440
|
| 33171 |
+
},
|
| 33172 |
+
{
|
| 33173 |
+
"epoch": 0.04,
|
| 33174 |
+
"learning_rate": 0.0004,
|
| 33175 |
+
"loss": 7.3841,
|
| 33176 |
+
"step": 5441
|
| 33177 |
+
},
|
| 33178 |
+
{
|
| 33179 |
+
"epoch": 0.04,
|
| 33180 |
+
"learning_rate": 0.0004,
|
| 33181 |
+
"loss": 6.1075,
|
| 33182 |
+
"step": 5442
|
| 33183 |
+
},
|
| 33184 |
+
{
|
| 33185 |
+
"epoch": 0.04,
|
| 33186 |
+
"learning_rate": 0.0004,
|
| 33187 |
+
"loss": 8.4645,
|
| 33188 |
+
"step": 5443
|
| 33189 |
+
},
|
| 33190 |
+
{
|
| 33191 |
+
"epoch": 0.04,
|
| 33192 |
+
"learning_rate": 0.0004,
|
| 33193 |
+
"loss": 2.5685,
|
| 33194 |
+
"step": 5444
|
| 33195 |
+
},
|
| 33196 |
+
{
|
| 33197 |
+
"epoch": 0.04,
|
| 33198 |
+
"learning_rate": 0.0004,
|
| 33199 |
+
"loss": 3.2423,
|
| 33200 |
+
"step": 5445
|
| 33201 |
+
},
|
| 33202 |
+
{
|
| 33203 |
+
"epoch": 0.04,
|
| 33204 |
+
"learning_rate": 0.0004,
|
| 33205 |
+
"loss": 8.3062,
|
| 33206 |
+
"step": 5446
|
| 33207 |
+
},
|
| 33208 |
+
{
|
| 33209 |
+
"epoch": 0.04,
|
| 33210 |
+
"learning_rate": 0.0004,
|
| 33211 |
+
"loss": 4.195,
|
| 33212 |
+
"step": 5447
|
| 33213 |
+
},
|
| 33214 |
+
{
|
| 33215 |
+
"epoch": 0.04,
|
| 33216 |
+
"learning_rate": 0.0004,
|
| 33217 |
+
"loss": 4.1215,
|
| 33218 |
+
"step": 5448
|
| 33219 |
+
},
|
| 33220 |
+
{
|
| 33221 |
+
"epoch": 0.04,
|
| 33222 |
+
"learning_rate": 0.0004,
|
| 33223 |
+
"loss": 3.7096,
|
| 33224 |
+
"step": 5449
|
| 33225 |
+
},
|
| 33226 |
+
{
|
| 33227 |
+
"epoch": 0.04,
|
| 33228 |
+
"learning_rate": 0.0004,
|
| 33229 |
+
"loss": 4.8696,
|
| 33230 |
+
"step": 5450
|
| 33231 |
+
},
|
| 33232 |
+
{
|
| 33233 |
+
"epoch": 0.04,
|
| 33234 |
+
"learning_rate": 0.0004,
|
| 33235 |
+
"loss": 3.0856,
|
| 33236 |
+
"step": 5451
|
| 33237 |
+
},
|
| 33238 |
+
{
|
| 33239 |
+
"epoch": 0.04,
|
| 33240 |
+
"learning_rate": 0.0004,
|
| 33241 |
+
"loss": 8.131,
|
| 33242 |
+
"step": 5452
|
| 33243 |
+
},
|
| 33244 |
+
{
|
| 33245 |
+
"epoch": 0.04,
|
| 33246 |
+
"learning_rate": 0.0004,
|
| 33247 |
+
"loss": 6.9663,
|
| 33248 |
+
"step": 5453
|
| 33249 |
+
},
|
| 33250 |
+
{
|
| 33251 |
+
"epoch": 0.04,
|
| 33252 |
+
"learning_rate": 0.0004,
|
| 33253 |
+
"loss": 6.8655,
|
| 33254 |
+
"step": 5454
|
| 33255 |
+
},
|
| 33256 |
+
{
|
| 33257 |
+
"epoch": 0.04,
|
| 33258 |
+
"learning_rate": 0.0004,
|
| 33259 |
+
"loss": 7.4411,
|
| 33260 |
+
"step": 5455
|
| 33261 |
+
},
|
| 33262 |
+
{
|
| 33263 |
+
"epoch": 0.04,
|
| 33264 |
+
"learning_rate": 0.0004,
|
| 33265 |
+
"loss": 7.33,
|
| 33266 |
+
"step": 5456
|
| 33267 |
+
},
|
| 33268 |
+
{
|
| 33269 |
+
"epoch": 0.04,
|
| 33270 |
+
"learning_rate": 0.0004,
|
| 33271 |
+
"loss": 7.4933,
|
| 33272 |
+
"step": 5457
|
| 33273 |
+
},
|
| 33274 |
+
{
|
| 33275 |
+
"epoch": 0.04,
|
| 33276 |
+
"learning_rate": 0.0004,
|
| 33277 |
+
"loss": 3.6501,
|
| 33278 |
+
"step": 5458
|
| 33279 |
+
},
|
| 33280 |
+
{
|
| 33281 |
+
"epoch": 0.04,
|
| 33282 |
+
"learning_rate": 0.0004,
|
| 33283 |
+
"loss": 6.9743,
|
| 33284 |
+
"step": 5459
|
| 33285 |
+
},
|
| 33286 |
+
{
|
| 33287 |
+
"epoch": 0.04,
|
| 33288 |
+
"learning_rate": 0.0004,
|
| 33289 |
+
"loss": 9.029,
|
| 33290 |
+
"step": 5460
|
| 33291 |
+
},
|
| 33292 |
+
{
|
| 33293 |
+
"epoch": 0.04,
|
| 33294 |
+
"learning_rate": 0.0004,
|
| 33295 |
+
"loss": 6.8042,
|
| 33296 |
+
"step": 5461
|
| 33297 |
+
},
|
| 33298 |
+
{
|
| 33299 |
+
"epoch": 0.04,
|
| 33300 |
+
"learning_rate": 0.0004,
|
| 33301 |
+
"loss": 10.3662,
|
| 33302 |
+
"step": 5462
|
| 33303 |
+
},
|
| 33304 |
+
{
|
| 33305 |
+
"epoch": 0.04,
|
| 33306 |
+
"learning_rate": 0.0004,
|
| 33307 |
+
"loss": 6.5808,
|
| 33308 |
+
"step": 5463
|
| 33309 |
+
},
|
| 33310 |
+
{
|
| 33311 |
+
"epoch": 0.04,
|
| 33312 |
+
"learning_rate": 0.0004,
|
| 33313 |
+
"loss": 7.422,
|
| 33314 |
+
"step": 5464
|
| 33315 |
+
},
|
| 33316 |
+
{
|
| 33317 |
+
"epoch": 0.04,
|
| 33318 |
+
"learning_rate": 0.0004,
|
| 33319 |
+
"loss": 7.5076,
|
| 33320 |
+
"step": 5465
|
| 33321 |
+
},
|
| 33322 |
+
{
|
| 33323 |
+
"epoch": 0.04,
|
| 33324 |
+
"learning_rate": 0.0004,
|
| 33325 |
+
"loss": 6.0352,
|
| 33326 |
+
"step": 5466
|
| 33327 |
+
},
|
| 33328 |
+
{
|
| 33329 |
+
"epoch": 0.04,
|
| 33330 |
+
"learning_rate": 0.0004,
|
| 33331 |
+
"loss": 2.7639,
|
| 33332 |
+
"step": 5467
|
| 33333 |
+
},
|
| 33334 |
+
{
|
| 33335 |
+
"epoch": 0.04,
|
| 33336 |
+
"learning_rate": 0.0004,
|
| 33337 |
+
"loss": 4.9244,
|
| 33338 |
+
"step": 5468
|
| 33339 |
+
},
|
| 33340 |
+
{
|
| 33341 |
+
"epoch": 0.04,
|
| 33342 |
+
"learning_rate": 0.0004,
|
| 33343 |
+
"loss": 6.7408,
|
| 33344 |
+
"step": 5469
|
| 33345 |
+
},
|
| 33346 |
+
{
|
| 33347 |
+
"epoch": 0.04,
|
| 33348 |
+
"learning_rate": 0.0004,
|
| 33349 |
+
"loss": 4.7444,
|
| 33350 |
+
"step": 5470
|
| 33351 |
+
},
|
| 33352 |
+
{
|
| 33353 |
+
"epoch": 0.04,
|
| 33354 |
+
"learning_rate": 0.0004,
|
| 33355 |
+
"loss": 8.3459,
|
| 33356 |
+
"step": 5471
|
| 33357 |
+
},
|
| 33358 |
+
{
|
| 33359 |
+
"epoch": 0.04,
|
| 33360 |
+
"learning_rate": 0.0004,
|
| 33361 |
+
"loss": 6.9678,
|
| 33362 |
+
"step": 5472
|
| 33363 |
+
},
|
| 33364 |
+
{
|
| 33365 |
+
"epoch": 0.04,
|
| 33366 |
+
"learning_rate": 0.0004,
|
| 33367 |
+
"loss": 8.1263,
|
| 33368 |
+
"step": 5473
|
| 33369 |
+
},
|
| 33370 |
+
{
|
| 33371 |
+
"epoch": 0.04,
|
| 33372 |
+
"learning_rate": 0.0004,
|
| 33373 |
+
"loss": 6.1176,
|
| 33374 |
+
"step": 5474
|
| 33375 |
+
},
|
| 33376 |
+
{
|
| 33377 |
+
"epoch": 0.04,
|
| 33378 |
+
"learning_rate": 0.0004,
|
| 33379 |
+
"loss": 5.2127,
|
| 33380 |
+
"step": 5475
|
| 33381 |
+
},
|
| 33382 |
+
{
|
| 33383 |
+
"epoch": 0.04,
|
| 33384 |
+
"learning_rate": 0.0004,
|
| 33385 |
+
"loss": 3.1435,
|
| 33386 |
+
"step": 5476
|
| 33387 |
+
},
|
| 33388 |
+
{
|
| 33389 |
+
"epoch": 0.04,
|
| 33390 |
+
"learning_rate": 0.0004,
|
| 33391 |
+
"loss": 5.8836,
|
| 33392 |
+
"step": 5477
|
| 33393 |
+
},
|
| 33394 |
+
{
|
| 33395 |
+
"epoch": 0.04,
|
| 33396 |
+
"learning_rate": 0.0004,
|
| 33397 |
+
"loss": 2.7154,
|
| 33398 |
+
"step": 5478
|
| 33399 |
+
},
|
| 33400 |
+
{
|
| 33401 |
+
"epoch": 0.04,
|
| 33402 |
+
"learning_rate": 0.0004,
|
| 33403 |
+
"loss": 7.6181,
|
| 33404 |
+
"step": 5479
|
| 33405 |
+
},
|
| 33406 |
+
{
|
| 33407 |
+
"epoch": 0.04,
|
| 33408 |
+
"learning_rate": 0.0004,
|
| 33409 |
+
"loss": 3.5132,
|
| 33410 |
+
"step": 5480
|
| 33411 |
+
},
|
| 33412 |
+
{
|
| 33413 |
+
"epoch": 0.04,
|
| 33414 |
+
"learning_rate": 0.0004,
|
| 33415 |
+
"loss": 5.9472,
|
| 33416 |
+
"step": 5481
|
| 33417 |
+
},
|
| 33418 |
+
{
|
| 33419 |
+
"epoch": 0.04,
|
| 33420 |
+
"learning_rate": 0.0004,
|
| 33421 |
+
"loss": 2.7316,
|
| 33422 |
+
"step": 5482
|
| 33423 |
+
},
|
| 33424 |
+
{
|
| 33425 |
+
"epoch": 0.04,
|
| 33426 |
+
"learning_rate": 0.0004,
|
| 33427 |
+
"loss": 4.4713,
|
| 33428 |
+
"step": 5483
|
| 33429 |
+
},
|
| 33430 |
+
{
|
| 33431 |
+
"epoch": 0.04,
|
| 33432 |
+
"learning_rate": 0.0004,
|
| 33433 |
+
"loss": 8.4081,
|
| 33434 |
+
"step": 5484
|
| 33435 |
+
},
|
| 33436 |
+
{
|
| 33437 |
+
"epoch": 0.04,
|
| 33438 |
+
"learning_rate": 0.0004,
|
| 33439 |
+
"loss": 2.5906,
|
| 33440 |
+
"step": 5485
|
| 33441 |
+
},
|
| 33442 |
+
{
|
| 33443 |
+
"epoch": 0.04,
|
| 33444 |
+
"learning_rate": 0.0004,
|
| 33445 |
+
"loss": 7.8309,
|
| 33446 |
+
"step": 5486
|
| 33447 |
+
},
|
| 33448 |
+
{
|
| 33449 |
+
"epoch": 0.04,
|
| 33450 |
+
"learning_rate": 0.0004,
|
| 33451 |
+
"loss": 2.5541,
|
| 33452 |
+
"step": 5487
|
| 33453 |
+
},
|
| 33454 |
+
{
|
| 33455 |
+
"epoch": 0.04,
|
| 33456 |
+
"learning_rate": 0.0004,
|
| 33457 |
+
"loss": 2.6686,
|
| 33458 |
+
"step": 5488
|
| 33459 |
+
},
|
| 33460 |
+
{
|
| 33461 |
+
"epoch": 0.04,
|
| 33462 |
+
"learning_rate": 0.0004,
|
| 33463 |
+
"loss": 2.5044,
|
| 33464 |
+
"step": 5489
|
| 33465 |
+
},
|
| 33466 |
+
{
|
| 33467 |
+
"epoch": 0.04,
|
| 33468 |
+
"learning_rate": 0.0004,
|
| 33469 |
+
"loss": 6.8598,
|
| 33470 |
+
"step": 5490
|
| 33471 |
+
},
|
| 33472 |
+
{
|
| 33473 |
+
"epoch": 0.04,
|
| 33474 |
+
"learning_rate": 0.0004,
|
| 33475 |
+
"loss": 8.1069,
|
| 33476 |
+
"step": 5491
|
| 33477 |
+
},
|
| 33478 |
+
{
|
| 33479 |
+
"epoch": 0.04,
|
| 33480 |
+
"learning_rate": 0.0004,
|
| 33481 |
+
"loss": 9.3975,
|
| 33482 |
+
"step": 5492
|
| 33483 |
+
},
|
| 33484 |
+
{
|
| 33485 |
+
"epoch": 0.04,
|
| 33486 |
+
"learning_rate": 0.0004,
|
| 33487 |
+
"loss": 6.7921,
|
| 33488 |
+
"step": 5493
|
| 33489 |
+
},
|
| 33490 |
+
{
|
| 33491 |
+
"epoch": 0.04,
|
| 33492 |
+
"learning_rate": 0.0004,
|
| 33493 |
+
"loss": 5.8833,
|
| 33494 |
+
"step": 5494
|
| 33495 |
+
},
|
| 33496 |
+
{
|
| 33497 |
+
"epoch": 0.04,
|
| 33498 |
+
"learning_rate": 0.0004,
|
| 33499 |
+
"loss": 5.4129,
|
| 33500 |
+
"step": 5495
|
| 33501 |
+
},
|
| 33502 |
+
{
|
| 33503 |
+
"epoch": 0.04,
|
| 33504 |
+
"learning_rate": 0.0004,
|
| 33505 |
+
"loss": 5.6771,
|
| 33506 |
+
"step": 5496
|
| 33507 |
+
},
|
| 33508 |
+
{
|
| 33509 |
+
"epoch": 0.04,
|
| 33510 |
+
"learning_rate": 0.0004,
|
| 33511 |
+
"loss": 6.3949,
|
| 33512 |
+
"step": 5497
|
| 33513 |
+
},
|
| 33514 |
+
{
|
| 33515 |
+
"epoch": 0.04,
|
| 33516 |
+
"learning_rate": 0.0004,
|
| 33517 |
+
"loss": 7.5032,
|
| 33518 |
+
"step": 5498
|
| 33519 |
+
},
|
| 33520 |
+
{
|
| 33521 |
+
"epoch": 0.04,
|
| 33522 |
+
"learning_rate": 0.0004,
|
| 33523 |
+
"loss": 2.963,
|
| 33524 |
+
"step": 5499
|
| 33525 |
+
},
|
| 33526 |
+
{
|
| 33527 |
+
"epoch": 0.04,
|
| 33528 |
+
"learning_rate": 0.0004,
|
| 33529 |
+
"loss": 3.4149,
|
| 33530 |
+
"step": 5500
|
| 33531 |
+
},
|
| 33532 |
+
{
|
| 33533 |
+
"epoch": 0.04,
|
| 33534 |
+
"learning_rate": 0.0004,
|
| 33535 |
+
"loss": 4.0817,
|
| 33536 |
+
"step": 5501
|
| 33537 |
+
},
|
| 33538 |
+
{
|
| 33539 |
+
"epoch": 0.04,
|
| 33540 |
+
"learning_rate": 0.0004,
|
| 33541 |
+
"loss": 8.606,
|
| 33542 |
+
"step": 5502
|
| 33543 |
+
},
|
| 33544 |
+
{
|
| 33545 |
+
"epoch": 0.04,
|
| 33546 |
+
"learning_rate": 0.0004,
|
| 33547 |
+
"loss": 8.112,
|
| 33548 |
+
"step": 5503
|
| 33549 |
+
},
|
| 33550 |
+
{
|
| 33551 |
+
"epoch": 0.04,
|
| 33552 |
+
"learning_rate": 0.0004,
|
| 33553 |
+
"loss": 9.0323,
|
| 33554 |
+
"step": 5504
|
| 33555 |
+
},
|
| 33556 |
+
{
|
| 33557 |
+
"epoch": 0.04,
|
| 33558 |
+
"learning_rate": 0.0004,
|
| 33559 |
+
"loss": 5.0102,
|
| 33560 |
+
"step": 5505
|
| 33561 |
+
},
|
| 33562 |
+
{
|
| 33563 |
+
"epoch": 0.04,
|
| 33564 |
+
"learning_rate": 0.0004,
|
| 33565 |
+
"loss": 6.9195,
|
| 33566 |
+
"step": 5506
|
| 33567 |
+
},
|
| 33568 |
+
{
|
| 33569 |
+
"epoch": 0.04,
|
| 33570 |
+
"learning_rate": 0.0004,
|
| 33571 |
+
"loss": 8.0544,
|
| 33572 |
+
"step": 5507
|
| 33573 |
+
},
|
| 33574 |
+
{
|
| 33575 |
+
"epoch": 0.04,
|
| 33576 |
+
"learning_rate": 0.0004,
|
| 33577 |
+
"loss": 5.7615,
|
| 33578 |
+
"step": 5508
|
| 33579 |
+
},
|
| 33580 |
+
{
|
| 33581 |
+
"epoch": 0.04,
|
| 33582 |
+
"learning_rate": 0.0004,
|
| 33583 |
+
"loss": 6.9108,
|
| 33584 |
+
"step": 5509
|
| 33585 |
+
},
|
| 33586 |
+
{
|
| 33587 |
+
"epoch": 0.04,
|
| 33588 |
+
"learning_rate": 0.0004,
|
| 33589 |
+
"loss": 7.2846,
|
| 33590 |
+
"step": 5510
|
| 33591 |
+
},
|
| 33592 |
+
{
|
| 33593 |
+
"epoch": 0.04,
|
| 33594 |
+
"learning_rate": 0.0004,
|
| 33595 |
+
"loss": 8.1243,
|
| 33596 |
+
"step": 5511
|
| 33597 |
+
},
|
| 33598 |
+
{
|
| 33599 |
+
"epoch": 0.04,
|
| 33600 |
+
"learning_rate": 0.0004,
|
| 33601 |
+
"loss": 7.0416,
|
| 33602 |
+
"step": 5512
|
| 33603 |
+
},
|
| 33604 |
+
{
|
| 33605 |
+
"epoch": 0.04,
|
| 33606 |
+
"learning_rate": 0.0004,
|
| 33607 |
+
"loss": 5.9334,
|
| 33608 |
+
"step": 5513
|
| 33609 |
+
},
|
| 33610 |
+
{
|
| 33611 |
+
"epoch": 0.04,
|
| 33612 |
+
"learning_rate": 0.0004,
|
| 33613 |
+
"loss": 6.7127,
|
| 33614 |
+
"step": 5514
|
| 33615 |
+
},
|
| 33616 |
+
{
|
| 33617 |
+
"epoch": 0.04,
|
| 33618 |
+
"learning_rate": 0.0004,
|
| 33619 |
+
"loss": 3.2506,
|
| 33620 |
+
"step": 5515
|
| 33621 |
+
},
|
| 33622 |
+
{
|
| 33623 |
+
"epoch": 0.04,
|
| 33624 |
+
"learning_rate": 0.0004,
|
| 33625 |
+
"loss": 9.5912,
|
| 33626 |
+
"step": 5516
|
| 33627 |
+
},
|
| 33628 |
+
{
|
| 33629 |
+
"epoch": 0.04,
|
| 33630 |
+
"learning_rate": 0.0004,
|
| 33631 |
+
"loss": 3.1955,
|
| 33632 |
+
"step": 5517
|
| 33633 |
+
},
|
| 33634 |
+
{
|
| 33635 |
+
"epoch": 0.04,
|
| 33636 |
+
"learning_rate": 0.0004,
|
| 33637 |
+
"loss": 5.3704,
|
| 33638 |
+
"step": 5518
|
| 33639 |
+
},
|
| 33640 |
+
{
|
| 33641 |
+
"epoch": 0.04,
|
| 33642 |
+
"learning_rate": 0.0004,
|
| 33643 |
+
"loss": 5.775,
|
| 33644 |
+
"step": 5519
|
| 33645 |
+
},
|
| 33646 |
+
{
|
| 33647 |
+
"epoch": 0.04,
|
| 33648 |
+
"learning_rate": 0.0004,
|
| 33649 |
+
"loss": 5.4993,
|
| 33650 |
+
"step": 5520
|
| 33651 |
+
},
|
| 33652 |
+
{
|
| 33653 |
+
"epoch": 0.04,
|
| 33654 |
+
"learning_rate": 0.0004,
|
| 33655 |
+
"loss": 8.1517,
|
| 33656 |
+
"step": 5521
|
| 33657 |
+
},
|
| 33658 |
+
{
|
| 33659 |
+
"epoch": 0.04,
|
| 33660 |
+
"learning_rate": 0.0004,
|
| 33661 |
+
"loss": 6.7803,
|
| 33662 |
+
"step": 5522
|
| 33663 |
+
},
|
| 33664 |
+
{
|
| 33665 |
+
"epoch": 0.04,
|
| 33666 |
+
"learning_rate": 0.0004,
|
| 33667 |
+
"loss": 5.2405,
|
| 33668 |
+
"step": 5523
|
| 33669 |
+
},
|
| 33670 |
+
{
|
| 33671 |
+
"epoch": 0.04,
|
| 33672 |
+
"learning_rate": 0.0004,
|
| 33673 |
+
"loss": 3.6089,
|
| 33674 |
+
"step": 5524
|
| 33675 |
+
},
|
| 33676 |
+
{
|
| 33677 |
+
"epoch": 0.04,
|
| 33678 |
+
"learning_rate": 0.0004,
|
| 33679 |
+
"loss": 6.3463,
|
| 33680 |
+
"step": 5525
|
| 33681 |
+
},
|
| 33682 |
+
{
|
| 33683 |
+
"epoch": 0.04,
|
| 33684 |
+
"learning_rate": 0.0004,
|
| 33685 |
+
"loss": 8.8214,
|
| 33686 |
+
"step": 5526
|
| 33687 |
+
},
|
| 33688 |
+
{
|
| 33689 |
+
"epoch": 0.04,
|
| 33690 |
+
"learning_rate": 0.0004,
|
| 33691 |
+
"loss": 7.0789,
|
| 33692 |
+
"step": 5527
|
| 33693 |
+
},
|
| 33694 |
+
{
|
| 33695 |
+
"epoch": 0.04,
|
| 33696 |
+
"learning_rate": 0.0004,
|
| 33697 |
+
"loss": 4.0443,
|
| 33698 |
+
"step": 5528
|
| 33699 |
+
},
|
| 33700 |
+
{
|
| 33701 |
+
"epoch": 0.04,
|
| 33702 |
+
"learning_rate": 0.0004,
|
| 33703 |
+
"loss": 2.9387,
|
| 33704 |
+
"step": 5529
|
| 33705 |
+
},
|
| 33706 |
+
{
|
| 33707 |
+
"epoch": 0.04,
|
| 33708 |
+
"learning_rate": 0.0004,
|
| 33709 |
+
"loss": 3.3787,
|
| 33710 |
+
"step": 5530
|
| 33711 |
+
},
|
| 33712 |
+
{
|
| 33713 |
+
"epoch": 0.04,
|
| 33714 |
+
"learning_rate": 0.0004,
|
| 33715 |
+
"loss": 3.2718,
|
| 33716 |
+
"step": 5531
|
| 33717 |
+
},
|
| 33718 |
+
{
|
| 33719 |
+
"epoch": 0.04,
|
| 33720 |
+
"learning_rate": 0.0004,
|
| 33721 |
+
"loss": 7.1476,
|
| 33722 |
+
"step": 5532
|
| 33723 |
+
},
|
| 33724 |
+
{
|
| 33725 |
+
"epoch": 0.04,
|
| 33726 |
+
"learning_rate": 0.0004,
|
| 33727 |
+
"loss": 3.1862,
|
| 33728 |
+
"step": 5533
|
| 33729 |
+
},
|
| 33730 |
+
{
|
| 33731 |
+
"epoch": 0.04,
|
| 33732 |
+
"learning_rate": 0.0004,
|
| 33733 |
+
"loss": 7.9094,
|
| 33734 |
+
"step": 5534
|
| 33735 |
+
},
|
| 33736 |
+
{
|
| 33737 |
+
"epoch": 0.04,
|
| 33738 |
+
"learning_rate": 0.0004,
|
| 33739 |
+
"loss": 2.6915,
|
| 33740 |
+
"step": 5535
|
| 33741 |
+
},
|
| 33742 |
+
{
|
| 33743 |
+
"epoch": 0.04,
|
| 33744 |
+
"learning_rate": 0.0004,
|
| 33745 |
+
"loss": 6.4335,
|
| 33746 |
+
"step": 5536
|
| 33747 |
+
},
|
| 33748 |
+
{
|
| 33749 |
+
"epoch": 0.04,
|
| 33750 |
+
"learning_rate": 0.0004,
|
| 33751 |
+
"loss": 3.5026,
|
| 33752 |
+
"step": 5537
|
| 33753 |
+
},
|
| 33754 |
+
{
|
| 33755 |
+
"epoch": 0.04,
|
| 33756 |
+
"learning_rate": 0.0004,
|
| 33757 |
+
"loss": 3.4137,
|
| 33758 |
+
"step": 5538
|
| 33759 |
+
},
|
| 33760 |
+
{
|
| 33761 |
+
"epoch": 0.04,
|
| 33762 |
+
"learning_rate": 0.0004,
|
| 33763 |
+
"loss": 2.4747,
|
| 33764 |
+
"step": 5539
|
| 33765 |
+
},
|
| 33766 |
+
{
|
| 33767 |
+
"epoch": 0.04,
|
| 33768 |
+
"learning_rate": 0.0004,
|
| 33769 |
+
"loss": 2.8525,
|
| 33770 |
+
"step": 5540
|
| 33771 |
+
},
|
| 33772 |
+
{
|
| 33773 |
+
"epoch": 0.04,
|
| 33774 |
+
"learning_rate": 0.0004,
|
| 33775 |
+
"loss": 7.0023,
|
| 33776 |
+
"step": 5541
|
| 33777 |
+
},
|
| 33778 |
+
{
|
| 33779 |
+
"epoch": 0.04,
|
| 33780 |
+
"learning_rate": 0.0004,
|
| 33781 |
+
"loss": 6.7906,
|
| 33782 |
+
"step": 5542
|
| 33783 |
+
},
|
| 33784 |
+
{
|
| 33785 |
+
"epoch": 0.04,
|
| 33786 |
+
"learning_rate": 0.0004,
|
| 33787 |
+
"loss": 6.4716,
|
| 33788 |
+
"step": 5543
|
| 33789 |
+
},
|
| 33790 |
+
{
|
| 33791 |
+
"epoch": 0.04,
|
| 33792 |
+
"learning_rate": 0.0004,
|
| 33793 |
+
"loss": 2.437,
|
| 33794 |
+
"step": 5544
|
| 33795 |
+
},
|
| 33796 |
+
{
|
| 33797 |
+
"epoch": 0.04,
|
| 33798 |
+
"learning_rate": 0.0004,
|
| 33799 |
+
"loss": 2.1311,
|
| 33800 |
+
"step": 5545
|
| 33801 |
+
},
|
| 33802 |
+
{
|
| 33803 |
+
"epoch": 0.04,
|
| 33804 |
+
"learning_rate": 0.0004,
|
| 33805 |
+
"loss": 5.2937,
|
| 33806 |
+
"step": 5546
|
| 33807 |
+
},
|
| 33808 |
+
{
|
| 33809 |
+
"epoch": 0.04,
|
| 33810 |
+
"learning_rate": 0.0004,
|
| 33811 |
+
"loss": 8.6679,
|
| 33812 |
+
"step": 5547
|
| 33813 |
+
},
|
| 33814 |
+
{
|
| 33815 |
+
"epoch": 0.04,
|
| 33816 |
+
"learning_rate": 0.0004,
|
| 33817 |
+
"loss": 6.9048,
|
| 33818 |
+
"step": 5548
|
| 33819 |
+
},
|
| 33820 |
+
{
|
| 33821 |
+
"epoch": 0.04,
|
| 33822 |
+
"learning_rate": 0.0004,
|
| 33823 |
+
"loss": 3.9706,
|
| 33824 |
+
"step": 5549
|
| 33825 |
+
},
|
| 33826 |
+
{
|
| 33827 |
+
"epoch": 0.04,
|
| 33828 |
+
"learning_rate": 0.0004,
|
| 33829 |
+
"loss": 5.5848,
|
| 33830 |
+
"step": 5550
|
| 33831 |
+
},
|
| 33832 |
+
{
|
| 33833 |
+
"epoch": 0.04,
|
| 33834 |
+
"learning_rate": 0.0004,
|
| 33835 |
+
"loss": 9.3629,
|
| 33836 |
+
"step": 5551
|
| 33837 |
+
},
|
| 33838 |
+
{
|
| 33839 |
+
"epoch": 0.04,
|
| 33840 |
+
"learning_rate": 0.0004,
|
| 33841 |
+
"loss": 8.8409,
|
| 33842 |
+
"step": 5552
|
| 33843 |
+
},
|
| 33844 |
+
{
|
| 33845 |
+
"epoch": 0.04,
|
| 33846 |
+
"learning_rate": 0.0004,
|
| 33847 |
+
"loss": 7.8026,
|
| 33848 |
+
"step": 5553
|
| 33849 |
+
},
|
| 33850 |
+
{
|
| 33851 |
+
"epoch": 0.04,
|
| 33852 |
+
"learning_rate": 0.0004,
|
| 33853 |
+
"loss": 6.7644,
|
| 33854 |
+
"step": 5554
|
| 33855 |
+
},
|
| 33856 |
+
{
|
| 33857 |
+
"epoch": 0.04,
|
| 33858 |
+
"learning_rate": 0.0004,
|
| 33859 |
+
"loss": 7.3403,
|
| 33860 |
+
"step": 5555
|
| 33861 |
+
},
|
| 33862 |
+
{
|
| 33863 |
+
"epoch": 0.04,
|
| 33864 |
+
"learning_rate": 0.0004,
|
| 33865 |
+
"loss": 8.8025,
|
| 33866 |
+
"step": 5556
|
| 33867 |
+
},
|
| 33868 |
+
{
|
| 33869 |
+
"epoch": 0.04,
|
| 33870 |
+
"learning_rate": 0.0004,
|
| 33871 |
+
"loss": 7.1454,
|
| 33872 |
+
"step": 5557
|
| 33873 |
+
},
|
| 33874 |
+
{
|
| 33875 |
+
"epoch": 0.04,
|
| 33876 |
+
"learning_rate": 0.0004,
|
| 33877 |
+
"loss": 7.9874,
|
| 33878 |
+
"step": 5558
|
| 33879 |
+
},
|
| 33880 |
+
{
|
| 33881 |
+
"epoch": 0.04,
|
| 33882 |
+
"learning_rate": 0.0004,
|
| 33883 |
+
"loss": 7.5292,
|
| 33884 |
+
"step": 5559
|
| 33885 |
+
},
|
| 33886 |
+
{
|
| 33887 |
+
"epoch": 0.04,
|
| 33888 |
+
"learning_rate": 0.0004,
|
| 33889 |
+
"loss": 6.8845,
|
| 33890 |
+
"step": 5560
|
| 33891 |
+
},
|
| 33892 |
+
{
|
| 33893 |
+
"epoch": 0.04,
|
| 33894 |
+
"learning_rate": 0.0004,
|
| 33895 |
+
"loss": 5.9432,
|
| 33896 |
+
"step": 5561
|
| 33897 |
+
},
|
| 33898 |
+
{
|
| 33899 |
+
"epoch": 0.04,
|
| 33900 |
+
"learning_rate": 0.0004,
|
| 33901 |
+
"loss": 5.5414,
|
| 33902 |
+
"step": 5562
|
| 33903 |
+
},
|
| 33904 |
+
{
|
| 33905 |
+
"epoch": 0.04,
|
| 33906 |
+
"learning_rate": 0.0004,
|
| 33907 |
+
"loss": 7.7734,
|
| 33908 |
+
"step": 5563
|
| 33909 |
+
},
|
| 33910 |
+
{
|
| 33911 |
+
"epoch": 0.04,
|
| 33912 |
+
"learning_rate": 0.0004,
|
| 33913 |
+
"loss": 4.0763,
|
| 33914 |
+
"step": 5564
|
| 33915 |
+
},
|
| 33916 |
+
{
|
| 33917 |
+
"epoch": 0.04,
|
| 33918 |
+
"learning_rate": 0.0004,
|
| 33919 |
+
"loss": 6.5921,
|
| 33920 |
+
"step": 5565
|
| 33921 |
+
},
|
| 33922 |
+
{
|
| 33923 |
+
"epoch": 0.04,
|
| 33924 |
+
"learning_rate": 0.0004,
|
| 33925 |
+
"loss": 6.3899,
|
| 33926 |
+
"step": 5566
|
| 33927 |
+
},
|
| 33928 |
+
{
|
| 33929 |
+
"epoch": 0.04,
|
| 33930 |
+
"learning_rate": 0.0004,
|
| 33931 |
+
"loss": 8.175,
|
| 33932 |
+
"step": 5567
|
| 33933 |
+
},
|
| 33934 |
+
{
|
| 33935 |
+
"epoch": 0.04,
|
| 33936 |
+
"learning_rate": 0.0004,
|
| 33937 |
+
"loss": 6.3888,
|
| 33938 |
+
"step": 5568
|
| 33939 |
+
},
|
| 33940 |
+
{
|
| 33941 |
+
"epoch": 0.04,
|
| 33942 |
+
"learning_rate": 0.0004,
|
| 33943 |
+
"loss": 6.1035,
|
| 33944 |
+
"step": 5569
|
| 33945 |
+
},
|
| 33946 |
+
{
|
| 33947 |
+
"epoch": 0.04,
|
| 33948 |
+
"learning_rate": 0.0004,
|
| 33949 |
+
"loss": 7.9351,
|
| 33950 |
+
"step": 5570
|
| 33951 |
+
},
|
| 33952 |
+
{
|
| 33953 |
+
"epoch": 0.04,
|
| 33954 |
+
"learning_rate": 0.0004,
|
| 33955 |
+
"loss": 3.6726,
|
| 33956 |
+
"step": 5571
|
| 33957 |
+
},
|
| 33958 |
+
{
|
| 33959 |
+
"epoch": 0.04,
|
| 33960 |
+
"learning_rate": 0.0004,
|
| 33961 |
+
"loss": 10.7635,
|
| 33962 |
+
"step": 5572
|
| 33963 |
+
},
|
| 33964 |
+
{
|
| 33965 |
+
"epoch": 0.04,
|
| 33966 |
+
"learning_rate": 0.0004,
|
| 33967 |
+
"loss": 7.6501,
|
| 33968 |
+
"step": 5573
|
| 33969 |
+
},
|
| 33970 |
+
{
|
| 33971 |
+
"epoch": 0.04,
|
| 33972 |
+
"learning_rate": 0.0004,
|
| 33973 |
+
"loss": 8.5995,
|
| 33974 |
+
"step": 5574
|
| 33975 |
+
},
|
| 33976 |
+
{
|
| 33977 |
+
"epoch": 0.04,
|
| 33978 |
+
"learning_rate": 0.0004,
|
| 33979 |
+
"loss": 7.9299,
|
| 33980 |
+
"step": 5575
|
| 33981 |
+
},
|
| 33982 |
+
{
|
| 33983 |
+
"epoch": 0.04,
|
| 33984 |
+
"learning_rate": 0.0004,
|
| 33985 |
+
"loss": 7.6476,
|
| 33986 |
+
"step": 5576
|
| 33987 |
+
},
|
| 33988 |
+
{
|
| 33989 |
+
"epoch": 0.04,
|
| 33990 |
+
"learning_rate": 0.0004,
|
| 33991 |
+
"loss": 3.6735,
|
| 33992 |
+
"step": 5577
|
| 33993 |
+
},
|
| 33994 |
+
{
|
| 33995 |
+
"epoch": 0.04,
|
| 33996 |
+
"learning_rate": 0.0004,
|
| 33997 |
+
"loss": 3.5287,
|
| 33998 |
+
"step": 5578
|
| 33999 |
+
},
|
| 34000 |
+
{
|
| 34001 |
+
"epoch": 0.04,
|
| 34002 |
+
"learning_rate": 0.0004,
|
| 34003 |
+
"loss": 3.2667,
|
| 34004 |
+
"step": 5579
|
| 34005 |
+
},
|
| 34006 |
+
{
|
| 34007 |
+
"epoch": 0.04,
|
| 34008 |
+
"learning_rate": 0.0004,
|
| 34009 |
+
"loss": 4.7265,
|
| 34010 |
+
"step": 5580
|
| 34011 |
+
},
|
| 34012 |
+
{
|
| 34013 |
+
"epoch": 0.04,
|
| 34014 |
+
"learning_rate": 0.0004,
|
| 34015 |
+
"loss": 4.2115,
|
| 34016 |
+
"step": 5581
|
| 34017 |
+
},
|
| 34018 |
+
{
|
| 34019 |
+
"epoch": 0.04,
|
| 34020 |
+
"learning_rate": 0.0004,
|
| 34021 |
+
"loss": 5.5158,
|
| 34022 |
+
"step": 5582
|
| 34023 |
+
},
|
| 34024 |
+
{
|
| 34025 |
+
"epoch": 0.04,
|
| 34026 |
+
"learning_rate": 0.0004,
|
| 34027 |
+
"loss": 6.8307,
|
| 34028 |
+
"step": 5583
|
| 34029 |
+
},
|
| 34030 |
+
{
|
| 34031 |
+
"epoch": 0.04,
|
| 34032 |
+
"learning_rate": 0.0004,
|
| 34033 |
+
"loss": 4.8515,
|
| 34034 |
+
"step": 5584
|
| 34035 |
+
},
|
| 34036 |
+
{
|
| 34037 |
+
"epoch": 0.04,
|
| 34038 |
+
"learning_rate": 0.0004,
|
| 34039 |
+
"loss": 7.112,
|
| 34040 |
+
"step": 5585
|
| 34041 |
+
},
|
| 34042 |
+
{
|
| 34043 |
+
"epoch": 0.04,
|
| 34044 |
+
"learning_rate": 0.0004,
|
| 34045 |
+
"loss": 6.6433,
|
| 34046 |
+
"step": 5586
|
| 34047 |
+
},
|
| 34048 |
+
{
|
| 34049 |
+
"epoch": 0.04,
|
| 34050 |
+
"learning_rate": 0.0004,
|
| 34051 |
+
"loss": 7.7307,
|
| 34052 |
+
"step": 5587
|
| 34053 |
+
},
|
| 34054 |
+
{
|
| 34055 |
+
"epoch": 0.04,
|
| 34056 |
+
"learning_rate": 0.0004,
|
| 34057 |
+
"loss": 5.9845,
|
| 34058 |
+
"step": 5588
|
| 34059 |
+
},
|
| 34060 |
+
{
|
| 34061 |
+
"epoch": 0.04,
|
| 34062 |
+
"learning_rate": 0.0004,
|
| 34063 |
+
"loss": 7.029,
|
| 34064 |
+
"step": 5589
|
| 34065 |
+
},
|
| 34066 |
+
{
|
| 34067 |
+
"epoch": 0.04,
|
| 34068 |
+
"learning_rate": 0.0004,
|
| 34069 |
+
"loss": 4.3792,
|
| 34070 |
+
"step": 5590
|
| 34071 |
+
},
|
| 34072 |
+
{
|
| 34073 |
+
"epoch": 0.04,
|
| 34074 |
+
"learning_rate": 0.0004,
|
| 34075 |
+
"loss": 5.3602,
|
| 34076 |
+
"step": 5591
|
| 34077 |
+
},
|
| 34078 |
+
{
|
| 34079 |
+
"epoch": 0.04,
|
| 34080 |
+
"learning_rate": 0.0004,
|
| 34081 |
+
"loss": 3.9109,
|
| 34082 |
+
"step": 5592
|
| 34083 |
+
},
|
| 34084 |
+
{
|
| 34085 |
+
"epoch": 0.04,
|
| 34086 |
+
"learning_rate": 0.0004,
|
| 34087 |
+
"loss": 5.5172,
|
| 34088 |
+
"step": 5593
|
| 34089 |
+
},
|
| 34090 |
+
{
|
| 34091 |
+
"epoch": 0.04,
|
| 34092 |
+
"learning_rate": 0.0004,
|
| 34093 |
+
"loss": 3.9592,
|
| 34094 |
+
"step": 5594
|
| 34095 |
+
},
|
| 34096 |
+
{
|
| 34097 |
+
"epoch": 0.04,
|
| 34098 |
+
"learning_rate": 0.0004,
|
| 34099 |
+
"loss": 4.125,
|
| 34100 |
+
"step": 5595
|
| 34101 |
+
},
|
| 34102 |
+
{
|
| 34103 |
+
"epoch": 0.04,
|
| 34104 |
+
"learning_rate": 0.0004,
|
| 34105 |
+
"loss": 5.3487,
|
| 34106 |
+
"step": 5596
|
| 34107 |
+
},
|
| 34108 |
+
{
|
| 34109 |
+
"epoch": 0.04,
|
| 34110 |
+
"learning_rate": 0.0004,
|
| 34111 |
+
"loss": 6.7272,
|
| 34112 |
+
"step": 5597
|
| 34113 |
+
},
|
| 34114 |
+
{
|
| 34115 |
+
"epoch": 0.04,
|
| 34116 |
+
"learning_rate": 0.0004,
|
| 34117 |
+
"loss": 5.6483,
|
| 34118 |
+
"step": 5598
|
| 34119 |
+
},
|
| 34120 |
+
{
|
| 34121 |
+
"epoch": 0.04,
|
| 34122 |
+
"learning_rate": 0.0004,
|
| 34123 |
+
"loss": 2.811,
|
| 34124 |
+
"step": 5599
|
| 34125 |
+
},
|
| 34126 |
+
{
|
| 34127 |
+
"epoch": 0.04,
|
| 34128 |
+
"learning_rate": 0.0004,
|
| 34129 |
+
"loss": 7.2748,
|
| 34130 |
+
"step": 5600
|
| 34131 |
+
},
|
| 34132 |
+
{
|
| 34133 |
+
"epoch": 0.04,
|
| 34134 |
+
"eval_loss": 6.424686431884766,
|
| 34135 |
+
"eval_runtime": 22.4149,
|
| 34136 |
+
"eval_samples_per_second": 2.231,
|
| 34137 |
+
"eval_steps_per_second": 1.115,
|
| 34138 |
+
"step": 5600
|
| 34139 |
+
},
|
| 34140 |
+
{
|
| 34141 |
+
"epoch": 0.04,
|
| 34142 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
| 34143 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
| 34144 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
| 34145 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
| 34146 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
| 34147 |
+
"mmlu_loss": 3.697023420333862,
|
| 34148 |
+
"step": 5600
|
| 34149 |
+
},
|
| 34150 |
+
{
|
| 34151 |
+
"epoch": 0.04,
|
| 34152 |
+
"learning_rate": 0.0004,
|
| 34153 |
+
"loss": 7.9876,
|
| 34154 |
+
"step": 5601
|
| 34155 |
+
},
|
| 34156 |
+
{
|
| 34157 |
+
"epoch": 0.04,
|
| 34158 |
+
"learning_rate": 0.0004,
|
| 34159 |
+
"loss": 4.0035,
|
| 34160 |
+
"step": 5602
|
| 34161 |
+
},
|
| 34162 |
+
{
|
| 34163 |
+
"epoch": 0.04,
|
| 34164 |
+
"learning_rate": 0.0004,
|
| 34165 |
+
"loss": 9.1667,
|
| 34166 |
+
"step": 5603
|
| 34167 |
+
},
|
| 34168 |
+
{
|
| 34169 |
+
"epoch": 0.04,
|
| 34170 |
+
"learning_rate": 0.0004,
|
| 34171 |
+
"loss": 3.504,
|
| 34172 |
+
"step": 5604
|
| 34173 |
+
},
|
| 34174 |
+
{
|
| 34175 |
+
"epoch": 0.04,
|
| 34176 |
+
"learning_rate": 0.0004,
|
| 34177 |
+
"loss": 8.1787,
|
| 34178 |
+
"step": 5605
|
| 34179 |
+
},
|
| 34180 |
+
{
|
| 34181 |
+
"epoch": 0.04,
|
| 34182 |
+
"learning_rate": 0.0004,
|
| 34183 |
+
"loss": 5.9815,
|
| 34184 |
+
"step": 5606
|
| 34185 |
+
},
|
| 34186 |
+
{
|
| 34187 |
+
"epoch": 0.04,
|
| 34188 |
+
"learning_rate": 0.0004,
|
| 34189 |
+
"loss": 7.3385,
|
| 34190 |
+
"step": 5607
|
| 34191 |
+
},
|
| 34192 |
+
{
|
| 34193 |
+
"epoch": 0.04,
|
| 34194 |
+
"learning_rate": 0.0004,
|
| 34195 |
+
"loss": 8.861,
|
| 34196 |
+
"step": 5608
|
| 34197 |
+
},
|
| 34198 |
+
{
|
| 34199 |
+
"epoch": 0.04,
|
| 34200 |
+
"learning_rate": 0.0004,
|
| 34201 |
+
"loss": 7.4176,
|
| 34202 |
+
"step": 5609
|
| 34203 |
+
},
|
| 34204 |
+
{
|
| 34205 |
+
"epoch": 0.04,
|
| 34206 |
+
"learning_rate": 0.0004,
|
| 34207 |
+
"loss": 8.1611,
|
| 34208 |
+
"step": 5610
|
| 34209 |
+
},
|
| 34210 |
+
{
|
| 34211 |
+
"epoch": 0.04,
|
| 34212 |
+
"learning_rate": 0.0004,
|
| 34213 |
+
"loss": 4.066,
|
| 34214 |
+
"step": 5611
|
| 34215 |
+
},
|
| 34216 |
+
{
|
| 34217 |
+
"epoch": 0.04,
|
| 34218 |
+
"learning_rate": 0.0004,
|
| 34219 |
+
"loss": 3.022,
|
| 34220 |
+
"step": 5612
|
| 34221 |
+
},
|
| 34222 |
+
{
|
| 34223 |
+
"epoch": 0.04,
|
| 34224 |
+
"learning_rate": 0.0004,
|
| 34225 |
+
"loss": 6.3958,
|
| 34226 |
+
"step": 5613
|
| 34227 |
+
},
|
| 34228 |
+
{
|
| 34229 |
+
"epoch": 0.04,
|
| 34230 |
+
"learning_rate": 0.0004,
|
| 34231 |
+
"loss": 7.3849,
|
| 34232 |
+
"step": 5614
|
| 34233 |
+
},
|
| 34234 |
+
{
|
| 34235 |
+
"epoch": 0.04,
|
| 34236 |
+
"learning_rate": 0.0004,
|
| 34237 |
+
"loss": 6.97,
|
| 34238 |
+
"step": 5615
|
| 34239 |
+
},
|
| 34240 |
+
{
|
| 34241 |
+
"epoch": 0.04,
|
| 34242 |
+
"learning_rate": 0.0004,
|
| 34243 |
+
"loss": 6.9633,
|
| 34244 |
+
"step": 5616
|
| 34245 |
+
},
|
| 34246 |
+
{
|
| 34247 |
+
"epoch": 0.04,
|
| 34248 |
+
"learning_rate": 0.0004,
|
| 34249 |
+
"loss": 6.1699,
|
| 34250 |
+
"step": 5617
|
| 34251 |
+
},
|
| 34252 |
+
{
|
| 34253 |
+
"epoch": 0.04,
|
| 34254 |
+
"learning_rate": 0.0004,
|
| 34255 |
+
"loss": 5.2956,
|
| 34256 |
+
"step": 5618
|
| 34257 |
+
},
|
| 34258 |
+
{
|
| 34259 |
+
"epoch": 0.04,
|
| 34260 |
+
"learning_rate": 0.0004,
|
| 34261 |
+
"loss": 6.3328,
|
| 34262 |
+
"step": 5619
|
| 34263 |
+
},
|
| 34264 |
+
{
|
| 34265 |
+
"epoch": 0.04,
|
| 34266 |
+
"learning_rate": 0.0004,
|
| 34267 |
+
"loss": 6.392,
|
| 34268 |
+
"step": 5620
|
| 34269 |
+
},
|
| 34270 |
+
{
|
| 34271 |
+
"epoch": 0.04,
|
| 34272 |
+
"learning_rate": 0.0004,
|
| 34273 |
+
"loss": 3.0549,
|
| 34274 |
+
"step": 5621
|
| 34275 |
+
},
|
| 34276 |
+
{
|
| 34277 |
+
"epoch": 0.04,
|
| 34278 |
+
"learning_rate": 0.0004,
|
| 34279 |
+
"loss": 3.0383,
|
| 34280 |
+
"step": 5622
|
| 34281 |
+
},
|
| 34282 |
+
{
|
| 34283 |
+
"epoch": 0.04,
|
| 34284 |
+
"learning_rate": 0.0004,
|
| 34285 |
+
"loss": 5.7526,
|
| 34286 |
+
"step": 5623
|
| 34287 |
+
},
|
| 34288 |
+
{
|
| 34289 |
+
"epoch": 0.04,
|
| 34290 |
+
"learning_rate": 0.0004,
|
| 34291 |
+
"loss": 8.3642,
|
| 34292 |
+
"step": 5624
|
| 34293 |
+
},
|
| 34294 |
+
{
|
| 34295 |
+
"epoch": 0.04,
|
| 34296 |
+
"learning_rate": 0.0004,
|
| 34297 |
+
"loss": 7.4075,
|
| 34298 |
+
"step": 5625
|
| 34299 |
+
},
|
| 34300 |
+
{
|
| 34301 |
+
"epoch": 0.04,
|
| 34302 |
+
"learning_rate": 0.0004,
|
| 34303 |
+
"loss": 5.4872,
|
| 34304 |
+
"step": 5626
|
| 34305 |
+
},
|
| 34306 |
+
{
|
| 34307 |
+
"epoch": 0.04,
|
| 34308 |
+
"learning_rate": 0.0004,
|
| 34309 |
+
"loss": 5.468,
|
| 34310 |
+
"step": 5627
|
| 34311 |
+
},
|
| 34312 |
+
{
|
| 34313 |
+
"epoch": 0.04,
|
| 34314 |
+
"learning_rate": 0.0004,
|
| 34315 |
+
"loss": 7.0869,
|
| 34316 |
+
"step": 5628
|
| 34317 |
+
},
|
| 34318 |
+
{
|
| 34319 |
+
"epoch": 0.04,
|
| 34320 |
+
"learning_rate": 0.0004,
|
| 34321 |
+
"loss": 5.0191,
|
| 34322 |
+
"step": 5629
|
| 34323 |
+
},
|
| 34324 |
+
{
|
| 34325 |
+
"epoch": 0.04,
|
| 34326 |
+
"learning_rate": 0.0004,
|
| 34327 |
+
"loss": 6.4106,
|
| 34328 |
+
"step": 5630
|
| 34329 |
+
},
|
| 34330 |
+
{
|
| 34331 |
+
"epoch": 0.04,
|
| 34332 |
+
"learning_rate": 0.0004,
|
| 34333 |
+
"loss": 3.9285,
|
| 34334 |
+
"step": 5631
|
| 34335 |
+
},
|
| 34336 |
+
{
|
| 34337 |
+
"epoch": 0.04,
|
| 34338 |
+
"learning_rate": 0.0004,
|
| 34339 |
+
"loss": 6.4914,
|
| 34340 |
+
"step": 5632
|
| 34341 |
+
},
|
| 34342 |
+
{
|
| 34343 |
+
"epoch": 0.04,
|
| 34344 |
+
"learning_rate": 0.0004,
|
| 34345 |
+
"loss": 6.6292,
|
| 34346 |
+
"step": 5633
|
| 34347 |
+
},
|
| 34348 |
+
{
|
| 34349 |
+
"epoch": 0.04,
|
| 34350 |
+
"learning_rate": 0.0004,
|
| 34351 |
+
"loss": 8.1575,
|
| 34352 |
+
"step": 5634
|
| 34353 |
+
},
|
| 34354 |
+
{
|
| 34355 |
+
"epoch": 0.04,
|
| 34356 |
+
"learning_rate": 0.0004,
|
| 34357 |
+
"loss": 5.2383,
|
| 34358 |
+
"step": 5635
|
| 34359 |
+
},
|
| 34360 |
+
{
|
| 34361 |
+
"epoch": 0.04,
|
| 34362 |
+
"learning_rate": 0.0004,
|
| 34363 |
+
"loss": 7.0047,
|
| 34364 |
+
"step": 5636
|
| 34365 |
+
},
|
| 34366 |
+
{
|
| 34367 |
+
"epoch": 0.04,
|
| 34368 |
+
"learning_rate": 0.0004,
|
| 34369 |
+
"loss": 8.2193,
|
| 34370 |
+
"step": 5637
|
| 34371 |
+
},
|
| 34372 |
+
{
|
| 34373 |
+
"epoch": 0.04,
|
| 34374 |
+
"learning_rate": 0.0004,
|
| 34375 |
+
"loss": 2.6003,
|
| 34376 |
+
"step": 5638
|
| 34377 |
+
},
|
| 34378 |
+
{
|
| 34379 |
+
"epoch": 0.04,
|
| 34380 |
+
"learning_rate": 0.0004,
|
| 34381 |
+
"loss": 5.394,
|
| 34382 |
+
"step": 5639
|
| 34383 |
+
},
|
| 34384 |
+
{
|
| 34385 |
+
"epoch": 0.04,
|
| 34386 |
+
"learning_rate": 0.0004,
|
| 34387 |
+
"loss": 2.5271,
|
| 34388 |
+
"step": 5640
|
| 34389 |
+
},
|
| 34390 |
+
{
|
| 34391 |
+
"epoch": 0.04,
|
| 34392 |
+
"learning_rate": 0.0004,
|
| 34393 |
+
"loss": 3.5289,
|
| 34394 |
+
"step": 5641
|
| 34395 |
+
},
|
| 34396 |
+
{
|
| 34397 |
+
"epoch": 0.04,
|
| 34398 |
+
"learning_rate": 0.0004,
|
| 34399 |
+
"loss": 5.5975,
|
| 34400 |
+
"step": 5642
|
| 34401 |
+
},
|
| 34402 |
+
{
|
| 34403 |
+
"epoch": 0.04,
|
| 34404 |
+
"learning_rate": 0.0004,
|
| 34405 |
+
"loss": 4.8294,
|
| 34406 |
+
"step": 5643
|
| 34407 |
+
},
|
| 34408 |
+
{
|
| 34409 |
+
"epoch": 0.04,
|
| 34410 |
+
"learning_rate": 0.0004,
|
| 34411 |
+
"loss": 4.3174,
|
| 34412 |
+
"step": 5644
|
| 34413 |
+
},
|
| 34414 |
+
{
|
| 34415 |
+
"epoch": 0.04,
|
| 34416 |
+
"learning_rate": 0.0004,
|
| 34417 |
+
"loss": 6.1406,
|
| 34418 |
+
"step": 5645
|
| 34419 |
+
},
|
| 34420 |
+
{
|
| 34421 |
+
"epoch": 0.04,
|
| 34422 |
+
"learning_rate": 0.0004,
|
| 34423 |
+
"loss": 3.8035,
|
| 34424 |
+
"step": 5646
|
| 34425 |
+
},
|
| 34426 |
+
{
|
| 34427 |
+
"epoch": 0.04,
|
| 34428 |
+
"learning_rate": 0.0004,
|
| 34429 |
+
"loss": 4.709,
|
| 34430 |
+
"step": 5647
|
| 34431 |
+
},
|
| 34432 |
+
{
|
| 34433 |
+
"epoch": 0.04,
|
| 34434 |
+
"learning_rate": 0.0004,
|
| 34435 |
+
"loss": 6.5739,
|
| 34436 |
+
"step": 5648
|
| 34437 |
+
},
|
| 34438 |
+
{
|
| 34439 |
+
"epoch": 0.04,
|
| 34440 |
+
"learning_rate": 0.0004,
|
| 34441 |
+
"loss": 2.2544,
|
| 34442 |
+
"step": 5649
|
| 34443 |
+
},
|
| 34444 |
+
{
|
| 34445 |
+
"epoch": 0.04,
|
| 34446 |
+
"learning_rate": 0.0004,
|
| 34447 |
+
"loss": 2.8751,
|
| 34448 |
+
"step": 5650
|
| 34449 |
+
},
|
| 34450 |
+
{
|
| 34451 |
+
"epoch": 0.04,
|
| 34452 |
+
"learning_rate": 0.0004,
|
| 34453 |
+
"loss": 8.6977,
|
| 34454 |
+
"step": 5651
|
| 34455 |
+
},
|
| 34456 |
+
{
|
| 34457 |
+
"epoch": 0.04,
|
| 34458 |
+
"learning_rate": 0.0004,
|
| 34459 |
+
"loss": 7.6419,
|
| 34460 |
+
"step": 5652
|
| 34461 |
+
},
|
| 34462 |
+
{
|
| 34463 |
+
"epoch": 0.04,
|
| 34464 |
+
"learning_rate": 0.0004,
|
| 34465 |
+
"loss": 6.7223,
|
| 34466 |
+
"step": 5653
|
| 34467 |
+
},
|
| 34468 |
+
{
|
| 34469 |
+
"epoch": 0.04,
|
| 34470 |
+
"learning_rate": 0.0004,
|
| 34471 |
+
"loss": 7.1536,
|
| 34472 |
+
"step": 5654
|
| 34473 |
+
},
|
| 34474 |
+
{
|
| 34475 |
+
"epoch": 0.04,
|
| 34476 |
+
"learning_rate": 0.0004,
|
| 34477 |
+
"loss": 7.181,
|
| 34478 |
+
"step": 5655
|
| 34479 |
+
},
|
| 34480 |
+
{
|
| 34481 |
+
"epoch": 0.04,
|
| 34482 |
+
"learning_rate": 0.0004,
|
| 34483 |
+
"loss": 6.9262,
|
| 34484 |
+
"step": 5656
|
| 34485 |
+
},
|
| 34486 |
+
{
|
| 34487 |
+
"epoch": 0.04,
|
| 34488 |
+
"learning_rate": 0.0004,
|
| 34489 |
+
"loss": 6.5811,
|
| 34490 |
+
"step": 5657
|
| 34491 |
+
},
|
| 34492 |
+
{
|
| 34493 |
+
"epoch": 0.04,
|
| 34494 |
+
"learning_rate": 0.0004,
|
| 34495 |
+
"loss": 5.4126,
|
| 34496 |
+
"step": 5658
|
| 34497 |
+
},
|
| 34498 |
+
{
|
| 34499 |
+
"epoch": 0.04,
|
| 34500 |
+
"learning_rate": 0.0004,
|
| 34501 |
+
"loss": 3.8075,
|
| 34502 |
+
"step": 5659
|
| 34503 |
+
},
|
| 34504 |
+
{
|
| 34505 |
+
"epoch": 0.04,
|
| 34506 |
+
"learning_rate": 0.0004,
|
| 34507 |
+
"loss": 8.6973,
|
| 34508 |
+
"step": 5660
|
| 34509 |
+
},
|
| 34510 |
+
{
|
| 34511 |
+
"epoch": 0.04,
|
| 34512 |
+
"learning_rate": 0.0004,
|
| 34513 |
+
"loss": 6.8127,
|
| 34514 |
+
"step": 5661
|
| 34515 |
+
},
|
| 34516 |
+
{
|
| 34517 |
+
"epoch": 0.04,
|
| 34518 |
+
"learning_rate": 0.0004,
|
| 34519 |
+
"loss": 6.5205,
|
| 34520 |
+
"step": 5662
|
| 34521 |
+
},
|
| 34522 |
+
{
|
| 34523 |
+
"epoch": 0.04,
|
| 34524 |
+
"learning_rate": 0.0004,
|
| 34525 |
+
"loss": 7.7184,
|
| 34526 |
+
"step": 5663
|
| 34527 |
+
},
|
| 34528 |
+
{
|
| 34529 |
+
"epoch": 0.04,
|
| 34530 |
+
"learning_rate": 0.0004,
|
| 34531 |
+
"loss": 2.6631,
|
| 34532 |
+
"step": 5664
|
| 34533 |
+
},
|
| 34534 |
+
{
|
| 34535 |
+
"epoch": 0.04,
|
| 34536 |
+
"learning_rate": 0.0004,
|
| 34537 |
+
"loss": 8.0133,
|
| 34538 |
+
"step": 5665
|
| 34539 |
+
},
|
| 34540 |
+
{
|
| 34541 |
+
"epoch": 0.04,
|
| 34542 |
+
"learning_rate": 0.0004,
|
| 34543 |
+
"loss": 8.4575,
|
| 34544 |
+
"step": 5666
|
| 34545 |
+
},
|
| 34546 |
+
{
|
| 34547 |
+
"epoch": 0.04,
|
| 34548 |
+
"learning_rate": 0.0004,
|
| 34549 |
+
"loss": 7.2522,
|
| 34550 |
+
"step": 5667
|
| 34551 |
+
},
|
| 34552 |
+
{
|
| 34553 |
+
"epoch": 0.04,
|
| 34554 |
+
"learning_rate": 0.0004,
|
| 34555 |
+
"loss": 7.638,
|
| 34556 |
+
"step": 5668
|
| 34557 |
+
},
|
| 34558 |
+
{
|
| 34559 |
+
"epoch": 0.04,
|
| 34560 |
+
"learning_rate": 0.0004,
|
| 34561 |
+
"loss": 5.5406,
|
| 34562 |
+
"step": 5669
|
| 34563 |
+
},
|
| 34564 |
+
{
|
| 34565 |
+
"epoch": 0.04,
|
| 34566 |
+
"learning_rate": 0.0004,
|
| 34567 |
+
"loss": 6.9613,
|
| 34568 |
+
"step": 5670
|
| 34569 |
+
},
|
| 34570 |
+
{
|
| 34571 |
+
"epoch": 0.04,
|
| 34572 |
+
"learning_rate": 0.0004,
|
| 34573 |
+
"loss": 7.8844,
|
| 34574 |
+
"step": 5671
|
| 34575 |
+
},
|
| 34576 |
+
{
|
| 34577 |
+
"epoch": 0.04,
|
| 34578 |
+
"learning_rate": 0.0004,
|
| 34579 |
+
"loss": 7.2222,
|
| 34580 |
+
"step": 5672
|
| 34581 |
+
},
|
| 34582 |
+
{
|
| 34583 |
+
"epoch": 0.04,
|
| 34584 |
+
"learning_rate": 0.0004,
|
| 34585 |
+
"loss": 5.2324,
|
| 34586 |
+
"step": 5673
|
| 34587 |
+
},
|
| 34588 |
+
{
|
| 34589 |
+
"epoch": 0.04,
|
| 34590 |
+
"learning_rate": 0.0004,
|
| 34591 |
+
"loss": 2.9769,
|
| 34592 |
+
"step": 5674
|
| 34593 |
+
},
|
| 34594 |
+
{
|
| 34595 |
+
"epoch": 0.04,
|
| 34596 |
+
"learning_rate": 0.0004,
|
| 34597 |
+
"loss": 5.4176,
|
| 34598 |
+
"step": 5675
|
| 34599 |
+
},
|
| 34600 |
+
{
|
| 34601 |
+
"epoch": 0.04,
|
| 34602 |
+
"learning_rate": 0.0004,
|
| 34603 |
+
"loss": 3.5889,
|
| 34604 |
+
"step": 5676
|
| 34605 |
+
},
|
| 34606 |
+
{
|
| 34607 |
+
"epoch": 0.04,
|
| 34608 |
+
"learning_rate": 0.0004,
|
| 34609 |
+
"loss": 6.2919,
|
| 34610 |
+
"step": 5677
|
| 34611 |
+
},
|
| 34612 |
+
{
|
| 34613 |
+
"epoch": 0.04,
|
| 34614 |
+
"learning_rate": 0.0004,
|
| 34615 |
+
"loss": 6.9505,
|
| 34616 |
+
"step": 5678
|
| 34617 |
+
},
|
| 34618 |
+
{
|
| 34619 |
+
"epoch": 0.04,
|
| 34620 |
+
"learning_rate": 0.0004,
|
| 34621 |
+
"loss": 5.694,
|
| 34622 |
+
"step": 5679
|
| 34623 |
+
},
|
| 34624 |
+
{
|
| 34625 |
+
"epoch": 0.04,
|
| 34626 |
+
"learning_rate": 0.0004,
|
| 34627 |
+
"loss": 6.5429,
|
| 34628 |
+
"step": 5680
|
| 34629 |
+
},
|
| 34630 |
+
{
|
| 34631 |
+
"epoch": 0.04,
|
| 34632 |
+
"learning_rate": 0.0004,
|
| 34633 |
+
"loss": 5.5205,
|
| 34634 |
+
"step": 5681
|
| 34635 |
+
},
|
| 34636 |
+
{
|
| 34637 |
+
"epoch": 0.04,
|
| 34638 |
+
"learning_rate": 0.0004,
|
| 34639 |
+
"loss": 6.5445,
|
| 34640 |
+
"step": 5682
|
| 34641 |
+
},
|
| 34642 |
+
{
|
| 34643 |
+
"epoch": 0.04,
|
| 34644 |
+
"learning_rate": 0.0004,
|
| 34645 |
+
"loss": 7.3609,
|
| 34646 |
+
"step": 5683
|
| 34647 |
+
},
|
| 34648 |
+
{
|
| 34649 |
+
"epoch": 0.04,
|
| 34650 |
+
"learning_rate": 0.0004,
|
| 34651 |
+
"loss": 7.3904,
|
| 34652 |
+
"step": 5684
|
| 34653 |
+
},
|
| 34654 |
+
{
|
| 34655 |
+
"epoch": 0.04,
|
| 34656 |
+
"learning_rate": 0.0004,
|
| 34657 |
+
"loss": 5.3472,
|
| 34658 |
+
"step": 5685
|
| 34659 |
+
},
|
| 34660 |
+
{
|
| 34661 |
+
"epoch": 0.04,
|
| 34662 |
+
"learning_rate": 0.0004,
|
| 34663 |
+
"loss": 4.7564,
|
| 34664 |
+
"step": 5686
|
| 34665 |
+
},
|
| 34666 |
+
{
|
| 34667 |
+
"epoch": 0.04,
|
| 34668 |
+
"learning_rate": 0.0004,
|
| 34669 |
+
"loss": 6.312,
|
| 34670 |
+
"step": 5687
|
| 34671 |
+
},
|
| 34672 |
+
{
|
| 34673 |
+
"epoch": 0.04,
|
| 34674 |
+
"learning_rate": 0.0004,
|
| 34675 |
+
"loss": 5.4367,
|
| 34676 |
+
"step": 5688
|
| 34677 |
+
},
|
| 34678 |
+
{
|
| 34679 |
+
"epoch": 0.04,
|
| 34680 |
+
"learning_rate": 0.0004,
|
| 34681 |
+
"loss": 8.4472,
|
| 34682 |
+
"step": 5689
|
| 34683 |
+
},
|
| 34684 |
+
{
|
| 34685 |
+
"epoch": 0.04,
|
| 34686 |
+
"learning_rate": 0.0004,
|
| 34687 |
+
"loss": 5.8272,
|
| 34688 |
+
"step": 5690
|
| 34689 |
+
},
|
| 34690 |
+
{
|
| 34691 |
+
"epoch": 0.04,
|
| 34692 |
+
"learning_rate": 0.0004,
|
| 34693 |
+
"loss": 5.2634,
|
| 34694 |
+
"step": 5691
|
| 34695 |
+
},
|
| 34696 |
+
{
|
| 34697 |
+
"epoch": 0.04,
|
| 34698 |
+
"learning_rate": 0.0004,
|
| 34699 |
+
"loss": 3.2939,
|
| 34700 |
+
"step": 5692
|
| 34701 |
+
},
|
| 34702 |
+
{
|
| 34703 |
+
"epoch": 0.04,
|
| 34704 |
+
"learning_rate": 0.0004,
|
| 34705 |
+
"loss": 2.657,
|
| 34706 |
+
"step": 5693
|
| 34707 |
+
},
|
| 34708 |
+
{
|
| 34709 |
+
"epoch": 0.04,
|
| 34710 |
+
"learning_rate": 0.0004,
|
| 34711 |
+
"loss": 3.1746,
|
| 34712 |
+
"step": 5694
|
| 34713 |
+
},
|
| 34714 |
+
{
|
| 34715 |
+
"epoch": 0.04,
|
| 34716 |
+
"learning_rate": 0.0004,
|
| 34717 |
+
"loss": 3.8332,
|
| 34718 |
+
"step": 5695
|
| 34719 |
+
},
|
| 34720 |
+
{
|
| 34721 |
+
"epoch": 0.04,
|
| 34722 |
+
"learning_rate": 0.0004,
|
| 34723 |
+
"loss": 4.94,
|
| 34724 |
+
"step": 5696
|
| 34725 |
+
},
|
| 34726 |
+
{
|
| 34727 |
+
"epoch": 0.04,
|
| 34728 |
+
"learning_rate": 0.0004,
|
| 34729 |
+
"loss": 6.7484,
|
| 34730 |
+
"step": 5697
|
| 34731 |
+
},
|
| 34732 |
+
{
|
| 34733 |
+
"epoch": 0.04,
|
| 34734 |
+
"learning_rate": 0.0004,
|
| 34735 |
+
"loss": 5.5731,
|
| 34736 |
+
"step": 5698
|
| 34737 |
+
},
|
| 34738 |
+
{
|
| 34739 |
+
"epoch": 0.04,
|
| 34740 |
+
"learning_rate": 0.0004,
|
| 34741 |
+
"loss": 2.7432,
|
| 34742 |
+
"step": 5699
|
| 34743 |
+
},
|
| 34744 |
+
{
|
| 34745 |
+
"epoch": 0.04,
|
| 34746 |
+
"learning_rate": 0.0004,
|
| 34747 |
+
"loss": 2.3457,
|
| 34748 |
+
"step": 5700
|
| 34749 |
+
},
|
| 34750 |
+
{
|
| 34751 |
+
"epoch": 0.04,
|
| 34752 |
+
"learning_rate": 0.0004,
|
| 34753 |
+
"loss": 7.3142,
|
| 34754 |
+
"step": 5701
|
| 34755 |
+
},
|
| 34756 |
+
{
|
| 34757 |
+
"epoch": 0.04,
|
| 34758 |
+
"learning_rate": 0.0004,
|
| 34759 |
+
"loss": 8.6531,
|
| 34760 |
+
"step": 5702
|
| 34761 |
+
},
|
| 34762 |
+
{
|
| 34763 |
+
"epoch": 0.04,
|
| 34764 |
+
"learning_rate": 0.0004,
|
| 34765 |
+
"loss": 8.9737,
|
| 34766 |
+
"step": 5703
|
| 34767 |
+
},
|
| 34768 |
+
{
|
| 34769 |
+
"epoch": 0.04,
|
| 34770 |
+
"learning_rate": 0.0004,
|
| 34771 |
+
"loss": 5.6196,
|
| 34772 |
+
"step": 5704
|
| 34773 |
+
},
|
| 34774 |
+
{
|
| 34775 |
+
"epoch": 0.04,
|
| 34776 |
+
"learning_rate": 0.0004,
|
| 34777 |
+
"loss": 4.8655,
|
| 34778 |
+
"step": 5705
|
| 34779 |
+
},
|
| 34780 |
+
{
|
| 34781 |
+
"epoch": 0.04,
|
| 34782 |
+
"learning_rate": 0.0004,
|
| 34783 |
+
"loss": 3.082,
|
| 34784 |
+
"step": 5706
|
| 34785 |
+
},
|
| 34786 |
+
{
|
| 34787 |
+
"epoch": 0.04,
|
| 34788 |
+
"learning_rate": 0.0004,
|
| 34789 |
+
"loss": 3.3827,
|
| 34790 |
+
"step": 5707
|
| 34791 |
+
},
|
| 34792 |
+
{
|
| 34793 |
+
"epoch": 0.04,
|
| 34794 |
+
"learning_rate": 0.0004,
|
| 34795 |
+
"loss": 6.4305,
|
| 34796 |
+
"step": 5708
|
| 34797 |
+
},
|
| 34798 |
+
{
|
| 34799 |
+
"epoch": 0.04,
|
| 34800 |
+
"learning_rate": 0.0004,
|
| 34801 |
+
"loss": 7.6621,
|
| 34802 |
+
"step": 5709
|
| 34803 |
+
},
|
| 34804 |
+
{
|
| 34805 |
+
"epoch": 0.04,
|
| 34806 |
+
"learning_rate": 0.0004,
|
| 34807 |
+
"loss": 7.9571,
|
| 34808 |
+
"step": 5710
|
| 34809 |
+
},
|
| 34810 |
+
{
|
| 34811 |
+
"epoch": 0.04,
|
| 34812 |
+
"learning_rate": 0.0004,
|
| 34813 |
+
"loss": 7.9943,
|
| 34814 |
+
"step": 5711
|
| 34815 |
+
},
|
| 34816 |
+
{
|
| 34817 |
+
"epoch": 0.04,
|
| 34818 |
+
"learning_rate": 0.0004,
|
| 34819 |
+
"loss": 8.6949,
|
| 34820 |
+
"step": 5712
|
| 34821 |
+
},
|
| 34822 |
+
{
|
| 34823 |
+
"epoch": 0.04,
|
| 34824 |
+
"learning_rate": 0.0004,
|
| 34825 |
+
"loss": 7.3717,
|
| 34826 |
+
"step": 5713
|
| 34827 |
+
},
|
| 34828 |
+
{
|
| 34829 |
+
"epoch": 0.04,
|
| 34830 |
+
"learning_rate": 0.0004,
|
| 34831 |
+
"loss": 7.3738,
|
| 34832 |
+
"step": 5714
|
| 34833 |
+
},
|
| 34834 |
+
{
|
| 34835 |
+
"epoch": 0.04,
|
| 34836 |
+
"learning_rate": 0.0004,
|
| 34837 |
+
"loss": 6.5416,
|
| 34838 |
+
"step": 5715
|
| 34839 |
+
},
|
| 34840 |
+
{
|
| 34841 |
+
"epoch": 0.04,
|
| 34842 |
+
"learning_rate": 0.0004,
|
| 34843 |
+
"loss": 3.6103,
|
| 34844 |
+
"step": 5716
|
| 34845 |
+
},
|
| 34846 |
+
{
|
| 34847 |
+
"epoch": 0.04,
|
| 34848 |
+
"learning_rate": 0.0004,
|
| 34849 |
+
"loss": 6.9328,
|
| 34850 |
+
"step": 5717
|
| 34851 |
+
},
|
| 34852 |
+
{
|
| 34853 |
+
"epoch": 0.04,
|
| 34854 |
+
"learning_rate": 0.0004,
|
| 34855 |
+
"loss": 7.5956,
|
| 34856 |
+
"step": 5718
|
| 34857 |
+
},
|
| 34858 |
+
{
|
| 34859 |
+
"epoch": 0.04,
|
| 34860 |
+
"learning_rate": 0.0004,
|
| 34861 |
+
"loss": 6.846,
|
| 34862 |
+
"step": 5719
|
| 34863 |
+
},
|
| 34864 |
+
{
|
| 34865 |
+
"epoch": 0.04,
|
| 34866 |
+
"learning_rate": 0.0004,
|
| 34867 |
+
"loss": 8.6016,
|
| 34868 |
+
"step": 5720
|
| 34869 |
+
},
|
| 34870 |
+
{
|
| 34871 |
+
"epoch": 0.04,
|
| 34872 |
+
"learning_rate": 0.0004,
|
| 34873 |
+
"loss": 7.1171,
|
| 34874 |
+
"step": 5721
|
| 34875 |
+
},
|
| 34876 |
+
{
|
| 34877 |
+
"epoch": 0.04,
|
| 34878 |
+
"learning_rate": 0.0004,
|
| 34879 |
+
"loss": 5.5251,
|
| 34880 |
+
"step": 5722
|
| 34881 |
+
},
|
| 34882 |
+
{
|
| 34883 |
+
"epoch": 0.04,
|
| 34884 |
+
"learning_rate": 0.0004,
|
| 34885 |
+
"loss": 6.3209,
|
| 34886 |
+
"step": 5723
|
| 34887 |
+
},
|
| 34888 |
+
{
|
| 34889 |
+
"epoch": 0.04,
|
| 34890 |
+
"learning_rate": 0.0004,
|
| 34891 |
+
"loss": 3.9372,
|
| 34892 |
+
"step": 5724
|
| 34893 |
+
},
|
| 34894 |
+
{
|
| 34895 |
+
"epoch": 0.04,
|
| 34896 |
+
"learning_rate": 0.0004,
|
| 34897 |
+
"loss": 5.4344,
|
| 34898 |
+
"step": 5725
|
| 34899 |
+
},
|
| 34900 |
+
{
|
| 34901 |
+
"epoch": 0.04,
|
| 34902 |
+
"learning_rate": 0.0004,
|
| 34903 |
+
"loss": 3.4504,
|
| 34904 |
+
"step": 5726
|
| 34905 |
+
},
|
| 34906 |
+
{
|
| 34907 |
+
"epoch": 0.04,
|
| 34908 |
+
"learning_rate": 0.0004,
|
| 34909 |
+
"loss": 3.0255,
|
| 34910 |
+
"step": 5727
|
| 34911 |
+
},
|
| 34912 |
+
{
|
| 34913 |
+
"epoch": 0.04,
|
| 34914 |
+
"learning_rate": 0.0004,
|
| 34915 |
+
"loss": 5.6402,
|
| 34916 |
+
"step": 5728
|
| 34917 |
+
},
|
| 34918 |
+
{
|
| 34919 |
+
"epoch": 0.04,
|
| 34920 |
+
"learning_rate": 0.0004,
|
| 34921 |
+
"loss": 5.542,
|
| 34922 |
+
"step": 5729
|
| 34923 |
+
},
|
| 34924 |
+
{
|
| 34925 |
+
"epoch": 0.04,
|
| 34926 |
+
"learning_rate": 0.0004,
|
| 34927 |
+
"loss": 5.1837,
|
| 34928 |
+
"step": 5730
|
| 34929 |
+
},
|
| 34930 |
+
{
|
| 34931 |
+
"epoch": 0.04,
|
| 34932 |
+
"learning_rate": 0.0004,
|
| 34933 |
+
"loss": 3.796,
|
| 34934 |
+
"step": 5731
|
| 34935 |
+
},
|
| 34936 |
+
{
|
| 34937 |
+
"epoch": 0.04,
|
| 34938 |
+
"learning_rate": 0.0004,
|
| 34939 |
+
"loss": 5.8129,
|
| 34940 |
+
"step": 5732
|
| 34941 |
+
},
|
| 34942 |
+
{
|
| 34943 |
+
"epoch": 0.04,
|
| 34944 |
+
"learning_rate": 0.0004,
|
| 34945 |
+
"loss": 7.0823,
|
| 34946 |
+
"step": 5733
|
| 34947 |
+
},
|
| 34948 |
+
{
|
| 34949 |
+
"epoch": 0.04,
|
| 34950 |
+
"learning_rate": 0.0004,
|
| 34951 |
+
"loss": 6.2968,
|
| 34952 |
+
"step": 5734
|
| 34953 |
+
},
|
| 34954 |
+
{
|
| 34955 |
+
"epoch": 0.04,
|
| 34956 |
+
"learning_rate": 0.0004,
|
| 34957 |
+
"loss": 6.0133,
|
| 34958 |
+
"step": 5735
|
| 34959 |
+
},
|
| 34960 |
+
{
|
| 34961 |
+
"epoch": 0.04,
|
| 34962 |
+
"learning_rate": 0.0004,
|
| 34963 |
+
"loss": 6.5933,
|
| 34964 |
+
"step": 5736
|
| 34965 |
+
},
|
| 34966 |
+
{
|
| 34967 |
+
"epoch": 0.04,
|
| 34968 |
+
"learning_rate": 0.0004,
|
| 34969 |
+
"loss": 6.9654,
|
| 34970 |
+
"step": 5737
|
| 34971 |
+
},
|
| 34972 |
+
{
|
| 34973 |
+
"epoch": 0.04,
|
| 34974 |
+
"learning_rate": 0.0004,
|
| 34975 |
+
"loss": 3.2416,
|
| 34976 |
+
"step": 5738
|
| 34977 |
+
},
|
| 34978 |
+
{
|
| 34979 |
+
"epoch": 0.04,
|
| 34980 |
+
"learning_rate": 0.0004,
|
| 34981 |
+
"loss": 6.8676,
|
| 34982 |
+
"step": 5739
|
| 34983 |
+
},
|
| 34984 |
+
{
|
| 34985 |
+
"epoch": 0.04,
|
| 34986 |
+
"learning_rate": 0.0004,
|
| 34987 |
+
"loss": 8.6902,
|
| 34988 |
+
"step": 5740
|
| 34989 |
+
},
|
| 34990 |
+
{
|
| 34991 |
+
"epoch": 0.04,
|
| 34992 |
+
"learning_rate": 0.0004,
|
| 34993 |
+
"loss": 4.9099,
|
| 34994 |
+
"step": 5741
|
| 34995 |
+
},
|
| 34996 |
+
{
|
| 34997 |
+
"epoch": 0.04,
|
| 34998 |
+
"learning_rate": 0.0004,
|
| 34999 |
+
"loss": 5.0585,
|
| 35000 |
+
"step": 5742
|
| 35001 |
+
},
|
| 35002 |
+
{
|
| 35003 |
+
"epoch": 0.04,
|
| 35004 |
+
"learning_rate": 0.0004,
|
| 35005 |
+
"loss": 7.2383,
|
| 35006 |
+
"step": 5743
|
| 35007 |
+
},
|
| 35008 |
+
{
|
| 35009 |
+
"epoch": 0.04,
|
| 35010 |
+
"learning_rate": 0.0004,
|
| 35011 |
+
"loss": 5.7071,
|
| 35012 |
+
"step": 5744
|
| 35013 |
+
},
|
| 35014 |
+
{
|
| 35015 |
+
"epoch": 0.04,
|
| 35016 |
+
"learning_rate": 0.0004,
|
| 35017 |
+
"loss": 4.0832,
|
| 35018 |
+
"step": 5745
|
| 35019 |
+
},
|
| 35020 |
+
{
|
| 35021 |
+
"epoch": 0.04,
|
| 35022 |
+
"learning_rate": 0.0004,
|
| 35023 |
+
"loss": 6.3828,
|
| 35024 |
+
"step": 5746
|
| 35025 |
+
},
|
| 35026 |
+
{
|
| 35027 |
+
"epoch": 0.04,
|
| 35028 |
+
"learning_rate": 0.0004,
|
| 35029 |
+
"loss": 6.8322,
|
| 35030 |
+
"step": 5747
|
| 35031 |
+
},
|
| 35032 |
+
{
|
| 35033 |
+
"epoch": 0.04,
|
| 35034 |
+
"learning_rate": 0.0004,
|
| 35035 |
+
"loss": 2.7194,
|
| 35036 |
+
"step": 5748
|
| 35037 |
+
},
|
| 35038 |
+
{
|
| 35039 |
+
"epoch": 0.04,
|
| 35040 |
+
"learning_rate": 0.0004,
|
| 35041 |
+
"loss": 3.3352,
|
| 35042 |
+
"step": 5749
|
| 35043 |
+
},
|
| 35044 |
+
{
|
| 35045 |
+
"epoch": 0.04,
|
| 35046 |
+
"learning_rate": 0.0004,
|
| 35047 |
+
"loss": 3.3438,
|
| 35048 |
+
"step": 5750
|
| 35049 |
+
},
|
| 35050 |
+
{
|
| 35051 |
+
"epoch": 0.04,
|
| 35052 |
+
"learning_rate": 0.0004,
|
| 35053 |
+
"loss": 8.5028,
|
| 35054 |
+
"step": 5751
|
| 35055 |
+
},
|
| 35056 |
+
{
|
| 35057 |
+
"epoch": 0.04,
|
| 35058 |
+
"learning_rate": 0.0004,
|
| 35059 |
+
"loss": 9.2395,
|
| 35060 |
+
"step": 5752
|
| 35061 |
+
},
|
| 35062 |
+
{
|
| 35063 |
+
"epoch": 0.04,
|
| 35064 |
+
"learning_rate": 0.0004,
|
| 35065 |
+
"loss": 8.8966,
|
| 35066 |
+
"step": 5753
|
| 35067 |
+
},
|
| 35068 |
+
{
|
| 35069 |
+
"epoch": 0.04,
|
| 35070 |
+
"learning_rate": 0.0004,
|
| 35071 |
+
"loss": 5.8439,
|
| 35072 |
+
"step": 5754
|
| 35073 |
+
},
|
| 35074 |
+
{
|
| 35075 |
+
"epoch": 0.04,
|
| 35076 |
+
"learning_rate": 0.0004,
|
| 35077 |
+
"loss": 8.026,
|
| 35078 |
+
"step": 5755
|
| 35079 |
+
},
|
| 35080 |
+
{
|
| 35081 |
+
"epoch": 0.04,
|
| 35082 |
+
"learning_rate": 0.0004,
|
| 35083 |
+
"loss": 3.7253,
|
| 35084 |
+
"step": 5756
|
| 35085 |
+
},
|
| 35086 |
+
{
|
| 35087 |
+
"epoch": 0.04,
|
| 35088 |
+
"learning_rate": 0.0004,
|
| 35089 |
+
"loss": 2.6592,
|
| 35090 |
+
"step": 5757
|
| 35091 |
+
},
|
| 35092 |
+
{
|
| 35093 |
+
"epoch": 0.04,
|
| 35094 |
+
"learning_rate": 0.0004,
|
| 35095 |
+
"loss": 4.6777,
|
| 35096 |
+
"step": 5758
|
| 35097 |
+
},
|
| 35098 |
+
{
|
| 35099 |
+
"epoch": 0.04,
|
| 35100 |
+
"learning_rate": 0.0004,
|
| 35101 |
+
"loss": 6.7246,
|
| 35102 |
+
"step": 5759
|
| 35103 |
+
},
|
| 35104 |
+
{
|
| 35105 |
+
"epoch": 0.04,
|
| 35106 |
+
"learning_rate": 0.0004,
|
| 35107 |
+
"loss": 6.5969,
|
| 35108 |
+
"step": 5760
|
| 35109 |
+
},
|
| 35110 |
+
{
|
| 35111 |
+
"epoch": 0.04,
|
| 35112 |
+
"learning_rate": 0.0004,
|
| 35113 |
+
"loss": 7.5921,
|
| 35114 |
+
"step": 5761
|
| 35115 |
+
},
|
| 35116 |
+
{
|
| 35117 |
+
"epoch": 0.04,
|
| 35118 |
+
"learning_rate": 0.0004,
|
| 35119 |
+
"loss": 6.2614,
|
| 35120 |
+
"step": 5762
|
| 35121 |
+
},
|
| 35122 |
+
{
|
| 35123 |
+
"epoch": 0.04,
|
| 35124 |
+
"learning_rate": 0.0004,
|
| 35125 |
+
"loss": 8.1911,
|
| 35126 |
+
"step": 5763
|
| 35127 |
+
},
|
| 35128 |
+
{
|
| 35129 |
+
"epoch": 0.04,
|
| 35130 |
+
"learning_rate": 0.0004,
|
| 35131 |
+
"loss": 3.013,
|
| 35132 |
+
"step": 5764
|
| 35133 |
+
},
|
| 35134 |
+
{
|
| 35135 |
+
"epoch": 0.04,
|
| 35136 |
+
"learning_rate": 0.0004,
|
| 35137 |
+
"loss": 5.5307,
|
| 35138 |
+
"step": 5765
|
| 35139 |
+
},
|
| 35140 |
+
{
|
| 35141 |
+
"epoch": 0.04,
|
| 35142 |
+
"learning_rate": 0.0004,
|
| 35143 |
+
"loss": 3.5039,
|
| 35144 |
+
"step": 5766
|
| 35145 |
+
},
|
| 35146 |
+
{
|
| 35147 |
+
"epoch": 0.04,
|
| 35148 |
+
"learning_rate": 0.0004,
|
| 35149 |
+
"loss": 7.7382,
|
| 35150 |
+
"step": 5767
|
| 35151 |
+
},
|
| 35152 |
+
{
|
| 35153 |
+
"epoch": 0.04,
|
| 35154 |
+
"learning_rate": 0.0004,
|
| 35155 |
+
"loss": 6.9728,
|
| 35156 |
+
"step": 5768
|
| 35157 |
+
},
|
| 35158 |
+
{
|
| 35159 |
+
"epoch": 0.04,
|
| 35160 |
+
"learning_rate": 0.0004,
|
| 35161 |
+
"loss": 3.8039,
|
| 35162 |
+
"step": 5769
|
| 35163 |
+
},
|
| 35164 |
+
{
|
| 35165 |
+
"epoch": 0.04,
|
| 35166 |
+
"learning_rate": 0.0004,
|
| 35167 |
+
"loss": 2.5774,
|
| 35168 |
+
"step": 5770
|
| 35169 |
+
},
|
| 35170 |
+
{
|
| 35171 |
+
"epoch": 0.04,
|
| 35172 |
+
"learning_rate": 0.0004,
|
| 35173 |
+
"loss": 6.3394,
|
| 35174 |
+
"step": 5771
|
| 35175 |
+
},
|
| 35176 |
+
{
|
| 35177 |
+
"epoch": 0.04,
|
| 35178 |
+
"learning_rate": 0.0004,
|
| 35179 |
+
"loss": 6.6831,
|
| 35180 |
+
"step": 5772
|
| 35181 |
+
},
|
| 35182 |
+
{
|
| 35183 |
+
"epoch": 0.04,
|
| 35184 |
+
"learning_rate": 0.0004,
|
| 35185 |
+
"loss": 3.1592,
|
| 35186 |
+
"step": 5773
|
| 35187 |
+
},
|
| 35188 |
+
{
|
| 35189 |
+
"epoch": 0.04,
|
| 35190 |
+
"learning_rate": 0.0004,
|
| 35191 |
+
"loss": 3.0903,
|
| 35192 |
+
"step": 5774
|
| 35193 |
+
},
|
| 35194 |
+
{
|
| 35195 |
+
"epoch": 0.04,
|
| 35196 |
+
"learning_rate": 0.0004,
|
| 35197 |
+
"loss": 5.0717,
|
| 35198 |
+
"step": 5775
|
| 35199 |
+
},
|
| 35200 |
+
{
|
| 35201 |
+
"epoch": 0.04,
|
| 35202 |
+
"learning_rate": 0.0004,
|
| 35203 |
+
"loss": 5.9321,
|
| 35204 |
+
"step": 5776
|
| 35205 |
+
},
|
| 35206 |
+
{
|
| 35207 |
+
"epoch": 0.04,
|
| 35208 |
+
"learning_rate": 0.0004,
|
| 35209 |
+
"loss": 6.8956,
|
| 35210 |
+
"step": 5777
|
| 35211 |
+
},
|
| 35212 |
+
{
|
| 35213 |
+
"epoch": 0.04,
|
| 35214 |
+
"learning_rate": 0.0004,
|
| 35215 |
+
"loss": 6.0156,
|
| 35216 |
+
"step": 5778
|
| 35217 |
+
},
|
| 35218 |
+
{
|
| 35219 |
+
"epoch": 0.04,
|
| 35220 |
+
"learning_rate": 0.0004,
|
| 35221 |
+
"loss": 10.4466,
|
| 35222 |
+
"step": 5779
|
| 35223 |
+
},
|
| 35224 |
+
{
|
| 35225 |
+
"epoch": 0.04,
|
| 35226 |
+
"learning_rate": 0.0004,
|
| 35227 |
+
"loss": 6.7845,
|
| 35228 |
+
"step": 5780
|
| 35229 |
+
},
|
| 35230 |
+
{
|
| 35231 |
+
"epoch": 0.04,
|
| 35232 |
+
"learning_rate": 0.0004,
|
| 35233 |
+
"loss": 6.6201,
|
| 35234 |
+
"step": 5781
|
| 35235 |
+
},
|
| 35236 |
+
{
|
| 35237 |
+
"epoch": 0.04,
|
| 35238 |
+
"learning_rate": 0.0004,
|
| 35239 |
+
"loss": 8.0356,
|
| 35240 |
+
"step": 5782
|
| 35241 |
+
},
|
| 35242 |
+
{
|
| 35243 |
+
"epoch": 0.04,
|
| 35244 |
+
"learning_rate": 0.0004,
|
| 35245 |
+
"loss": 3.6344,
|
| 35246 |
+
"step": 5783
|
| 35247 |
+
},
|
| 35248 |
+
{
|
| 35249 |
+
"epoch": 0.04,
|
| 35250 |
+
"learning_rate": 0.0004,
|
| 35251 |
+
"loss": 1.9238,
|
| 35252 |
+
"step": 5784
|
| 35253 |
+
},
|
| 35254 |
+
{
|
| 35255 |
+
"epoch": 0.04,
|
| 35256 |
+
"learning_rate": 0.0004,
|
| 35257 |
+
"loss": 3.1729,
|
| 35258 |
+
"step": 5785
|
| 35259 |
+
},
|
| 35260 |
+
{
|
| 35261 |
+
"epoch": 0.04,
|
| 35262 |
+
"learning_rate": 0.0004,
|
| 35263 |
+
"loss": 3.2512,
|
| 35264 |
+
"step": 5786
|
| 35265 |
+
},
|
| 35266 |
+
{
|
| 35267 |
+
"epoch": 0.04,
|
| 35268 |
+
"learning_rate": 0.0004,
|
| 35269 |
+
"loss": 5.6445,
|
| 35270 |
+
"step": 5787
|
| 35271 |
+
},
|
| 35272 |
+
{
|
| 35273 |
+
"epoch": 0.04,
|
| 35274 |
+
"learning_rate": 0.0004,
|
| 35275 |
+
"loss": 8.5213,
|
| 35276 |
+
"step": 5788
|
| 35277 |
+
},
|
| 35278 |
+
{
|
| 35279 |
+
"epoch": 0.04,
|
| 35280 |
+
"learning_rate": 0.0004,
|
| 35281 |
+
"loss": 7.6667,
|
| 35282 |
+
"step": 5789
|
| 35283 |
+
},
|
| 35284 |
+
{
|
| 35285 |
+
"epoch": 0.04,
|
| 35286 |
+
"learning_rate": 0.0004,
|
| 35287 |
+
"loss": 6.6139,
|
| 35288 |
+
"step": 5790
|
| 35289 |
+
},
|
| 35290 |
+
{
|
| 35291 |
+
"epoch": 0.04,
|
| 35292 |
+
"learning_rate": 0.0004,
|
| 35293 |
+
"loss": 7.3236,
|
| 35294 |
+
"step": 5791
|
| 35295 |
+
},
|
| 35296 |
+
{
|
| 35297 |
+
"epoch": 0.04,
|
| 35298 |
+
"learning_rate": 0.0004,
|
| 35299 |
+
"loss": 5.4503,
|
| 35300 |
+
"step": 5792
|
| 35301 |
+
},
|
| 35302 |
+
{
|
| 35303 |
+
"epoch": 0.04,
|
| 35304 |
+
"learning_rate": 0.0004,
|
| 35305 |
+
"loss": 5.5111,
|
| 35306 |
+
"step": 5793
|
| 35307 |
+
},
|
| 35308 |
+
{
|
| 35309 |
+
"epoch": 0.04,
|
| 35310 |
+
"learning_rate": 0.0004,
|
| 35311 |
+
"loss": 5.6659,
|
| 35312 |
+
"step": 5794
|
| 35313 |
+
},
|
| 35314 |
+
{
|
| 35315 |
+
"epoch": 0.04,
|
| 35316 |
+
"learning_rate": 0.0004,
|
| 35317 |
+
"loss": 6.4502,
|
| 35318 |
+
"step": 5795
|
| 35319 |
+
},
|
| 35320 |
+
{
|
| 35321 |
+
"epoch": 0.04,
|
| 35322 |
+
"learning_rate": 0.0004,
|
| 35323 |
+
"loss": 7.0923,
|
| 35324 |
+
"step": 5796
|
| 35325 |
+
},
|
| 35326 |
+
{
|
| 35327 |
+
"epoch": 0.04,
|
| 35328 |
+
"learning_rate": 0.0004,
|
| 35329 |
+
"loss": 7.4155,
|
| 35330 |
+
"step": 5797
|
| 35331 |
+
},
|
| 35332 |
+
{
|
| 35333 |
+
"epoch": 0.04,
|
| 35334 |
+
"learning_rate": 0.0004,
|
| 35335 |
+
"loss": 6.3765,
|
| 35336 |
+
"step": 5798
|
| 35337 |
+
},
|
| 35338 |
+
{
|
| 35339 |
+
"epoch": 0.04,
|
| 35340 |
+
"learning_rate": 0.0004,
|
| 35341 |
+
"loss": 6.0413,
|
| 35342 |
+
"step": 5799
|
| 35343 |
+
},
|
| 35344 |
+
{
|
| 35345 |
+
"epoch": 0.04,
|
| 35346 |
+
"learning_rate": 0.0004,
|
| 35347 |
+
"loss": 4.103,
|
| 35348 |
+
"step": 5800
|
| 35349 |
+
},
|
| 35350 |
+
{
|
| 35351 |
+
"epoch": 0.04,
|
| 35352 |
+
"eval_loss": 6.396474838256836,
|
| 35353 |
+
"eval_runtime": 22.3993,
|
| 35354 |
+
"eval_samples_per_second": 2.232,
|
| 35355 |
+
"eval_steps_per_second": 1.116,
|
| 35356 |
+
"step": 5800
|
| 35357 |
+
},
|
| 35358 |
+
{
|
| 35359 |
+
"epoch": 0.04,
|
| 35360 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
| 35361 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
| 35362 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
| 35363 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
| 35364 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
| 35365 |
+
"mmlu_loss": 3.9258560848236086,
|
| 35366 |
+
"step": 5800
|
| 35367 |
+
},
|
| 35368 |
+
{
|
| 35369 |
+
"epoch": 0.04,
|
| 35370 |
+
"learning_rate": 0.0004,
|
| 35371 |
+
"loss": 6.7963,
|
| 35372 |
+
"step": 5801
|
| 35373 |
+
},
|
| 35374 |
+
{
|
| 35375 |
+
"epoch": 0.04,
|
| 35376 |
+
"learning_rate": 0.0004,
|
| 35377 |
+
"loss": 7.8511,
|
| 35378 |
+
"step": 5802
|
| 35379 |
+
},
|
| 35380 |
+
{
|
| 35381 |
+
"epoch": 0.04,
|
| 35382 |
+
"learning_rate": 0.0004,
|
| 35383 |
+
"loss": 8.0362,
|
| 35384 |
+
"step": 5803
|
| 35385 |
+
},
|
| 35386 |
+
{
|
| 35387 |
+
"epoch": 0.04,
|
| 35388 |
+
"learning_rate": 0.0004,
|
| 35389 |
+
"loss": 7.8104,
|
| 35390 |
+
"step": 5804
|
| 35391 |
+
},
|
| 35392 |
+
{
|
| 35393 |
+
"epoch": 0.04,
|
| 35394 |
+
"learning_rate": 0.0004,
|
| 35395 |
+
"loss": 3.5734,
|
| 35396 |
+
"step": 5805
|
| 35397 |
+
},
|
| 35398 |
+
{
|
| 35399 |
+
"epoch": 0.04,
|
| 35400 |
+
"learning_rate": 0.0004,
|
| 35401 |
+
"loss": 7.0506,
|
| 35402 |
+
"step": 5806
|
| 35403 |
+
},
|
| 35404 |
+
{
|
| 35405 |
+
"epoch": 0.04,
|
| 35406 |
+
"learning_rate": 0.0004,
|
| 35407 |
+
"loss": 7.656,
|
| 35408 |
+
"step": 5807
|
| 35409 |
+
},
|
| 35410 |
+
{
|
| 35411 |
+
"epoch": 0.04,
|
| 35412 |
+
"learning_rate": 0.0004,
|
| 35413 |
+
"loss": 6.3932,
|
| 35414 |
+
"step": 5808
|
| 35415 |
+
},
|
| 35416 |
+
{
|
| 35417 |
+
"epoch": 0.04,
|
| 35418 |
+
"learning_rate": 0.0004,
|
| 35419 |
+
"loss": 3.8245,
|
| 35420 |
+
"step": 5809
|
| 35421 |
+
},
|
| 35422 |
+
{
|
| 35423 |
+
"epoch": 0.04,
|
| 35424 |
+
"learning_rate": 0.0004,
|
| 35425 |
+
"loss": 7.633,
|
| 35426 |
+
"step": 5810
|
| 35427 |
+
},
|
| 35428 |
+
{
|
| 35429 |
+
"epoch": 0.04,
|
| 35430 |
+
"learning_rate": 0.0004,
|
| 35431 |
+
"loss": 8.2002,
|
| 35432 |
+
"step": 5811
|
| 35433 |
+
},
|
| 35434 |
+
{
|
| 35435 |
+
"epoch": 0.04,
|
| 35436 |
+
"learning_rate": 0.0004,
|
| 35437 |
+
"loss": 7.415,
|
| 35438 |
+
"step": 5812
|
| 35439 |
+
},
|
| 35440 |
+
{
|
| 35441 |
+
"epoch": 0.04,
|
| 35442 |
+
"learning_rate": 0.0004,
|
| 35443 |
+
"loss": 8.3959,
|
| 35444 |
+
"step": 5813
|
| 35445 |
+
},
|
| 35446 |
+
{
|
| 35447 |
+
"epoch": 0.04,
|
| 35448 |
+
"learning_rate": 0.0004,
|
| 35449 |
+
"loss": 5.0557,
|
| 35450 |
+
"step": 5814
|
| 35451 |
+
},
|
| 35452 |
+
{
|
| 35453 |
+
"epoch": 0.04,
|
| 35454 |
+
"learning_rate": 0.0004,
|
| 35455 |
+
"loss": 5.5936,
|
| 35456 |
+
"step": 5815
|
| 35457 |
+
},
|
| 35458 |
+
{
|
| 35459 |
+
"epoch": 0.04,
|
| 35460 |
+
"learning_rate": 0.0004,
|
| 35461 |
+
"loss": 5.6552,
|
| 35462 |
+
"step": 5816
|
| 35463 |
+
},
|
| 35464 |
+
{
|
| 35465 |
+
"epoch": 0.04,
|
| 35466 |
+
"learning_rate": 0.0004,
|
| 35467 |
+
"loss": 6.4557,
|
| 35468 |
+
"step": 5817
|
| 35469 |
+
},
|
| 35470 |
+
{
|
| 35471 |
+
"epoch": 0.04,
|
| 35472 |
+
"learning_rate": 0.0004,
|
| 35473 |
+
"loss": 3.4525,
|
| 35474 |
+
"step": 5818
|
| 35475 |
+
},
|
| 35476 |
+
{
|
| 35477 |
+
"epoch": 0.04,
|
| 35478 |
+
"learning_rate": 0.0004,
|
| 35479 |
+
"loss": 5.2712,
|
| 35480 |
+
"step": 5819
|
| 35481 |
+
},
|
| 35482 |
+
{
|
| 35483 |
+
"epoch": 0.04,
|
| 35484 |
+
"learning_rate": 0.0004,
|
| 35485 |
+
"loss": 6.5788,
|
| 35486 |
+
"step": 5820
|
| 35487 |
+
},
|
| 35488 |
+
{
|
| 35489 |
+
"epoch": 0.04,
|
| 35490 |
+
"learning_rate": 0.0004,
|
| 35491 |
+
"loss": 3.0075,
|
| 35492 |
+
"step": 5821
|
| 35493 |
+
},
|
| 35494 |
+
{
|
| 35495 |
+
"epoch": 0.04,
|
| 35496 |
+
"learning_rate": 0.0004,
|
| 35497 |
+
"loss": 3.6125,
|
| 35498 |
+
"step": 5822
|
| 35499 |
+
},
|
| 35500 |
+
{
|
| 35501 |
+
"epoch": 0.04,
|
| 35502 |
+
"learning_rate": 0.0004,
|
| 35503 |
+
"loss": 6.3804,
|
| 35504 |
+
"step": 5823
|
| 35505 |
+
},
|
| 35506 |
+
{
|
| 35507 |
+
"epoch": 0.04,
|
| 35508 |
+
"learning_rate": 0.0004,
|
| 35509 |
+
"loss": 6.4267,
|
| 35510 |
+
"step": 5824
|
| 35511 |
+
},
|
| 35512 |
+
{
|
| 35513 |
+
"epoch": 0.04,
|
| 35514 |
+
"learning_rate": 0.0004,
|
| 35515 |
+
"loss": 2.6356,
|
| 35516 |
+
"step": 5825
|
| 35517 |
+
},
|
| 35518 |
+
{
|
| 35519 |
+
"epoch": 0.04,
|
| 35520 |
+
"learning_rate": 0.0004,
|
| 35521 |
+
"loss": 3.2399,
|
| 35522 |
+
"step": 5826
|
| 35523 |
+
},
|
| 35524 |
+
{
|
| 35525 |
+
"epoch": 0.04,
|
| 35526 |
+
"learning_rate": 0.0004,
|
| 35527 |
+
"loss": 3.8583,
|
| 35528 |
+
"step": 5827
|
| 35529 |
+
},
|
| 35530 |
+
{
|
| 35531 |
+
"epoch": 0.04,
|
| 35532 |
+
"learning_rate": 0.0004,
|
| 35533 |
+
"loss": 7.3494,
|
| 35534 |
+
"step": 5828
|
| 35535 |
+
},
|
| 35536 |
+
{
|
| 35537 |
+
"epoch": 0.04,
|
| 35538 |
+
"learning_rate": 0.0004,
|
| 35539 |
+
"loss": 8.0112,
|
| 35540 |
+
"step": 5829
|
| 35541 |
+
},
|
| 35542 |
+
{
|
| 35543 |
+
"epoch": 0.04,
|
| 35544 |
+
"learning_rate": 0.0004,
|
| 35545 |
+
"loss": 8.23,
|
| 35546 |
+
"step": 5830
|
| 35547 |
+
},
|
| 35548 |
+
{
|
| 35549 |
+
"epoch": 0.04,
|
| 35550 |
+
"learning_rate": 0.0004,
|
| 35551 |
+
"loss": 3.9515,
|
| 35552 |
+
"step": 5831
|
| 35553 |
+
},
|
| 35554 |
+
{
|
| 35555 |
+
"epoch": 0.04,
|
| 35556 |
+
"learning_rate": 0.0004,
|
| 35557 |
+
"loss": 2.8647,
|
| 35558 |
+
"step": 5832
|
| 35559 |
+
},
|
| 35560 |
+
{
|
| 35561 |
+
"epoch": 0.04,
|
| 35562 |
+
"learning_rate": 0.0004,
|
| 35563 |
+
"loss": 5.9658,
|
| 35564 |
+
"step": 5833
|
| 35565 |
+
},
|
| 35566 |
+
{
|
| 35567 |
+
"epoch": 0.04,
|
| 35568 |
+
"learning_rate": 0.0004,
|
| 35569 |
+
"loss": 5.105,
|
| 35570 |
+
"step": 5834
|
| 35571 |
+
},
|
| 35572 |
+
{
|
| 35573 |
+
"epoch": 0.04,
|
| 35574 |
+
"learning_rate": 0.0004,
|
| 35575 |
+
"loss": 7.1376,
|
| 35576 |
+
"step": 5835
|
| 35577 |
+
},
|
| 35578 |
+
{
|
| 35579 |
+
"epoch": 0.04,
|
| 35580 |
+
"learning_rate": 0.0004,
|
| 35581 |
+
"loss": 3.6733,
|
| 35582 |
+
"step": 5836
|
| 35583 |
+
},
|
| 35584 |
+
{
|
| 35585 |
+
"epoch": 0.04,
|
| 35586 |
+
"learning_rate": 0.0004,
|
| 35587 |
+
"loss": 6.3152,
|
| 35588 |
+
"step": 5837
|
| 35589 |
+
},
|
| 35590 |
+
{
|
| 35591 |
+
"epoch": 0.04,
|
| 35592 |
+
"learning_rate": 0.0004,
|
| 35593 |
+
"loss": 5.0539,
|
| 35594 |
+
"step": 5838
|
| 35595 |
+
},
|
| 35596 |
+
{
|
| 35597 |
+
"epoch": 0.04,
|
| 35598 |
+
"learning_rate": 0.0004,
|
| 35599 |
+
"loss": 4.3399,
|
| 35600 |
+
"step": 5839
|
| 35601 |
+
},
|
| 35602 |
+
{
|
| 35603 |
+
"epoch": 0.04,
|
| 35604 |
+
"learning_rate": 0.0004,
|
| 35605 |
+
"loss": 6.6373,
|
| 35606 |
+
"step": 5840
|
| 35607 |
+
},
|
| 35608 |
+
{
|
| 35609 |
+
"epoch": 0.04,
|
| 35610 |
+
"learning_rate": 0.0004,
|
| 35611 |
+
"loss": 2.2022,
|
| 35612 |
+
"step": 5841
|
| 35613 |
+
},
|
| 35614 |
+
{
|
| 35615 |
+
"epoch": 0.04,
|
| 35616 |
+
"learning_rate": 0.0004,
|
| 35617 |
+
"loss": 4.2046,
|
| 35618 |
+
"step": 5842
|
| 35619 |
+
},
|
| 35620 |
+
{
|
| 35621 |
+
"epoch": 0.04,
|
| 35622 |
+
"learning_rate": 0.0004,
|
| 35623 |
+
"loss": 7.3559,
|
| 35624 |
+
"step": 5843
|
| 35625 |
+
},
|
| 35626 |
+
{
|
| 35627 |
+
"epoch": 0.04,
|
| 35628 |
+
"learning_rate": 0.0004,
|
| 35629 |
+
"loss": 5.4401,
|
| 35630 |
+
"step": 5844
|
| 35631 |
+
},
|
| 35632 |
+
{
|
| 35633 |
+
"epoch": 0.04,
|
| 35634 |
+
"learning_rate": 0.0004,
|
| 35635 |
+
"loss": 2.1425,
|
| 35636 |
+
"step": 5845
|
| 35637 |
+
},
|
| 35638 |
+
{
|
| 35639 |
+
"epoch": 0.04,
|
| 35640 |
+
"learning_rate": 0.0004,
|
| 35641 |
+
"loss": 8.926,
|
| 35642 |
+
"step": 5846
|
| 35643 |
+
},
|
| 35644 |
+
{
|
| 35645 |
+
"epoch": 0.04,
|
| 35646 |
+
"learning_rate": 0.0004,
|
| 35647 |
+
"loss": 6.7124,
|
| 35648 |
+
"step": 5847
|
| 35649 |
+
},
|
| 35650 |
+
{
|
| 35651 |
+
"epoch": 0.04,
|
| 35652 |
+
"learning_rate": 0.0004,
|
| 35653 |
+
"loss": 4.5364,
|
| 35654 |
+
"step": 5848
|
| 35655 |
+
},
|
| 35656 |
+
{
|
| 35657 |
+
"epoch": 0.04,
|
| 35658 |
+
"learning_rate": 0.0004,
|
| 35659 |
+
"loss": 4.0998,
|
| 35660 |
+
"step": 5849
|
| 35661 |
+
},
|
| 35662 |
+
{
|
| 35663 |
+
"epoch": 0.04,
|
| 35664 |
+
"learning_rate": 0.0004,
|
| 35665 |
+
"loss": 4.5025,
|
| 35666 |
+
"step": 5850
|
| 35667 |
+
},
|
| 35668 |
+
{
|
| 35669 |
+
"epoch": 0.04,
|
| 35670 |
+
"learning_rate": 0.0004,
|
| 35671 |
+
"loss": 3.0973,
|
| 35672 |
+
"step": 5851
|
| 35673 |
+
},
|
| 35674 |
+
{
|
| 35675 |
+
"epoch": 0.04,
|
| 35676 |
+
"learning_rate": 0.0004,
|
| 35677 |
+
"loss": 8.7097,
|
| 35678 |
+
"step": 5852
|
| 35679 |
+
},
|
| 35680 |
+
{
|
| 35681 |
+
"epoch": 0.04,
|
| 35682 |
+
"learning_rate": 0.0004,
|
| 35683 |
+
"loss": 3.1209,
|
| 35684 |
+
"step": 5853
|
| 35685 |
+
},
|
| 35686 |
+
{
|
| 35687 |
+
"epoch": 0.04,
|
| 35688 |
+
"learning_rate": 0.0004,
|
| 35689 |
+
"loss": 7.3284,
|
| 35690 |
+
"step": 5854
|
| 35691 |
+
},
|
| 35692 |
+
{
|
| 35693 |
+
"epoch": 0.04,
|
| 35694 |
+
"learning_rate": 0.0004,
|
| 35695 |
+
"loss": 5.096,
|
| 35696 |
+
"step": 5855
|
| 35697 |
+
},
|
| 35698 |
+
{
|
| 35699 |
+
"epoch": 0.04,
|
| 35700 |
+
"learning_rate": 0.0004,
|
| 35701 |
+
"loss": 5.7432,
|
| 35702 |
+
"step": 5856
|
| 35703 |
+
},
|
| 35704 |
+
{
|
| 35705 |
+
"epoch": 0.04,
|
| 35706 |
+
"learning_rate": 0.0004,
|
| 35707 |
+
"loss": 7.9329,
|
| 35708 |
+
"step": 5857
|
| 35709 |
+
},
|
| 35710 |
+
{
|
| 35711 |
+
"epoch": 0.04,
|
| 35712 |
+
"learning_rate": 0.0004,
|
| 35713 |
+
"loss": 3.5233,
|
| 35714 |
+
"step": 5858
|
| 35715 |
+
},
|
| 35716 |
+
{
|
| 35717 |
+
"epoch": 0.04,
|
| 35718 |
+
"learning_rate": 0.0004,
|
| 35719 |
+
"loss": 2.4872,
|
| 35720 |
+
"step": 5859
|
| 35721 |
+
},
|
| 35722 |
+
{
|
| 35723 |
+
"epoch": 0.04,
|
| 35724 |
+
"learning_rate": 0.0004,
|
| 35725 |
+
"loss": 8.2481,
|
| 35726 |
+
"step": 5860
|
| 35727 |
+
},
|
| 35728 |
+
{
|
| 35729 |
+
"epoch": 0.04,
|
| 35730 |
+
"learning_rate": 0.0004,
|
| 35731 |
+
"loss": 3.1908,
|
| 35732 |
+
"step": 5861
|
| 35733 |
+
},
|
| 35734 |
+
{
|
| 35735 |
+
"epoch": 0.04,
|
| 35736 |
+
"learning_rate": 0.0004,
|
| 35737 |
+
"loss": 7.7033,
|
| 35738 |
+
"step": 5862
|
| 35739 |
+
},
|
| 35740 |
+
{
|
| 35741 |
+
"epoch": 0.04,
|
| 35742 |
+
"learning_rate": 0.0004,
|
| 35743 |
+
"loss": 6.8059,
|
| 35744 |
+
"step": 5863
|
| 35745 |
+
},
|
| 35746 |
+
{
|
| 35747 |
+
"epoch": 0.04,
|
| 35748 |
+
"learning_rate": 0.0004,
|
| 35749 |
+
"loss": 4.1783,
|
| 35750 |
+
"step": 5864
|
| 35751 |
+
},
|
| 35752 |
+
{
|
| 35753 |
+
"epoch": 0.04,
|
| 35754 |
+
"learning_rate": 0.0004,
|
| 35755 |
+
"loss": 2.6015,
|
| 35756 |
+
"step": 5865
|
| 35757 |
+
},
|
| 35758 |
+
{
|
| 35759 |
+
"epoch": 0.04,
|
| 35760 |
+
"learning_rate": 0.0004,
|
| 35761 |
+
"loss": 5.8913,
|
| 35762 |
+
"step": 5866
|
| 35763 |
+
},
|
| 35764 |
+
{
|
| 35765 |
+
"epoch": 0.04,
|
| 35766 |
+
"learning_rate": 0.0004,
|
| 35767 |
+
"loss": 4.0391,
|
| 35768 |
+
"step": 5867
|
| 35769 |
+
},
|
| 35770 |
+
{
|
| 35771 |
+
"epoch": 0.04,
|
| 35772 |
+
"learning_rate": 0.0004,
|
| 35773 |
+
"loss": 6.6047,
|
| 35774 |
+
"step": 5868
|
| 35775 |
+
},
|
| 35776 |
+
{
|
| 35777 |
+
"epoch": 0.04,
|
| 35778 |
+
"learning_rate": 0.0004,
|
| 35779 |
+
"loss": 4.9347,
|
| 35780 |
+
"step": 5869
|
| 35781 |
+
},
|
| 35782 |
+
{
|
| 35783 |
+
"epoch": 0.04,
|
| 35784 |
+
"learning_rate": 0.0004,
|
| 35785 |
+
"loss": 7.2305,
|
| 35786 |
+
"step": 5870
|
| 35787 |
+
},
|
| 35788 |
+
{
|
| 35789 |
+
"epoch": 0.04,
|
| 35790 |
+
"learning_rate": 0.0004,
|
| 35791 |
+
"loss": 5.8909,
|
| 35792 |
+
"step": 5871
|
| 35793 |
+
},
|
| 35794 |
+
{
|
| 35795 |
+
"epoch": 0.04,
|
| 35796 |
+
"learning_rate": 0.0004,
|
| 35797 |
+
"loss": 5.762,
|
| 35798 |
+
"step": 5872
|
| 35799 |
+
},
|
| 35800 |
+
{
|
| 35801 |
+
"epoch": 0.04,
|
| 35802 |
+
"learning_rate": 0.0004,
|
| 35803 |
+
"loss": 7.7704,
|
| 35804 |
+
"step": 5873
|
| 35805 |
+
},
|
| 35806 |
+
{
|
| 35807 |
+
"epoch": 0.04,
|
| 35808 |
+
"learning_rate": 0.0004,
|
| 35809 |
+
"loss": 4.8633,
|
| 35810 |
+
"step": 5874
|
| 35811 |
+
},
|
| 35812 |
+
{
|
| 35813 |
+
"epoch": 0.04,
|
| 35814 |
+
"learning_rate": 0.0004,
|
| 35815 |
+
"loss": 6.6627,
|
| 35816 |
+
"step": 5875
|
| 35817 |
+
},
|
| 35818 |
+
{
|
| 35819 |
+
"epoch": 0.04,
|
| 35820 |
+
"learning_rate": 0.0004,
|
| 35821 |
+
"loss": 7.5499,
|
| 35822 |
+
"step": 5876
|
| 35823 |
+
},
|
| 35824 |
+
{
|
| 35825 |
+
"epoch": 0.04,
|
| 35826 |
+
"learning_rate": 0.0004,
|
| 35827 |
+
"loss": 6.6472,
|
| 35828 |
+
"step": 5877
|
| 35829 |
+
},
|
| 35830 |
+
{
|
| 35831 |
+
"epoch": 0.04,
|
| 35832 |
+
"learning_rate": 0.0004,
|
| 35833 |
+
"loss": 7.4914,
|
| 35834 |
+
"step": 5878
|
| 35835 |
+
},
|
| 35836 |
+
{
|
| 35837 |
+
"epoch": 0.04,
|
| 35838 |
+
"learning_rate": 0.0004,
|
| 35839 |
+
"loss": 3.9684,
|
| 35840 |
+
"step": 5879
|
| 35841 |
+
},
|
| 35842 |
+
{
|
| 35843 |
+
"epoch": 0.04,
|
| 35844 |
+
"learning_rate": 0.0004,
|
| 35845 |
+
"loss": 6.903,
|
| 35846 |
+
"step": 5880
|
| 35847 |
+
},
|
| 35848 |
+
{
|
| 35849 |
+
"epoch": 0.04,
|
| 35850 |
+
"learning_rate": 0.0004,
|
| 35851 |
+
"loss": 6.4157,
|
| 35852 |
+
"step": 5881
|
| 35853 |
+
},
|
| 35854 |
+
{
|
| 35855 |
+
"epoch": 0.04,
|
| 35856 |
+
"learning_rate": 0.0004,
|
| 35857 |
+
"loss": 6.2942,
|
| 35858 |
+
"step": 5882
|
| 35859 |
+
},
|
| 35860 |
+
{
|
| 35861 |
+
"epoch": 0.04,
|
| 35862 |
+
"learning_rate": 0.0004,
|
| 35863 |
+
"loss": 2.6608,
|
| 35864 |
+
"step": 5883
|
| 35865 |
+
},
|
| 35866 |
+
{
|
| 35867 |
+
"epoch": 0.04,
|
| 35868 |
+
"learning_rate": 0.0004,
|
| 35869 |
+
"loss": 5.3432,
|
| 35870 |
+
"step": 5884
|
| 35871 |
+
},
|
| 35872 |
+
{
|
| 35873 |
+
"epoch": 0.04,
|
| 35874 |
+
"learning_rate": 0.0004,
|
| 35875 |
+
"loss": 2.9234,
|
| 35876 |
+
"step": 5885
|
| 35877 |
+
},
|
| 35878 |
+
{
|
| 35879 |
+
"epoch": 0.04,
|
| 35880 |
+
"learning_rate": 0.0004,
|
| 35881 |
+
"loss": 6.6604,
|
| 35882 |
+
"step": 5886
|
| 35883 |
+
},
|
| 35884 |
+
{
|
| 35885 |
+
"epoch": 0.04,
|
| 35886 |
+
"learning_rate": 0.0004,
|
| 35887 |
+
"loss": 2.7875,
|
| 35888 |
+
"step": 5887
|
| 35889 |
+
},
|
| 35890 |
+
{
|
| 35891 |
+
"epoch": 0.04,
|
| 35892 |
+
"learning_rate": 0.0004,
|
| 35893 |
+
"loss": 5.4057,
|
| 35894 |
+
"step": 5888
|
| 35895 |
+
},
|
| 35896 |
+
{
|
| 35897 |
+
"epoch": 0.04,
|
| 35898 |
+
"learning_rate": 0.0004,
|
| 35899 |
+
"loss": 2.352,
|
| 35900 |
+
"step": 5889
|
| 35901 |
+
},
|
| 35902 |
+
{
|
| 35903 |
+
"epoch": 0.04,
|
| 35904 |
+
"learning_rate": 0.0004,
|
| 35905 |
+
"loss": 2.2785,
|
| 35906 |
+
"step": 5890
|
| 35907 |
+
},
|
| 35908 |
+
{
|
| 35909 |
+
"epoch": 0.05,
|
| 35910 |
+
"learning_rate": 0.0004,
|
| 35911 |
+
"loss": 3.4352,
|
| 35912 |
+
"step": 5891
|
| 35913 |
+
},
|
| 35914 |
+
{
|
| 35915 |
+
"epoch": 0.05,
|
| 35916 |
+
"learning_rate": 0.0004,
|
| 35917 |
+
"loss": 5.6623,
|
| 35918 |
+
"step": 5892
|
| 35919 |
+
},
|
| 35920 |
+
{
|
| 35921 |
+
"epoch": 0.05,
|
| 35922 |
+
"learning_rate": 0.0004,
|
| 35923 |
+
"loss": 2.8201,
|
| 35924 |
+
"step": 5893
|
| 35925 |
+
},
|
| 35926 |
+
{
|
| 35927 |
+
"epoch": 0.05,
|
| 35928 |
+
"learning_rate": 0.0004,
|
| 35929 |
+
"loss": 6.2894,
|
| 35930 |
+
"step": 5894
|
| 35931 |
+
},
|
| 35932 |
+
{
|
| 35933 |
+
"epoch": 0.05,
|
| 35934 |
+
"learning_rate": 0.0004,
|
| 35935 |
+
"loss": 7.0568,
|
| 35936 |
+
"step": 5895
|
| 35937 |
+
},
|
| 35938 |
+
{
|
| 35939 |
+
"epoch": 0.05,
|
| 35940 |
+
"learning_rate": 0.0004,
|
| 35941 |
+
"loss": 9.1368,
|
| 35942 |
+
"step": 5896
|
| 35943 |
+
},
|
| 35944 |
+
{
|
| 35945 |
+
"epoch": 0.05,
|
| 35946 |
+
"learning_rate": 0.0004,
|
| 35947 |
+
"loss": 4.4088,
|
| 35948 |
+
"step": 5897
|
| 35949 |
+
},
|
| 35950 |
+
{
|
| 35951 |
+
"epoch": 0.05,
|
| 35952 |
+
"learning_rate": 0.0004,
|
| 35953 |
+
"loss": 6.5719,
|
| 35954 |
+
"step": 5898
|
| 35955 |
+
},
|
| 35956 |
+
{
|
| 35957 |
+
"epoch": 0.05,
|
| 35958 |
+
"learning_rate": 0.0004,
|
| 35959 |
+
"loss": 7.4458,
|
| 35960 |
+
"step": 5899
|
| 35961 |
+
},
|
| 35962 |
+
{
|
| 35963 |
+
"epoch": 0.05,
|
| 35964 |
+
"learning_rate": 0.0004,
|
| 35965 |
+
"loss": 6.4525,
|
| 35966 |
+
"step": 5900
|
| 35967 |
+
},
|
| 35968 |
+
{
|
| 35969 |
+
"epoch": 0.05,
|
| 35970 |
+
"learning_rate": 0.0004,
|
| 35971 |
+
"loss": 8.5376,
|
| 35972 |
+
"step": 5901
|
| 35973 |
+
},
|
| 35974 |
+
{
|
| 35975 |
+
"epoch": 0.05,
|
| 35976 |
+
"learning_rate": 0.0004,
|
| 35977 |
+
"loss": 6.6726,
|
| 35978 |
+
"step": 5902
|
| 35979 |
+
},
|
| 35980 |
+
{
|
| 35981 |
+
"epoch": 0.05,
|
| 35982 |
+
"learning_rate": 0.0004,
|
| 35983 |
+
"loss": 7.8266,
|
| 35984 |
+
"step": 5903
|
| 35985 |
+
},
|
| 35986 |
+
{
|
| 35987 |
+
"epoch": 0.05,
|
| 35988 |
+
"learning_rate": 0.0004,
|
| 35989 |
+
"loss": 6.7965,
|
| 35990 |
+
"step": 5904
|
| 35991 |
+
},
|
| 35992 |
+
{
|
| 35993 |
+
"epoch": 0.05,
|
| 35994 |
+
"learning_rate": 0.0004,
|
| 35995 |
+
"loss": 4.8672,
|
| 35996 |
+
"step": 5905
|
| 35997 |
+
},
|
| 35998 |
+
{
|
| 35999 |
+
"epoch": 0.05,
|
| 36000 |
+
"learning_rate": 0.0004,
|
| 36001 |
+
"loss": 3.2546,
|
| 36002 |
+
"step": 5906
|
| 36003 |
+
},
|
| 36004 |
+
{
|
| 36005 |
+
"epoch": 0.05,
|
| 36006 |
+
"learning_rate": 0.0004,
|
| 36007 |
+
"loss": 7.9688,
|
| 36008 |
+
"step": 5907
|
| 36009 |
+
},
|
| 36010 |
+
{
|
| 36011 |
+
"epoch": 0.05,
|
| 36012 |
+
"learning_rate": 0.0004,
|
| 36013 |
+
"loss": 7.4705,
|
| 36014 |
+
"step": 5908
|
| 36015 |
+
},
|
| 36016 |
+
{
|
| 36017 |
+
"epoch": 0.05,
|
| 36018 |
+
"learning_rate": 0.0004,
|
| 36019 |
+
"loss": 3.4179,
|
| 36020 |
+
"step": 5909
|
| 36021 |
+
},
|
| 36022 |
+
{
|
| 36023 |
+
"epoch": 0.05,
|
| 36024 |
+
"learning_rate": 0.0004,
|
| 36025 |
+
"loss": 7.4204,
|
| 36026 |
+
"step": 5910
|
| 36027 |
+
},
|
| 36028 |
+
{
|
| 36029 |
+
"epoch": 0.05,
|
| 36030 |
+
"learning_rate": 0.0004,
|
| 36031 |
+
"loss": 7.7445,
|
| 36032 |
+
"step": 5911
|
| 36033 |
+
},
|
| 36034 |
+
{
|
| 36035 |
+
"epoch": 0.05,
|
| 36036 |
+
"learning_rate": 0.0004,
|
| 36037 |
+
"loss": 6.9589,
|
| 36038 |
+
"step": 5912
|
| 36039 |
+
},
|
| 36040 |
+
{
|
| 36041 |
+
"epoch": 0.05,
|
| 36042 |
+
"learning_rate": 0.0004,
|
| 36043 |
+
"loss": 7.5771,
|
| 36044 |
+
"step": 5913
|
| 36045 |
+
},
|
| 36046 |
+
{
|
| 36047 |
+
"epoch": 0.05,
|
| 36048 |
+
"learning_rate": 0.0004,
|
| 36049 |
+
"loss": 9.1289,
|
| 36050 |
+
"step": 5914
|
| 36051 |
+
},
|
| 36052 |
+
{
|
| 36053 |
+
"epoch": 0.05,
|
| 36054 |
+
"learning_rate": 0.0004,
|
| 36055 |
+
"loss": 4.0564,
|
| 36056 |
+
"step": 5915
|
| 36057 |
+
},
|
| 36058 |
+
{
|
| 36059 |
+
"epoch": 0.05,
|
| 36060 |
+
"learning_rate": 0.0004,
|
| 36061 |
+
"loss": 7.5045,
|
| 36062 |
+
"step": 5916
|
| 36063 |
+
},
|
| 36064 |
+
{
|
| 36065 |
+
"epoch": 0.05,
|
| 36066 |
+
"learning_rate": 0.0004,
|
| 36067 |
+
"loss": 6.6193,
|
| 36068 |
+
"step": 5917
|
| 36069 |
+
},
|
| 36070 |
+
{
|
| 36071 |
+
"epoch": 0.05,
|
| 36072 |
+
"learning_rate": 0.0004,
|
| 36073 |
+
"loss": 2.9347,
|
| 36074 |
+
"step": 5918
|
| 36075 |
+
},
|
| 36076 |
+
{
|
| 36077 |
+
"epoch": 0.05,
|
| 36078 |
+
"learning_rate": 0.0004,
|
| 36079 |
+
"loss": 7.276,
|
| 36080 |
+
"step": 5919
|
| 36081 |
+
},
|
| 36082 |
+
{
|
| 36083 |
+
"epoch": 0.05,
|
| 36084 |
+
"learning_rate": 0.0004,
|
| 36085 |
+
"loss": 6.0243,
|
| 36086 |
+
"step": 5920
|
| 36087 |
+
},
|
| 36088 |
+
{
|
| 36089 |
+
"epoch": 0.05,
|
| 36090 |
+
"learning_rate": 0.0004,
|
| 36091 |
+
"loss": 8.8889,
|
| 36092 |
+
"step": 5921
|
| 36093 |
+
},
|
| 36094 |
+
{
|
| 36095 |
+
"epoch": 0.05,
|
| 36096 |
+
"learning_rate": 0.0004,
|
| 36097 |
+
"loss": 4.8016,
|
| 36098 |
+
"step": 5922
|
| 36099 |
+
},
|
| 36100 |
+
{
|
| 36101 |
+
"epoch": 0.05,
|
| 36102 |
+
"learning_rate": 0.0004,
|
| 36103 |
+
"loss": 7.6244,
|
| 36104 |
+
"step": 5923
|
| 36105 |
+
},
|
| 36106 |
+
{
|
| 36107 |
+
"epoch": 0.05,
|
| 36108 |
+
"learning_rate": 0.0004,
|
| 36109 |
+
"loss": 4.6548,
|
| 36110 |
+
"step": 5924
|
| 36111 |
+
},
|
| 36112 |
+
{
|
| 36113 |
+
"epoch": 0.05,
|
| 36114 |
+
"learning_rate": 0.0004,
|
| 36115 |
+
"loss": 5.446,
|
| 36116 |
+
"step": 5925
|
| 36117 |
+
},
|
| 36118 |
+
{
|
| 36119 |
+
"epoch": 0.05,
|
| 36120 |
+
"learning_rate": 0.0004,
|
| 36121 |
+
"loss": 3.0701,
|
| 36122 |
+
"step": 5926
|
| 36123 |
+
},
|
| 36124 |
+
{
|
| 36125 |
+
"epoch": 0.05,
|
| 36126 |
+
"learning_rate": 0.0004,
|
| 36127 |
+
"loss": 3.6489,
|
| 36128 |
+
"step": 5927
|
| 36129 |
+
},
|
| 36130 |
+
{
|
| 36131 |
+
"epoch": 0.05,
|
| 36132 |
+
"learning_rate": 0.0004,
|
| 36133 |
+
"loss": 6.8636,
|
| 36134 |
+
"step": 5928
|
| 36135 |
+
},
|
| 36136 |
+
{
|
| 36137 |
+
"epoch": 0.05,
|
| 36138 |
+
"learning_rate": 0.0004,
|
| 36139 |
+
"loss": 7.3796,
|
| 36140 |
+
"step": 5929
|
| 36141 |
+
},
|
| 36142 |
+
{
|
| 36143 |
+
"epoch": 0.05,
|
| 36144 |
+
"learning_rate": 0.0004,
|
| 36145 |
+
"loss": 6.3366,
|
| 36146 |
+
"step": 5930
|
| 36147 |
+
},
|
| 36148 |
+
{
|
| 36149 |
+
"epoch": 0.05,
|
| 36150 |
+
"learning_rate": 0.0004,
|
| 36151 |
+
"loss": 7.4844,
|
| 36152 |
+
"step": 5931
|
| 36153 |
+
},
|
| 36154 |
+
{
|
| 36155 |
+
"epoch": 0.05,
|
| 36156 |
+
"learning_rate": 0.0004,
|
| 36157 |
+
"loss": 5.5549,
|
| 36158 |
+
"step": 5932
|
| 36159 |
+
},
|
| 36160 |
+
{
|
| 36161 |
+
"epoch": 0.05,
|
| 36162 |
+
"learning_rate": 0.0004,
|
| 36163 |
+
"loss": 7.976,
|
| 36164 |
+
"step": 5933
|
| 36165 |
+
},
|
| 36166 |
+
{
|
| 36167 |
+
"epoch": 0.05,
|
| 36168 |
+
"learning_rate": 0.0004,
|
| 36169 |
+
"loss": 7.0844,
|
| 36170 |
+
"step": 5934
|
| 36171 |
+
},
|
| 36172 |
+
{
|
| 36173 |
+
"epoch": 0.05,
|
| 36174 |
+
"learning_rate": 0.0004,
|
| 36175 |
+
"loss": 3.5849,
|
| 36176 |
+
"step": 5935
|
| 36177 |
+
},
|
| 36178 |
+
{
|
| 36179 |
+
"epoch": 0.05,
|
| 36180 |
+
"learning_rate": 0.0004,
|
| 36181 |
+
"loss": 6.5648,
|
| 36182 |
+
"step": 5936
|
| 36183 |
+
},
|
| 36184 |
+
{
|
| 36185 |
+
"epoch": 0.05,
|
| 36186 |
+
"learning_rate": 0.0004,
|
| 36187 |
+
"loss": 6.8267,
|
| 36188 |
+
"step": 5937
|
| 36189 |
+
},
|
| 36190 |
+
{
|
| 36191 |
+
"epoch": 0.05,
|
| 36192 |
+
"learning_rate": 0.0004,
|
| 36193 |
+
"loss": 4.0671,
|
| 36194 |
+
"step": 5938
|
| 36195 |
+
},
|
| 36196 |
+
{
|
| 36197 |
+
"epoch": 0.05,
|
| 36198 |
+
"learning_rate": 0.0004,
|
| 36199 |
+
"loss": 5.9199,
|
| 36200 |
+
"step": 5939
|
| 36201 |
+
},
|
| 36202 |
+
{
|
| 36203 |
+
"epoch": 0.05,
|
| 36204 |
+
"learning_rate": 0.0004,
|
| 36205 |
+
"loss": 6.7518,
|
| 36206 |
+
"step": 5940
|
| 36207 |
+
},
|
| 36208 |
+
{
|
| 36209 |
+
"epoch": 0.05,
|
| 36210 |
+
"learning_rate": 0.0004,
|
| 36211 |
+
"loss": 2.9931,
|
| 36212 |
+
"step": 5941
|
| 36213 |
+
},
|
| 36214 |
+
{
|
| 36215 |
+
"epoch": 0.05,
|
| 36216 |
+
"learning_rate": 0.0004,
|
| 36217 |
+
"loss": 4.1515,
|
| 36218 |
+
"step": 5942
|
| 36219 |
+
},
|
| 36220 |
+
{
|
| 36221 |
+
"epoch": 0.05,
|
| 36222 |
+
"learning_rate": 0.0004,
|
| 36223 |
+
"loss": 5.4225,
|
| 36224 |
+
"step": 5943
|
| 36225 |
+
},
|
| 36226 |
+
{
|
| 36227 |
+
"epoch": 0.05,
|
| 36228 |
+
"learning_rate": 0.0004,
|
| 36229 |
+
"loss": 4.7662,
|
| 36230 |
+
"step": 5944
|
| 36231 |
+
},
|
| 36232 |
+
{
|
| 36233 |
+
"epoch": 0.05,
|
| 36234 |
+
"learning_rate": 0.0004,
|
| 36235 |
+
"loss": 4.7916,
|
| 36236 |
+
"step": 5945
|
| 36237 |
+
},
|
| 36238 |
+
{
|
| 36239 |
+
"epoch": 0.05,
|
| 36240 |
+
"learning_rate": 0.0004,
|
| 36241 |
+
"loss": 5.6711,
|
| 36242 |
+
"step": 5946
|
| 36243 |
+
},
|
| 36244 |
+
{
|
| 36245 |
+
"epoch": 0.05,
|
| 36246 |
+
"learning_rate": 0.0004,
|
| 36247 |
+
"loss": 6.4338,
|
| 36248 |
+
"step": 5947
|
| 36249 |
+
},
|
| 36250 |
+
{
|
| 36251 |
+
"epoch": 0.05,
|
| 36252 |
+
"learning_rate": 0.0004,
|
| 36253 |
+
"loss": 6.1612,
|
| 36254 |
+
"step": 5948
|
| 36255 |
+
},
|
| 36256 |
+
{
|
| 36257 |
+
"epoch": 0.05,
|
| 36258 |
+
"learning_rate": 0.0004,
|
| 36259 |
+
"loss": 4.3135,
|
| 36260 |
+
"step": 5949
|
| 36261 |
+
},
|
| 36262 |
+
{
|
| 36263 |
+
"epoch": 0.05,
|
| 36264 |
+
"learning_rate": 0.0004,
|
| 36265 |
+
"loss": 5.6296,
|
| 36266 |
+
"step": 5950
|
| 36267 |
+
},
|
| 36268 |
+
{
|
| 36269 |
+
"epoch": 0.05,
|
| 36270 |
+
"learning_rate": 0.0004,
|
| 36271 |
+
"loss": 8.2795,
|
| 36272 |
+
"step": 5951
|
| 36273 |
+
},
|
| 36274 |
+
{
|
| 36275 |
+
"epoch": 0.05,
|
| 36276 |
+
"learning_rate": 0.0004,
|
| 36277 |
+
"loss": 7.2667,
|
| 36278 |
+
"step": 5952
|
| 36279 |
+
},
|
| 36280 |
+
{
|
| 36281 |
+
"epoch": 0.05,
|
| 36282 |
+
"learning_rate": 0.0004,
|
| 36283 |
+
"loss": 4.4897,
|
| 36284 |
+
"step": 5953
|
| 36285 |
+
},
|
| 36286 |
+
{
|
| 36287 |
+
"epoch": 0.05,
|
| 36288 |
+
"learning_rate": 0.0004,
|
| 36289 |
+
"loss": 3.9241,
|
| 36290 |
+
"step": 5954
|
| 36291 |
+
},
|
| 36292 |
+
{
|
| 36293 |
+
"epoch": 0.05,
|
| 36294 |
+
"learning_rate": 0.0004,
|
| 36295 |
+
"loss": 7.776,
|
| 36296 |
+
"step": 5955
|
| 36297 |
+
},
|
| 36298 |
+
{
|
| 36299 |
+
"epoch": 0.05,
|
| 36300 |
+
"learning_rate": 0.0004,
|
| 36301 |
+
"loss": 7.3649,
|
| 36302 |
+
"step": 5956
|
| 36303 |
+
},
|
| 36304 |
+
{
|
| 36305 |
+
"epoch": 0.05,
|
| 36306 |
+
"learning_rate": 0.0004,
|
| 36307 |
+
"loss": 2.6375,
|
| 36308 |
+
"step": 5957
|
| 36309 |
+
},
|
| 36310 |
+
{
|
| 36311 |
+
"epoch": 0.05,
|
| 36312 |
+
"learning_rate": 0.0004,
|
| 36313 |
+
"loss": 9.0611,
|
| 36314 |
+
"step": 5958
|
| 36315 |
+
},
|
| 36316 |
+
{
|
| 36317 |
+
"epoch": 0.05,
|
| 36318 |
+
"learning_rate": 0.0004,
|
| 36319 |
+
"loss": 6.7652,
|
| 36320 |
+
"step": 5959
|
| 36321 |
+
},
|
| 36322 |
+
{
|
| 36323 |
+
"epoch": 0.05,
|
| 36324 |
+
"learning_rate": 0.0004,
|
| 36325 |
+
"loss": 8.7396,
|
| 36326 |
+
"step": 5960
|
| 36327 |
+
},
|
| 36328 |
+
{
|
| 36329 |
+
"epoch": 0.05,
|
| 36330 |
+
"learning_rate": 0.0004,
|
| 36331 |
+
"loss": 7.8184,
|
| 36332 |
+
"step": 5961
|
| 36333 |
+
},
|
| 36334 |
+
{
|
| 36335 |
+
"epoch": 0.05,
|
| 36336 |
+
"learning_rate": 0.0004,
|
| 36337 |
+
"loss": 6.9717,
|
| 36338 |
+
"step": 5962
|
| 36339 |
+
},
|
| 36340 |
+
{
|
| 36341 |
+
"epoch": 0.05,
|
| 36342 |
+
"learning_rate": 0.0004,
|
| 36343 |
+
"loss": 6.7367,
|
| 36344 |
+
"step": 5963
|
| 36345 |
+
},
|
| 36346 |
+
{
|
| 36347 |
+
"epoch": 0.05,
|
| 36348 |
+
"learning_rate": 0.0004,
|
| 36349 |
+
"loss": 5.3137,
|
| 36350 |
+
"step": 5964
|
| 36351 |
+
},
|
| 36352 |
+
{
|
| 36353 |
+
"epoch": 0.05,
|
| 36354 |
+
"learning_rate": 0.0004,
|
| 36355 |
+
"loss": 7.5619,
|
| 36356 |
+
"step": 5965
|
| 36357 |
+
},
|
| 36358 |
+
{
|
| 36359 |
+
"epoch": 0.05,
|
| 36360 |
+
"learning_rate": 0.0004,
|
| 36361 |
+
"loss": 5.5172,
|
| 36362 |
+
"step": 5966
|
| 36363 |
+
},
|
| 36364 |
+
{
|
| 36365 |
+
"epoch": 0.05,
|
| 36366 |
+
"learning_rate": 0.0004,
|
| 36367 |
+
"loss": 7.5568,
|
| 36368 |
+
"step": 5967
|
| 36369 |
+
},
|
| 36370 |
+
{
|
| 36371 |
+
"epoch": 0.05,
|
| 36372 |
+
"learning_rate": 0.0004,
|
| 36373 |
+
"loss": 8.1321,
|
| 36374 |
+
"step": 5968
|
| 36375 |
+
},
|
| 36376 |
+
{
|
| 36377 |
+
"epoch": 0.05,
|
| 36378 |
+
"learning_rate": 0.0004,
|
| 36379 |
+
"loss": 8.8486,
|
| 36380 |
+
"step": 5969
|
| 36381 |
+
},
|
| 36382 |
+
{
|
| 36383 |
+
"epoch": 0.05,
|
| 36384 |
+
"learning_rate": 0.0004,
|
| 36385 |
+
"loss": 3.6196,
|
| 36386 |
+
"step": 5970
|
| 36387 |
+
},
|
| 36388 |
+
{
|
| 36389 |
+
"epoch": 0.05,
|
| 36390 |
+
"learning_rate": 0.0004,
|
| 36391 |
+
"loss": 7.7649,
|
| 36392 |
+
"step": 5971
|
| 36393 |
+
},
|
| 36394 |
+
{
|
| 36395 |
+
"epoch": 0.05,
|
| 36396 |
+
"learning_rate": 0.0004,
|
| 36397 |
+
"loss": 8.096,
|
| 36398 |
+
"step": 5972
|
| 36399 |
+
},
|
| 36400 |
+
{
|
| 36401 |
+
"epoch": 0.05,
|
| 36402 |
+
"learning_rate": 0.0004,
|
| 36403 |
+
"loss": 3.2377,
|
| 36404 |
+
"step": 5973
|
| 36405 |
+
},
|
| 36406 |
+
{
|
| 36407 |
+
"epoch": 0.05,
|
| 36408 |
+
"learning_rate": 0.0004,
|
| 36409 |
+
"loss": 7.9327,
|
| 36410 |
+
"step": 5974
|
| 36411 |
+
},
|
| 36412 |
+
{
|
| 36413 |
+
"epoch": 0.05,
|
| 36414 |
+
"learning_rate": 0.0004,
|
| 36415 |
+
"loss": 3.0676,
|
| 36416 |
+
"step": 5975
|
| 36417 |
+
},
|
| 36418 |
+
{
|
| 36419 |
+
"epoch": 0.05,
|
| 36420 |
+
"learning_rate": 0.0004,
|
| 36421 |
+
"loss": 6.9014,
|
| 36422 |
+
"step": 5976
|
| 36423 |
+
},
|
| 36424 |
+
{
|
| 36425 |
+
"epoch": 0.05,
|
| 36426 |
+
"learning_rate": 0.0004,
|
| 36427 |
+
"loss": 7.9241,
|
| 36428 |
+
"step": 5977
|
| 36429 |
+
},
|
| 36430 |
+
{
|
| 36431 |
+
"epoch": 0.05,
|
| 36432 |
+
"learning_rate": 0.0004,
|
| 36433 |
+
"loss": 12.1662,
|
| 36434 |
+
"step": 5978
|
| 36435 |
+
},
|
| 36436 |
+
{
|
| 36437 |
+
"epoch": 0.05,
|
| 36438 |
+
"learning_rate": 0.0004,
|
| 36439 |
+
"loss": 2.9906,
|
| 36440 |
+
"step": 5979
|
| 36441 |
+
},
|
| 36442 |
+
{
|
| 36443 |
+
"epoch": 0.05,
|
| 36444 |
+
"learning_rate": 0.0004,
|
| 36445 |
+
"loss": 4.6138,
|
| 36446 |
+
"step": 5980
|
| 36447 |
+
},
|
| 36448 |
+
{
|
| 36449 |
+
"epoch": 0.05,
|
| 36450 |
+
"learning_rate": 0.0004,
|
| 36451 |
+
"loss": 2.8328,
|
| 36452 |
+
"step": 5981
|
| 36453 |
+
},
|
| 36454 |
+
{
|
| 36455 |
+
"epoch": 0.05,
|
| 36456 |
+
"learning_rate": 0.0004,
|
| 36457 |
+
"loss": 2.6569,
|
| 36458 |
+
"step": 5982
|
| 36459 |
+
},
|
| 36460 |
+
{
|
| 36461 |
+
"epoch": 0.05,
|
| 36462 |
+
"learning_rate": 0.0004,
|
| 36463 |
+
"loss": 6.6642,
|
| 36464 |
+
"step": 5983
|
| 36465 |
+
},
|
| 36466 |
+
{
|
| 36467 |
+
"epoch": 0.05,
|
| 36468 |
+
"learning_rate": 0.0004,
|
| 36469 |
+
"loss": 4.8701,
|
| 36470 |
+
"step": 5984
|
| 36471 |
+
},
|
| 36472 |
+
{
|
| 36473 |
+
"epoch": 0.05,
|
| 36474 |
+
"learning_rate": 0.0004,
|
| 36475 |
+
"loss": 2.4972,
|
| 36476 |
+
"step": 5985
|
| 36477 |
+
},
|
| 36478 |
+
{
|
| 36479 |
+
"epoch": 0.05,
|
| 36480 |
+
"learning_rate": 0.0004,
|
| 36481 |
+
"loss": 3.1518,
|
| 36482 |
+
"step": 5986
|
| 36483 |
+
},
|
| 36484 |
+
{
|
| 36485 |
+
"epoch": 0.05,
|
| 36486 |
+
"learning_rate": 0.0004,
|
| 36487 |
+
"loss": 7.1437,
|
| 36488 |
+
"step": 5987
|
| 36489 |
+
},
|
| 36490 |
+
{
|
| 36491 |
+
"epoch": 0.05,
|
| 36492 |
+
"learning_rate": 0.0004,
|
| 36493 |
+
"loss": 6.2173,
|
| 36494 |
+
"step": 5988
|
| 36495 |
+
},
|
| 36496 |
+
{
|
| 36497 |
+
"epoch": 0.05,
|
| 36498 |
+
"learning_rate": 0.0004,
|
| 36499 |
+
"loss": 6.7305,
|
| 36500 |
+
"step": 5989
|
| 36501 |
+
},
|
| 36502 |
+
{
|
| 36503 |
+
"epoch": 0.05,
|
| 36504 |
+
"learning_rate": 0.0004,
|
| 36505 |
+
"loss": 7.6896,
|
| 36506 |
+
"step": 5990
|
| 36507 |
+
},
|
| 36508 |
+
{
|
| 36509 |
+
"epoch": 0.05,
|
| 36510 |
+
"learning_rate": 0.0004,
|
| 36511 |
+
"loss": 7.5627,
|
| 36512 |
+
"step": 5991
|
| 36513 |
+
},
|
| 36514 |
+
{
|
| 36515 |
+
"epoch": 0.05,
|
| 36516 |
+
"learning_rate": 0.0004,
|
| 36517 |
+
"loss": 4.5204,
|
| 36518 |
+
"step": 5992
|
| 36519 |
+
},
|
| 36520 |
+
{
|
| 36521 |
+
"epoch": 0.05,
|
| 36522 |
+
"learning_rate": 0.0004,
|
| 36523 |
+
"loss": 5.9454,
|
| 36524 |
+
"step": 5993
|
| 36525 |
+
},
|
| 36526 |
+
{
|
| 36527 |
+
"epoch": 0.05,
|
| 36528 |
+
"learning_rate": 0.0004,
|
| 36529 |
+
"loss": 6.4362,
|
| 36530 |
+
"step": 5994
|
| 36531 |
+
},
|
| 36532 |
+
{
|
| 36533 |
+
"epoch": 0.05,
|
| 36534 |
+
"learning_rate": 0.0004,
|
| 36535 |
+
"loss": 4.0131,
|
| 36536 |
+
"step": 5995
|
| 36537 |
+
},
|
| 36538 |
+
{
|
| 36539 |
+
"epoch": 0.05,
|
| 36540 |
+
"learning_rate": 0.0004,
|
| 36541 |
+
"loss": 6.1399,
|
| 36542 |
+
"step": 5996
|
| 36543 |
+
},
|
| 36544 |
+
{
|
| 36545 |
+
"epoch": 0.05,
|
| 36546 |
+
"learning_rate": 0.0004,
|
| 36547 |
+
"loss": 7.666,
|
| 36548 |
+
"step": 5997
|
| 36549 |
+
},
|
| 36550 |
+
{
|
| 36551 |
+
"epoch": 0.05,
|
| 36552 |
+
"learning_rate": 0.0004,
|
| 36553 |
+
"loss": 8.962,
|
| 36554 |
+
"step": 5998
|
| 36555 |
+
},
|
| 36556 |
+
{
|
| 36557 |
+
"epoch": 0.05,
|
| 36558 |
+
"learning_rate": 0.0004,
|
| 36559 |
+
"loss": 3.4282,
|
| 36560 |
+
"step": 5999
|
| 36561 |
+
},
|
| 36562 |
+
{
|
| 36563 |
+
"epoch": 0.05,
|
| 36564 |
+
"learning_rate": 0.0004,
|
| 36565 |
+
"loss": 3.7265,
|
| 36566 |
+
"step": 6000
|
| 36567 |
+
},
|
| 36568 |
+
{
|
| 36569 |
+
"epoch": 0.05,
|
| 36570 |
+
"eval_loss": 6.473691463470459,
|
| 36571 |
+
"eval_runtime": 22.3658,
|
| 36572 |
+
"eval_samples_per_second": 2.236,
|
| 36573 |
+
"eval_steps_per_second": 1.118,
|
| 36574 |
+
"step": 6000
|
| 36575 |
+
},
|
| 36576 |
+
{
|
| 36577 |
+
"epoch": 0.05,
|
| 36578 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
| 36579 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
| 36580 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
| 36581 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
| 36582 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
| 36583 |
+
"mmlu_loss": 3.9286953735351564,
|
| 36584 |
+
"step": 6000
|
| 36585 |
+
},
|
| 36586 |
+
{
|
| 36587 |
+
"epoch": 0.05,
|
| 36588 |
+
"step": 6000,
|
| 36589 |
+
"total_flos": 9.88792958631936e+16,
|
| 36590 |
+
"train_loss": 0.5874443841576577,
|
| 36591 |
+
"train_runtime": 1725.6374,
|
| 36592 |
+
"train_samples_per_second": 17.385,
|
| 36593 |
+
"train_steps_per_second": 17.385
|
| 36594 |
}
|
| 36595 |
],
|
| 36596 |
"max_steps": 30000,
|
| 36597 |
"num_train_epochs": 1,
|
| 36598 |
+
"total_flos": 9.88792958631936e+16,
|
| 36599 |
"trial_name": null,
|
| 36600 |
"trial_params": null
|
| 36601 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6011
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85783faab59f5f6d8bcf691e35bb86cff435e22f3fa9169bf4e56c0239c8d7e4
|
| 3 |
size 6011
|