Farouk commited on
Commit ·
fbd1944
1
Parent(s): f277292
Training in progress, step 5600
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- all_results.json +7 -7
- checkpoint-4200/adapter_model/adapter_model/README.md +24 -0
- checkpoint-4200/adapter_model/adapter_model/adapter_model.bin +1 -1
- checkpoint-5600/README.md +20 -0
- checkpoint-5600/adapter_config.json +26 -0
- checkpoint-5600/adapter_model.bin +3 -0
- checkpoint-5600/added_tokens.json +3 -0
- checkpoint-5600/optimizer.pt +3 -0
- checkpoint-5600/rng_state.pth +3 -0
- checkpoint-5600/scheduler.pt +3 -0
- checkpoint-5600/special_tokens_map.json +6 -0
- checkpoint-5600/tokenizer.model +3 -0
- checkpoint-5600/tokenizer_config.json +35 -0
- checkpoint-5600/trainer_state.json +0 -0
- checkpoint-5600/training_args.bin +3 -0
- eval_results.json +3 -3
- metrics.json +1 -1
- train_results.json +4 -4
- trainer_state.json +3666 -3
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -14,13 +14,13 @@
|
|
| 14 |
"r": 64,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
-
"
|
|
|
|
| 18 |
"down_proj",
|
| 19 |
-
"q_proj",
|
| 20 |
"gate_proj",
|
| 21 |
-
"o_proj",
|
| 22 |
"up_proj",
|
| 23 |
-
"
|
|
|
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
|
|
|
| 14 |
"r": 64,
|
| 15 |
"revision": null,
|
| 16 |
"target_modules": [
|
| 17 |
+
"o_proj",
|
| 18 |
+
"k_proj",
|
| 19 |
"down_proj",
|
|
|
|
| 20 |
"gate_proj",
|
|
|
|
| 21 |
"up_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"q_proj"
|
| 24 |
],
|
| 25 |
"task_type": "CAUSAL_LM"
|
| 26 |
}
|
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 871609293
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a81912e202e5ef1e0abe63acfe2600dcb02f3768a37b847ef40a41ebbb64f69
|
| 3 |
size 871609293
|
all_results.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
-
"eval_runtime": 21.
|
| 5 |
-
"eval_samples_per_second": 2.
|
| 6 |
-
"eval_steps_per_second": 1.
|
| 7 |
-
"train_loss":
|
| 8 |
-
"train_runtime":
|
| 9 |
-
"train_samples_per_second":
|
| 10 |
-
"train_steps_per_second":
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
+
"eval_runtime": 21.6378,
|
| 5 |
+
"eval_samples_per_second": 2.311,
|
| 6 |
+
"eval_steps_per_second": 1.155,
|
| 7 |
+
"train_loss": 0.6445872698006807,
|
| 8 |
+
"train_runtime": 1748.3273,
|
| 9 |
+
"train_samples_per_second": 17.159,
|
| 10 |
+
"train_steps_per_second": 17.159
|
| 11 |
}
|
checkpoint-4200/adapter_model/adapter_model/README.md
CHANGED
|
@@ -70,6 +70,28 @@ The following `bitsandbytes` quantization config was used during training:
|
|
| 70 |
- bnb_4bit_use_double_quant: True
|
| 71 |
- bnb_4bit_compute_dtype: bfloat16
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
The following `bitsandbytes` quantization config was used during training:
|
| 74 |
- load_in_8bit: False
|
| 75 |
- load_in_4bit: True
|
|
@@ -88,5 +110,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
| 88 |
- PEFT 0.4.0
|
| 89 |
- PEFT 0.4.0
|
| 90 |
- PEFT 0.4.0
|
|
|
|
|
|
|
| 91 |
|
| 92 |
- PEFT 0.4.0
|
|
|
|
| 70 |
- bnb_4bit_use_double_quant: True
|
| 71 |
- bnb_4bit_compute_dtype: bfloat16
|
| 72 |
|
| 73 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 74 |
+
- load_in_8bit: False
|
| 75 |
+
- load_in_4bit: True
|
| 76 |
+
- llm_int8_threshold: 6.0
|
| 77 |
+
- llm_int8_skip_modules: None
|
| 78 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 79 |
+
- llm_int8_has_fp16_weight: False
|
| 80 |
+
- bnb_4bit_quant_type: nf4
|
| 81 |
+
- bnb_4bit_use_double_quant: True
|
| 82 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 83 |
+
|
| 84 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 85 |
+
- load_in_8bit: False
|
| 86 |
+
- load_in_4bit: True
|
| 87 |
+
- llm_int8_threshold: 6.0
|
| 88 |
+
- llm_int8_skip_modules: None
|
| 89 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 90 |
+
- llm_int8_has_fp16_weight: False
|
| 91 |
+
- bnb_4bit_quant_type: nf4
|
| 92 |
+
- bnb_4bit_use_double_quant: True
|
| 93 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 94 |
+
|
| 95 |
The following `bitsandbytes` quantization config was used during training:
|
| 96 |
- load_in_8bit: False
|
| 97 |
- load_in_4bit: True
|
|
|
|
| 110 |
- PEFT 0.4.0
|
| 111 |
- PEFT 0.4.0
|
| 112 |
- PEFT 0.4.0
|
| 113 |
+
- PEFT 0.4.0
|
| 114 |
+
- PEFT 0.4.0
|
| 115 |
|
| 116 |
- PEFT 0.4.0
|
checkpoint-4200/adapter_model/adapter_model/adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 871609293
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff18c40f9b3c9fb20f1c95d4dff151244eba09eee79ae11c6121cc23181c2442
|
| 3 |
size 871609293
|
checkpoint-5600/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
---
|
| 4 |
+
## Training procedure
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
The following `bitsandbytes` quantization config was used during training:
|
| 8 |
+
- load_in_8bit: False
|
| 9 |
+
- load_in_4bit: True
|
| 10 |
+
- llm_int8_threshold: 6.0
|
| 11 |
+
- llm_int8_skip_modules: None
|
| 12 |
+
- llm_int8_enable_fp32_cpu_offload: False
|
| 13 |
+
- llm_int8_has_fp16_weight: False
|
| 14 |
+
- bnb_4bit_quant_type: nf4
|
| 15 |
+
- bnb_4bit_use_double_quant: True
|
| 16 |
+
- bnb_4bit_compute_dtype: bfloat16
|
| 17 |
+
### Framework versions
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
- PEFT 0.4.0
|
checkpoint-5600/adapter_config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"auto_mapping": null,
|
| 3 |
+
"base_model_name_or_path": "codellama/CodeLlama-34b-Python-hf",
|
| 4 |
+
"bias": "none",
|
| 5 |
+
"fan_in_fan_out": false,
|
| 6 |
+
"inference_mode": true,
|
| 7 |
+
"init_lora_weights": true,
|
| 8 |
+
"layers_pattern": null,
|
| 9 |
+
"layers_to_transform": null,
|
| 10 |
+
"lora_alpha": 16.0,
|
| 11 |
+
"lora_dropout": 0.1,
|
| 12 |
+
"modules_to_save": null,
|
| 13 |
+
"peft_type": "LORA",
|
| 14 |
+
"r": 64,
|
| 15 |
+
"revision": null,
|
| 16 |
+
"target_modules": [
|
| 17 |
+
"o_proj",
|
| 18 |
+
"k_proj",
|
| 19 |
+
"down_proj",
|
| 20 |
+
"gate_proj",
|
| 21 |
+
"up_proj",
|
| 22 |
+
"v_proj",
|
| 23 |
+
"q_proj"
|
| 24 |
+
],
|
| 25 |
+
"task_type": "CAUSAL_LM"
|
| 26 |
+
}
|
checkpoint-5600/adapter_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a81912e202e5ef1e0abe63acfe2600dcb02f3768a37b847ef40a41ebbb64f69
|
| 3 |
+
size 871609293
|
checkpoint-5600/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"[PAD]": 32000
|
| 3 |
+
}
|
checkpoint-5600/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f1ce7f0dd170bfeebb821db5f0cfcca98b3957b20a9caeefcb11d959a230f9e
|
| 3 |
+
size 873872799
|
checkpoint-5600/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eacbdddf8408ff496013b66ade44228149b42f2f803cd158b398d7288028823
|
| 3 |
+
size 14511
|
checkpoint-5600/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81248501833af563175f43c1d681185643b8411cee1fb1e631b8687c465eb2e3
|
| 3 |
+
size 627
|
checkpoint-5600/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"unk_token": "<unk>"
|
| 6 |
+
}
|
checkpoint-5600/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
checkpoint-5600/tokenizer_config.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"bos_token": {
|
| 5 |
+
"__type": "AddedToken",
|
| 6 |
+
"content": "<s>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": true,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"clean_up_tokenization_spaces": false,
|
| 13 |
+
"eos_token": {
|
| 14 |
+
"__type": "AddedToken",
|
| 15 |
+
"content": "</s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": true,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false
|
| 20 |
+
},
|
| 21 |
+
"legacy": null,
|
| 22 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 23 |
+
"pad_token": null,
|
| 24 |
+
"padding_side": "right",
|
| 25 |
+
"sp_model_kwargs": {},
|
| 26 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 27 |
+
"unk_token": {
|
| 28 |
+
"__type": "AddedToken",
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false
|
| 34 |
+
}
|
| 35 |
+
}
|
checkpoint-5600/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-5600/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe7d931ebfbcece1009124b9eae98d1a465edd703240c0655ee9bb17db395973
|
| 3 |
+
size 6011
|
eval_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
-
"eval_runtime": 21.
|
| 5 |
-
"eval_samples_per_second": 2.
|
| 6 |
-
"eval_steps_per_second": 1.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
"eval_loss": 6.335043907165527,
|
| 4 |
+
"eval_runtime": 21.6378,
|
| 5 |
+
"eval_samples_per_second": 2.311,
|
| 6 |
+
"eval_steps_per_second": 1.155
|
| 7 |
}
|
metrics.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"run_name": "codellama34b_unnatural", "train_runtime":
|
|
|
|
| 1 |
+
{"run_name": "codellama34b_unnatural", "train_runtime": 1748.3273, "train_samples_per_second": 17.159, "train_steps_per_second": 17.159, "train_loss": 0.6445872698006807, "epoch": 0.04, "eval_loss": 6.335043907165527, "eval_runtime": 21.6378, "eval_samples_per_second": 2.311, "eval_steps_per_second": 1.155}
|
train_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
-
"train_loss":
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples_per_second":
|
| 6 |
-
"train_steps_per_second":
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 0.04,
|
| 3 |
+
"train_loss": 0.6445872698006807,
|
| 4 |
+
"train_runtime": 1748.3273,
|
| 5 |
+
"train_samples_per_second": 17.159,
|
| 6 |
+
"train_steps_per_second": 17.159
|
| 7 |
}
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 6.335043907165527,
|
| 3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -29265,11 +29265,3674 @@
|
|
| 29265 |
"train_runtime": 2886.6567,
|
| 29266 |
"train_samples_per_second": 10.393,
|
| 29267 |
"train_steps_per_second": 10.393
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29268 |
}
|
| 29269 |
],
|
| 29270 |
"max_steps": 30000,
|
| 29271 |
"num_train_epochs": 1,
|
| 29272 |
-
"total_flos":
|
| 29273 |
"trial_name": null,
|
| 29274 |
"trial_params": null
|
| 29275 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 6.335043907165527,
|
| 3 |
"best_model_checkpoint": "./output_v2/34bCodellama_CodeLlama-34b-Python-hf_unnatural-instructions_standardized/checkpoint-4200",
|
| 4 |
+
"epoch": 0.04124971354365595,
|
| 5 |
+
"global_step": 5400,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 29265 |
"train_runtime": 2886.6567,
|
| 29266 |
"train_samples_per_second": 10.393,
|
| 29267 |
"train_steps_per_second": 10.393
|
| 29268 |
+
},
|
| 29269 |
+
{
|
| 29270 |
+
"epoch": 0.04,
|
| 29271 |
+
"learning_rate": 0.0004,
|
| 29272 |
+
"loss": 8.196,
|
| 29273 |
+
"step": 4801
|
| 29274 |
+
},
|
| 29275 |
+
{
|
| 29276 |
+
"epoch": 0.04,
|
| 29277 |
+
"learning_rate": 0.0004,
|
| 29278 |
+
"loss": 7.4766,
|
| 29279 |
+
"step": 4802
|
| 29280 |
+
},
|
| 29281 |
+
{
|
| 29282 |
+
"epoch": 0.04,
|
| 29283 |
+
"learning_rate": 0.0004,
|
| 29284 |
+
"loss": 7.5177,
|
| 29285 |
+
"step": 4803
|
| 29286 |
+
},
|
| 29287 |
+
{
|
| 29288 |
+
"epoch": 0.04,
|
| 29289 |
+
"learning_rate": 0.0004,
|
| 29290 |
+
"loss": 7.6057,
|
| 29291 |
+
"step": 4804
|
| 29292 |
+
},
|
| 29293 |
+
{
|
| 29294 |
+
"epoch": 0.04,
|
| 29295 |
+
"learning_rate": 0.0004,
|
| 29296 |
+
"loss": 7.9972,
|
| 29297 |
+
"step": 4805
|
| 29298 |
+
},
|
| 29299 |
+
{
|
| 29300 |
+
"epoch": 0.04,
|
| 29301 |
+
"learning_rate": 0.0004,
|
| 29302 |
+
"loss": 8.3521,
|
| 29303 |
+
"step": 4806
|
| 29304 |
+
},
|
| 29305 |
+
{
|
| 29306 |
+
"epoch": 0.04,
|
| 29307 |
+
"learning_rate": 0.0004,
|
| 29308 |
+
"loss": 7.9037,
|
| 29309 |
+
"step": 4807
|
| 29310 |
+
},
|
| 29311 |
+
{
|
| 29312 |
+
"epoch": 0.04,
|
| 29313 |
+
"learning_rate": 0.0004,
|
| 29314 |
+
"loss": 8.967,
|
| 29315 |
+
"step": 4808
|
| 29316 |
+
},
|
| 29317 |
+
{
|
| 29318 |
+
"epoch": 0.04,
|
| 29319 |
+
"learning_rate": 0.0004,
|
| 29320 |
+
"loss": 6.2405,
|
| 29321 |
+
"step": 4809
|
| 29322 |
+
},
|
| 29323 |
+
{
|
| 29324 |
+
"epoch": 0.04,
|
| 29325 |
+
"learning_rate": 0.0004,
|
| 29326 |
+
"loss": 8.5253,
|
| 29327 |
+
"step": 4810
|
| 29328 |
+
},
|
| 29329 |
+
{
|
| 29330 |
+
"epoch": 0.04,
|
| 29331 |
+
"learning_rate": 0.0004,
|
| 29332 |
+
"loss": 4.2458,
|
| 29333 |
+
"step": 4811
|
| 29334 |
+
},
|
| 29335 |
+
{
|
| 29336 |
+
"epoch": 0.04,
|
| 29337 |
+
"learning_rate": 0.0004,
|
| 29338 |
+
"loss": 7.179,
|
| 29339 |
+
"step": 4812
|
| 29340 |
+
},
|
| 29341 |
+
{
|
| 29342 |
+
"epoch": 0.04,
|
| 29343 |
+
"learning_rate": 0.0004,
|
| 29344 |
+
"loss": 7.6969,
|
| 29345 |
+
"step": 4813
|
| 29346 |
+
},
|
| 29347 |
+
{
|
| 29348 |
+
"epoch": 0.04,
|
| 29349 |
+
"learning_rate": 0.0004,
|
| 29350 |
+
"loss": 9.2289,
|
| 29351 |
+
"step": 4814
|
| 29352 |
+
},
|
| 29353 |
+
{
|
| 29354 |
+
"epoch": 0.04,
|
| 29355 |
+
"learning_rate": 0.0004,
|
| 29356 |
+
"loss": 7.0946,
|
| 29357 |
+
"step": 4815
|
| 29358 |
+
},
|
| 29359 |
+
{
|
| 29360 |
+
"epoch": 0.04,
|
| 29361 |
+
"learning_rate": 0.0004,
|
| 29362 |
+
"loss": 7.9045,
|
| 29363 |
+
"step": 4816
|
| 29364 |
+
},
|
| 29365 |
+
{
|
| 29366 |
+
"epoch": 0.04,
|
| 29367 |
+
"learning_rate": 0.0004,
|
| 29368 |
+
"loss": 9.6952,
|
| 29369 |
+
"step": 4817
|
| 29370 |
+
},
|
| 29371 |
+
{
|
| 29372 |
+
"epoch": 0.04,
|
| 29373 |
+
"learning_rate": 0.0004,
|
| 29374 |
+
"loss": 6.7265,
|
| 29375 |
+
"step": 4818
|
| 29376 |
+
},
|
| 29377 |
+
{
|
| 29378 |
+
"epoch": 0.04,
|
| 29379 |
+
"learning_rate": 0.0004,
|
| 29380 |
+
"loss": 2.9574,
|
| 29381 |
+
"step": 4819
|
| 29382 |
+
},
|
| 29383 |
+
{
|
| 29384 |
+
"epoch": 0.04,
|
| 29385 |
+
"learning_rate": 0.0004,
|
| 29386 |
+
"loss": 7.3774,
|
| 29387 |
+
"step": 4820
|
| 29388 |
+
},
|
| 29389 |
+
{
|
| 29390 |
+
"epoch": 0.04,
|
| 29391 |
+
"learning_rate": 0.0004,
|
| 29392 |
+
"loss": 6.7837,
|
| 29393 |
+
"step": 4821
|
| 29394 |
+
},
|
| 29395 |
+
{
|
| 29396 |
+
"epoch": 0.04,
|
| 29397 |
+
"learning_rate": 0.0004,
|
| 29398 |
+
"loss": 3.3796,
|
| 29399 |
+
"step": 4822
|
| 29400 |
+
},
|
| 29401 |
+
{
|
| 29402 |
+
"epoch": 0.04,
|
| 29403 |
+
"learning_rate": 0.0004,
|
| 29404 |
+
"loss": 6.4443,
|
| 29405 |
+
"step": 4823
|
| 29406 |
+
},
|
| 29407 |
+
{
|
| 29408 |
+
"epoch": 0.04,
|
| 29409 |
+
"learning_rate": 0.0004,
|
| 29410 |
+
"loss": 8.7734,
|
| 29411 |
+
"step": 4824
|
| 29412 |
+
},
|
| 29413 |
+
{
|
| 29414 |
+
"epoch": 0.04,
|
| 29415 |
+
"learning_rate": 0.0004,
|
| 29416 |
+
"loss": 9.5535,
|
| 29417 |
+
"step": 4825
|
| 29418 |
+
},
|
| 29419 |
+
{
|
| 29420 |
+
"epoch": 0.04,
|
| 29421 |
+
"learning_rate": 0.0004,
|
| 29422 |
+
"loss": 6.1014,
|
| 29423 |
+
"step": 4826
|
| 29424 |
+
},
|
| 29425 |
+
{
|
| 29426 |
+
"epoch": 0.04,
|
| 29427 |
+
"learning_rate": 0.0004,
|
| 29428 |
+
"loss": 3.5574,
|
| 29429 |
+
"step": 4827
|
| 29430 |
+
},
|
| 29431 |
+
{
|
| 29432 |
+
"epoch": 0.04,
|
| 29433 |
+
"learning_rate": 0.0004,
|
| 29434 |
+
"loss": 4.5114,
|
| 29435 |
+
"step": 4828
|
| 29436 |
+
},
|
| 29437 |
+
{
|
| 29438 |
+
"epoch": 0.04,
|
| 29439 |
+
"learning_rate": 0.0004,
|
| 29440 |
+
"loss": 3.431,
|
| 29441 |
+
"step": 4829
|
| 29442 |
+
},
|
| 29443 |
+
{
|
| 29444 |
+
"epoch": 0.04,
|
| 29445 |
+
"learning_rate": 0.0004,
|
| 29446 |
+
"loss": 7.8042,
|
| 29447 |
+
"step": 4830
|
| 29448 |
+
},
|
| 29449 |
+
{
|
| 29450 |
+
"epoch": 0.04,
|
| 29451 |
+
"learning_rate": 0.0004,
|
| 29452 |
+
"loss": 2.4997,
|
| 29453 |
+
"step": 4831
|
| 29454 |
+
},
|
| 29455 |
+
{
|
| 29456 |
+
"epoch": 0.04,
|
| 29457 |
+
"learning_rate": 0.0004,
|
| 29458 |
+
"loss": 7.9027,
|
| 29459 |
+
"step": 4832
|
| 29460 |
+
},
|
| 29461 |
+
{
|
| 29462 |
+
"epoch": 0.04,
|
| 29463 |
+
"learning_rate": 0.0004,
|
| 29464 |
+
"loss": 4.7126,
|
| 29465 |
+
"step": 4833
|
| 29466 |
+
},
|
| 29467 |
+
{
|
| 29468 |
+
"epoch": 0.04,
|
| 29469 |
+
"learning_rate": 0.0004,
|
| 29470 |
+
"loss": 2.8638,
|
| 29471 |
+
"step": 4834
|
| 29472 |
+
},
|
| 29473 |
+
{
|
| 29474 |
+
"epoch": 0.04,
|
| 29475 |
+
"learning_rate": 0.0004,
|
| 29476 |
+
"loss": 6.4997,
|
| 29477 |
+
"step": 4835
|
| 29478 |
+
},
|
| 29479 |
+
{
|
| 29480 |
+
"epoch": 0.04,
|
| 29481 |
+
"learning_rate": 0.0004,
|
| 29482 |
+
"loss": 4.8501,
|
| 29483 |
+
"step": 4836
|
| 29484 |
+
},
|
| 29485 |
+
{
|
| 29486 |
+
"epoch": 0.04,
|
| 29487 |
+
"learning_rate": 0.0004,
|
| 29488 |
+
"loss": 2.6346,
|
| 29489 |
+
"step": 4837
|
| 29490 |
+
},
|
| 29491 |
+
{
|
| 29492 |
+
"epoch": 0.04,
|
| 29493 |
+
"learning_rate": 0.0004,
|
| 29494 |
+
"loss": 2.8403,
|
| 29495 |
+
"step": 4838
|
| 29496 |
+
},
|
| 29497 |
+
{
|
| 29498 |
+
"epoch": 0.04,
|
| 29499 |
+
"learning_rate": 0.0004,
|
| 29500 |
+
"loss": 6.8362,
|
| 29501 |
+
"step": 4839
|
| 29502 |
+
},
|
| 29503 |
+
{
|
| 29504 |
+
"epoch": 0.04,
|
| 29505 |
+
"learning_rate": 0.0004,
|
| 29506 |
+
"loss": 2.8393,
|
| 29507 |
+
"step": 4840
|
| 29508 |
+
},
|
| 29509 |
+
{
|
| 29510 |
+
"epoch": 0.04,
|
| 29511 |
+
"learning_rate": 0.0004,
|
| 29512 |
+
"loss": 2.6428,
|
| 29513 |
+
"step": 4841
|
| 29514 |
+
},
|
| 29515 |
+
{
|
| 29516 |
+
"epoch": 0.04,
|
| 29517 |
+
"learning_rate": 0.0004,
|
| 29518 |
+
"loss": 5.9946,
|
| 29519 |
+
"step": 4842
|
| 29520 |
+
},
|
| 29521 |
+
{
|
| 29522 |
+
"epoch": 0.04,
|
| 29523 |
+
"learning_rate": 0.0004,
|
| 29524 |
+
"loss": 4.3163,
|
| 29525 |
+
"step": 4843
|
| 29526 |
+
},
|
| 29527 |
+
{
|
| 29528 |
+
"epoch": 0.04,
|
| 29529 |
+
"learning_rate": 0.0004,
|
| 29530 |
+
"loss": 6.9659,
|
| 29531 |
+
"step": 4844
|
| 29532 |
+
},
|
| 29533 |
+
{
|
| 29534 |
+
"epoch": 0.04,
|
| 29535 |
+
"learning_rate": 0.0004,
|
| 29536 |
+
"loss": 6.5787,
|
| 29537 |
+
"step": 4845
|
| 29538 |
+
},
|
| 29539 |
+
{
|
| 29540 |
+
"epoch": 0.04,
|
| 29541 |
+
"learning_rate": 0.0004,
|
| 29542 |
+
"loss": 9.0435,
|
| 29543 |
+
"step": 4846
|
| 29544 |
+
},
|
| 29545 |
+
{
|
| 29546 |
+
"epoch": 0.04,
|
| 29547 |
+
"learning_rate": 0.0004,
|
| 29548 |
+
"loss": 5.6627,
|
| 29549 |
+
"step": 4847
|
| 29550 |
+
},
|
| 29551 |
+
{
|
| 29552 |
+
"epoch": 0.04,
|
| 29553 |
+
"learning_rate": 0.0004,
|
| 29554 |
+
"loss": 7.0435,
|
| 29555 |
+
"step": 4848
|
| 29556 |
+
},
|
| 29557 |
+
{
|
| 29558 |
+
"epoch": 0.04,
|
| 29559 |
+
"learning_rate": 0.0004,
|
| 29560 |
+
"loss": 7.584,
|
| 29561 |
+
"step": 4849
|
| 29562 |
+
},
|
| 29563 |
+
{
|
| 29564 |
+
"epoch": 0.04,
|
| 29565 |
+
"learning_rate": 0.0004,
|
| 29566 |
+
"loss": 5.5761,
|
| 29567 |
+
"step": 4850
|
| 29568 |
+
},
|
| 29569 |
+
{
|
| 29570 |
+
"epoch": 0.04,
|
| 29571 |
+
"learning_rate": 0.0004,
|
| 29572 |
+
"loss": 8.0644,
|
| 29573 |
+
"step": 4851
|
| 29574 |
+
},
|
| 29575 |
+
{
|
| 29576 |
+
"epoch": 0.04,
|
| 29577 |
+
"learning_rate": 0.0004,
|
| 29578 |
+
"loss": 6.7897,
|
| 29579 |
+
"step": 4852
|
| 29580 |
+
},
|
| 29581 |
+
{
|
| 29582 |
+
"epoch": 0.04,
|
| 29583 |
+
"learning_rate": 0.0004,
|
| 29584 |
+
"loss": 7.7933,
|
| 29585 |
+
"step": 4853
|
| 29586 |
+
},
|
| 29587 |
+
{
|
| 29588 |
+
"epoch": 0.04,
|
| 29589 |
+
"learning_rate": 0.0004,
|
| 29590 |
+
"loss": 8.0918,
|
| 29591 |
+
"step": 4854
|
| 29592 |
+
},
|
| 29593 |
+
{
|
| 29594 |
+
"epoch": 0.04,
|
| 29595 |
+
"learning_rate": 0.0004,
|
| 29596 |
+
"loss": 8.1191,
|
| 29597 |
+
"step": 4855
|
| 29598 |
+
},
|
| 29599 |
+
{
|
| 29600 |
+
"epoch": 0.04,
|
| 29601 |
+
"learning_rate": 0.0004,
|
| 29602 |
+
"loss": 5.6498,
|
| 29603 |
+
"step": 4856
|
| 29604 |
+
},
|
| 29605 |
+
{
|
| 29606 |
+
"epoch": 0.04,
|
| 29607 |
+
"learning_rate": 0.0004,
|
| 29608 |
+
"loss": 3.1834,
|
| 29609 |
+
"step": 4857
|
| 29610 |
+
},
|
| 29611 |
+
{
|
| 29612 |
+
"epoch": 0.04,
|
| 29613 |
+
"learning_rate": 0.0004,
|
| 29614 |
+
"loss": 7.5713,
|
| 29615 |
+
"step": 4858
|
| 29616 |
+
},
|
| 29617 |
+
{
|
| 29618 |
+
"epoch": 0.04,
|
| 29619 |
+
"learning_rate": 0.0004,
|
| 29620 |
+
"loss": 6.643,
|
| 29621 |
+
"step": 4859
|
| 29622 |
+
},
|
| 29623 |
+
{
|
| 29624 |
+
"epoch": 0.04,
|
| 29625 |
+
"learning_rate": 0.0004,
|
| 29626 |
+
"loss": 6.0051,
|
| 29627 |
+
"step": 4860
|
| 29628 |
+
},
|
| 29629 |
+
{
|
| 29630 |
+
"epoch": 0.04,
|
| 29631 |
+
"learning_rate": 0.0004,
|
| 29632 |
+
"loss": 5.3192,
|
| 29633 |
+
"step": 4861
|
| 29634 |
+
},
|
| 29635 |
+
{
|
| 29636 |
+
"epoch": 0.04,
|
| 29637 |
+
"learning_rate": 0.0004,
|
| 29638 |
+
"loss": 6.6787,
|
| 29639 |
+
"step": 4862
|
| 29640 |
+
},
|
| 29641 |
+
{
|
| 29642 |
+
"epoch": 0.04,
|
| 29643 |
+
"learning_rate": 0.0004,
|
| 29644 |
+
"loss": 6.1336,
|
| 29645 |
+
"step": 4863
|
| 29646 |
+
},
|
| 29647 |
+
{
|
| 29648 |
+
"epoch": 0.04,
|
| 29649 |
+
"learning_rate": 0.0004,
|
| 29650 |
+
"loss": 4.1196,
|
| 29651 |
+
"step": 4864
|
| 29652 |
+
},
|
| 29653 |
+
{
|
| 29654 |
+
"epoch": 0.04,
|
| 29655 |
+
"learning_rate": 0.0004,
|
| 29656 |
+
"loss": 7.7662,
|
| 29657 |
+
"step": 4865
|
| 29658 |
+
},
|
| 29659 |
+
{
|
| 29660 |
+
"epoch": 0.04,
|
| 29661 |
+
"learning_rate": 0.0004,
|
| 29662 |
+
"loss": 6.5099,
|
| 29663 |
+
"step": 4866
|
| 29664 |
+
},
|
| 29665 |
+
{
|
| 29666 |
+
"epoch": 0.04,
|
| 29667 |
+
"learning_rate": 0.0004,
|
| 29668 |
+
"loss": 6.4698,
|
| 29669 |
+
"step": 4867
|
| 29670 |
+
},
|
| 29671 |
+
{
|
| 29672 |
+
"epoch": 0.04,
|
| 29673 |
+
"learning_rate": 0.0004,
|
| 29674 |
+
"loss": 2.9245,
|
| 29675 |
+
"step": 4868
|
| 29676 |
+
},
|
| 29677 |
+
{
|
| 29678 |
+
"epoch": 0.04,
|
| 29679 |
+
"learning_rate": 0.0004,
|
| 29680 |
+
"loss": 3.0627,
|
| 29681 |
+
"step": 4869
|
| 29682 |
+
},
|
| 29683 |
+
{
|
| 29684 |
+
"epoch": 0.04,
|
| 29685 |
+
"learning_rate": 0.0004,
|
| 29686 |
+
"loss": 4.4951,
|
| 29687 |
+
"step": 4870
|
| 29688 |
+
},
|
| 29689 |
+
{
|
| 29690 |
+
"epoch": 0.04,
|
| 29691 |
+
"learning_rate": 0.0004,
|
| 29692 |
+
"loss": 9.7325,
|
| 29693 |
+
"step": 4871
|
| 29694 |
+
},
|
| 29695 |
+
{
|
| 29696 |
+
"epoch": 0.04,
|
| 29697 |
+
"learning_rate": 0.0004,
|
| 29698 |
+
"loss": 5.3552,
|
| 29699 |
+
"step": 4872
|
| 29700 |
+
},
|
| 29701 |
+
{
|
| 29702 |
+
"epoch": 0.04,
|
| 29703 |
+
"learning_rate": 0.0004,
|
| 29704 |
+
"loss": 4.0359,
|
| 29705 |
+
"step": 4873
|
| 29706 |
+
},
|
| 29707 |
+
{
|
| 29708 |
+
"epoch": 0.04,
|
| 29709 |
+
"learning_rate": 0.0004,
|
| 29710 |
+
"loss": 5.259,
|
| 29711 |
+
"step": 4874
|
| 29712 |
+
},
|
| 29713 |
+
{
|
| 29714 |
+
"epoch": 0.04,
|
| 29715 |
+
"learning_rate": 0.0004,
|
| 29716 |
+
"loss": 7.39,
|
| 29717 |
+
"step": 4875
|
| 29718 |
+
},
|
| 29719 |
+
{
|
| 29720 |
+
"epoch": 0.04,
|
| 29721 |
+
"learning_rate": 0.0004,
|
| 29722 |
+
"loss": 5.2283,
|
| 29723 |
+
"step": 4876
|
| 29724 |
+
},
|
| 29725 |
+
{
|
| 29726 |
+
"epoch": 0.04,
|
| 29727 |
+
"learning_rate": 0.0004,
|
| 29728 |
+
"loss": 7.2338,
|
| 29729 |
+
"step": 4877
|
| 29730 |
+
},
|
| 29731 |
+
{
|
| 29732 |
+
"epoch": 0.04,
|
| 29733 |
+
"learning_rate": 0.0004,
|
| 29734 |
+
"loss": 4.5157,
|
| 29735 |
+
"step": 4878
|
| 29736 |
+
},
|
| 29737 |
+
{
|
| 29738 |
+
"epoch": 0.04,
|
| 29739 |
+
"learning_rate": 0.0004,
|
| 29740 |
+
"loss": 6.7822,
|
| 29741 |
+
"step": 4879
|
| 29742 |
+
},
|
| 29743 |
+
{
|
| 29744 |
+
"epoch": 0.04,
|
| 29745 |
+
"learning_rate": 0.0004,
|
| 29746 |
+
"loss": 5.4882,
|
| 29747 |
+
"step": 4880
|
| 29748 |
+
},
|
| 29749 |
+
{
|
| 29750 |
+
"epoch": 0.04,
|
| 29751 |
+
"learning_rate": 0.0004,
|
| 29752 |
+
"loss": 2.9394,
|
| 29753 |
+
"step": 4881
|
| 29754 |
+
},
|
| 29755 |
+
{
|
| 29756 |
+
"epoch": 0.04,
|
| 29757 |
+
"learning_rate": 0.0004,
|
| 29758 |
+
"loss": 9.7844,
|
| 29759 |
+
"step": 4882
|
| 29760 |
+
},
|
| 29761 |
+
{
|
| 29762 |
+
"epoch": 0.04,
|
| 29763 |
+
"learning_rate": 0.0004,
|
| 29764 |
+
"loss": 4.0808,
|
| 29765 |
+
"step": 4883
|
| 29766 |
+
},
|
| 29767 |
+
{
|
| 29768 |
+
"epoch": 0.04,
|
| 29769 |
+
"learning_rate": 0.0004,
|
| 29770 |
+
"loss": 5.4349,
|
| 29771 |
+
"step": 4884
|
| 29772 |
+
},
|
| 29773 |
+
{
|
| 29774 |
+
"epoch": 0.04,
|
| 29775 |
+
"learning_rate": 0.0004,
|
| 29776 |
+
"loss": 4.7747,
|
| 29777 |
+
"step": 4885
|
| 29778 |
+
},
|
| 29779 |
+
{
|
| 29780 |
+
"epoch": 0.04,
|
| 29781 |
+
"learning_rate": 0.0004,
|
| 29782 |
+
"loss": 3.2778,
|
| 29783 |
+
"step": 4886
|
| 29784 |
+
},
|
| 29785 |
+
{
|
| 29786 |
+
"epoch": 0.04,
|
| 29787 |
+
"learning_rate": 0.0004,
|
| 29788 |
+
"loss": 7.9457,
|
| 29789 |
+
"step": 4887
|
| 29790 |
+
},
|
| 29791 |
+
{
|
| 29792 |
+
"epoch": 0.04,
|
| 29793 |
+
"learning_rate": 0.0004,
|
| 29794 |
+
"loss": 3.196,
|
| 29795 |
+
"step": 4888
|
| 29796 |
+
},
|
| 29797 |
+
{
|
| 29798 |
+
"epoch": 0.04,
|
| 29799 |
+
"learning_rate": 0.0004,
|
| 29800 |
+
"loss": 4.2576,
|
| 29801 |
+
"step": 4889
|
| 29802 |
+
},
|
| 29803 |
+
{
|
| 29804 |
+
"epoch": 0.04,
|
| 29805 |
+
"learning_rate": 0.0004,
|
| 29806 |
+
"loss": 6.3978,
|
| 29807 |
+
"step": 4890
|
| 29808 |
+
},
|
| 29809 |
+
{
|
| 29810 |
+
"epoch": 0.04,
|
| 29811 |
+
"learning_rate": 0.0004,
|
| 29812 |
+
"loss": 7.4963,
|
| 29813 |
+
"step": 4891
|
| 29814 |
+
},
|
| 29815 |
+
{
|
| 29816 |
+
"epoch": 0.04,
|
| 29817 |
+
"learning_rate": 0.0004,
|
| 29818 |
+
"loss": 8.8915,
|
| 29819 |
+
"step": 4892
|
| 29820 |
+
},
|
| 29821 |
+
{
|
| 29822 |
+
"epoch": 0.04,
|
| 29823 |
+
"learning_rate": 0.0004,
|
| 29824 |
+
"loss": 2.8344,
|
| 29825 |
+
"step": 4893
|
| 29826 |
+
},
|
| 29827 |
+
{
|
| 29828 |
+
"epoch": 0.04,
|
| 29829 |
+
"learning_rate": 0.0004,
|
| 29830 |
+
"loss": 6.5248,
|
| 29831 |
+
"step": 4894
|
| 29832 |
+
},
|
| 29833 |
+
{
|
| 29834 |
+
"epoch": 0.04,
|
| 29835 |
+
"learning_rate": 0.0004,
|
| 29836 |
+
"loss": 2.9729,
|
| 29837 |
+
"step": 4895
|
| 29838 |
+
},
|
| 29839 |
+
{
|
| 29840 |
+
"epoch": 0.04,
|
| 29841 |
+
"learning_rate": 0.0004,
|
| 29842 |
+
"loss": 2.7504,
|
| 29843 |
+
"step": 4896
|
| 29844 |
+
},
|
| 29845 |
+
{
|
| 29846 |
+
"epoch": 0.04,
|
| 29847 |
+
"learning_rate": 0.0004,
|
| 29848 |
+
"loss": 4.4288,
|
| 29849 |
+
"step": 4897
|
| 29850 |
+
},
|
| 29851 |
+
{
|
| 29852 |
+
"epoch": 0.04,
|
| 29853 |
+
"learning_rate": 0.0004,
|
| 29854 |
+
"loss": 5.173,
|
| 29855 |
+
"step": 4898
|
| 29856 |
+
},
|
| 29857 |
+
{
|
| 29858 |
+
"epoch": 0.04,
|
| 29859 |
+
"learning_rate": 0.0004,
|
| 29860 |
+
"loss": 2.6288,
|
| 29861 |
+
"step": 4899
|
| 29862 |
+
},
|
| 29863 |
+
{
|
| 29864 |
+
"epoch": 0.04,
|
| 29865 |
+
"learning_rate": 0.0004,
|
| 29866 |
+
"loss": 7.3934,
|
| 29867 |
+
"step": 4900
|
| 29868 |
+
},
|
| 29869 |
+
{
|
| 29870 |
+
"epoch": 0.04,
|
| 29871 |
+
"learning_rate": 0.0004,
|
| 29872 |
+
"loss": 2.8056,
|
| 29873 |
+
"step": 4901
|
| 29874 |
+
},
|
| 29875 |
+
{
|
| 29876 |
+
"epoch": 0.04,
|
| 29877 |
+
"learning_rate": 0.0004,
|
| 29878 |
+
"loss": 6.7523,
|
| 29879 |
+
"step": 4902
|
| 29880 |
+
},
|
| 29881 |
+
{
|
| 29882 |
+
"epoch": 0.04,
|
| 29883 |
+
"learning_rate": 0.0004,
|
| 29884 |
+
"loss": 7.6066,
|
| 29885 |
+
"step": 4903
|
| 29886 |
+
},
|
| 29887 |
+
{
|
| 29888 |
+
"epoch": 0.04,
|
| 29889 |
+
"learning_rate": 0.0004,
|
| 29890 |
+
"loss": 8.6161,
|
| 29891 |
+
"step": 4904
|
| 29892 |
+
},
|
| 29893 |
+
{
|
| 29894 |
+
"epoch": 0.04,
|
| 29895 |
+
"learning_rate": 0.0004,
|
| 29896 |
+
"loss": 7.8099,
|
| 29897 |
+
"step": 4905
|
| 29898 |
+
},
|
| 29899 |
+
{
|
| 29900 |
+
"epoch": 0.04,
|
| 29901 |
+
"learning_rate": 0.0004,
|
| 29902 |
+
"loss": 5.2048,
|
| 29903 |
+
"step": 4906
|
| 29904 |
+
},
|
| 29905 |
+
{
|
| 29906 |
+
"epoch": 0.04,
|
| 29907 |
+
"learning_rate": 0.0004,
|
| 29908 |
+
"loss": 2.6112,
|
| 29909 |
+
"step": 4907
|
| 29910 |
+
},
|
| 29911 |
+
{
|
| 29912 |
+
"epoch": 0.04,
|
| 29913 |
+
"learning_rate": 0.0004,
|
| 29914 |
+
"loss": 8.5394,
|
| 29915 |
+
"step": 4908
|
| 29916 |
+
},
|
| 29917 |
+
{
|
| 29918 |
+
"epoch": 0.04,
|
| 29919 |
+
"learning_rate": 0.0004,
|
| 29920 |
+
"loss": 7.9661,
|
| 29921 |
+
"step": 4909
|
| 29922 |
+
},
|
| 29923 |
+
{
|
| 29924 |
+
"epoch": 0.04,
|
| 29925 |
+
"learning_rate": 0.0004,
|
| 29926 |
+
"loss": 7.1945,
|
| 29927 |
+
"step": 4910
|
| 29928 |
+
},
|
| 29929 |
+
{
|
| 29930 |
+
"epoch": 0.04,
|
| 29931 |
+
"learning_rate": 0.0004,
|
| 29932 |
+
"loss": 7.1823,
|
| 29933 |
+
"step": 4911
|
| 29934 |
+
},
|
| 29935 |
+
{
|
| 29936 |
+
"epoch": 0.04,
|
| 29937 |
+
"learning_rate": 0.0004,
|
| 29938 |
+
"loss": 6.5774,
|
| 29939 |
+
"step": 4912
|
| 29940 |
+
},
|
| 29941 |
+
{
|
| 29942 |
+
"epoch": 0.04,
|
| 29943 |
+
"learning_rate": 0.0004,
|
| 29944 |
+
"loss": 7.3444,
|
| 29945 |
+
"step": 4913
|
| 29946 |
+
},
|
| 29947 |
+
{
|
| 29948 |
+
"epoch": 0.04,
|
| 29949 |
+
"learning_rate": 0.0004,
|
| 29950 |
+
"loss": 7.8732,
|
| 29951 |
+
"step": 4914
|
| 29952 |
+
},
|
| 29953 |
+
{
|
| 29954 |
+
"epoch": 0.04,
|
| 29955 |
+
"learning_rate": 0.0004,
|
| 29956 |
+
"loss": 3.5685,
|
| 29957 |
+
"step": 4915
|
| 29958 |
+
},
|
| 29959 |
+
{
|
| 29960 |
+
"epoch": 0.04,
|
| 29961 |
+
"learning_rate": 0.0004,
|
| 29962 |
+
"loss": 4.6944,
|
| 29963 |
+
"step": 4916
|
| 29964 |
+
},
|
| 29965 |
+
{
|
| 29966 |
+
"epoch": 0.04,
|
| 29967 |
+
"learning_rate": 0.0004,
|
| 29968 |
+
"loss": 8.9668,
|
| 29969 |
+
"step": 4917
|
| 29970 |
+
},
|
| 29971 |
+
{
|
| 29972 |
+
"epoch": 0.04,
|
| 29973 |
+
"learning_rate": 0.0004,
|
| 29974 |
+
"loss": 2.9854,
|
| 29975 |
+
"step": 4918
|
| 29976 |
+
},
|
| 29977 |
+
{
|
| 29978 |
+
"epoch": 0.04,
|
| 29979 |
+
"learning_rate": 0.0004,
|
| 29980 |
+
"loss": 9.0986,
|
| 29981 |
+
"step": 4919
|
| 29982 |
+
},
|
| 29983 |
+
{
|
| 29984 |
+
"epoch": 0.04,
|
| 29985 |
+
"learning_rate": 0.0004,
|
| 29986 |
+
"loss": 6.8546,
|
| 29987 |
+
"step": 4920
|
| 29988 |
+
},
|
| 29989 |
+
{
|
| 29990 |
+
"epoch": 0.04,
|
| 29991 |
+
"learning_rate": 0.0004,
|
| 29992 |
+
"loss": 2.658,
|
| 29993 |
+
"step": 4921
|
| 29994 |
+
},
|
| 29995 |
+
{
|
| 29996 |
+
"epoch": 0.04,
|
| 29997 |
+
"learning_rate": 0.0004,
|
| 29998 |
+
"loss": 2.8595,
|
| 29999 |
+
"step": 4922
|
| 30000 |
+
},
|
| 30001 |
+
{
|
| 30002 |
+
"epoch": 0.04,
|
| 30003 |
+
"learning_rate": 0.0004,
|
| 30004 |
+
"loss": 6.3526,
|
| 30005 |
+
"step": 4923
|
| 30006 |
+
},
|
| 30007 |
+
{
|
| 30008 |
+
"epoch": 0.04,
|
| 30009 |
+
"learning_rate": 0.0004,
|
| 30010 |
+
"loss": 6.6612,
|
| 30011 |
+
"step": 4924
|
| 30012 |
+
},
|
| 30013 |
+
{
|
| 30014 |
+
"epoch": 0.04,
|
| 30015 |
+
"learning_rate": 0.0004,
|
| 30016 |
+
"loss": 6.4798,
|
| 30017 |
+
"step": 4925
|
| 30018 |
+
},
|
| 30019 |
+
{
|
| 30020 |
+
"epoch": 0.04,
|
| 30021 |
+
"learning_rate": 0.0004,
|
| 30022 |
+
"loss": 6.0779,
|
| 30023 |
+
"step": 4926
|
| 30024 |
+
},
|
| 30025 |
+
{
|
| 30026 |
+
"epoch": 0.04,
|
| 30027 |
+
"learning_rate": 0.0004,
|
| 30028 |
+
"loss": 2.8211,
|
| 30029 |
+
"step": 4927
|
| 30030 |
+
},
|
| 30031 |
+
{
|
| 30032 |
+
"epoch": 0.04,
|
| 30033 |
+
"learning_rate": 0.0004,
|
| 30034 |
+
"loss": 7.9007,
|
| 30035 |
+
"step": 4928
|
| 30036 |
+
},
|
| 30037 |
+
{
|
| 30038 |
+
"epoch": 0.04,
|
| 30039 |
+
"learning_rate": 0.0004,
|
| 30040 |
+
"loss": 2.5789,
|
| 30041 |
+
"step": 4929
|
| 30042 |
+
},
|
| 30043 |
+
{
|
| 30044 |
+
"epoch": 0.04,
|
| 30045 |
+
"learning_rate": 0.0004,
|
| 30046 |
+
"loss": 8.0357,
|
| 30047 |
+
"step": 4930
|
| 30048 |
+
},
|
| 30049 |
+
{
|
| 30050 |
+
"epoch": 0.04,
|
| 30051 |
+
"learning_rate": 0.0004,
|
| 30052 |
+
"loss": 6.8846,
|
| 30053 |
+
"step": 4931
|
| 30054 |
+
},
|
| 30055 |
+
{
|
| 30056 |
+
"epoch": 0.04,
|
| 30057 |
+
"learning_rate": 0.0004,
|
| 30058 |
+
"loss": 5.7409,
|
| 30059 |
+
"step": 4932
|
| 30060 |
+
},
|
| 30061 |
+
{
|
| 30062 |
+
"epoch": 0.04,
|
| 30063 |
+
"learning_rate": 0.0004,
|
| 30064 |
+
"loss": 8.4081,
|
| 30065 |
+
"step": 4933
|
| 30066 |
+
},
|
| 30067 |
+
{
|
| 30068 |
+
"epoch": 0.04,
|
| 30069 |
+
"learning_rate": 0.0004,
|
| 30070 |
+
"loss": 7.3187,
|
| 30071 |
+
"step": 4934
|
| 30072 |
+
},
|
| 30073 |
+
{
|
| 30074 |
+
"epoch": 0.04,
|
| 30075 |
+
"learning_rate": 0.0004,
|
| 30076 |
+
"loss": 8.1926,
|
| 30077 |
+
"step": 4935
|
| 30078 |
+
},
|
| 30079 |
+
{
|
| 30080 |
+
"epoch": 0.04,
|
| 30081 |
+
"learning_rate": 0.0004,
|
| 30082 |
+
"loss": 8.2912,
|
| 30083 |
+
"step": 4936
|
| 30084 |
+
},
|
| 30085 |
+
{
|
| 30086 |
+
"epoch": 0.04,
|
| 30087 |
+
"learning_rate": 0.0004,
|
| 30088 |
+
"loss": 6.6701,
|
| 30089 |
+
"step": 4937
|
| 30090 |
+
},
|
| 30091 |
+
{
|
| 30092 |
+
"epoch": 0.04,
|
| 30093 |
+
"learning_rate": 0.0004,
|
| 30094 |
+
"loss": 4.8162,
|
| 30095 |
+
"step": 4938
|
| 30096 |
+
},
|
| 30097 |
+
{
|
| 30098 |
+
"epoch": 0.04,
|
| 30099 |
+
"learning_rate": 0.0004,
|
| 30100 |
+
"loss": 2.7585,
|
| 30101 |
+
"step": 4939
|
| 30102 |
+
},
|
| 30103 |
+
{
|
| 30104 |
+
"epoch": 0.04,
|
| 30105 |
+
"learning_rate": 0.0004,
|
| 30106 |
+
"loss": 6.6232,
|
| 30107 |
+
"step": 4940
|
| 30108 |
+
},
|
| 30109 |
+
{
|
| 30110 |
+
"epoch": 0.04,
|
| 30111 |
+
"learning_rate": 0.0004,
|
| 30112 |
+
"loss": 7.9613,
|
| 30113 |
+
"step": 4941
|
| 30114 |
+
},
|
| 30115 |
+
{
|
| 30116 |
+
"epoch": 0.04,
|
| 30117 |
+
"learning_rate": 0.0004,
|
| 30118 |
+
"loss": 3.954,
|
| 30119 |
+
"step": 4942
|
| 30120 |
+
},
|
| 30121 |
+
{
|
| 30122 |
+
"epoch": 0.04,
|
| 30123 |
+
"learning_rate": 0.0004,
|
| 30124 |
+
"loss": 2.7287,
|
| 30125 |
+
"step": 4943
|
| 30126 |
+
},
|
| 30127 |
+
{
|
| 30128 |
+
"epoch": 0.04,
|
| 30129 |
+
"learning_rate": 0.0004,
|
| 30130 |
+
"loss": 4.6305,
|
| 30131 |
+
"step": 4944
|
| 30132 |
+
},
|
| 30133 |
+
{
|
| 30134 |
+
"epoch": 0.04,
|
| 30135 |
+
"learning_rate": 0.0004,
|
| 30136 |
+
"loss": 2.6932,
|
| 30137 |
+
"step": 4945
|
| 30138 |
+
},
|
| 30139 |
+
{
|
| 30140 |
+
"epoch": 0.04,
|
| 30141 |
+
"learning_rate": 0.0004,
|
| 30142 |
+
"loss": 2.6798,
|
| 30143 |
+
"step": 4946
|
| 30144 |
+
},
|
| 30145 |
+
{
|
| 30146 |
+
"epoch": 0.04,
|
| 30147 |
+
"learning_rate": 0.0004,
|
| 30148 |
+
"loss": 3.6665,
|
| 30149 |
+
"step": 4947
|
| 30150 |
+
},
|
| 30151 |
+
{
|
| 30152 |
+
"epoch": 0.04,
|
| 30153 |
+
"learning_rate": 0.0004,
|
| 30154 |
+
"loss": 6.1462,
|
| 30155 |
+
"step": 4948
|
| 30156 |
+
},
|
| 30157 |
+
{
|
| 30158 |
+
"epoch": 0.04,
|
| 30159 |
+
"learning_rate": 0.0004,
|
| 30160 |
+
"loss": 4.0676,
|
| 30161 |
+
"step": 4949
|
| 30162 |
+
},
|
| 30163 |
+
{
|
| 30164 |
+
"epoch": 0.04,
|
| 30165 |
+
"learning_rate": 0.0004,
|
| 30166 |
+
"loss": 4.2834,
|
| 30167 |
+
"step": 4950
|
| 30168 |
+
},
|
| 30169 |
+
{
|
| 30170 |
+
"epoch": 0.04,
|
| 30171 |
+
"learning_rate": 0.0004,
|
| 30172 |
+
"loss": 5.8273,
|
| 30173 |
+
"step": 4951
|
| 30174 |
+
},
|
| 30175 |
+
{
|
| 30176 |
+
"epoch": 0.04,
|
| 30177 |
+
"learning_rate": 0.0004,
|
| 30178 |
+
"loss": 7.635,
|
| 30179 |
+
"step": 4952
|
| 30180 |
+
},
|
| 30181 |
+
{
|
| 30182 |
+
"epoch": 0.04,
|
| 30183 |
+
"learning_rate": 0.0004,
|
| 30184 |
+
"loss": 8.9245,
|
| 30185 |
+
"step": 4953
|
| 30186 |
+
},
|
| 30187 |
+
{
|
| 30188 |
+
"epoch": 0.04,
|
| 30189 |
+
"learning_rate": 0.0004,
|
| 30190 |
+
"loss": 8.5401,
|
| 30191 |
+
"step": 4954
|
| 30192 |
+
},
|
| 30193 |
+
{
|
| 30194 |
+
"epoch": 0.04,
|
| 30195 |
+
"learning_rate": 0.0004,
|
| 30196 |
+
"loss": 8.2944,
|
| 30197 |
+
"step": 4955
|
| 30198 |
+
},
|
| 30199 |
+
{
|
| 30200 |
+
"epoch": 0.04,
|
| 30201 |
+
"learning_rate": 0.0004,
|
| 30202 |
+
"loss": 6.6151,
|
| 30203 |
+
"step": 4956
|
| 30204 |
+
},
|
| 30205 |
+
{
|
| 30206 |
+
"epoch": 0.04,
|
| 30207 |
+
"learning_rate": 0.0004,
|
| 30208 |
+
"loss": 4.3668,
|
| 30209 |
+
"step": 4957
|
| 30210 |
+
},
|
| 30211 |
+
{
|
| 30212 |
+
"epoch": 0.04,
|
| 30213 |
+
"learning_rate": 0.0004,
|
| 30214 |
+
"loss": 7.4506,
|
| 30215 |
+
"step": 4958
|
| 30216 |
+
},
|
| 30217 |
+
{
|
| 30218 |
+
"epoch": 0.04,
|
| 30219 |
+
"learning_rate": 0.0004,
|
| 30220 |
+
"loss": 7.8919,
|
| 30221 |
+
"step": 4959
|
| 30222 |
+
},
|
| 30223 |
+
{
|
| 30224 |
+
"epoch": 0.04,
|
| 30225 |
+
"learning_rate": 0.0004,
|
| 30226 |
+
"loss": 8.7462,
|
| 30227 |
+
"step": 4960
|
| 30228 |
+
},
|
| 30229 |
+
{
|
| 30230 |
+
"epoch": 0.04,
|
| 30231 |
+
"learning_rate": 0.0004,
|
| 30232 |
+
"loss": 5.8915,
|
| 30233 |
+
"step": 4961
|
| 30234 |
+
},
|
| 30235 |
+
{
|
| 30236 |
+
"epoch": 0.04,
|
| 30237 |
+
"learning_rate": 0.0004,
|
| 30238 |
+
"loss": 7.4696,
|
| 30239 |
+
"step": 4962
|
| 30240 |
+
},
|
| 30241 |
+
{
|
| 30242 |
+
"epoch": 0.04,
|
| 30243 |
+
"learning_rate": 0.0004,
|
| 30244 |
+
"loss": 7.0112,
|
| 30245 |
+
"step": 4963
|
| 30246 |
+
},
|
| 30247 |
+
{
|
| 30248 |
+
"epoch": 0.04,
|
| 30249 |
+
"learning_rate": 0.0004,
|
| 30250 |
+
"loss": 8.1888,
|
| 30251 |
+
"step": 4964
|
| 30252 |
+
},
|
| 30253 |
+
{
|
| 30254 |
+
"epoch": 0.04,
|
| 30255 |
+
"learning_rate": 0.0004,
|
| 30256 |
+
"loss": 7.1465,
|
| 30257 |
+
"step": 4965
|
| 30258 |
+
},
|
| 30259 |
+
{
|
| 30260 |
+
"epoch": 0.04,
|
| 30261 |
+
"learning_rate": 0.0004,
|
| 30262 |
+
"loss": 3.6028,
|
| 30263 |
+
"step": 4966
|
| 30264 |
+
},
|
| 30265 |
+
{
|
| 30266 |
+
"epoch": 0.04,
|
| 30267 |
+
"learning_rate": 0.0004,
|
| 30268 |
+
"loss": 3.279,
|
| 30269 |
+
"step": 4967
|
| 30270 |
+
},
|
| 30271 |
+
{
|
| 30272 |
+
"epoch": 0.04,
|
| 30273 |
+
"learning_rate": 0.0004,
|
| 30274 |
+
"loss": 6.4619,
|
| 30275 |
+
"step": 4968
|
| 30276 |
+
},
|
| 30277 |
+
{
|
| 30278 |
+
"epoch": 0.04,
|
| 30279 |
+
"learning_rate": 0.0004,
|
| 30280 |
+
"loss": 6.7617,
|
| 30281 |
+
"step": 4969
|
| 30282 |
+
},
|
| 30283 |
+
{
|
| 30284 |
+
"epoch": 0.04,
|
| 30285 |
+
"learning_rate": 0.0004,
|
| 30286 |
+
"loss": 8.0521,
|
| 30287 |
+
"step": 4970
|
| 30288 |
+
},
|
| 30289 |
+
{
|
| 30290 |
+
"epoch": 0.04,
|
| 30291 |
+
"learning_rate": 0.0004,
|
| 30292 |
+
"loss": 3.9583,
|
| 30293 |
+
"step": 4971
|
| 30294 |
+
},
|
| 30295 |
+
{
|
| 30296 |
+
"epoch": 0.04,
|
| 30297 |
+
"learning_rate": 0.0004,
|
| 30298 |
+
"loss": 8.5725,
|
| 30299 |
+
"step": 4972
|
| 30300 |
+
},
|
| 30301 |
+
{
|
| 30302 |
+
"epoch": 0.04,
|
| 30303 |
+
"learning_rate": 0.0004,
|
| 30304 |
+
"loss": 6.3248,
|
| 30305 |
+
"step": 4973
|
| 30306 |
+
},
|
| 30307 |
+
{
|
| 30308 |
+
"epoch": 0.04,
|
| 30309 |
+
"learning_rate": 0.0004,
|
| 30310 |
+
"loss": 2.9984,
|
| 30311 |
+
"step": 4974
|
| 30312 |
+
},
|
| 30313 |
+
{
|
| 30314 |
+
"epoch": 0.04,
|
| 30315 |
+
"learning_rate": 0.0004,
|
| 30316 |
+
"loss": 5.7955,
|
| 30317 |
+
"step": 4975
|
| 30318 |
+
},
|
| 30319 |
+
{
|
| 30320 |
+
"epoch": 0.04,
|
| 30321 |
+
"learning_rate": 0.0004,
|
| 30322 |
+
"loss": 5.4351,
|
| 30323 |
+
"step": 4976
|
| 30324 |
+
},
|
| 30325 |
+
{
|
| 30326 |
+
"epoch": 0.04,
|
| 30327 |
+
"learning_rate": 0.0004,
|
| 30328 |
+
"loss": 3.5412,
|
| 30329 |
+
"step": 4977
|
| 30330 |
+
},
|
| 30331 |
+
{
|
| 30332 |
+
"epoch": 0.04,
|
| 30333 |
+
"learning_rate": 0.0004,
|
| 30334 |
+
"loss": 9.4986,
|
| 30335 |
+
"step": 4978
|
| 30336 |
+
},
|
| 30337 |
+
{
|
| 30338 |
+
"epoch": 0.04,
|
| 30339 |
+
"learning_rate": 0.0004,
|
| 30340 |
+
"loss": 3.4686,
|
| 30341 |
+
"step": 4979
|
| 30342 |
+
},
|
| 30343 |
+
{
|
| 30344 |
+
"epoch": 0.04,
|
| 30345 |
+
"learning_rate": 0.0004,
|
| 30346 |
+
"loss": 6.3709,
|
| 30347 |
+
"step": 4980
|
| 30348 |
+
},
|
| 30349 |
+
{
|
| 30350 |
+
"epoch": 0.04,
|
| 30351 |
+
"learning_rate": 0.0004,
|
| 30352 |
+
"loss": 3.4326,
|
| 30353 |
+
"step": 4981
|
| 30354 |
+
},
|
| 30355 |
+
{
|
| 30356 |
+
"epoch": 0.04,
|
| 30357 |
+
"learning_rate": 0.0004,
|
| 30358 |
+
"loss": 5.3118,
|
| 30359 |
+
"step": 4982
|
| 30360 |
+
},
|
| 30361 |
+
{
|
| 30362 |
+
"epoch": 0.04,
|
| 30363 |
+
"learning_rate": 0.0004,
|
| 30364 |
+
"loss": 6.2933,
|
| 30365 |
+
"step": 4983
|
| 30366 |
+
},
|
| 30367 |
+
{
|
| 30368 |
+
"epoch": 0.04,
|
| 30369 |
+
"learning_rate": 0.0004,
|
| 30370 |
+
"loss": 7.2728,
|
| 30371 |
+
"step": 4984
|
| 30372 |
+
},
|
| 30373 |
+
{
|
| 30374 |
+
"epoch": 0.04,
|
| 30375 |
+
"learning_rate": 0.0004,
|
| 30376 |
+
"loss": 5.5518,
|
| 30377 |
+
"step": 4985
|
| 30378 |
+
},
|
| 30379 |
+
{
|
| 30380 |
+
"epoch": 0.04,
|
| 30381 |
+
"learning_rate": 0.0004,
|
| 30382 |
+
"loss": 5.8085,
|
| 30383 |
+
"step": 4986
|
| 30384 |
+
},
|
| 30385 |
+
{
|
| 30386 |
+
"epoch": 0.04,
|
| 30387 |
+
"learning_rate": 0.0004,
|
| 30388 |
+
"loss": 6.024,
|
| 30389 |
+
"step": 4987
|
| 30390 |
+
},
|
| 30391 |
+
{
|
| 30392 |
+
"epoch": 0.04,
|
| 30393 |
+
"learning_rate": 0.0004,
|
| 30394 |
+
"loss": 6.7633,
|
| 30395 |
+
"step": 4988
|
| 30396 |
+
},
|
| 30397 |
+
{
|
| 30398 |
+
"epoch": 0.04,
|
| 30399 |
+
"learning_rate": 0.0004,
|
| 30400 |
+
"loss": 3.9099,
|
| 30401 |
+
"step": 4989
|
| 30402 |
+
},
|
| 30403 |
+
{
|
| 30404 |
+
"epoch": 0.04,
|
| 30405 |
+
"learning_rate": 0.0004,
|
| 30406 |
+
"loss": 3.0304,
|
| 30407 |
+
"step": 4990
|
| 30408 |
+
},
|
| 30409 |
+
{
|
| 30410 |
+
"epoch": 0.04,
|
| 30411 |
+
"learning_rate": 0.0004,
|
| 30412 |
+
"loss": 5.939,
|
| 30413 |
+
"step": 4991
|
| 30414 |
+
},
|
| 30415 |
+
{
|
| 30416 |
+
"epoch": 0.04,
|
| 30417 |
+
"learning_rate": 0.0004,
|
| 30418 |
+
"loss": 3.1024,
|
| 30419 |
+
"step": 4992
|
| 30420 |
+
},
|
| 30421 |
+
{
|
| 30422 |
+
"epoch": 0.04,
|
| 30423 |
+
"learning_rate": 0.0004,
|
| 30424 |
+
"loss": 3.2432,
|
| 30425 |
+
"step": 4993
|
| 30426 |
+
},
|
| 30427 |
+
{
|
| 30428 |
+
"epoch": 0.04,
|
| 30429 |
+
"learning_rate": 0.0004,
|
| 30430 |
+
"loss": 6.9213,
|
| 30431 |
+
"step": 4994
|
| 30432 |
+
},
|
| 30433 |
+
{
|
| 30434 |
+
"epoch": 0.04,
|
| 30435 |
+
"learning_rate": 0.0004,
|
| 30436 |
+
"loss": 4.6644,
|
| 30437 |
+
"step": 4995
|
| 30438 |
+
},
|
| 30439 |
+
{
|
| 30440 |
+
"epoch": 0.04,
|
| 30441 |
+
"learning_rate": 0.0004,
|
| 30442 |
+
"loss": 5.9821,
|
| 30443 |
+
"step": 4996
|
| 30444 |
+
},
|
| 30445 |
+
{
|
| 30446 |
+
"epoch": 0.04,
|
| 30447 |
+
"learning_rate": 0.0004,
|
| 30448 |
+
"loss": 4.9677,
|
| 30449 |
+
"step": 4997
|
| 30450 |
+
},
|
| 30451 |
+
{
|
| 30452 |
+
"epoch": 0.04,
|
| 30453 |
+
"learning_rate": 0.0004,
|
| 30454 |
+
"loss": 6.7992,
|
| 30455 |
+
"step": 4998
|
| 30456 |
+
},
|
| 30457 |
+
{
|
| 30458 |
+
"epoch": 0.04,
|
| 30459 |
+
"learning_rate": 0.0004,
|
| 30460 |
+
"loss": 3.2743,
|
| 30461 |
+
"step": 4999
|
| 30462 |
+
},
|
| 30463 |
+
{
|
| 30464 |
+
"epoch": 0.04,
|
| 30465 |
+
"learning_rate": 0.0004,
|
| 30466 |
+
"loss": 4.5054,
|
| 30467 |
+
"step": 5000
|
| 30468 |
+
},
|
| 30469 |
+
{
|
| 30470 |
+
"epoch": 0.04,
|
| 30471 |
+
"eval_loss": 6.463876247406006,
|
| 30472 |
+
"eval_runtime": 22.4171,
|
| 30473 |
+
"eval_samples_per_second": 2.23,
|
| 30474 |
+
"eval_steps_per_second": 1.115,
|
| 30475 |
+
"step": 5000
|
| 30476 |
+
},
|
| 30477 |
+
{
|
| 30478 |
+
"epoch": 0.04,
|
| 30479 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
| 30480 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
| 30481 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
| 30482 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
| 30483 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
| 30484 |
+
"mmlu_loss": 3.4964506435394287,
|
| 30485 |
+
"step": 5000
|
| 30486 |
+
},
|
| 30487 |
+
{
|
| 30488 |
+
"epoch": 0.04,
|
| 30489 |
+
"learning_rate": 0.0004,
|
| 30490 |
+
"loss": 7.8989,
|
| 30491 |
+
"step": 5001
|
| 30492 |
+
},
|
| 30493 |
+
{
|
| 30494 |
+
"epoch": 0.04,
|
| 30495 |
+
"learning_rate": 0.0004,
|
| 30496 |
+
"loss": 8.045,
|
| 30497 |
+
"step": 5002
|
| 30498 |
+
},
|
| 30499 |
+
{
|
| 30500 |
+
"epoch": 0.04,
|
| 30501 |
+
"learning_rate": 0.0004,
|
| 30502 |
+
"loss": 8.2012,
|
| 30503 |
+
"step": 5003
|
| 30504 |
+
},
|
| 30505 |
+
{
|
| 30506 |
+
"epoch": 0.04,
|
| 30507 |
+
"learning_rate": 0.0004,
|
| 30508 |
+
"loss": 7.5305,
|
| 30509 |
+
"step": 5004
|
| 30510 |
+
},
|
| 30511 |
+
{
|
| 30512 |
+
"epoch": 0.04,
|
| 30513 |
+
"learning_rate": 0.0004,
|
| 30514 |
+
"loss": 7.2522,
|
| 30515 |
+
"step": 5005
|
| 30516 |
+
},
|
| 30517 |
+
{
|
| 30518 |
+
"epoch": 0.04,
|
| 30519 |
+
"learning_rate": 0.0004,
|
| 30520 |
+
"loss": 7.5176,
|
| 30521 |
+
"step": 5006
|
| 30522 |
+
},
|
| 30523 |
+
{
|
| 30524 |
+
"epoch": 0.04,
|
| 30525 |
+
"learning_rate": 0.0004,
|
| 30526 |
+
"loss": 6.9333,
|
| 30527 |
+
"step": 5007
|
| 30528 |
+
},
|
| 30529 |
+
{
|
| 30530 |
+
"epoch": 0.04,
|
| 30531 |
+
"learning_rate": 0.0004,
|
| 30532 |
+
"loss": 5.6115,
|
| 30533 |
+
"step": 5008
|
| 30534 |
+
},
|
| 30535 |
+
{
|
| 30536 |
+
"epoch": 0.04,
|
| 30537 |
+
"learning_rate": 0.0004,
|
| 30538 |
+
"loss": 7.2527,
|
| 30539 |
+
"step": 5009
|
| 30540 |
+
},
|
| 30541 |
+
{
|
| 30542 |
+
"epoch": 0.04,
|
| 30543 |
+
"learning_rate": 0.0004,
|
| 30544 |
+
"loss": 7.3046,
|
| 30545 |
+
"step": 5010
|
| 30546 |
+
},
|
| 30547 |
+
{
|
| 30548 |
+
"epoch": 0.04,
|
| 30549 |
+
"learning_rate": 0.0004,
|
| 30550 |
+
"loss": 6.2746,
|
| 30551 |
+
"step": 5011
|
| 30552 |
+
},
|
| 30553 |
+
{
|
| 30554 |
+
"epoch": 0.04,
|
| 30555 |
+
"learning_rate": 0.0004,
|
| 30556 |
+
"loss": 5.8078,
|
| 30557 |
+
"step": 5012
|
| 30558 |
+
},
|
| 30559 |
+
{
|
| 30560 |
+
"epoch": 0.04,
|
| 30561 |
+
"learning_rate": 0.0004,
|
| 30562 |
+
"loss": 5.0053,
|
| 30563 |
+
"step": 5013
|
| 30564 |
+
},
|
| 30565 |
+
{
|
| 30566 |
+
"epoch": 0.04,
|
| 30567 |
+
"learning_rate": 0.0004,
|
| 30568 |
+
"loss": 8.3415,
|
| 30569 |
+
"step": 5014
|
| 30570 |
+
},
|
| 30571 |
+
{
|
| 30572 |
+
"epoch": 0.04,
|
| 30573 |
+
"learning_rate": 0.0004,
|
| 30574 |
+
"loss": 8.1199,
|
| 30575 |
+
"step": 5015
|
| 30576 |
+
},
|
| 30577 |
+
{
|
| 30578 |
+
"epoch": 0.04,
|
| 30579 |
+
"learning_rate": 0.0004,
|
| 30580 |
+
"loss": 5.6292,
|
| 30581 |
+
"step": 5016
|
| 30582 |
+
},
|
| 30583 |
+
{
|
| 30584 |
+
"epoch": 0.04,
|
| 30585 |
+
"learning_rate": 0.0004,
|
| 30586 |
+
"loss": 3.5195,
|
| 30587 |
+
"step": 5017
|
| 30588 |
+
},
|
| 30589 |
+
{
|
| 30590 |
+
"epoch": 0.04,
|
| 30591 |
+
"learning_rate": 0.0004,
|
| 30592 |
+
"loss": 7.2367,
|
| 30593 |
+
"step": 5018
|
| 30594 |
+
},
|
| 30595 |
+
{
|
| 30596 |
+
"epoch": 0.04,
|
| 30597 |
+
"learning_rate": 0.0004,
|
| 30598 |
+
"loss": 4.219,
|
| 30599 |
+
"step": 5019
|
| 30600 |
+
},
|
| 30601 |
+
{
|
| 30602 |
+
"epoch": 0.04,
|
| 30603 |
+
"learning_rate": 0.0004,
|
| 30604 |
+
"loss": 3.4376,
|
| 30605 |
+
"step": 5020
|
| 30606 |
+
},
|
| 30607 |
+
{
|
| 30608 |
+
"epoch": 0.04,
|
| 30609 |
+
"learning_rate": 0.0004,
|
| 30610 |
+
"loss": 4.1413,
|
| 30611 |
+
"step": 5021
|
| 30612 |
+
},
|
| 30613 |
+
{
|
| 30614 |
+
"epoch": 0.04,
|
| 30615 |
+
"learning_rate": 0.0004,
|
| 30616 |
+
"loss": 5.7102,
|
| 30617 |
+
"step": 5022
|
| 30618 |
+
},
|
| 30619 |
+
{
|
| 30620 |
+
"epoch": 0.04,
|
| 30621 |
+
"learning_rate": 0.0004,
|
| 30622 |
+
"loss": 3.3297,
|
| 30623 |
+
"step": 5023
|
| 30624 |
+
},
|
| 30625 |
+
{
|
| 30626 |
+
"epoch": 0.04,
|
| 30627 |
+
"learning_rate": 0.0004,
|
| 30628 |
+
"loss": 8.9923,
|
| 30629 |
+
"step": 5024
|
| 30630 |
+
},
|
| 30631 |
+
{
|
| 30632 |
+
"epoch": 0.04,
|
| 30633 |
+
"learning_rate": 0.0004,
|
| 30634 |
+
"loss": 3.0071,
|
| 30635 |
+
"step": 5025
|
| 30636 |
+
},
|
| 30637 |
+
{
|
| 30638 |
+
"epoch": 0.04,
|
| 30639 |
+
"learning_rate": 0.0004,
|
| 30640 |
+
"loss": 7.351,
|
| 30641 |
+
"step": 5026
|
| 30642 |
+
},
|
| 30643 |
+
{
|
| 30644 |
+
"epoch": 0.04,
|
| 30645 |
+
"learning_rate": 0.0004,
|
| 30646 |
+
"loss": 2.824,
|
| 30647 |
+
"step": 5027
|
| 30648 |
+
},
|
| 30649 |
+
{
|
| 30650 |
+
"epoch": 0.04,
|
| 30651 |
+
"learning_rate": 0.0004,
|
| 30652 |
+
"loss": 7.5031,
|
| 30653 |
+
"step": 5028
|
| 30654 |
+
},
|
| 30655 |
+
{
|
| 30656 |
+
"epoch": 0.04,
|
| 30657 |
+
"learning_rate": 0.0004,
|
| 30658 |
+
"loss": 7.7362,
|
| 30659 |
+
"step": 5029
|
| 30660 |
+
},
|
| 30661 |
+
{
|
| 30662 |
+
"epoch": 0.04,
|
| 30663 |
+
"learning_rate": 0.0004,
|
| 30664 |
+
"loss": 5.5291,
|
| 30665 |
+
"step": 5030
|
| 30666 |
+
},
|
| 30667 |
+
{
|
| 30668 |
+
"epoch": 0.04,
|
| 30669 |
+
"learning_rate": 0.0004,
|
| 30670 |
+
"loss": 6.2754,
|
| 30671 |
+
"step": 5031
|
| 30672 |
+
},
|
| 30673 |
+
{
|
| 30674 |
+
"epoch": 0.04,
|
| 30675 |
+
"learning_rate": 0.0004,
|
| 30676 |
+
"loss": 3.1831,
|
| 30677 |
+
"step": 5032
|
| 30678 |
+
},
|
| 30679 |
+
{
|
| 30680 |
+
"epoch": 0.04,
|
| 30681 |
+
"learning_rate": 0.0004,
|
| 30682 |
+
"loss": 7.1838,
|
| 30683 |
+
"step": 5033
|
| 30684 |
+
},
|
| 30685 |
+
{
|
| 30686 |
+
"epoch": 0.04,
|
| 30687 |
+
"learning_rate": 0.0004,
|
| 30688 |
+
"loss": 5.2428,
|
| 30689 |
+
"step": 5034
|
| 30690 |
+
},
|
| 30691 |
+
{
|
| 30692 |
+
"epoch": 0.04,
|
| 30693 |
+
"learning_rate": 0.0004,
|
| 30694 |
+
"loss": 3.0482,
|
| 30695 |
+
"step": 5035
|
| 30696 |
+
},
|
| 30697 |
+
{
|
| 30698 |
+
"epoch": 0.04,
|
| 30699 |
+
"learning_rate": 0.0004,
|
| 30700 |
+
"loss": 5.2135,
|
| 30701 |
+
"step": 5036
|
| 30702 |
+
},
|
| 30703 |
+
{
|
| 30704 |
+
"epoch": 0.04,
|
| 30705 |
+
"learning_rate": 0.0004,
|
| 30706 |
+
"loss": 6.2257,
|
| 30707 |
+
"step": 5037
|
| 30708 |
+
},
|
| 30709 |
+
{
|
| 30710 |
+
"epoch": 0.04,
|
| 30711 |
+
"learning_rate": 0.0004,
|
| 30712 |
+
"loss": 6.7514,
|
| 30713 |
+
"step": 5038
|
| 30714 |
+
},
|
| 30715 |
+
{
|
| 30716 |
+
"epoch": 0.04,
|
| 30717 |
+
"learning_rate": 0.0004,
|
| 30718 |
+
"loss": 5.9855,
|
| 30719 |
+
"step": 5039
|
| 30720 |
+
},
|
| 30721 |
+
{
|
| 30722 |
+
"epoch": 0.04,
|
| 30723 |
+
"learning_rate": 0.0004,
|
| 30724 |
+
"loss": 2.9738,
|
| 30725 |
+
"step": 5040
|
| 30726 |
+
},
|
| 30727 |
+
{
|
| 30728 |
+
"epoch": 0.04,
|
| 30729 |
+
"learning_rate": 0.0004,
|
| 30730 |
+
"loss": 3.3993,
|
| 30731 |
+
"step": 5041
|
| 30732 |
+
},
|
| 30733 |
+
{
|
| 30734 |
+
"epoch": 0.04,
|
| 30735 |
+
"learning_rate": 0.0004,
|
| 30736 |
+
"loss": 8.35,
|
| 30737 |
+
"step": 5042
|
| 30738 |
+
},
|
| 30739 |
+
{
|
| 30740 |
+
"epoch": 0.04,
|
| 30741 |
+
"learning_rate": 0.0004,
|
| 30742 |
+
"loss": 7.1362,
|
| 30743 |
+
"step": 5043
|
| 30744 |
+
},
|
| 30745 |
+
{
|
| 30746 |
+
"epoch": 0.04,
|
| 30747 |
+
"learning_rate": 0.0004,
|
| 30748 |
+
"loss": 3.2576,
|
| 30749 |
+
"step": 5044
|
| 30750 |
+
},
|
| 30751 |
+
{
|
| 30752 |
+
"epoch": 0.04,
|
| 30753 |
+
"learning_rate": 0.0004,
|
| 30754 |
+
"loss": 4.0811,
|
| 30755 |
+
"step": 5045
|
| 30756 |
+
},
|
| 30757 |
+
{
|
| 30758 |
+
"epoch": 0.04,
|
| 30759 |
+
"learning_rate": 0.0004,
|
| 30760 |
+
"loss": 5.1524,
|
| 30761 |
+
"step": 5046
|
| 30762 |
+
},
|
| 30763 |
+
{
|
| 30764 |
+
"epoch": 0.04,
|
| 30765 |
+
"learning_rate": 0.0004,
|
| 30766 |
+
"loss": 3.3487,
|
| 30767 |
+
"step": 5047
|
| 30768 |
+
},
|
| 30769 |
+
{
|
| 30770 |
+
"epoch": 0.04,
|
| 30771 |
+
"learning_rate": 0.0004,
|
| 30772 |
+
"loss": 3.6397,
|
| 30773 |
+
"step": 5048
|
| 30774 |
+
},
|
| 30775 |
+
{
|
| 30776 |
+
"epoch": 0.04,
|
| 30777 |
+
"learning_rate": 0.0004,
|
| 30778 |
+
"loss": 3.5974,
|
| 30779 |
+
"step": 5049
|
| 30780 |
+
},
|
| 30781 |
+
{
|
| 30782 |
+
"epoch": 0.04,
|
| 30783 |
+
"learning_rate": 0.0004,
|
| 30784 |
+
"loss": 3.3061,
|
| 30785 |
+
"step": 5050
|
| 30786 |
+
},
|
| 30787 |
+
{
|
| 30788 |
+
"epoch": 0.04,
|
| 30789 |
+
"learning_rate": 0.0004,
|
| 30790 |
+
"loss": 8.1425,
|
| 30791 |
+
"step": 5051
|
| 30792 |
+
},
|
| 30793 |
+
{
|
| 30794 |
+
"epoch": 0.04,
|
| 30795 |
+
"learning_rate": 0.0004,
|
| 30796 |
+
"loss": 7.7089,
|
| 30797 |
+
"step": 5052
|
| 30798 |
+
},
|
| 30799 |
+
{
|
| 30800 |
+
"epoch": 0.04,
|
| 30801 |
+
"learning_rate": 0.0004,
|
| 30802 |
+
"loss": 7.2659,
|
| 30803 |
+
"step": 5053
|
| 30804 |
+
},
|
| 30805 |
+
{
|
| 30806 |
+
"epoch": 0.04,
|
| 30807 |
+
"learning_rate": 0.0004,
|
| 30808 |
+
"loss": 8.8699,
|
| 30809 |
+
"step": 5054
|
| 30810 |
+
},
|
| 30811 |
+
{
|
| 30812 |
+
"epoch": 0.04,
|
| 30813 |
+
"learning_rate": 0.0004,
|
| 30814 |
+
"loss": 6.561,
|
| 30815 |
+
"step": 5055
|
| 30816 |
+
},
|
| 30817 |
+
{
|
| 30818 |
+
"epoch": 0.04,
|
| 30819 |
+
"learning_rate": 0.0004,
|
| 30820 |
+
"loss": 8.8095,
|
| 30821 |
+
"step": 5056
|
| 30822 |
+
},
|
| 30823 |
+
{
|
| 30824 |
+
"epoch": 0.04,
|
| 30825 |
+
"learning_rate": 0.0004,
|
| 30826 |
+
"loss": 8.5513,
|
| 30827 |
+
"step": 5057
|
| 30828 |
+
},
|
| 30829 |
+
{
|
| 30830 |
+
"epoch": 0.04,
|
| 30831 |
+
"learning_rate": 0.0004,
|
| 30832 |
+
"loss": 5.8696,
|
| 30833 |
+
"step": 5058
|
| 30834 |
+
},
|
| 30835 |
+
{
|
| 30836 |
+
"epoch": 0.04,
|
| 30837 |
+
"learning_rate": 0.0004,
|
| 30838 |
+
"loss": 7.397,
|
| 30839 |
+
"step": 5059
|
| 30840 |
+
},
|
| 30841 |
+
{
|
| 30842 |
+
"epoch": 0.04,
|
| 30843 |
+
"learning_rate": 0.0004,
|
| 30844 |
+
"loss": 7.3762,
|
| 30845 |
+
"step": 5060
|
| 30846 |
+
},
|
| 30847 |
+
{
|
| 30848 |
+
"epoch": 0.04,
|
| 30849 |
+
"learning_rate": 0.0004,
|
| 30850 |
+
"loss": 7.1008,
|
| 30851 |
+
"step": 5061
|
| 30852 |
+
},
|
| 30853 |
+
{
|
| 30854 |
+
"epoch": 0.04,
|
| 30855 |
+
"learning_rate": 0.0004,
|
| 30856 |
+
"loss": 6.4717,
|
| 30857 |
+
"step": 5062
|
| 30858 |
+
},
|
| 30859 |
+
{
|
| 30860 |
+
"epoch": 0.04,
|
| 30861 |
+
"learning_rate": 0.0004,
|
| 30862 |
+
"loss": 6.8093,
|
| 30863 |
+
"step": 5063
|
| 30864 |
+
},
|
| 30865 |
+
{
|
| 30866 |
+
"epoch": 0.04,
|
| 30867 |
+
"learning_rate": 0.0004,
|
| 30868 |
+
"loss": 3.9448,
|
| 30869 |
+
"step": 5064
|
| 30870 |
+
},
|
| 30871 |
+
{
|
| 30872 |
+
"epoch": 0.04,
|
| 30873 |
+
"learning_rate": 0.0004,
|
| 30874 |
+
"loss": 3.4344,
|
| 30875 |
+
"step": 5065
|
| 30876 |
+
},
|
| 30877 |
+
{
|
| 30878 |
+
"epoch": 0.04,
|
| 30879 |
+
"learning_rate": 0.0004,
|
| 30880 |
+
"loss": 7.6244,
|
| 30881 |
+
"step": 5066
|
| 30882 |
+
},
|
| 30883 |
+
{
|
| 30884 |
+
"epoch": 0.04,
|
| 30885 |
+
"learning_rate": 0.0004,
|
| 30886 |
+
"loss": 3.1716,
|
| 30887 |
+
"step": 5067
|
| 30888 |
+
},
|
| 30889 |
+
{
|
| 30890 |
+
"epoch": 0.04,
|
| 30891 |
+
"learning_rate": 0.0004,
|
| 30892 |
+
"loss": 3.664,
|
| 30893 |
+
"step": 5068
|
| 30894 |
+
},
|
| 30895 |
+
{
|
| 30896 |
+
"epoch": 0.04,
|
| 30897 |
+
"learning_rate": 0.0004,
|
| 30898 |
+
"loss": 6.798,
|
| 30899 |
+
"step": 5069
|
| 30900 |
+
},
|
| 30901 |
+
{
|
| 30902 |
+
"epoch": 0.04,
|
| 30903 |
+
"learning_rate": 0.0004,
|
| 30904 |
+
"loss": 7.5492,
|
| 30905 |
+
"step": 5070
|
| 30906 |
+
},
|
| 30907 |
+
{
|
| 30908 |
+
"epoch": 0.04,
|
| 30909 |
+
"learning_rate": 0.0004,
|
| 30910 |
+
"loss": 6.4213,
|
| 30911 |
+
"step": 5071
|
| 30912 |
+
},
|
| 30913 |
+
{
|
| 30914 |
+
"epoch": 0.04,
|
| 30915 |
+
"learning_rate": 0.0004,
|
| 30916 |
+
"loss": 7.4969,
|
| 30917 |
+
"step": 5072
|
| 30918 |
+
},
|
| 30919 |
+
{
|
| 30920 |
+
"epoch": 0.04,
|
| 30921 |
+
"learning_rate": 0.0004,
|
| 30922 |
+
"loss": 4.6795,
|
| 30923 |
+
"step": 5073
|
| 30924 |
+
},
|
| 30925 |
+
{
|
| 30926 |
+
"epoch": 0.04,
|
| 30927 |
+
"learning_rate": 0.0004,
|
| 30928 |
+
"loss": 3.4509,
|
| 30929 |
+
"step": 5074
|
| 30930 |
+
},
|
| 30931 |
+
{
|
| 30932 |
+
"epoch": 0.04,
|
| 30933 |
+
"learning_rate": 0.0004,
|
| 30934 |
+
"loss": 3.7005,
|
| 30935 |
+
"step": 5075
|
| 30936 |
+
},
|
| 30937 |
+
{
|
| 30938 |
+
"epoch": 0.04,
|
| 30939 |
+
"learning_rate": 0.0004,
|
| 30940 |
+
"loss": 3.0754,
|
| 30941 |
+
"step": 5076
|
| 30942 |
+
},
|
| 30943 |
+
{
|
| 30944 |
+
"epoch": 0.04,
|
| 30945 |
+
"learning_rate": 0.0004,
|
| 30946 |
+
"loss": 3.0044,
|
| 30947 |
+
"step": 5077
|
| 30948 |
+
},
|
| 30949 |
+
{
|
| 30950 |
+
"epoch": 0.04,
|
| 30951 |
+
"learning_rate": 0.0004,
|
| 30952 |
+
"loss": 5.8357,
|
| 30953 |
+
"step": 5078
|
| 30954 |
+
},
|
| 30955 |
+
{
|
| 30956 |
+
"epoch": 0.04,
|
| 30957 |
+
"learning_rate": 0.0004,
|
| 30958 |
+
"loss": 6.0447,
|
| 30959 |
+
"step": 5079
|
| 30960 |
+
},
|
| 30961 |
+
{
|
| 30962 |
+
"epoch": 0.04,
|
| 30963 |
+
"learning_rate": 0.0004,
|
| 30964 |
+
"loss": 3.0046,
|
| 30965 |
+
"step": 5080
|
| 30966 |
+
},
|
| 30967 |
+
{
|
| 30968 |
+
"epoch": 0.04,
|
| 30969 |
+
"learning_rate": 0.0004,
|
| 30970 |
+
"loss": 7.3184,
|
| 30971 |
+
"step": 5081
|
| 30972 |
+
},
|
| 30973 |
+
{
|
| 30974 |
+
"epoch": 0.04,
|
| 30975 |
+
"learning_rate": 0.0004,
|
| 30976 |
+
"loss": 9.1669,
|
| 30977 |
+
"step": 5082
|
| 30978 |
+
},
|
| 30979 |
+
{
|
| 30980 |
+
"epoch": 0.04,
|
| 30981 |
+
"learning_rate": 0.0004,
|
| 30982 |
+
"loss": 2.7666,
|
| 30983 |
+
"step": 5083
|
| 30984 |
+
},
|
| 30985 |
+
{
|
| 30986 |
+
"epoch": 0.04,
|
| 30987 |
+
"learning_rate": 0.0004,
|
| 30988 |
+
"loss": 3.6633,
|
| 30989 |
+
"step": 5084
|
| 30990 |
+
},
|
| 30991 |
+
{
|
| 30992 |
+
"epoch": 0.04,
|
| 30993 |
+
"learning_rate": 0.0004,
|
| 30994 |
+
"loss": 2.5884,
|
| 30995 |
+
"step": 5085
|
| 30996 |
+
},
|
| 30997 |
+
{
|
| 30998 |
+
"epoch": 0.04,
|
| 30999 |
+
"learning_rate": 0.0004,
|
| 31000 |
+
"loss": 7.572,
|
| 31001 |
+
"step": 5086
|
| 31002 |
+
},
|
| 31003 |
+
{
|
| 31004 |
+
"epoch": 0.04,
|
| 31005 |
+
"learning_rate": 0.0004,
|
| 31006 |
+
"loss": 3.7835,
|
| 31007 |
+
"step": 5087
|
| 31008 |
+
},
|
| 31009 |
+
{
|
| 31010 |
+
"epoch": 0.04,
|
| 31011 |
+
"learning_rate": 0.0004,
|
| 31012 |
+
"loss": 9.58,
|
| 31013 |
+
"step": 5088
|
| 31014 |
+
},
|
| 31015 |
+
{
|
| 31016 |
+
"epoch": 0.04,
|
| 31017 |
+
"learning_rate": 0.0004,
|
| 31018 |
+
"loss": 1.8841,
|
| 31019 |
+
"step": 5089
|
| 31020 |
+
},
|
| 31021 |
+
{
|
| 31022 |
+
"epoch": 0.04,
|
| 31023 |
+
"learning_rate": 0.0004,
|
| 31024 |
+
"loss": 1.8848,
|
| 31025 |
+
"step": 5090
|
| 31026 |
+
},
|
| 31027 |
+
{
|
| 31028 |
+
"epoch": 0.04,
|
| 31029 |
+
"learning_rate": 0.0004,
|
| 31030 |
+
"loss": 2.2856,
|
| 31031 |
+
"step": 5091
|
| 31032 |
+
},
|
| 31033 |
+
{
|
| 31034 |
+
"epoch": 0.04,
|
| 31035 |
+
"learning_rate": 0.0004,
|
| 31036 |
+
"loss": 5.4633,
|
| 31037 |
+
"step": 5092
|
| 31038 |
+
},
|
| 31039 |
+
{
|
| 31040 |
+
"epoch": 0.04,
|
| 31041 |
+
"learning_rate": 0.0004,
|
| 31042 |
+
"loss": 6.082,
|
| 31043 |
+
"step": 5093
|
| 31044 |
+
},
|
| 31045 |
+
{
|
| 31046 |
+
"epoch": 0.04,
|
| 31047 |
+
"learning_rate": 0.0004,
|
| 31048 |
+
"loss": 6.8496,
|
| 31049 |
+
"step": 5094
|
| 31050 |
+
},
|
| 31051 |
+
{
|
| 31052 |
+
"epoch": 0.04,
|
| 31053 |
+
"learning_rate": 0.0004,
|
| 31054 |
+
"loss": 6.2212,
|
| 31055 |
+
"step": 5095
|
| 31056 |
+
},
|
| 31057 |
+
{
|
| 31058 |
+
"epoch": 0.04,
|
| 31059 |
+
"learning_rate": 0.0004,
|
| 31060 |
+
"loss": 6.5972,
|
| 31061 |
+
"step": 5096
|
| 31062 |
+
},
|
| 31063 |
+
{
|
| 31064 |
+
"epoch": 0.04,
|
| 31065 |
+
"learning_rate": 0.0004,
|
| 31066 |
+
"loss": 6.9355,
|
| 31067 |
+
"step": 5097
|
| 31068 |
+
},
|
| 31069 |
+
{
|
| 31070 |
+
"epoch": 0.04,
|
| 31071 |
+
"learning_rate": 0.0004,
|
| 31072 |
+
"loss": 7.5645,
|
| 31073 |
+
"step": 5098
|
| 31074 |
+
},
|
| 31075 |
+
{
|
| 31076 |
+
"epoch": 0.04,
|
| 31077 |
+
"learning_rate": 0.0004,
|
| 31078 |
+
"loss": 5.944,
|
| 31079 |
+
"step": 5099
|
| 31080 |
+
},
|
| 31081 |
+
{
|
| 31082 |
+
"epoch": 0.04,
|
| 31083 |
+
"learning_rate": 0.0004,
|
| 31084 |
+
"loss": 1.7678,
|
| 31085 |
+
"step": 5100
|
| 31086 |
+
},
|
| 31087 |
+
{
|
| 31088 |
+
"epoch": 0.04,
|
| 31089 |
+
"learning_rate": 0.0004,
|
| 31090 |
+
"loss": 2.294,
|
| 31091 |
+
"step": 5101
|
| 31092 |
+
},
|
| 31093 |
+
{
|
| 31094 |
+
"epoch": 0.04,
|
| 31095 |
+
"learning_rate": 0.0004,
|
| 31096 |
+
"loss": 8.9452,
|
| 31097 |
+
"step": 5102
|
| 31098 |
+
},
|
| 31099 |
+
{
|
| 31100 |
+
"epoch": 0.04,
|
| 31101 |
+
"learning_rate": 0.0004,
|
| 31102 |
+
"loss": 7.2751,
|
| 31103 |
+
"step": 5103
|
| 31104 |
+
},
|
| 31105 |
+
{
|
| 31106 |
+
"epoch": 0.04,
|
| 31107 |
+
"learning_rate": 0.0004,
|
| 31108 |
+
"loss": 7.2882,
|
| 31109 |
+
"step": 5104
|
| 31110 |
+
},
|
| 31111 |
+
{
|
| 31112 |
+
"epoch": 0.04,
|
| 31113 |
+
"learning_rate": 0.0004,
|
| 31114 |
+
"loss": 8.3101,
|
| 31115 |
+
"step": 5105
|
| 31116 |
+
},
|
| 31117 |
+
{
|
| 31118 |
+
"epoch": 0.04,
|
| 31119 |
+
"learning_rate": 0.0004,
|
| 31120 |
+
"loss": 8.1729,
|
| 31121 |
+
"step": 5106
|
| 31122 |
+
},
|
| 31123 |
+
{
|
| 31124 |
+
"epoch": 0.04,
|
| 31125 |
+
"learning_rate": 0.0004,
|
| 31126 |
+
"loss": 4.7164,
|
| 31127 |
+
"step": 5107
|
| 31128 |
+
},
|
| 31129 |
+
{
|
| 31130 |
+
"epoch": 0.04,
|
| 31131 |
+
"learning_rate": 0.0004,
|
| 31132 |
+
"loss": 6.9774,
|
| 31133 |
+
"step": 5108
|
| 31134 |
+
},
|
| 31135 |
+
{
|
| 31136 |
+
"epoch": 0.04,
|
| 31137 |
+
"learning_rate": 0.0004,
|
| 31138 |
+
"loss": 8.5206,
|
| 31139 |
+
"step": 5109
|
| 31140 |
+
},
|
| 31141 |
+
{
|
| 31142 |
+
"epoch": 0.04,
|
| 31143 |
+
"learning_rate": 0.0004,
|
| 31144 |
+
"loss": 7.961,
|
| 31145 |
+
"step": 5110
|
| 31146 |
+
},
|
| 31147 |
+
{
|
| 31148 |
+
"epoch": 0.04,
|
| 31149 |
+
"learning_rate": 0.0004,
|
| 31150 |
+
"loss": 2.5247,
|
| 31151 |
+
"step": 5111
|
| 31152 |
+
},
|
| 31153 |
+
{
|
| 31154 |
+
"epoch": 0.04,
|
| 31155 |
+
"learning_rate": 0.0004,
|
| 31156 |
+
"loss": 6.9292,
|
| 31157 |
+
"step": 5112
|
| 31158 |
+
},
|
| 31159 |
+
{
|
| 31160 |
+
"epoch": 0.04,
|
| 31161 |
+
"learning_rate": 0.0004,
|
| 31162 |
+
"loss": 7.3455,
|
| 31163 |
+
"step": 5113
|
| 31164 |
+
},
|
| 31165 |
+
{
|
| 31166 |
+
"epoch": 0.04,
|
| 31167 |
+
"learning_rate": 0.0004,
|
| 31168 |
+
"loss": 8.483,
|
| 31169 |
+
"step": 5114
|
| 31170 |
+
},
|
| 31171 |
+
{
|
| 31172 |
+
"epoch": 0.04,
|
| 31173 |
+
"learning_rate": 0.0004,
|
| 31174 |
+
"loss": 6.5356,
|
| 31175 |
+
"step": 5115
|
| 31176 |
+
},
|
| 31177 |
+
{
|
| 31178 |
+
"epoch": 0.04,
|
| 31179 |
+
"learning_rate": 0.0004,
|
| 31180 |
+
"loss": 8.7652,
|
| 31181 |
+
"step": 5116
|
| 31182 |
+
},
|
| 31183 |
+
{
|
| 31184 |
+
"epoch": 0.04,
|
| 31185 |
+
"learning_rate": 0.0004,
|
| 31186 |
+
"loss": 7.6761,
|
| 31187 |
+
"step": 5117
|
| 31188 |
+
},
|
| 31189 |
+
{
|
| 31190 |
+
"epoch": 0.04,
|
| 31191 |
+
"learning_rate": 0.0004,
|
| 31192 |
+
"loss": 3.3517,
|
| 31193 |
+
"step": 5118
|
| 31194 |
+
},
|
| 31195 |
+
{
|
| 31196 |
+
"epoch": 0.04,
|
| 31197 |
+
"learning_rate": 0.0004,
|
| 31198 |
+
"loss": 3.6645,
|
| 31199 |
+
"step": 5119
|
| 31200 |
+
},
|
| 31201 |
+
{
|
| 31202 |
+
"epoch": 0.04,
|
| 31203 |
+
"learning_rate": 0.0004,
|
| 31204 |
+
"loss": 8.1988,
|
| 31205 |
+
"step": 5120
|
| 31206 |
+
},
|
| 31207 |
+
{
|
| 31208 |
+
"epoch": 0.04,
|
| 31209 |
+
"learning_rate": 0.0004,
|
| 31210 |
+
"loss": 7.292,
|
| 31211 |
+
"step": 5121
|
| 31212 |
+
},
|
| 31213 |
+
{
|
| 31214 |
+
"epoch": 0.04,
|
| 31215 |
+
"learning_rate": 0.0004,
|
| 31216 |
+
"loss": 7.7002,
|
| 31217 |
+
"step": 5122
|
| 31218 |
+
},
|
| 31219 |
+
{
|
| 31220 |
+
"epoch": 0.04,
|
| 31221 |
+
"learning_rate": 0.0004,
|
| 31222 |
+
"loss": 5.1236,
|
| 31223 |
+
"step": 5123
|
| 31224 |
+
},
|
| 31225 |
+
{
|
| 31226 |
+
"epoch": 0.04,
|
| 31227 |
+
"learning_rate": 0.0004,
|
| 31228 |
+
"loss": 4.2389,
|
| 31229 |
+
"step": 5124
|
| 31230 |
+
},
|
| 31231 |
+
{
|
| 31232 |
+
"epoch": 0.04,
|
| 31233 |
+
"learning_rate": 0.0004,
|
| 31234 |
+
"loss": 5.5483,
|
| 31235 |
+
"step": 5125
|
| 31236 |
+
},
|
| 31237 |
+
{
|
| 31238 |
+
"epoch": 0.04,
|
| 31239 |
+
"learning_rate": 0.0004,
|
| 31240 |
+
"loss": 6.6833,
|
| 31241 |
+
"step": 5126
|
| 31242 |
+
},
|
| 31243 |
+
{
|
| 31244 |
+
"epoch": 0.04,
|
| 31245 |
+
"learning_rate": 0.0004,
|
| 31246 |
+
"loss": 7.1315,
|
| 31247 |
+
"step": 5127
|
| 31248 |
+
},
|
| 31249 |
+
{
|
| 31250 |
+
"epoch": 0.04,
|
| 31251 |
+
"learning_rate": 0.0004,
|
| 31252 |
+
"loss": 3.8323,
|
| 31253 |
+
"step": 5128
|
| 31254 |
+
},
|
| 31255 |
+
{
|
| 31256 |
+
"epoch": 0.04,
|
| 31257 |
+
"learning_rate": 0.0004,
|
| 31258 |
+
"loss": 2.7692,
|
| 31259 |
+
"step": 5129
|
| 31260 |
+
},
|
| 31261 |
+
{
|
| 31262 |
+
"epoch": 0.04,
|
| 31263 |
+
"learning_rate": 0.0004,
|
| 31264 |
+
"loss": 6.637,
|
| 31265 |
+
"step": 5130
|
| 31266 |
+
},
|
| 31267 |
+
{
|
| 31268 |
+
"epoch": 0.04,
|
| 31269 |
+
"learning_rate": 0.0004,
|
| 31270 |
+
"loss": 4.0574,
|
| 31271 |
+
"step": 5131
|
| 31272 |
+
},
|
| 31273 |
+
{
|
| 31274 |
+
"epoch": 0.04,
|
| 31275 |
+
"learning_rate": 0.0004,
|
| 31276 |
+
"loss": 3.1885,
|
| 31277 |
+
"step": 5132
|
| 31278 |
+
},
|
| 31279 |
+
{
|
| 31280 |
+
"epoch": 0.04,
|
| 31281 |
+
"learning_rate": 0.0004,
|
| 31282 |
+
"loss": 3.2203,
|
| 31283 |
+
"step": 5133
|
| 31284 |
+
},
|
| 31285 |
+
{
|
| 31286 |
+
"epoch": 0.04,
|
| 31287 |
+
"learning_rate": 0.0004,
|
| 31288 |
+
"loss": 3.0885,
|
| 31289 |
+
"step": 5134
|
| 31290 |
+
},
|
| 31291 |
+
{
|
| 31292 |
+
"epoch": 0.04,
|
| 31293 |
+
"learning_rate": 0.0004,
|
| 31294 |
+
"loss": 8.2705,
|
| 31295 |
+
"step": 5135
|
| 31296 |
+
},
|
| 31297 |
+
{
|
| 31298 |
+
"epoch": 0.04,
|
| 31299 |
+
"learning_rate": 0.0004,
|
| 31300 |
+
"loss": 7.3963,
|
| 31301 |
+
"step": 5136
|
| 31302 |
+
},
|
| 31303 |
+
{
|
| 31304 |
+
"epoch": 0.04,
|
| 31305 |
+
"learning_rate": 0.0004,
|
| 31306 |
+
"loss": 6.9949,
|
| 31307 |
+
"step": 5137
|
| 31308 |
+
},
|
| 31309 |
+
{
|
| 31310 |
+
"epoch": 0.04,
|
| 31311 |
+
"learning_rate": 0.0004,
|
| 31312 |
+
"loss": 5.7339,
|
| 31313 |
+
"step": 5138
|
| 31314 |
+
},
|
| 31315 |
+
{
|
| 31316 |
+
"epoch": 0.04,
|
| 31317 |
+
"learning_rate": 0.0004,
|
| 31318 |
+
"loss": 5.041,
|
| 31319 |
+
"step": 5139
|
| 31320 |
+
},
|
| 31321 |
+
{
|
| 31322 |
+
"epoch": 0.04,
|
| 31323 |
+
"learning_rate": 0.0004,
|
| 31324 |
+
"loss": 7.3292,
|
| 31325 |
+
"step": 5140
|
| 31326 |
+
},
|
| 31327 |
+
{
|
| 31328 |
+
"epoch": 0.04,
|
| 31329 |
+
"learning_rate": 0.0004,
|
| 31330 |
+
"loss": 2.7841,
|
| 31331 |
+
"step": 5141
|
| 31332 |
+
},
|
| 31333 |
+
{
|
| 31334 |
+
"epoch": 0.04,
|
| 31335 |
+
"learning_rate": 0.0004,
|
| 31336 |
+
"loss": 2.5847,
|
| 31337 |
+
"step": 5142
|
| 31338 |
+
},
|
| 31339 |
+
{
|
| 31340 |
+
"epoch": 0.04,
|
| 31341 |
+
"learning_rate": 0.0004,
|
| 31342 |
+
"loss": 3.3698,
|
| 31343 |
+
"step": 5143
|
| 31344 |
+
},
|
| 31345 |
+
{
|
| 31346 |
+
"epoch": 0.04,
|
| 31347 |
+
"learning_rate": 0.0004,
|
| 31348 |
+
"loss": 6.2052,
|
| 31349 |
+
"step": 5144
|
| 31350 |
+
},
|
| 31351 |
+
{
|
| 31352 |
+
"epoch": 0.04,
|
| 31353 |
+
"learning_rate": 0.0004,
|
| 31354 |
+
"loss": 4.8951,
|
| 31355 |
+
"step": 5145
|
| 31356 |
+
},
|
| 31357 |
+
{
|
| 31358 |
+
"epoch": 0.04,
|
| 31359 |
+
"learning_rate": 0.0004,
|
| 31360 |
+
"loss": 5.9602,
|
| 31361 |
+
"step": 5146
|
| 31362 |
+
},
|
| 31363 |
+
{
|
| 31364 |
+
"epoch": 0.04,
|
| 31365 |
+
"learning_rate": 0.0004,
|
| 31366 |
+
"loss": 2.2329,
|
| 31367 |
+
"step": 5147
|
| 31368 |
+
},
|
| 31369 |
+
{
|
| 31370 |
+
"epoch": 0.04,
|
| 31371 |
+
"learning_rate": 0.0004,
|
| 31372 |
+
"loss": 8.0463,
|
| 31373 |
+
"step": 5148
|
| 31374 |
+
},
|
| 31375 |
+
{
|
| 31376 |
+
"epoch": 0.04,
|
| 31377 |
+
"learning_rate": 0.0004,
|
| 31378 |
+
"loss": 4.4032,
|
| 31379 |
+
"step": 5149
|
| 31380 |
+
},
|
| 31381 |
+
{
|
| 31382 |
+
"epoch": 0.04,
|
| 31383 |
+
"learning_rate": 0.0004,
|
| 31384 |
+
"loss": 3.3965,
|
| 31385 |
+
"step": 5150
|
| 31386 |
+
},
|
| 31387 |
+
{
|
| 31388 |
+
"epoch": 0.04,
|
| 31389 |
+
"learning_rate": 0.0004,
|
| 31390 |
+
"loss": 8.4988,
|
| 31391 |
+
"step": 5151
|
| 31392 |
+
},
|
| 31393 |
+
{
|
| 31394 |
+
"epoch": 0.04,
|
| 31395 |
+
"learning_rate": 0.0004,
|
| 31396 |
+
"loss": 8.2164,
|
| 31397 |
+
"step": 5152
|
| 31398 |
+
},
|
| 31399 |
+
{
|
| 31400 |
+
"epoch": 0.04,
|
| 31401 |
+
"learning_rate": 0.0004,
|
| 31402 |
+
"loss": 7.1181,
|
| 31403 |
+
"step": 5153
|
| 31404 |
+
},
|
| 31405 |
+
{
|
| 31406 |
+
"epoch": 0.04,
|
| 31407 |
+
"learning_rate": 0.0004,
|
| 31408 |
+
"loss": 7.6035,
|
| 31409 |
+
"step": 5154
|
| 31410 |
+
},
|
| 31411 |
+
{
|
| 31412 |
+
"epoch": 0.04,
|
| 31413 |
+
"learning_rate": 0.0004,
|
| 31414 |
+
"loss": 4.8853,
|
| 31415 |
+
"step": 5155
|
| 31416 |
+
},
|
| 31417 |
+
{
|
| 31418 |
+
"epoch": 0.04,
|
| 31419 |
+
"learning_rate": 0.0004,
|
| 31420 |
+
"loss": 8.9761,
|
| 31421 |
+
"step": 5156
|
| 31422 |
+
},
|
| 31423 |
+
{
|
| 31424 |
+
"epoch": 0.04,
|
| 31425 |
+
"learning_rate": 0.0004,
|
| 31426 |
+
"loss": 7.6907,
|
| 31427 |
+
"step": 5157
|
| 31428 |
+
},
|
| 31429 |
+
{
|
| 31430 |
+
"epoch": 0.04,
|
| 31431 |
+
"learning_rate": 0.0004,
|
| 31432 |
+
"loss": 4.5017,
|
| 31433 |
+
"step": 5158
|
| 31434 |
+
},
|
| 31435 |
+
{
|
| 31436 |
+
"epoch": 0.04,
|
| 31437 |
+
"learning_rate": 0.0004,
|
| 31438 |
+
"loss": 4.3164,
|
| 31439 |
+
"step": 5159
|
| 31440 |
+
},
|
| 31441 |
+
{
|
| 31442 |
+
"epoch": 0.04,
|
| 31443 |
+
"learning_rate": 0.0004,
|
| 31444 |
+
"loss": 6.7913,
|
| 31445 |
+
"step": 5160
|
| 31446 |
+
},
|
| 31447 |
+
{
|
| 31448 |
+
"epoch": 0.04,
|
| 31449 |
+
"learning_rate": 0.0004,
|
| 31450 |
+
"loss": 8.4106,
|
| 31451 |
+
"step": 5161
|
| 31452 |
+
},
|
| 31453 |
+
{
|
| 31454 |
+
"epoch": 0.04,
|
| 31455 |
+
"learning_rate": 0.0004,
|
| 31456 |
+
"loss": 6.4332,
|
| 31457 |
+
"step": 5162
|
| 31458 |
+
},
|
| 31459 |
+
{
|
| 31460 |
+
"epoch": 0.04,
|
| 31461 |
+
"learning_rate": 0.0004,
|
| 31462 |
+
"loss": 7.8098,
|
| 31463 |
+
"step": 5163
|
| 31464 |
+
},
|
| 31465 |
+
{
|
| 31466 |
+
"epoch": 0.04,
|
| 31467 |
+
"learning_rate": 0.0004,
|
| 31468 |
+
"loss": 6.2833,
|
| 31469 |
+
"step": 5164
|
| 31470 |
+
},
|
| 31471 |
+
{
|
| 31472 |
+
"epoch": 0.04,
|
| 31473 |
+
"learning_rate": 0.0004,
|
| 31474 |
+
"loss": 7.7168,
|
| 31475 |
+
"step": 5165
|
| 31476 |
+
},
|
| 31477 |
+
{
|
| 31478 |
+
"epoch": 0.04,
|
| 31479 |
+
"learning_rate": 0.0004,
|
| 31480 |
+
"loss": 8.851,
|
| 31481 |
+
"step": 5166
|
| 31482 |
+
},
|
| 31483 |
+
{
|
| 31484 |
+
"epoch": 0.04,
|
| 31485 |
+
"learning_rate": 0.0004,
|
| 31486 |
+
"loss": 4.8072,
|
| 31487 |
+
"step": 5167
|
| 31488 |
+
},
|
| 31489 |
+
{
|
| 31490 |
+
"epoch": 0.04,
|
| 31491 |
+
"learning_rate": 0.0004,
|
| 31492 |
+
"loss": 6.9745,
|
| 31493 |
+
"step": 5168
|
| 31494 |
+
},
|
| 31495 |
+
{
|
| 31496 |
+
"epoch": 0.04,
|
| 31497 |
+
"learning_rate": 0.0004,
|
| 31498 |
+
"loss": 6.5735,
|
| 31499 |
+
"step": 5169
|
| 31500 |
+
},
|
| 31501 |
+
{
|
| 31502 |
+
"epoch": 0.04,
|
| 31503 |
+
"learning_rate": 0.0004,
|
| 31504 |
+
"loss": 7.771,
|
| 31505 |
+
"step": 5170
|
| 31506 |
+
},
|
| 31507 |
+
{
|
| 31508 |
+
"epoch": 0.04,
|
| 31509 |
+
"learning_rate": 0.0004,
|
| 31510 |
+
"loss": 6.9335,
|
| 31511 |
+
"step": 5171
|
| 31512 |
+
},
|
| 31513 |
+
{
|
| 31514 |
+
"epoch": 0.04,
|
| 31515 |
+
"learning_rate": 0.0004,
|
| 31516 |
+
"loss": 6.856,
|
| 31517 |
+
"step": 5172
|
| 31518 |
+
},
|
| 31519 |
+
{
|
| 31520 |
+
"epoch": 0.04,
|
| 31521 |
+
"learning_rate": 0.0004,
|
| 31522 |
+
"loss": 6.4643,
|
| 31523 |
+
"step": 5173
|
| 31524 |
+
},
|
| 31525 |
+
{
|
| 31526 |
+
"epoch": 0.04,
|
| 31527 |
+
"learning_rate": 0.0004,
|
| 31528 |
+
"loss": 7.2565,
|
| 31529 |
+
"step": 5174
|
| 31530 |
+
},
|
| 31531 |
+
{
|
| 31532 |
+
"epoch": 0.04,
|
| 31533 |
+
"learning_rate": 0.0004,
|
| 31534 |
+
"loss": 8.0499,
|
| 31535 |
+
"step": 5175
|
| 31536 |
+
},
|
| 31537 |
+
{
|
| 31538 |
+
"epoch": 0.04,
|
| 31539 |
+
"learning_rate": 0.0004,
|
| 31540 |
+
"loss": 4.8558,
|
| 31541 |
+
"step": 5176
|
| 31542 |
+
},
|
| 31543 |
+
{
|
| 31544 |
+
"epoch": 0.04,
|
| 31545 |
+
"learning_rate": 0.0004,
|
| 31546 |
+
"loss": 3.99,
|
| 31547 |
+
"step": 5177
|
| 31548 |
+
},
|
| 31549 |
+
{
|
| 31550 |
+
"epoch": 0.04,
|
| 31551 |
+
"learning_rate": 0.0004,
|
| 31552 |
+
"loss": 3.7183,
|
| 31553 |
+
"step": 5178
|
| 31554 |
+
},
|
| 31555 |
+
{
|
| 31556 |
+
"epoch": 0.04,
|
| 31557 |
+
"learning_rate": 0.0004,
|
| 31558 |
+
"loss": 5.2353,
|
| 31559 |
+
"step": 5179
|
| 31560 |
+
},
|
| 31561 |
+
{
|
| 31562 |
+
"epoch": 0.04,
|
| 31563 |
+
"learning_rate": 0.0004,
|
| 31564 |
+
"loss": 8.1037,
|
| 31565 |
+
"step": 5180
|
| 31566 |
+
},
|
| 31567 |
+
{
|
| 31568 |
+
"epoch": 0.04,
|
| 31569 |
+
"learning_rate": 0.0004,
|
| 31570 |
+
"loss": 4.4882,
|
| 31571 |
+
"step": 5181
|
| 31572 |
+
},
|
| 31573 |
+
{
|
| 31574 |
+
"epoch": 0.04,
|
| 31575 |
+
"learning_rate": 0.0004,
|
| 31576 |
+
"loss": 6.2169,
|
| 31577 |
+
"step": 5182
|
| 31578 |
+
},
|
| 31579 |
+
{
|
| 31580 |
+
"epoch": 0.04,
|
| 31581 |
+
"learning_rate": 0.0004,
|
| 31582 |
+
"loss": 7.01,
|
| 31583 |
+
"step": 5183
|
| 31584 |
+
},
|
| 31585 |
+
{
|
| 31586 |
+
"epoch": 0.04,
|
| 31587 |
+
"learning_rate": 0.0004,
|
| 31588 |
+
"loss": 8.2869,
|
| 31589 |
+
"step": 5184
|
| 31590 |
+
},
|
| 31591 |
+
{
|
| 31592 |
+
"epoch": 0.04,
|
| 31593 |
+
"learning_rate": 0.0004,
|
| 31594 |
+
"loss": 3.5804,
|
| 31595 |
+
"step": 5185
|
| 31596 |
+
},
|
| 31597 |
+
{
|
| 31598 |
+
"epoch": 0.04,
|
| 31599 |
+
"learning_rate": 0.0004,
|
| 31600 |
+
"loss": 5.3033,
|
| 31601 |
+
"step": 5186
|
| 31602 |
+
},
|
| 31603 |
+
{
|
| 31604 |
+
"epoch": 0.04,
|
| 31605 |
+
"learning_rate": 0.0004,
|
| 31606 |
+
"loss": 4.1612,
|
| 31607 |
+
"step": 5187
|
| 31608 |
+
},
|
| 31609 |
+
{
|
| 31610 |
+
"epoch": 0.04,
|
| 31611 |
+
"learning_rate": 0.0004,
|
| 31612 |
+
"loss": 6.9619,
|
| 31613 |
+
"step": 5188
|
| 31614 |
+
},
|
| 31615 |
+
{
|
| 31616 |
+
"epoch": 0.04,
|
| 31617 |
+
"learning_rate": 0.0004,
|
| 31618 |
+
"loss": 5.3567,
|
| 31619 |
+
"step": 5189
|
| 31620 |
+
},
|
| 31621 |
+
{
|
| 31622 |
+
"epoch": 0.04,
|
| 31623 |
+
"learning_rate": 0.0004,
|
| 31624 |
+
"loss": 3.2493,
|
| 31625 |
+
"step": 5190
|
| 31626 |
+
},
|
| 31627 |
+
{
|
| 31628 |
+
"epoch": 0.04,
|
| 31629 |
+
"learning_rate": 0.0004,
|
| 31630 |
+
"loss": 7.3546,
|
| 31631 |
+
"step": 5191
|
| 31632 |
+
},
|
| 31633 |
+
{
|
| 31634 |
+
"epoch": 0.04,
|
| 31635 |
+
"learning_rate": 0.0004,
|
| 31636 |
+
"loss": 6.3477,
|
| 31637 |
+
"step": 5192
|
| 31638 |
+
},
|
| 31639 |
+
{
|
| 31640 |
+
"epoch": 0.04,
|
| 31641 |
+
"learning_rate": 0.0004,
|
| 31642 |
+
"loss": 6.7189,
|
| 31643 |
+
"step": 5193
|
| 31644 |
+
},
|
| 31645 |
+
{
|
| 31646 |
+
"epoch": 0.04,
|
| 31647 |
+
"learning_rate": 0.0004,
|
| 31648 |
+
"loss": 3.956,
|
| 31649 |
+
"step": 5194
|
| 31650 |
+
},
|
| 31651 |
+
{
|
| 31652 |
+
"epoch": 0.04,
|
| 31653 |
+
"learning_rate": 0.0004,
|
| 31654 |
+
"loss": 5.3166,
|
| 31655 |
+
"step": 5195
|
| 31656 |
+
},
|
| 31657 |
+
{
|
| 31658 |
+
"epoch": 0.04,
|
| 31659 |
+
"learning_rate": 0.0004,
|
| 31660 |
+
"loss": 6.0115,
|
| 31661 |
+
"step": 5196
|
| 31662 |
+
},
|
| 31663 |
+
{
|
| 31664 |
+
"epoch": 0.04,
|
| 31665 |
+
"learning_rate": 0.0004,
|
| 31666 |
+
"loss": 3.3418,
|
| 31667 |
+
"step": 5197
|
| 31668 |
+
},
|
| 31669 |
+
{
|
| 31670 |
+
"epoch": 0.04,
|
| 31671 |
+
"learning_rate": 0.0004,
|
| 31672 |
+
"loss": 3.1107,
|
| 31673 |
+
"step": 5198
|
| 31674 |
+
},
|
| 31675 |
+
{
|
| 31676 |
+
"epoch": 0.04,
|
| 31677 |
+
"learning_rate": 0.0004,
|
| 31678 |
+
"loss": 6.1123,
|
| 31679 |
+
"step": 5199
|
| 31680 |
+
},
|
| 31681 |
+
{
|
| 31682 |
+
"epoch": 0.04,
|
| 31683 |
+
"learning_rate": 0.0004,
|
| 31684 |
+
"loss": 3.6152,
|
| 31685 |
+
"step": 5200
|
| 31686 |
+
},
|
| 31687 |
+
{
|
| 31688 |
+
"epoch": 0.04,
|
| 31689 |
+
"eval_loss": 6.378727912902832,
|
| 31690 |
+
"eval_runtime": 22.4659,
|
| 31691 |
+
"eval_samples_per_second": 2.226,
|
| 31692 |
+
"eval_steps_per_second": 1.113,
|
| 31693 |
+
"step": 5200
|
| 31694 |
+
},
|
| 31695 |
+
{
|
| 31696 |
+
"epoch": 0.04,
|
| 31697 |
+
"mmlu_eval_accuracy": 0.32602813852813856,
|
| 31698 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365,
|
| 31699 |
+
"mmlu_eval_accuracy_anatomy": 0.35714285714285715,
|
| 31700 |
+
"mmlu_eval_accuracy_astronomy": 0.25,
|
| 31701 |
+
"mmlu_eval_accuracy_business_ethics": 0.3333333333333333,
|
| 31702 |
+
"mmlu_loss": 3.6307257652282714,
|
| 31703 |
+
"step": 5200
|
| 31704 |
+
},
|
| 31705 |
+
{
|
| 31706 |
+
"epoch": 0.04,
|
| 31707 |
+
"learning_rate": 0.0004,
|
| 31708 |
+
"loss": 8.3892,
|
| 31709 |
+
"step": 5201
|
| 31710 |
+
},
|
| 31711 |
+
{
|
| 31712 |
+
"epoch": 0.04,
|
| 31713 |
+
"learning_rate": 0.0004,
|
| 31714 |
+
"loss": 8.2349,
|
| 31715 |
+
"step": 5202
|
| 31716 |
+
},
|
| 31717 |
+
{
|
| 31718 |
+
"epoch": 0.04,
|
| 31719 |
+
"learning_rate": 0.0004,
|
| 31720 |
+
"loss": 7.5333,
|
| 31721 |
+
"step": 5203
|
| 31722 |
+
},
|
| 31723 |
+
{
|
| 31724 |
+
"epoch": 0.04,
|
| 31725 |
+
"learning_rate": 0.0004,
|
| 31726 |
+
"loss": 6.145,
|
| 31727 |
+
"step": 5204
|
| 31728 |
+
},
|
| 31729 |
+
{
|
| 31730 |
+
"epoch": 0.04,
|
| 31731 |
+
"learning_rate": 0.0004,
|
| 31732 |
+
"loss": 6.4543,
|
| 31733 |
+
"step": 5205
|
| 31734 |
+
},
|
| 31735 |
+
{
|
| 31736 |
+
"epoch": 0.04,
|
| 31737 |
+
"learning_rate": 0.0004,
|
| 31738 |
+
"loss": 2.7512,
|
| 31739 |
+
"step": 5206
|
| 31740 |
+
},
|
| 31741 |
+
{
|
| 31742 |
+
"epoch": 0.04,
|
| 31743 |
+
"learning_rate": 0.0004,
|
| 31744 |
+
"loss": 8.44,
|
| 31745 |
+
"step": 5207
|
| 31746 |
+
},
|
| 31747 |
+
{
|
| 31748 |
+
"epoch": 0.04,
|
| 31749 |
+
"learning_rate": 0.0004,
|
| 31750 |
+
"loss": 3.7399,
|
| 31751 |
+
"step": 5208
|
| 31752 |
+
},
|
| 31753 |
+
{
|
| 31754 |
+
"epoch": 0.04,
|
| 31755 |
+
"learning_rate": 0.0004,
|
| 31756 |
+
"loss": 6.133,
|
| 31757 |
+
"step": 5209
|
| 31758 |
+
},
|
| 31759 |
+
{
|
| 31760 |
+
"epoch": 0.04,
|
| 31761 |
+
"learning_rate": 0.0004,
|
| 31762 |
+
"loss": 5.1594,
|
| 31763 |
+
"step": 5210
|
| 31764 |
+
},
|
| 31765 |
+
{
|
| 31766 |
+
"epoch": 0.04,
|
| 31767 |
+
"learning_rate": 0.0004,
|
| 31768 |
+
"loss": 2.6307,
|
| 31769 |
+
"step": 5211
|
| 31770 |
+
},
|
| 31771 |
+
{
|
| 31772 |
+
"epoch": 0.04,
|
| 31773 |
+
"learning_rate": 0.0004,
|
| 31774 |
+
"loss": 3.4796,
|
| 31775 |
+
"step": 5212
|
| 31776 |
+
},
|
| 31777 |
+
{
|
| 31778 |
+
"epoch": 0.04,
|
| 31779 |
+
"learning_rate": 0.0004,
|
| 31780 |
+
"loss": 4.2767,
|
| 31781 |
+
"step": 5213
|
| 31782 |
+
},
|
| 31783 |
+
{
|
| 31784 |
+
"epoch": 0.04,
|
| 31785 |
+
"learning_rate": 0.0004,
|
| 31786 |
+
"loss": 8.9843,
|
| 31787 |
+
"step": 5214
|
| 31788 |
+
},
|
| 31789 |
+
{
|
| 31790 |
+
"epoch": 0.04,
|
| 31791 |
+
"learning_rate": 0.0004,
|
| 31792 |
+
"loss": 7.0799,
|
| 31793 |
+
"step": 5215
|
| 31794 |
+
},
|
| 31795 |
+
{
|
| 31796 |
+
"epoch": 0.04,
|
| 31797 |
+
"learning_rate": 0.0004,
|
| 31798 |
+
"loss": 7.1612,
|
| 31799 |
+
"step": 5216
|
| 31800 |
+
},
|
| 31801 |
+
{
|
| 31802 |
+
"epoch": 0.04,
|
| 31803 |
+
"learning_rate": 0.0004,
|
| 31804 |
+
"loss": 3.2503,
|
| 31805 |
+
"step": 5217
|
| 31806 |
+
},
|
| 31807 |
+
{
|
| 31808 |
+
"epoch": 0.04,
|
| 31809 |
+
"learning_rate": 0.0004,
|
| 31810 |
+
"loss": 4.4808,
|
| 31811 |
+
"step": 5218
|
| 31812 |
+
},
|
| 31813 |
+
{
|
| 31814 |
+
"epoch": 0.04,
|
| 31815 |
+
"learning_rate": 0.0004,
|
| 31816 |
+
"loss": 3.0658,
|
| 31817 |
+
"step": 5219
|
| 31818 |
+
},
|
| 31819 |
+
{
|
| 31820 |
+
"epoch": 0.04,
|
| 31821 |
+
"learning_rate": 0.0004,
|
| 31822 |
+
"loss": 8.8345,
|
| 31823 |
+
"step": 5220
|
| 31824 |
+
},
|
| 31825 |
+
{
|
| 31826 |
+
"epoch": 0.04,
|
| 31827 |
+
"learning_rate": 0.0004,
|
| 31828 |
+
"loss": 2.2569,
|
| 31829 |
+
"step": 5221
|
| 31830 |
+
},
|
| 31831 |
+
{
|
| 31832 |
+
"epoch": 0.04,
|
| 31833 |
+
"learning_rate": 0.0004,
|
| 31834 |
+
"loss": 6.3815,
|
| 31835 |
+
"step": 5222
|
| 31836 |
+
},
|
| 31837 |
+
{
|
| 31838 |
+
"epoch": 0.04,
|
| 31839 |
+
"learning_rate": 0.0004,
|
| 31840 |
+
"loss": 3.381,
|
| 31841 |
+
"step": 5223
|
| 31842 |
+
},
|
| 31843 |
+
{
|
| 31844 |
+
"epoch": 0.04,
|
| 31845 |
+
"learning_rate": 0.0004,
|
| 31846 |
+
"loss": 7.2786,
|
| 31847 |
+
"step": 5224
|
| 31848 |
+
},
|
| 31849 |
+
{
|
| 31850 |
+
"epoch": 0.04,
|
| 31851 |
+
"learning_rate": 0.0004,
|
| 31852 |
+
"loss": 8.1709,
|
| 31853 |
+
"step": 5225
|
| 31854 |
+
},
|
| 31855 |
+
{
|
| 31856 |
+
"epoch": 0.04,
|
| 31857 |
+
"learning_rate": 0.0004,
|
| 31858 |
+
"loss": 2.5537,
|
| 31859 |
+
"step": 5226
|
| 31860 |
+
},
|
| 31861 |
+
{
|
| 31862 |
+
"epoch": 0.04,
|
| 31863 |
+
"learning_rate": 0.0004,
|
| 31864 |
+
"loss": 6.498,
|
| 31865 |
+
"step": 5227
|
| 31866 |
+
},
|
| 31867 |
+
{
|
| 31868 |
+
"epoch": 0.04,
|
| 31869 |
+
"learning_rate": 0.0004,
|
| 31870 |
+
"loss": 6.9914,
|
| 31871 |
+
"step": 5228
|
| 31872 |
+
},
|
| 31873 |
+
{
|
| 31874 |
+
"epoch": 0.04,
|
| 31875 |
+
"learning_rate": 0.0004,
|
| 31876 |
+
"loss": 3.6207,
|
| 31877 |
+
"step": 5229
|
| 31878 |
+
},
|
| 31879 |
+
{
|
| 31880 |
+
"epoch": 0.04,
|
| 31881 |
+
"learning_rate": 0.0004,
|
| 31882 |
+
"loss": 5.1154,
|
| 31883 |
+
"step": 5230
|
| 31884 |
+
},
|
| 31885 |
+
{
|
| 31886 |
+
"epoch": 0.04,
|
| 31887 |
+
"learning_rate": 0.0004,
|
| 31888 |
+
"loss": 7.7722,
|
| 31889 |
+
"step": 5231
|
| 31890 |
+
},
|
| 31891 |
+
{
|
| 31892 |
+
"epoch": 0.04,
|
| 31893 |
+
"learning_rate": 0.0004,
|
| 31894 |
+
"loss": 3.0188,
|
| 31895 |
+
"step": 5232
|
| 31896 |
+
},
|
| 31897 |
+
{
|
| 31898 |
+
"epoch": 0.04,
|
| 31899 |
+
"learning_rate": 0.0004,
|
| 31900 |
+
"loss": 2.133,
|
| 31901 |
+
"step": 5233
|
| 31902 |
+
},
|
| 31903 |
+
{
|
| 31904 |
+
"epoch": 0.04,
|
| 31905 |
+
"learning_rate": 0.0004,
|
| 31906 |
+
"loss": 3.7835,
|
| 31907 |
+
"step": 5234
|
| 31908 |
+
},
|
| 31909 |
+
{
|
| 31910 |
+
"epoch": 0.04,
|
| 31911 |
+
"learning_rate": 0.0004,
|
| 31912 |
+
"loss": 2.1327,
|
| 31913 |
+
"step": 5235
|
| 31914 |
+
},
|
| 31915 |
+
{
|
| 31916 |
+
"epoch": 0.04,
|
| 31917 |
+
"learning_rate": 0.0004,
|
| 31918 |
+
"loss": 6.9416,
|
| 31919 |
+
"step": 5236
|
| 31920 |
+
},
|
| 31921 |
+
{
|
| 31922 |
+
"epoch": 0.04,
|
| 31923 |
+
"learning_rate": 0.0004,
|
| 31924 |
+
"loss": 7.1057,
|
| 31925 |
+
"step": 5237
|
| 31926 |
+
},
|
| 31927 |
+
{
|
| 31928 |
+
"epoch": 0.04,
|
| 31929 |
+
"learning_rate": 0.0004,
|
| 31930 |
+
"loss": 3.5148,
|
| 31931 |
+
"step": 5238
|
| 31932 |
+
},
|
| 31933 |
+
{
|
| 31934 |
+
"epoch": 0.04,
|
| 31935 |
+
"learning_rate": 0.0004,
|
| 31936 |
+
"loss": 3.8436,
|
| 31937 |
+
"step": 5239
|
| 31938 |
+
},
|
| 31939 |
+
{
|
| 31940 |
+
"epoch": 0.04,
|
| 31941 |
+
"learning_rate": 0.0004,
|
| 31942 |
+
"loss": 4.763,
|
| 31943 |
+
"step": 5240
|
| 31944 |
+
},
|
| 31945 |
+
{
|
| 31946 |
+
"epoch": 0.04,
|
| 31947 |
+
"learning_rate": 0.0004,
|
| 31948 |
+
"loss": 4.7498,
|
| 31949 |
+
"step": 5241
|
| 31950 |
+
},
|
| 31951 |
+
{
|
| 31952 |
+
"epoch": 0.04,
|
| 31953 |
+
"learning_rate": 0.0004,
|
| 31954 |
+
"loss": 6.7862,
|
| 31955 |
+
"step": 5242
|
| 31956 |
+
},
|
| 31957 |
+
{
|
| 31958 |
+
"epoch": 0.04,
|
| 31959 |
+
"learning_rate": 0.0004,
|
| 31960 |
+
"loss": 6.9326,
|
| 31961 |
+
"step": 5243
|
| 31962 |
+
},
|
| 31963 |
+
{
|
| 31964 |
+
"epoch": 0.04,
|
| 31965 |
+
"learning_rate": 0.0004,
|
| 31966 |
+
"loss": 2.1277,
|
| 31967 |
+
"step": 5244
|
| 31968 |
+
},
|
| 31969 |
+
{
|
| 31970 |
+
"epoch": 0.04,
|
| 31971 |
+
"learning_rate": 0.0004,
|
| 31972 |
+
"loss": 6.5697,
|
| 31973 |
+
"step": 5245
|
| 31974 |
+
},
|
| 31975 |
+
{
|
| 31976 |
+
"epoch": 0.04,
|
| 31977 |
+
"learning_rate": 0.0004,
|
| 31978 |
+
"loss": 2.0789,
|
| 31979 |
+
"step": 5246
|
| 31980 |
+
},
|
| 31981 |
+
{
|
| 31982 |
+
"epoch": 0.04,
|
| 31983 |
+
"learning_rate": 0.0004,
|
| 31984 |
+
"loss": 2.6917,
|
| 31985 |
+
"step": 5247
|
| 31986 |
+
},
|
| 31987 |
+
{
|
| 31988 |
+
"epoch": 0.04,
|
| 31989 |
+
"learning_rate": 0.0004,
|
| 31990 |
+
"loss": 2.5857,
|
| 31991 |
+
"step": 5248
|
| 31992 |
+
},
|
| 31993 |
+
{
|
| 31994 |
+
"epoch": 0.04,
|
| 31995 |
+
"learning_rate": 0.0004,
|
| 31996 |
+
"loss": 2.5904,
|
| 31997 |
+
"step": 5249
|
| 31998 |
+
},
|
| 31999 |
+
{
|
| 32000 |
+
"epoch": 0.04,
|
| 32001 |
+
"learning_rate": 0.0004,
|
| 32002 |
+
"loss": 3.6245,
|
| 32003 |
+
"step": 5250
|
| 32004 |
+
},
|
| 32005 |
+
{
|
| 32006 |
+
"epoch": 0.04,
|
| 32007 |
+
"learning_rate": 0.0004,
|
| 32008 |
+
"loss": 6.5449,
|
| 32009 |
+
"step": 5251
|
| 32010 |
+
},
|
| 32011 |
+
{
|
| 32012 |
+
"epoch": 0.04,
|
| 32013 |
+
"learning_rate": 0.0004,
|
| 32014 |
+
"loss": 9.9082,
|
| 32015 |
+
"step": 5252
|
| 32016 |
+
},
|
| 32017 |
+
{
|
| 32018 |
+
"epoch": 0.04,
|
| 32019 |
+
"learning_rate": 0.0004,
|
| 32020 |
+
"loss": 7.9662,
|
| 32021 |
+
"step": 5253
|
| 32022 |
+
},
|
| 32023 |
+
{
|
| 32024 |
+
"epoch": 0.04,
|
| 32025 |
+
"learning_rate": 0.0004,
|
| 32026 |
+
"loss": 7.733,
|
| 32027 |
+
"step": 5254
|
| 32028 |
+
},
|
| 32029 |
+
{
|
| 32030 |
+
"epoch": 0.04,
|
| 32031 |
+
"learning_rate": 0.0004,
|
| 32032 |
+
"loss": 7.1067,
|
| 32033 |
+
"step": 5255
|
| 32034 |
+
},
|
| 32035 |
+
{
|
| 32036 |
+
"epoch": 0.04,
|
| 32037 |
+
"learning_rate": 0.0004,
|
| 32038 |
+
"loss": 5.9188,
|
| 32039 |
+
"step": 5256
|
| 32040 |
+
},
|
| 32041 |
+
{
|
| 32042 |
+
"epoch": 0.04,
|
| 32043 |
+
"learning_rate": 0.0004,
|
| 32044 |
+
"loss": 4.5555,
|
| 32045 |
+
"step": 5257
|
| 32046 |
+
},
|
| 32047 |
+
{
|
| 32048 |
+
"epoch": 0.04,
|
| 32049 |
+
"learning_rate": 0.0004,
|
| 32050 |
+
"loss": 6.7376,
|
| 32051 |
+
"step": 5258
|
| 32052 |
+
},
|
| 32053 |
+
{
|
| 32054 |
+
"epoch": 0.04,
|
| 32055 |
+
"learning_rate": 0.0004,
|
| 32056 |
+
"loss": 9.3653,
|
| 32057 |
+
"step": 5259
|
| 32058 |
+
},
|
| 32059 |
+
{
|
| 32060 |
+
"epoch": 0.04,
|
| 32061 |
+
"learning_rate": 0.0004,
|
| 32062 |
+
"loss": 5.7456,
|
| 32063 |
+
"step": 5260
|
| 32064 |
+
},
|
| 32065 |
+
{
|
| 32066 |
+
"epoch": 0.04,
|
| 32067 |
+
"learning_rate": 0.0004,
|
| 32068 |
+
"loss": 6.1382,
|
| 32069 |
+
"step": 5261
|
| 32070 |
+
},
|
| 32071 |
+
{
|
| 32072 |
+
"epoch": 0.04,
|
| 32073 |
+
"learning_rate": 0.0004,
|
| 32074 |
+
"loss": 2.472,
|
| 32075 |
+
"step": 5262
|
| 32076 |
+
},
|
| 32077 |
+
{
|
| 32078 |
+
"epoch": 0.04,
|
| 32079 |
+
"learning_rate": 0.0004,
|
| 32080 |
+
"loss": 7.0488,
|
| 32081 |
+
"step": 5263
|
| 32082 |
+
},
|
| 32083 |
+
{
|
| 32084 |
+
"epoch": 0.04,
|
| 32085 |
+
"learning_rate": 0.0004,
|
| 32086 |
+
"loss": 7.4769,
|
| 32087 |
+
"step": 5264
|
| 32088 |
+
},
|
| 32089 |
+
{
|
| 32090 |
+
"epoch": 0.04,
|
| 32091 |
+
"learning_rate": 0.0004,
|
| 32092 |
+
"loss": 4.5465,
|
| 32093 |
+
"step": 5265
|
| 32094 |
+
},
|
| 32095 |
+
{
|
| 32096 |
+
"epoch": 0.04,
|
| 32097 |
+
"learning_rate": 0.0004,
|
| 32098 |
+
"loss": 2.8687,
|
| 32099 |
+
"step": 5266
|
| 32100 |
+
},
|
| 32101 |
+
{
|
| 32102 |
+
"epoch": 0.04,
|
| 32103 |
+
"learning_rate": 0.0004,
|
| 32104 |
+
"loss": 6.535,
|
| 32105 |
+
"step": 5267
|
| 32106 |
+
},
|
| 32107 |
+
{
|
| 32108 |
+
"epoch": 0.04,
|
| 32109 |
+
"learning_rate": 0.0004,
|
| 32110 |
+
"loss": 3.935,
|
| 32111 |
+
"step": 5268
|
| 32112 |
+
},
|
| 32113 |
+
{
|
| 32114 |
+
"epoch": 0.04,
|
| 32115 |
+
"learning_rate": 0.0004,
|
| 32116 |
+
"loss": 6.2081,
|
| 32117 |
+
"step": 5269
|
| 32118 |
+
},
|
| 32119 |
+
{
|
| 32120 |
+
"epoch": 0.04,
|
| 32121 |
+
"learning_rate": 0.0004,
|
| 32122 |
+
"loss": 3.5528,
|
| 32123 |
+
"step": 5270
|
| 32124 |
+
},
|
| 32125 |
+
{
|
| 32126 |
+
"epoch": 0.04,
|
| 32127 |
+
"learning_rate": 0.0004,
|
| 32128 |
+
"loss": 5.2201,
|
| 32129 |
+
"step": 5271
|
| 32130 |
+
},
|
| 32131 |
+
{
|
| 32132 |
+
"epoch": 0.04,
|
| 32133 |
+
"learning_rate": 0.0004,
|
| 32134 |
+
"loss": 6.3348,
|
| 32135 |
+
"step": 5272
|
| 32136 |
+
},
|
| 32137 |
+
{
|
| 32138 |
+
"epoch": 0.04,
|
| 32139 |
+
"learning_rate": 0.0004,
|
| 32140 |
+
"loss": 6.8958,
|
| 32141 |
+
"step": 5273
|
| 32142 |
+
},
|
| 32143 |
+
{
|
| 32144 |
+
"epoch": 0.04,
|
| 32145 |
+
"learning_rate": 0.0004,
|
| 32146 |
+
"loss": 6.2687,
|
| 32147 |
+
"step": 5274
|
| 32148 |
+
},
|
| 32149 |
+
{
|
| 32150 |
+
"epoch": 0.04,
|
| 32151 |
+
"learning_rate": 0.0004,
|
| 32152 |
+
"loss": 4.2481,
|
| 32153 |
+
"step": 5275
|
| 32154 |
+
},
|
| 32155 |
+
{
|
| 32156 |
+
"epoch": 0.04,
|
| 32157 |
+
"learning_rate": 0.0004,
|
| 32158 |
+
"loss": 3.1491,
|
| 32159 |
+
"step": 5276
|
| 32160 |
+
},
|
| 32161 |
+
{
|
| 32162 |
+
"epoch": 0.04,
|
| 32163 |
+
"learning_rate": 0.0004,
|
| 32164 |
+
"loss": 2.9855,
|
| 32165 |
+
"step": 5277
|
| 32166 |
+
},
|
| 32167 |
+
{
|
| 32168 |
+
"epoch": 0.04,
|
| 32169 |
+
"learning_rate": 0.0004,
|
| 32170 |
+
"loss": 6.7815,
|
| 32171 |
+
"step": 5278
|
| 32172 |
+
},
|
| 32173 |
+
{
|
| 32174 |
+
"epoch": 0.04,
|
| 32175 |
+
"learning_rate": 0.0004,
|
| 32176 |
+
"loss": 8.0858,
|
| 32177 |
+
"step": 5279
|
| 32178 |
+
},
|
| 32179 |
+
{
|
| 32180 |
+
"epoch": 0.04,
|
| 32181 |
+
"learning_rate": 0.0004,
|
| 32182 |
+
"loss": 5.8508,
|
| 32183 |
+
"step": 5280
|
| 32184 |
+
},
|
| 32185 |
+
{
|
| 32186 |
+
"epoch": 0.04,
|
| 32187 |
+
"learning_rate": 0.0004,
|
| 32188 |
+
"loss": 6.1981,
|
| 32189 |
+
"step": 5281
|
| 32190 |
+
},
|
| 32191 |
+
{
|
| 32192 |
+
"epoch": 0.04,
|
| 32193 |
+
"learning_rate": 0.0004,
|
| 32194 |
+
"loss": 6.0052,
|
| 32195 |
+
"step": 5282
|
| 32196 |
+
},
|
| 32197 |
+
{
|
| 32198 |
+
"epoch": 0.04,
|
| 32199 |
+
"learning_rate": 0.0004,
|
| 32200 |
+
"loss": 6.6153,
|
| 32201 |
+
"step": 5283
|
| 32202 |
+
},
|
| 32203 |
+
{
|
| 32204 |
+
"epoch": 0.04,
|
| 32205 |
+
"learning_rate": 0.0004,
|
| 32206 |
+
"loss": 3.3998,
|
| 32207 |
+
"step": 5284
|
| 32208 |
+
},
|
| 32209 |
+
{
|
| 32210 |
+
"epoch": 0.04,
|
| 32211 |
+
"learning_rate": 0.0004,
|
| 32212 |
+
"loss": 7.9997,
|
| 32213 |
+
"step": 5285
|
| 32214 |
+
},
|
| 32215 |
+
{
|
| 32216 |
+
"epoch": 0.04,
|
| 32217 |
+
"learning_rate": 0.0004,
|
| 32218 |
+
"loss": 4.3768,
|
| 32219 |
+
"step": 5286
|
| 32220 |
+
},
|
| 32221 |
+
{
|
| 32222 |
+
"epoch": 0.04,
|
| 32223 |
+
"learning_rate": 0.0004,
|
| 32224 |
+
"loss": 7.3477,
|
| 32225 |
+
"step": 5287
|
| 32226 |
+
},
|
| 32227 |
+
{
|
| 32228 |
+
"epoch": 0.04,
|
| 32229 |
+
"learning_rate": 0.0004,
|
| 32230 |
+
"loss": 3.5558,
|
| 32231 |
+
"step": 5288
|
| 32232 |
+
},
|
| 32233 |
+
{
|
| 32234 |
+
"epoch": 0.04,
|
| 32235 |
+
"learning_rate": 0.0004,
|
| 32236 |
+
"loss": 5.4668,
|
| 32237 |
+
"step": 5289
|
| 32238 |
+
},
|
| 32239 |
+
{
|
| 32240 |
+
"epoch": 0.04,
|
| 32241 |
+
"learning_rate": 0.0004,
|
| 32242 |
+
"loss": 6.571,
|
| 32243 |
+
"step": 5290
|
| 32244 |
+
},
|
| 32245 |
+
{
|
| 32246 |
+
"epoch": 0.04,
|
| 32247 |
+
"learning_rate": 0.0004,
|
| 32248 |
+
"loss": 4.8902,
|
| 32249 |
+
"step": 5291
|
| 32250 |
+
},
|
| 32251 |
+
{
|
| 32252 |
+
"epoch": 0.04,
|
| 32253 |
+
"learning_rate": 0.0004,
|
| 32254 |
+
"loss": 8.3882,
|
| 32255 |
+
"step": 5292
|
| 32256 |
+
},
|
| 32257 |
+
{
|
| 32258 |
+
"epoch": 0.04,
|
| 32259 |
+
"learning_rate": 0.0004,
|
| 32260 |
+
"loss": 2.6453,
|
| 32261 |
+
"step": 5293
|
| 32262 |
+
},
|
| 32263 |
+
{
|
| 32264 |
+
"epoch": 0.04,
|
| 32265 |
+
"learning_rate": 0.0004,
|
| 32266 |
+
"loss": 3.0346,
|
| 32267 |
+
"step": 5294
|
| 32268 |
+
},
|
| 32269 |
+
{
|
| 32270 |
+
"epoch": 0.04,
|
| 32271 |
+
"learning_rate": 0.0004,
|
| 32272 |
+
"loss": 6.6004,
|
| 32273 |
+
"step": 5295
|
| 32274 |
+
},
|
| 32275 |
+
{
|
| 32276 |
+
"epoch": 0.04,
|
| 32277 |
+
"learning_rate": 0.0004,
|
| 32278 |
+
"loss": 3.2908,
|
| 32279 |
+
"step": 5296
|
| 32280 |
+
},
|
| 32281 |
+
{
|
| 32282 |
+
"epoch": 0.04,
|
| 32283 |
+
"learning_rate": 0.0004,
|
| 32284 |
+
"loss": 5.8072,
|
| 32285 |
+
"step": 5297
|
| 32286 |
+
},
|
| 32287 |
+
{
|
| 32288 |
+
"epoch": 0.04,
|
| 32289 |
+
"learning_rate": 0.0004,
|
| 32290 |
+
"loss": 5.5529,
|
| 32291 |
+
"step": 5298
|
| 32292 |
+
},
|
| 32293 |
+
{
|
| 32294 |
+
"epoch": 0.04,
|
| 32295 |
+
"learning_rate": 0.0004,
|
| 32296 |
+
"loss": 2.8983,
|
| 32297 |
+
"step": 5299
|
| 32298 |
+
},
|
| 32299 |
+
{
|
| 32300 |
+
"epoch": 0.04,
|
| 32301 |
+
"learning_rate": 0.0004,
|
| 32302 |
+
"loss": 5.3365,
|
| 32303 |
+
"step": 5300
|
| 32304 |
+
},
|
| 32305 |
+
{
|
| 32306 |
+
"epoch": 0.04,
|
| 32307 |
+
"learning_rate": 0.0004,
|
| 32308 |
+
"loss": 7.7748,
|
| 32309 |
+
"step": 5301
|
| 32310 |
+
},
|
| 32311 |
+
{
|
| 32312 |
+
"epoch": 0.04,
|
| 32313 |
+
"learning_rate": 0.0004,
|
| 32314 |
+
"loss": 8.1817,
|
| 32315 |
+
"step": 5302
|
| 32316 |
+
},
|
| 32317 |
+
{
|
| 32318 |
+
"epoch": 0.04,
|
| 32319 |
+
"learning_rate": 0.0004,
|
| 32320 |
+
"loss": 4.1362,
|
| 32321 |
+
"step": 5303
|
| 32322 |
+
},
|
| 32323 |
+
{
|
| 32324 |
+
"epoch": 0.04,
|
| 32325 |
+
"learning_rate": 0.0004,
|
| 32326 |
+
"loss": 7.4656,
|
| 32327 |
+
"step": 5304
|
| 32328 |
+
},
|
| 32329 |
+
{
|
| 32330 |
+
"epoch": 0.04,
|
| 32331 |
+
"learning_rate": 0.0004,
|
| 32332 |
+
"loss": 8.1376,
|
| 32333 |
+
"step": 5305
|
| 32334 |
+
},
|
| 32335 |
+
{
|
| 32336 |
+
"epoch": 0.04,
|
| 32337 |
+
"learning_rate": 0.0004,
|
| 32338 |
+
"loss": 8.9722,
|
| 32339 |
+
"step": 5306
|
| 32340 |
+
},
|
| 32341 |
+
{
|
| 32342 |
+
"epoch": 0.04,
|
| 32343 |
+
"learning_rate": 0.0004,
|
| 32344 |
+
"loss": 6.6619,
|
| 32345 |
+
"step": 5307
|
| 32346 |
+
},
|
| 32347 |
+
{
|
| 32348 |
+
"epoch": 0.04,
|
| 32349 |
+
"learning_rate": 0.0004,
|
| 32350 |
+
"loss": 7.3412,
|
| 32351 |
+
"step": 5308
|
| 32352 |
+
},
|
| 32353 |
+
{
|
| 32354 |
+
"epoch": 0.04,
|
| 32355 |
+
"learning_rate": 0.0004,
|
| 32356 |
+
"loss": 2.8955,
|
| 32357 |
+
"step": 5309
|
| 32358 |
+
},
|
| 32359 |
+
{
|
| 32360 |
+
"epoch": 0.04,
|
| 32361 |
+
"learning_rate": 0.0004,
|
| 32362 |
+
"loss": 8.0063,
|
| 32363 |
+
"step": 5310
|
| 32364 |
+
},
|
| 32365 |
+
{
|
| 32366 |
+
"epoch": 0.04,
|
| 32367 |
+
"learning_rate": 0.0004,
|
| 32368 |
+
"loss": 2.7085,
|
| 32369 |
+
"step": 5311
|
| 32370 |
+
},
|
| 32371 |
+
{
|
| 32372 |
+
"epoch": 0.04,
|
| 32373 |
+
"learning_rate": 0.0004,
|
| 32374 |
+
"loss": 7.3925,
|
| 32375 |
+
"step": 5312
|
| 32376 |
+
},
|
| 32377 |
+
{
|
| 32378 |
+
"epoch": 0.04,
|
| 32379 |
+
"learning_rate": 0.0004,
|
| 32380 |
+
"loss": 2.8739,
|
| 32381 |
+
"step": 5313
|
| 32382 |
+
},
|
| 32383 |
+
{
|
| 32384 |
+
"epoch": 0.04,
|
| 32385 |
+
"learning_rate": 0.0004,
|
| 32386 |
+
"loss": 5.2402,
|
| 32387 |
+
"step": 5314
|
| 32388 |
+
},
|
| 32389 |
+
{
|
| 32390 |
+
"epoch": 0.04,
|
| 32391 |
+
"learning_rate": 0.0004,
|
| 32392 |
+
"loss": 8.3129,
|
| 32393 |
+
"step": 5315
|
| 32394 |
+
},
|
| 32395 |
+
{
|
| 32396 |
+
"epoch": 0.04,
|
| 32397 |
+
"learning_rate": 0.0004,
|
| 32398 |
+
"loss": 7.4129,
|
| 32399 |
+
"step": 5316
|
| 32400 |
+
},
|
| 32401 |
+
{
|
| 32402 |
+
"epoch": 0.04,
|
| 32403 |
+
"learning_rate": 0.0004,
|
| 32404 |
+
"loss": 7.722,
|
| 32405 |
+
"step": 5317
|
| 32406 |
+
},
|
| 32407 |
+
{
|
| 32408 |
+
"epoch": 0.04,
|
| 32409 |
+
"learning_rate": 0.0004,
|
| 32410 |
+
"loss": 8.2092,
|
| 32411 |
+
"step": 5318
|
| 32412 |
+
},
|
| 32413 |
+
{
|
| 32414 |
+
"epoch": 0.04,
|
| 32415 |
+
"learning_rate": 0.0004,
|
| 32416 |
+
"loss": 5.2358,
|
| 32417 |
+
"step": 5319
|
| 32418 |
+
},
|
| 32419 |
+
{
|
| 32420 |
+
"epoch": 0.04,
|
| 32421 |
+
"learning_rate": 0.0004,
|
| 32422 |
+
"loss": 7.9978,
|
| 32423 |
+
"step": 5320
|
| 32424 |
+
},
|
| 32425 |
+
{
|
| 32426 |
+
"epoch": 0.04,
|
| 32427 |
+
"learning_rate": 0.0004,
|
| 32428 |
+
"loss": 7.9746,
|
| 32429 |
+
"step": 5321
|
| 32430 |
+
},
|
| 32431 |
+
{
|
| 32432 |
+
"epoch": 0.04,
|
| 32433 |
+
"learning_rate": 0.0004,
|
| 32434 |
+
"loss": 4.1538,
|
| 32435 |
+
"step": 5322
|
| 32436 |
+
},
|
| 32437 |
+
{
|
| 32438 |
+
"epoch": 0.04,
|
| 32439 |
+
"learning_rate": 0.0004,
|
| 32440 |
+
"loss": 6.4998,
|
| 32441 |
+
"step": 5323
|
| 32442 |
+
},
|
| 32443 |
+
{
|
| 32444 |
+
"epoch": 0.04,
|
| 32445 |
+
"learning_rate": 0.0004,
|
| 32446 |
+
"loss": 3.8847,
|
| 32447 |
+
"step": 5324
|
| 32448 |
+
},
|
| 32449 |
+
{
|
| 32450 |
+
"epoch": 0.04,
|
| 32451 |
+
"learning_rate": 0.0004,
|
| 32452 |
+
"loss": 6.3631,
|
| 32453 |
+
"step": 5325
|
| 32454 |
+
},
|
| 32455 |
+
{
|
| 32456 |
+
"epoch": 0.04,
|
| 32457 |
+
"learning_rate": 0.0004,
|
| 32458 |
+
"loss": 5.1982,
|
| 32459 |
+
"step": 5326
|
| 32460 |
+
},
|
| 32461 |
+
{
|
| 32462 |
+
"epoch": 0.04,
|
| 32463 |
+
"learning_rate": 0.0004,
|
| 32464 |
+
"loss": 3.6708,
|
| 32465 |
+
"step": 5327
|
| 32466 |
+
},
|
| 32467 |
+
{
|
| 32468 |
+
"epoch": 0.04,
|
| 32469 |
+
"learning_rate": 0.0004,
|
| 32470 |
+
"loss": 5.3822,
|
| 32471 |
+
"step": 5328
|
| 32472 |
+
},
|
| 32473 |
+
{
|
| 32474 |
+
"epoch": 0.04,
|
| 32475 |
+
"learning_rate": 0.0004,
|
| 32476 |
+
"loss": 9.2081,
|
| 32477 |
+
"step": 5329
|
| 32478 |
+
},
|
| 32479 |
+
{
|
| 32480 |
+
"epoch": 0.04,
|
| 32481 |
+
"learning_rate": 0.0004,
|
| 32482 |
+
"loss": 2.4944,
|
| 32483 |
+
"step": 5330
|
| 32484 |
+
},
|
| 32485 |
+
{
|
| 32486 |
+
"epoch": 0.04,
|
| 32487 |
+
"learning_rate": 0.0004,
|
| 32488 |
+
"loss": 4.5158,
|
| 32489 |
+
"step": 5331
|
| 32490 |
+
},
|
| 32491 |
+
{
|
| 32492 |
+
"epoch": 0.04,
|
| 32493 |
+
"learning_rate": 0.0004,
|
| 32494 |
+
"loss": 3.287,
|
| 32495 |
+
"step": 5332
|
| 32496 |
+
},
|
| 32497 |
+
{
|
| 32498 |
+
"epoch": 0.04,
|
| 32499 |
+
"learning_rate": 0.0004,
|
| 32500 |
+
"loss": 6.0359,
|
| 32501 |
+
"step": 5333
|
| 32502 |
+
},
|
| 32503 |
+
{
|
| 32504 |
+
"epoch": 0.04,
|
| 32505 |
+
"learning_rate": 0.0004,
|
| 32506 |
+
"loss": 5.2941,
|
| 32507 |
+
"step": 5334
|
| 32508 |
+
},
|
| 32509 |
+
{
|
| 32510 |
+
"epoch": 0.04,
|
| 32511 |
+
"learning_rate": 0.0004,
|
| 32512 |
+
"loss": 6.0545,
|
| 32513 |
+
"step": 5335
|
| 32514 |
+
},
|
| 32515 |
+
{
|
| 32516 |
+
"epoch": 0.04,
|
| 32517 |
+
"learning_rate": 0.0004,
|
| 32518 |
+
"loss": 5.9831,
|
| 32519 |
+
"step": 5336
|
| 32520 |
+
},
|
| 32521 |
+
{
|
| 32522 |
+
"epoch": 0.04,
|
| 32523 |
+
"learning_rate": 0.0004,
|
| 32524 |
+
"loss": 5.0593,
|
| 32525 |
+
"step": 5337
|
| 32526 |
+
},
|
| 32527 |
+
{
|
| 32528 |
+
"epoch": 0.04,
|
| 32529 |
+
"learning_rate": 0.0004,
|
| 32530 |
+
"loss": 2.3721,
|
| 32531 |
+
"step": 5338
|
| 32532 |
+
},
|
| 32533 |
+
{
|
| 32534 |
+
"epoch": 0.04,
|
| 32535 |
+
"learning_rate": 0.0004,
|
| 32536 |
+
"loss": 2.6548,
|
| 32537 |
+
"step": 5339
|
| 32538 |
+
},
|
| 32539 |
+
{
|
| 32540 |
+
"epoch": 0.04,
|
| 32541 |
+
"learning_rate": 0.0004,
|
| 32542 |
+
"loss": 7.4947,
|
| 32543 |
+
"step": 5340
|
| 32544 |
+
},
|
| 32545 |
+
{
|
| 32546 |
+
"epoch": 0.04,
|
| 32547 |
+
"learning_rate": 0.0004,
|
| 32548 |
+
"loss": 5.9871,
|
| 32549 |
+
"step": 5341
|
| 32550 |
+
},
|
| 32551 |
+
{
|
| 32552 |
+
"epoch": 0.04,
|
| 32553 |
+
"learning_rate": 0.0004,
|
| 32554 |
+
"loss": 5.8511,
|
| 32555 |
+
"step": 5342
|
| 32556 |
+
},
|
| 32557 |
+
{
|
| 32558 |
+
"epoch": 0.04,
|
| 32559 |
+
"learning_rate": 0.0004,
|
| 32560 |
+
"loss": 5.0414,
|
| 32561 |
+
"step": 5343
|
| 32562 |
+
},
|
| 32563 |
+
{
|
| 32564 |
+
"epoch": 0.04,
|
| 32565 |
+
"learning_rate": 0.0004,
|
| 32566 |
+
"loss": 4.5975,
|
| 32567 |
+
"step": 5344
|
| 32568 |
+
},
|
| 32569 |
+
{
|
| 32570 |
+
"epoch": 0.04,
|
| 32571 |
+
"learning_rate": 0.0004,
|
| 32572 |
+
"loss": 4.9401,
|
| 32573 |
+
"step": 5345
|
| 32574 |
+
},
|
| 32575 |
+
{
|
| 32576 |
+
"epoch": 0.04,
|
| 32577 |
+
"learning_rate": 0.0004,
|
| 32578 |
+
"loss": 6.1735,
|
| 32579 |
+
"step": 5346
|
| 32580 |
+
},
|
| 32581 |
+
{
|
| 32582 |
+
"epoch": 0.04,
|
| 32583 |
+
"learning_rate": 0.0004,
|
| 32584 |
+
"loss": 8.225,
|
| 32585 |
+
"step": 5347
|
| 32586 |
+
},
|
| 32587 |
+
{
|
| 32588 |
+
"epoch": 0.04,
|
| 32589 |
+
"learning_rate": 0.0004,
|
| 32590 |
+
"loss": 2.3719,
|
| 32591 |
+
"step": 5348
|
| 32592 |
+
},
|
| 32593 |
+
{
|
| 32594 |
+
"epoch": 0.04,
|
| 32595 |
+
"learning_rate": 0.0004,
|
| 32596 |
+
"loss": 7.0078,
|
| 32597 |
+
"step": 5349
|
| 32598 |
+
},
|
| 32599 |
+
{
|
| 32600 |
+
"epoch": 0.04,
|
| 32601 |
+
"learning_rate": 0.0004,
|
| 32602 |
+
"loss": 7.067,
|
| 32603 |
+
"step": 5350
|
| 32604 |
+
},
|
| 32605 |
+
{
|
| 32606 |
+
"epoch": 0.04,
|
| 32607 |
+
"learning_rate": 0.0004,
|
| 32608 |
+
"loss": 6.2358,
|
| 32609 |
+
"step": 5351
|
| 32610 |
+
},
|
| 32611 |
+
{
|
| 32612 |
+
"epoch": 0.04,
|
| 32613 |
+
"learning_rate": 0.0004,
|
| 32614 |
+
"loss": 7.7797,
|
| 32615 |
+
"step": 5352
|
| 32616 |
+
},
|
| 32617 |
+
{
|
| 32618 |
+
"epoch": 0.04,
|
| 32619 |
+
"learning_rate": 0.0004,
|
| 32620 |
+
"loss": 7.3039,
|
| 32621 |
+
"step": 5353
|
| 32622 |
+
},
|
| 32623 |
+
{
|
| 32624 |
+
"epoch": 0.04,
|
| 32625 |
+
"learning_rate": 0.0004,
|
| 32626 |
+
"loss": 7.7088,
|
| 32627 |
+
"step": 5354
|
| 32628 |
+
},
|
| 32629 |
+
{
|
| 32630 |
+
"epoch": 0.04,
|
| 32631 |
+
"learning_rate": 0.0004,
|
| 32632 |
+
"loss": 5.8908,
|
| 32633 |
+
"step": 5355
|
| 32634 |
+
},
|
| 32635 |
+
{
|
| 32636 |
+
"epoch": 0.04,
|
| 32637 |
+
"learning_rate": 0.0004,
|
| 32638 |
+
"loss": 6.053,
|
| 32639 |
+
"step": 5356
|
| 32640 |
+
},
|
| 32641 |
+
{
|
| 32642 |
+
"epoch": 0.04,
|
| 32643 |
+
"learning_rate": 0.0004,
|
| 32644 |
+
"loss": 8.1785,
|
| 32645 |
+
"step": 5357
|
| 32646 |
+
},
|
| 32647 |
+
{
|
| 32648 |
+
"epoch": 0.04,
|
| 32649 |
+
"learning_rate": 0.0004,
|
| 32650 |
+
"loss": 8.9685,
|
| 32651 |
+
"step": 5358
|
| 32652 |
+
},
|
| 32653 |
+
{
|
| 32654 |
+
"epoch": 0.04,
|
| 32655 |
+
"learning_rate": 0.0004,
|
| 32656 |
+
"loss": 3.5938,
|
| 32657 |
+
"step": 5359
|
| 32658 |
+
},
|
| 32659 |
+
{
|
| 32660 |
+
"epoch": 0.04,
|
| 32661 |
+
"learning_rate": 0.0004,
|
| 32662 |
+
"loss": 3.55,
|
| 32663 |
+
"step": 5360
|
| 32664 |
+
},
|
| 32665 |
+
{
|
| 32666 |
+
"epoch": 0.04,
|
| 32667 |
+
"learning_rate": 0.0004,
|
| 32668 |
+
"loss": 8.9066,
|
| 32669 |
+
"step": 5361
|
| 32670 |
+
},
|
| 32671 |
+
{
|
| 32672 |
+
"epoch": 0.04,
|
| 32673 |
+
"learning_rate": 0.0004,
|
| 32674 |
+
"loss": 7.1162,
|
| 32675 |
+
"step": 5362
|
| 32676 |
+
},
|
| 32677 |
+
{
|
| 32678 |
+
"epoch": 0.04,
|
| 32679 |
+
"learning_rate": 0.0004,
|
| 32680 |
+
"loss": 4.3855,
|
| 32681 |
+
"step": 5363
|
| 32682 |
+
},
|
| 32683 |
+
{
|
| 32684 |
+
"epoch": 0.04,
|
| 32685 |
+
"learning_rate": 0.0004,
|
| 32686 |
+
"loss": 7.3739,
|
| 32687 |
+
"step": 5364
|
| 32688 |
+
},
|
| 32689 |
+
{
|
| 32690 |
+
"epoch": 0.04,
|
| 32691 |
+
"learning_rate": 0.0004,
|
| 32692 |
+
"loss": 4.6735,
|
| 32693 |
+
"step": 5365
|
| 32694 |
+
},
|
| 32695 |
+
{
|
| 32696 |
+
"epoch": 0.04,
|
| 32697 |
+
"learning_rate": 0.0004,
|
| 32698 |
+
"loss": 7.4358,
|
| 32699 |
+
"step": 5366
|
| 32700 |
+
},
|
| 32701 |
+
{
|
| 32702 |
+
"epoch": 0.04,
|
| 32703 |
+
"learning_rate": 0.0004,
|
| 32704 |
+
"loss": 8.0145,
|
| 32705 |
+
"step": 5367
|
| 32706 |
+
},
|
| 32707 |
+
{
|
| 32708 |
+
"epoch": 0.04,
|
| 32709 |
+
"learning_rate": 0.0004,
|
| 32710 |
+
"loss": 6.3903,
|
| 32711 |
+
"step": 5368
|
| 32712 |
+
},
|
| 32713 |
+
{
|
| 32714 |
+
"epoch": 0.04,
|
| 32715 |
+
"learning_rate": 0.0004,
|
| 32716 |
+
"loss": 6.7513,
|
| 32717 |
+
"step": 5369
|
| 32718 |
+
},
|
| 32719 |
+
{
|
| 32720 |
+
"epoch": 0.04,
|
| 32721 |
+
"learning_rate": 0.0004,
|
| 32722 |
+
"loss": 6.3265,
|
| 32723 |
+
"step": 5370
|
| 32724 |
+
},
|
| 32725 |
+
{
|
| 32726 |
+
"epoch": 0.04,
|
| 32727 |
+
"learning_rate": 0.0004,
|
| 32728 |
+
"loss": 6.0655,
|
| 32729 |
+
"step": 5371
|
| 32730 |
+
},
|
| 32731 |
+
{
|
| 32732 |
+
"epoch": 0.04,
|
| 32733 |
+
"learning_rate": 0.0004,
|
| 32734 |
+
"loss": 5.046,
|
| 32735 |
+
"step": 5372
|
| 32736 |
+
},
|
| 32737 |
+
{
|
| 32738 |
+
"epoch": 0.04,
|
| 32739 |
+
"learning_rate": 0.0004,
|
| 32740 |
+
"loss": 5.9508,
|
| 32741 |
+
"step": 5373
|
| 32742 |
+
},
|
| 32743 |
+
{
|
| 32744 |
+
"epoch": 0.04,
|
| 32745 |
+
"learning_rate": 0.0004,
|
| 32746 |
+
"loss": 4.7946,
|
| 32747 |
+
"step": 5374
|
| 32748 |
+
},
|
| 32749 |
+
{
|
| 32750 |
+
"epoch": 0.04,
|
| 32751 |
+
"learning_rate": 0.0004,
|
| 32752 |
+
"loss": 6.4541,
|
| 32753 |
+
"step": 5375
|
| 32754 |
+
},
|
| 32755 |
+
{
|
| 32756 |
+
"epoch": 0.04,
|
| 32757 |
+
"learning_rate": 0.0004,
|
| 32758 |
+
"loss": 6.6098,
|
| 32759 |
+
"step": 5376
|
| 32760 |
+
},
|
| 32761 |
+
{
|
| 32762 |
+
"epoch": 0.04,
|
| 32763 |
+
"learning_rate": 0.0004,
|
| 32764 |
+
"loss": 4.124,
|
| 32765 |
+
"step": 5377
|
| 32766 |
+
},
|
| 32767 |
+
{
|
| 32768 |
+
"epoch": 0.04,
|
| 32769 |
+
"learning_rate": 0.0004,
|
| 32770 |
+
"loss": 6.539,
|
| 32771 |
+
"step": 5378
|
| 32772 |
+
},
|
| 32773 |
+
{
|
| 32774 |
+
"epoch": 0.04,
|
| 32775 |
+
"learning_rate": 0.0004,
|
| 32776 |
+
"loss": 7.8777,
|
| 32777 |
+
"step": 5379
|
| 32778 |
+
},
|
| 32779 |
+
{
|
| 32780 |
+
"epoch": 0.04,
|
| 32781 |
+
"learning_rate": 0.0004,
|
| 32782 |
+
"loss": 6.6315,
|
| 32783 |
+
"step": 5380
|
| 32784 |
+
},
|
| 32785 |
+
{
|
| 32786 |
+
"epoch": 0.04,
|
| 32787 |
+
"learning_rate": 0.0004,
|
| 32788 |
+
"loss": 7.1006,
|
| 32789 |
+
"step": 5381
|
| 32790 |
+
},
|
| 32791 |
+
{
|
| 32792 |
+
"epoch": 0.04,
|
| 32793 |
+
"learning_rate": 0.0004,
|
| 32794 |
+
"loss": 5.1972,
|
| 32795 |
+
"step": 5382
|
| 32796 |
+
},
|
| 32797 |
+
{
|
| 32798 |
+
"epoch": 0.04,
|
| 32799 |
+
"learning_rate": 0.0004,
|
| 32800 |
+
"loss": 8.1427,
|
| 32801 |
+
"step": 5383
|
| 32802 |
+
},
|
| 32803 |
+
{
|
| 32804 |
+
"epoch": 0.04,
|
| 32805 |
+
"learning_rate": 0.0004,
|
| 32806 |
+
"loss": 6.1585,
|
| 32807 |
+
"step": 5384
|
| 32808 |
+
},
|
| 32809 |
+
{
|
| 32810 |
+
"epoch": 0.04,
|
| 32811 |
+
"learning_rate": 0.0004,
|
| 32812 |
+
"loss": 2.6632,
|
| 32813 |
+
"step": 5385
|
| 32814 |
+
},
|
| 32815 |
+
{
|
| 32816 |
+
"epoch": 0.04,
|
| 32817 |
+
"learning_rate": 0.0004,
|
| 32818 |
+
"loss": 3.2398,
|
| 32819 |
+
"step": 5386
|
| 32820 |
+
},
|
| 32821 |
+
{
|
| 32822 |
+
"epoch": 0.04,
|
| 32823 |
+
"learning_rate": 0.0004,
|
| 32824 |
+
"loss": 2.3961,
|
| 32825 |
+
"step": 5387
|
| 32826 |
+
},
|
| 32827 |
+
{
|
| 32828 |
+
"epoch": 0.04,
|
| 32829 |
+
"learning_rate": 0.0004,
|
| 32830 |
+
"loss": 4.7233,
|
| 32831 |
+
"step": 5388
|
| 32832 |
+
},
|
| 32833 |
+
{
|
| 32834 |
+
"epoch": 0.04,
|
| 32835 |
+
"learning_rate": 0.0004,
|
| 32836 |
+
"loss": 7.0959,
|
| 32837 |
+
"step": 5389
|
| 32838 |
+
},
|
| 32839 |
+
{
|
| 32840 |
+
"epoch": 0.04,
|
| 32841 |
+
"learning_rate": 0.0004,
|
| 32842 |
+
"loss": 5.5001,
|
| 32843 |
+
"step": 5390
|
| 32844 |
+
},
|
| 32845 |
+
{
|
| 32846 |
+
"epoch": 0.04,
|
| 32847 |
+
"learning_rate": 0.0004,
|
| 32848 |
+
"loss": 3.0294,
|
| 32849 |
+
"step": 5391
|
| 32850 |
+
},
|
| 32851 |
+
{
|
| 32852 |
+
"epoch": 0.04,
|
| 32853 |
+
"learning_rate": 0.0004,
|
| 32854 |
+
"loss": 7.0155,
|
| 32855 |
+
"step": 5392
|
| 32856 |
+
},
|
| 32857 |
+
{
|
| 32858 |
+
"epoch": 0.04,
|
| 32859 |
+
"learning_rate": 0.0004,
|
| 32860 |
+
"loss": 3.2199,
|
| 32861 |
+
"step": 5393
|
| 32862 |
+
},
|
| 32863 |
+
{
|
| 32864 |
+
"epoch": 0.04,
|
| 32865 |
+
"learning_rate": 0.0004,
|
| 32866 |
+
"loss": 3.3572,
|
| 32867 |
+
"step": 5394
|
| 32868 |
+
},
|
| 32869 |
+
{
|
| 32870 |
+
"epoch": 0.04,
|
| 32871 |
+
"learning_rate": 0.0004,
|
| 32872 |
+
"loss": 7.7692,
|
| 32873 |
+
"step": 5395
|
| 32874 |
+
},
|
| 32875 |
+
{
|
| 32876 |
+
"epoch": 0.04,
|
| 32877 |
+
"learning_rate": 0.0004,
|
| 32878 |
+
"loss": 7.6206,
|
| 32879 |
+
"step": 5396
|
| 32880 |
+
},
|
| 32881 |
+
{
|
| 32882 |
+
"epoch": 0.04,
|
| 32883 |
+
"learning_rate": 0.0004,
|
| 32884 |
+
"loss": 4.5941,
|
| 32885 |
+
"step": 5397
|
| 32886 |
+
},
|
| 32887 |
+
{
|
| 32888 |
+
"epoch": 0.04,
|
| 32889 |
+
"learning_rate": 0.0004,
|
| 32890 |
+
"loss": 3.6666,
|
| 32891 |
+
"step": 5398
|
| 32892 |
+
},
|
| 32893 |
+
{
|
| 32894 |
+
"epoch": 0.04,
|
| 32895 |
+
"learning_rate": 0.0004,
|
| 32896 |
+
"loss": 2.2717,
|
| 32897 |
+
"step": 5399
|
| 32898 |
+
},
|
| 32899 |
+
{
|
| 32900 |
+
"epoch": 0.04,
|
| 32901 |
+
"learning_rate": 0.0004,
|
| 32902 |
+
"loss": 4.9048,
|
| 32903 |
+
"step": 5400
|
| 32904 |
+
},
|
| 32905 |
+
{
|
| 32906 |
+
"epoch": 0.04,
|
| 32907 |
+
"eval_loss": 6.526280403137207,
|
| 32908 |
+
"eval_runtime": 22.3472,
|
| 32909 |
+
"eval_samples_per_second": 2.237,
|
| 32910 |
+
"eval_steps_per_second": 1.119,
|
| 32911 |
+
"step": 5400
|
| 32912 |
+
},
|
| 32913 |
+
{
|
| 32914 |
+
"epoch": 0.04,
|
| 32915 |
+
"mmlu_eval_accuracy": 0.2525477994227994,
|
| 32916 |
+
"mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182,
|
| 32917 |
+
"mmlu_eval_accuracy_anatomy": 0.07142857142857142,
|
| 32918 |
+
"mmlu_eval_accuracy_astronomy": 0.3125,
|
| 32919 |
+
"mmlu_eval_accuracy_business_ethics": 0.4444444444444444,
|
| 32920 |
+
"mmlu_loss": 3.8057762241363524,
|
| 32921 |
+
"step": 5400
|
| 32922 |
+
},
|
| 32923 |
+
{
|
| 32924 |
+
"epoch": 0.04,
|
| 32925 |
+
"step": 5400,
|
| 32926 |
+
"total_flos": 8.918950910784307e+16,
|
| 32927 |
+
"train_loss": 0.6445872698006807,
|
| 32928 |
+
"train_runtime": 1748.3273,
|
| 32929 |
+
"train_samples_per_second": 17.159,
|
| 32930 |
+
"train_steps_per_second": 17.159
|
| 32931 |
}
|
| 32932 |
],
|
| 32933 |
"max_steps": 30000,
|
| 32934 |
"num_train_epochs": 1,
|
| 32935 |
+
"total_flos": 8.918950910784307e+16,
|
| 32936 |
"trial_name": null,
|
| 32937 |
"trial_params": null
|
| 32938 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6011
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe7d931ebfbcece1009124b9eae98d1a465edd703240c0655ee9bb17db395973
|
| 3 |
size 6011
|